# Reinforcement Learning

## Minihack

### Install NLE

In [None]:
# !apt update -qq && apt install -qq -y flex bison libbz2-dev libglib2.0-0 libsm6 libxext6 cmake 
%pip install -U --quiet git+https://github.com/facebookresearch/nle.git@main

### Install Minihack

In [None]:
%pip install -U --quiet git+https://github.com/facebookresearch/minihack.git@main

### Install RLlib

In [None]:
%pip install -U --quiet ray[rllib] ray[tune] ray[default]

### Installs

In [None]:
%pip install -U --quiet comet_ml hydra-core pipdeptree wandb opencv-python

### Versions

In [None]:
!python --version
!pipdeptree -r --packages pip,gym,nle,minihack,ray,wandb

### Imports

In [None]:
import random
import gym
import nle
import minihack
import ray

import numpy as np
import cv2
from collections import OrderedDict

In [None]:
ray.init(num_gpus=1)

### Custom

In [None]:
from gym.spaces import Box
from minihack.envs.skills_quest import MiniHackQuestHard
from ray.tune.registry import register_env


class dotdict(dict):
    """dot.notation access to dictionary attributes"""

    __getattr__ = dict.get
    __setattr__ = dict.__setitem__
    __delattr__ = dict.__delitem__


class CustomEnv(MiniHackQuestHard):
    def __init__(self, config):
        # Hack to resolve error "'CustomEnv' object has no attribute 'env'"
        self.env = dotdict({"_vardir": "/tmp/run"})

        config = dotdict(config)

        self._obs_keys = config.obs_keys.split(",")
        super().__init__(observation_keys=self._obs_keys)

        self.shape = dotdict(config.input_shape)
        self.observation_space["pixel"] = Box(
            0, 255, (self.shape.height, self.shape.width, 3), np.uint8
        )

    def _resize_frame(self, frame):
        return cv2.resize(
            frame,
            dsize=(self.shape.width, self.shape.height),
            interpolation=cv2.INTER_LINEAR,
        )

    def _process_obs(self, obs):
        return OrderedDict(
            {
                key: self._resize_frame(obs[key]) if key == "pixel" else obs[key]
                for key in self._obs_keys
            }
        )

    def reset(self):
        return self._process_obs(super().reset())

    def step(self, action):
        obs, reward, done, info = super().step(action)
        return self._process_obs(obs), reward, done, info


register_env("MiniHack-D3QN-v0", CustomEnv)


### Train

In [None]:
from hydra import initialize, compose
from omegaconf import OmegaConf

import ray
from ray import tune
from ray.air.callbacks.wandb import WandbLoggerCallback

with initialize(version_base=None, config_path="."):
    cfg = compose(config_name="config.yaml")

dqn_cfg = OmegaConf.to_object(cfg.get("minihack-d3qn", {}))

wandb_cfg = dqn_cfg.get("logger_config", {}).get("wandb", {})

callbacks = [
    WandbLoggerCallback(
        project=wandb_cfg["project"],
        group=wandb_cfg["group"],
        api_key_file=wandb_cfg["api_key_file"],
    )
]

analysis = tune.run(
    "DQN",
    callbacks=callbacks,
    config=dqn_cfg,
    stop={"training_iteration": 10},
    local_dir="./results",
    log_to_file=True,
)


In [None]:
ray.shutdown()