# Reinforcement Learning

## Minihack

### OS Update

In [None]:
!sudo apt -qq update
!sudo apt -q install -y \
    build-essential \
    autoconf \
    libtool \
    pkg-config \
    python3-dev \
    python3-pip \
    python3-numpy \
    git \
    flex \
    bison \
    libbz2-dev

!wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | sudo apt-key add -
!sudo apt-add-repository 'deb https://apt.kitware.com/ubuntu/ bionic main'
!sudo apt -qq update
!apt -qq --allow-unauthenticated install -y \
    cmake \
    kitware-archive-keyring

# feel free to use a more elegant solution to make /usr/bin/cmake the default one
!sudo rm $(which cmake)
!$(which cmake) --version

### Installs

In [None]:
!pip3 install -U --quiet nle
!pip3 install -U --quiet minihack
!pip3 install -U --quiet comet_ml

### Comet Config

In [None]:
from comet_ml import Experiment

experiment = Experiment(project_name="minihack")

### Imports

In [None]:
import random
import gym
import nle
import minihack
import numpy as np

from tqdm.auto import trange

from minihack_rl.dqn.agent import D3QNAgent

### Constants

In [None]:
params = {
    'seed': 1,
    'env': 'MiniHack-Quest-Hard-v0',
    'obs_keys': ('pixel', 'message'),
    'replay_size': 50000,
    'batch_size': 32,
    'double_dqn': True,
    'target_update_freq': 100,
    'steps': int(1e7),
    'learn_starts': int(1e4),
    'learn_freq': 1000,
    'learn_rate': 0.001,
    'gamma': 0.99
}

### Train

In [None]:
np.random.seed(params['seed'])
random.seed(params['seed'])

env = gym.make(params['env'], observation_keys=params['obs_keys'])


agent = D3QNAgent(
    env,
    N_replay=params['replay_size'],
    N_batch=params['batch_size'],
    N_=params['target_update_freq'],
    double_dqn=params['double_dqn'],
    gamma=params['gamma'],
    lr=params['learn_rate'],
    lf=params['learn_freq']
)

agent.state, _ = env.reset()
env.render()

# Prefill memory
for t in trange(params['learn_starts']):
    action = env.action_space.sample()
    _, _, terminal = agent.step(action)
    
    if terminal:
        agent.state, _ = env.reset()

# Train agent
agent.train(params['steps'], params['epsilon'])

### Comet Commit

In [None]:
experiment.end()