# Reinforcement Learning

## Minihack

### Install NLE

In [None]:
# !apt update -qq && apt install -qq -y flex bison libbz2-dev libglib2.0-0 libsm6 libxext6 cmake 
# %pip install -U --quiet git+https://github.com/facebookresearch/nle.git@main

### Install Minihack

In [None]:
# %pip install -U --quiet git+https://github.com/facebookresearch/minihack.git@main

### Install RLlib

In [None]:
%pip install -U --quiet ray[rllib] ray[tune] ray[default]

### Installs

In [None]:
%pip install -U --quiet comet_ml hydra-core pipdeptree wandb opencv-python

### Versions

In [1]:
!python --version
!pipdeptree -r --packages pip,gym,nle,minihack,ray,wandb

Python 3.10.6
gym==0.23.0
  - minihack==0.1.3+2f022b0 [requires: gym>=0.15,<=0.23]
  - nle==0.8.1+68b9362 [requires: gym>=0.15,<=0.23]
    - minihack==0.1.3+2f022b0 [requires: nle>=0.8.1]
pip==22.3.1
ray==2.0.1
wandb==0.13.5


### Imports

In [2]:
import random
import gym
import nle
import minihack
import ray

import numpy as np
import cv2
from collections import OrderedDict

In [3]:
ray.init(num_gpus=1)

2022-11-08 17:28:57,445	INFO worker.py:1509 -- Started a local Ray instance. View the dashboard at [1m[32m127.0.0.1:8265 [39m[22m


0,1
Python version:,3.10.6
Ray version:,2.0.1
Dashboard:,http://127.0.0.1:8265


### Custom

In [6]:
from gym.spaces import Box
from minihack.envs.skills_quest import MiniHackQuestHard
from ray.tune.registry import register_env


class dotdict(dict):
    """dot.notation access to dictionary attributes"""

    __getattr__ = dict.get
    __setattr__ = dict.__setitem__
    __delattr__ = dict.__delitem__


class CustomEnv(MiniHackQuestHard):
    def __init__(self, config):
        # Hack to resolve error "'CustomEnv' object has no attribute 'env'"
        self.env = dotdict({"_vardir": "/tmp/run"})

        config = dotdict(config)

        self._obs_keys = config.obs_keys.split(",")
        super().__init__(observation_keys=self._obs_keys)

        self.shape = dotdict(config.input_shape)
        self.observation_space["pixel"] = Box(
            0, 255, (self.shape.height, self.shape.width, 3), np.uint8
        )

        print(self.observation_space)

    def _resize_frame(self, frame):
        return cv2.resize(
            frame,
            dsize=(self.shape.width, self.shape.height),
            interpolation=cv2.INTER_LINEAR,
        )

    def _process_obs(self, obs):
        return OrderedDict(
            {
                key: self._resize_frame(obs[key]) if key == "pixel" else obs[key]
                for key in self._obs_keys
            }
        )

    def reset(self):
        return self._process_obs(super().reset())

    def step(self, action):
        obs, reward, done, info = super().step(action)
        return self._process_obs(obs), reward, done, info


register_env("MiniHack-D3QN-v0", CustomEnv)


### Train

In [5]:
from hydra import initialize, compose
from omegaconf import OmegaConf

import ray
from ray import tune
from ray.air.callbacks.wandb import WandbLoggerCallback

with initialize(version_base=None, config_path="."):
    cfg = compose(config_name="config.yaml")

dqn_cfg = OmegaConf.to_object(cfg.get("minihack-d3qn", {}))

wandb_cfg = dqn_cfg.get("logger_config", {}).get("wandb", {})

callbacks = [
    WandbLoggerCallback(
        project=wandb_cfg["project"],
        group=wandb_cfg["group"],
        api_key_file=wandb_cfg["api_key_file"],
    )
]

analysis = tune.run(
    "DQN",
    callbacks=callbacks,
    config=dqn_cfg,
    stop={"training_iteration": 10},
    local_dir="./results",
    log_to_file=True,
)


Trial name,status,loc,gamma,hiddens,lr,target_network_up...
DQN_MiniHack-D3QN-v0_ab7af_00011,RUNNING,,0.999,[512],0.0001,10000
DQN_MiniHack-D3QN-v0_ab7af_00012,PENDING,,0.99,[256],0.001,10000
DQN_MiniHack-D3QN-v0_ab7af_00013,PENDING,,0.999,[256],0.001,10000
DQN_MiniHack-D3QN-v0_ab7af_00014,PENDING,,0.99,[512],0.001,10000
DQN_MiniHack-D3QN-v0_ab7af_00015,PENDING,,0.999,[512],0.001,10000
DQN_MiniHack-D3QN-v0_ab7af_00016,PENDING,,0.99,[256],0.0001,50000
DQN_MiniHack-D3QN-v0_ab7af_00017,PENDING,,0.999,[256],0.0001,50000
DQN_MiniHack-D3QN-v0_ab7af_00018,PENDING,,0.99,[512],0.0001,50000
DQN_MiniHack-D3QN-v0_ab7af_00019,PENDING,,0.999,[512],0.0001,50000
DQN_MiniHack-D3QN-v0_ab7af_00020,PENDING,,0.99,[256],0.001,50000

Trial name,# failures,error file
DQN_MiniHack-D3QN-v0_ab7af_00000,1,"/home/develop/github/minihack-rl/notebooks/results/DQN/DQN_MiniHack-D3QN-v0_ab7af_00000_0_gamma=0.9900,hiddens=256,lr=0.0001,target_network_update_freq=5000_2022-11-08_17-47-37/error.txt"
DQN_MiniHack-D3QN-v0_ab7af_00001,1,"/home/develop/github/minihack-rl/notebooks/results/DQN/DQN_MiniHack-D3QN-v0_ab7af_00001_1_gamma=0.9990,hiddens=256,lr=0.0001,target_network_update_freq=5000_2022-11-08_17-47-58/error.txt"
DQN_MiniHack-D3QN-v0_ab7af_00002,1,"/home/develop/github/minihack-rl/notebooks/results/DQN/DQN_MiniHack-D3QN-v0_ab7af_00002_2_gamma=0.9900,hiddens=512,lr=0.0001,target_network_update_freq=5000_2022-11-08_17-48-15/error.txt"
DQN_MiniHack-D3QN-v0_ab7af_00003,1,"/home/develop/github/minihack-rl/notebooks/results/DQN/DQN_MiniHack-D3QN-v0_ab7af_00003_3_gamma=0.9990,hiddens=512,lr=0.0001,target_network_update_freq=5000_2022-11-08_17-48-31/error.txt"
DQN_MiniHack-D3QN-v0_ab7af_00004,1,"/home/develop/github/minihack-rl/notebooks/results/DQN/DQN_MiniHack-D3QN-v0_ab7af_00004_4_gamma=0.9900,hiddens=256,lr=0.0010,target_network_update_freq=5000_2022-11-08_17-48-49/error.txt"
DQN_MiniHack-D3QN-v0_ab7af_00005,1,"/home/develop/github/minihack-rl/notebooks/results/DQN/DQN_MiniHack-D3QN-v0_ab7af_00005_5_gamma=0.9990,hiddens=256,lr=0.0010,target_network_update_freq=5000_2022-11-08_17-49-09/error.txt"
DQN_MiniHack-D3QN-v0_ab7af_00006,1,"/home/develop/github/minihack-rl/notebooks/results/DQN/DQN_MiniHack-D3QN-v0_ab7af_00006_6_gamma=0.9900,hiddens=512,lr=0.0010,target_network_update_freq=5000_2022-11-08_17-49-26/error.txt"
DQN_MiniHack-D3QN-v0_ab7af_00007,1,"/home/develop/github/minihack-rl/notebooks/results/DQN/DQN_MiniHack-D3QN-v0_ab7af_00007_7_gamma=0.9990,hiddens=512,lr=0.0010,target_network_update_freq=5000_2022-11-08_17-49-43/error.txt"
DQN_MiniHack-D3QN-v0_ab7af_00008,1,"/home/develop/github/minihack-rl/notebooks/results/DQN/DQN_MiniHack-D3QN-v0_ab7af_00008_8_gamma=0.9900,hiddens=256,lr=0.0001,target_network_update_freq=10000_2022-11-08_17-50-03/error.txt"
DQN_MiniHack-D3QN-v0_ab7af_00009,1,"/home/develop/github/minihack-rl/notebooks/results/DQN/DQN_MiniHack-D3QN-v0_ab7af_00009_9_gamma=0.9990,hiddens=256,lr=0.0001,target_network_update_freq=10000_2022-11-08_17-50-24/error.txt"


Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mmamello-justice[0m ([33mcoms-4061a-team[0m). Use [1m`wandb login --relogin`[0m to force relogin


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016670803333302803, max=1.0…

wandb: ERROR Failed to sample metric: Not Supported
[2m[36m(DQN pid=2041)[0m 2022-11-08 17:47:45,746	INFO simple_q.py:293 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting `simple_optimizer=True` if this doesn't work for you.
[2m[36m(DQN pid=2041)[0m 2022-11-08 17:47:45,748	INFO algorithm.py:355 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


2022-11-08 17:47:51,386	ERROR trial_runner.py:987 -- Trial DQN_MiniHack-D3QN-v0_ab7af_00000: Error processing event.
ray.tune.error._TuneNoNextExecutorEventError: Traceback (most recent call last):
  File "/home/develop/anaconda3/envs/minihack_rl/lib/python3.10/site-packages/ray/tune/execution/ray_trial_executor.py", line 996, in get_next_executor_event
    future_result = ray.get(ready_future)
  File "/home/develop/anaconda3/envs/minihack_rl/lib/python3.10/site-packages/ray/_private/client_mode_hook.py", line 105, in wrapper
    return func(*args, **kwargs)
  File "/home/develop/anaconda3/envs/minihack_rl/lib/python3.10/site-packages/ray/_private/worker.py", line 2282, in get
    raise value
ray.exceptions.RayActorError: The actor died because of an error raised in its creation task, [36mray::DQN.__init__()[39m (pid=2041, ip=172.21.111.144, repr=DQN)
  File "/home/develop/anaconda3/envs/minihack_rl/lib/python3.10/site-packages/ray/rllib/algorithms/algorithm.py", line 312, in __init_

[2m[36m(DQN pid=2041)[0m 2022-11-08 17:47:51,360	ERROR worker.py:756 -- Exception raised in creation task: The actor died because of an error raised in its creation task, [36mray::DQN.__init__()[39m (pid=2041, ip=172.21.111.144, repr=DQN)
[2m[36m(DQN pid=2041)[0m   File "/home/develop/anaconda3/envs/minihack_rl/lib/python3.10/site-packages/ray/rllib/algorithms/algorithm.py", line 312, in __init__
[2m[36m(DQN pid=2041)[0m     super().__init__(config=config, logger_creator=logger_creator, **kwargs)
[2m[36m(DQN pid=2041)[0m   File "/home/develop/anaconda3/envs/minihack_rl/lib/python3.10/site-packages/ray/tune/trainable/trainable.py", line 159, in __init__
[2m[36m(DQN pid=2041)[0m     self.setup(copy.deepcopy(self.config))
[2m[36m(DQN pid=2041)[0m   File "/home/develop/anaconda3/envs/minihack_rl/lib/python3.10/site-packages/ray/rllib/algorithms/algorithm.py", line 422, in setup
[2m[36m(DQN pid=2041)[0m     self.workers = WorkerSet(
[2m[36m(DQN pid=2041)[0m   File 

VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.173712…

Result for DQN_MiniHack-D3QN-v0_ab7af_00000:
  trial_id: ab7af_00000
  


2022-11-08 17:47:57,903	ERROR ray_trial_executor.py:103 -- An exception occurred when trying to stop the Ray actor:Traceback (most recent call last):
  File "/home/develop/anaconda3/envs/minihack_rl/lib/python3.10/site-packages/ray/tune/execution/ray_trial_executor.py", line 94, in _post_stop_cleanup
    ray.get(future, timeout=0)
  File "/home/develop/anaconda3/envs/minihack_rl/lib/python3.10/site-packages/ray/_private/client_mode_hook.py", line 105, in wrapper
    return func(*args, **kwargs)
  File "/home/develop/anaconda3/envs/minihack_rl/lib/python3.10/site-packages/ray/_private/worker.py", line 2282, in get
    raise value
ray.exceptions.RayActorError: The actor died because of an error raised in its creation task, [36mray::DQN.__init__()[39m (pid=2041, ip=172.21.111.144, repr=DQN)
  File "/home/develop/anaconda3/envs/minihack_rl/lib/python3.10/site-packages/ray/rllib/algorithms/algorithm.py", line 312, in __init__
    super().__init__(config=config, logger_creator=logger_creat

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016671568333307126, max=1.0…

wandb: ERROR Failed to sample metric: Not Supported
[2m[36m(DQN pid=2227)[0m 2022-11-08 17:48:04,957	INFO simple_q.py:293 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting `simple_optimizer=True` if this doesn't work for you.
[2m[36m(DQN pid=2227)[0m 2022-11-08 17:48:04,960	INFO algorithm.py:355 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


2022-11-08 17:48:09,032	ERROR trial_runner.py:987 -- Trial DQN_MiniHack-D3QN-v0_ab7af_00001: Error processing event.
ray.tune.error._TuneNoNextExecutorEventError: Traceback (most recent call last):
  File "/home/develop/anaconda3/envs/minihack_rl/lib/python3.10/site-packages/ray/tune/execution/ray_trial_executor.py", line 996, in get_next_executor_event
    future_result = ray.get(ready_future)
  File "/home/develop/anaconda3/envs/minihack_rl/lib/python3.10/site-packages/ray/_private/client_mode_hook.py", line 105, in wrapper
    return func(*args, **kwargs)
  File "/home/develop/anaconda3/envs/minihack_rl/lib/python3.10/site-packages/ray/_private/worker.py", line 2282, in get
    raise value
ray.exceptions.RayActorError: The actor died because of an error raised in its creation task, [36mray::DQN.__init__()[39m (pid=2227, ip=172.21.111.144, repr=DQN)
  File "/home/develop/anaconda3/envs/minihack_rl/lib/python3.10/site-packages/ray/rllib/algorithms/algorithm.py", line 312, in __init_

[2m[36m(DQN pid=2227)[0m 2022-11-08 17:48:09,020	ERROR worker.py:756 -- Exception raised in creation task: The actor died because of an error raised in its creation task, [36mray::DQN.__init__()[39m (pid=2227, ip=172.21.111.144, repr=DQN)
[2m[36m(DQN pid=2227)[0m   File "/home/develop/anaconda3/envs/minihack_rl/lib/python3.10/site-packages/ray/rllib/algorithms/algorithm.py", line 312, in __init__
[2m[36m(DQN pid=2227)[0m     super().__init__(config=config, logger_creator=logger_creator, **kwargs)
[2m[36m(DQN pid=2227)[0m   File "/home/develop/anaconda3/envs/minihack_rl/lib/python3.10/site-packages/ray/tune/trainable/trainable.py", line 159, in __init__
[2m[36m(DQN pid=2227)[0m     self.setup(copy.deepcopy(self.config))
[2m[36m(DQN pid=2227)[0m   File "/home/develop/anaconda3/envs/minihack_rl/lib/python3.10/site-packages/ray/rllib/algorithms/algorithm.py", line 422, in setup
[2m[36m(DQN pid=2227)[0m     self.workers = WorkerSet(
[2m[36m(DQN pid=2227)[0m   File 

VBox(children=(Label(value='0.002 MB of 0.011 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.173697…

Result for DQN_MiniHack-D3QN-v0_ab7af_00001:
  trial_id: ab7af_00001
  


Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mmamello-justice[0m ([33mcoms-4061a-team[0m). Use [1m`wandb login --relogin`[0m to force relogin


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016669480000079298, max=1.0…

wandb: ERROR Failed to sample metric: Not Supported
[2m[36m(DQN pid=2412)[0m 2022-11-08 17:48:20,935	INFO simple_q.py:293 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting `simple_optimizer=True` if this doesn't work for you.
[2m[36m(DQN pid=2412)[0m 2022-11-08 17:48:20,936	INFO algorithm.py:355 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
2022-11-08 17:48:24,563	ERROR trial_runner.py:987 -- Trial DQN_MiniHack-D3QN-v0_ab7af_00002: Error processing event.
ray.tune.error._TuneNoNextExecutorEventError: Traceback (most recent call last):
  File "/home/develop/anaconda3/envs/minihack_rl/lib/python3.10/site-packages/ray/tune/execution/ray_trial_executor.py", line 996, in get_next_executor_event
    future_result = ray.get(ready_future)
  File "/home/develop/anaconda3/envs/minihack_rl/lib/python3.10/site-packages/ray/_private/client_mode_hook.py", line 105, in

VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

Result for DQN_MiniHack-D3QN-v0_ab7af_00002:
  trial_id: ab7af_00002
  


Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mmamello-justice[0m ([33mcoms-4061a-team[0m). Use [1m`wandb login --relogin`[0m to force relogin


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016669351666253836, max=1.0…

[2m[36m(DQN pid=2594)[0m 2022-11-08 17:48:36,898	INFO simple_q.py:293 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting `simple_optimizer=True` if this doesn't work for you.
[2m[36m(DQN pid=2594)[0m 2022-11-08 17:48:36,899	INFO algorithm.py:355 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
wandb: ERROR Failed to sample metric: Not Supported
2022-11-08 17:48:40,672	ERROR trial_runner.py:987 -- Trial DQN_MiniHack-D3QN-v0_ab7af_00003: Error processing event.
ray.tune.error._TuneNoNextExecutorEventError: Traceback (most recent call last):
  File "/home/develop/anaconda3/envs/minihack_rl/lib/python3.10/site-packages/ray/tune/execution/ray_trial_executor.py", line 996, in get_next_executor_event
    future_result = ray.get(ready_future)
  File "/home/develop/anaconda3/envs/minihack_rl/lib/python3.10/site-packages/ray/_private/client_mode_hook.py", line 105, in

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

Result for DQN_MiniHack-D3QN-v0_ab7af_00003:
  trial_id: ab7af_00003
  


Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mmamello-justice[0m ([33mcoms-4061a-team[0m). Use [1m`wandb login --relogin`[0m to force relogin


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016669869999653506, max=1.0…

wandb: ERROR Failed to sample metric: Not Supported
[2m[36m(DQN pid=2777)[0m 2022-11-08 17:48:55,456	INFO simple_q.py:293 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting `simple_optimizer=True` if this doesn't work for you.
[2m[36m(DQN pid=2777)[0m 2022-11-08 17:48:55,458	INFO algorithm.py:355 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
2022-11-08 17:48:59,233	ERROR trial_runner.py:987 -- Trial DQN_MiniHack-D3QN-v0_ab7af_00004: Error processing event.
ray.tune.error._TuneNoNextExecutorEventError: Traceback (most recent call last):
  File "/home/develop/anaconda3/envs/minihack_rl/lib/python3.10/site-packages/ray/tune/execution/ray_trial_executor.py", line 996, in get_next_executor_event
    future_result = ray.get(ready_future)
  File "/home/develop/anaconda3/envs/minihack_rl/lib/python3.10/site-packages/ray/_private/client_mode_hook.py", line 105, in

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

Result for DQN_MiniHack-D3QN-v0_ab7af_00004:
  trial_id: ab7af_00004
  


Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mmamello-justice[0m ([33mcoms-4061a-team[0m). Use [1m`wandb login --relogin`[0m to force relogin


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016669206666604926, max=1.0…

wandb: ERROR Failed to sample metric: Not Supported
[2m[36m(DQN pid=2972)[0m 2022-11-08 17:49:14,967	INFO simple_q.py:293 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting `simple_optimizer=True` if this doesn't work for you.
[2m[36m(DQN pid=2972)[0m 2022-11-08 17:49:14,968	INFO algorithm.py:355 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
2022-11-08 17:49:18,366	ERROR trial_runner.py:987 -- Trial DQN_MiniHack-D3QN-v0_ab7af_00005: Error processing event.
ray.tune.error._TuneNoNextExecutorEventError: Traceback (most recent call last):
  File "/home/develop/anaconda3/envs/minihack_rl/lib/python3.10/site-packages/ray/tune/execution/ray_trial_executor.py", line 996, in get_next_executor_event
    future_result = ray.get(ready_future)
  File "/home/develop/anaconda3/envs/minihack_rl/lib/python3.10/site-packages/ray/_private/client_mode_hook.py", line 105, in

[2m[36m(DQN pid=2972)[0m 2022-11-08 17:49:18,353	ERROR worker.py:756 -- Exception raised in creation task: The actor died because of an error raised in its creation task, [36mray::DQN.__init__()[39m (pid=2972, ip=172.21.111.144, repr=DQN)
[2m[36m(DQN pid=2972)[0m   File "/home/develop/anaconda3/envs/minihack_rl/lib/python3.10/site-packages/ray/rllib/algorithms/algorithm.py", line 312, in __init__
[2m[36m(DQN pid=2972)[0m     super().__init__(config=config, logger_creator=logger_creator, **kwargs)
[2m[36m(DQN pid=2972)[0m   File "/home/develop/anaconda3/envs/minihack_rl/lib/python3.10/site-packages/ray/tune/trainable/trainable.py", line 159, in __init__
[2m[36m(DQN pid=2972)[0m     self.setup(copy.deepcopy(self.config))
[2m[36m(DQN pid=2972)[0m   File "/home/develop/anaconda3/envs/minihack_rl/lib/python3.10/site-packages/ray/rllib/algorithms/algorithm.py", line 422, in setup
[2m[36m(DQN pid=2972)[0m     self.workers = WorkerSet(
[2m[36m(DQN pid=2972)[0m   File 

VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

Result for DQN_MiniHack-D3QN-v0_ab7af_00005:
  trial_id: ab7af_00005
  


Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mmamello-justice[0m ([33mcoms-4061a-team[0m). Use [1m`wandb login --relogin`[0m to force relogin


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01666979833341126, max=1.0)…

wandb: ERROR Failed to sample metric: Not Supported
[2m[36m(DQN pid=3155)[0m 2022-11-08 17:49:32,056	INFO simple_q.py:293 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting `simple_optimizer=True` if this doesn't work for you.
[2m[36m(DQN pid=3155)[0m 2022-11-08 17:49:32,058	INFO algorithm.py:355 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


2022-11-08 17:49:36,209	ERROR trial_runner.py:987 -- Trial DQN_MiniHack-D3QN-v0_ab7af_00006: Error processing event.
ray.tune.error._TuneNoNextExecutorEventError: Traceback (most recent call last):
  File "/home/develop/anaconda3/envs/minihack_rl/lib/python3.10/site-packages/ray/tune/execution/ray_trial_executor.py", line 996, in get_next_executor_event
    future_result = ray.get(ready_future)
  File "/home/develop/anaconda3/envs/minihack_rl/lib/python3.10/site-packages/ray/_private/client_mode_hook.py", line 105, in wrapper
    return func(*args, **kwargs)
  File "/home/develop/anaconda3/envs/minihack_rl/lib/python3.10/site-packages/ray/_private/worker.py", line 2282, in get
    raise value
ray.exceptions.RayActorError: The actor died because of an error raised in its creation task, [36mray::DQN.__init__()[39m (pid=3155, ip=172.21.111.144, repr=DQN)
  File "/home/develop/anaconda3/envs/minihack_rl/lib/python3.10/site-packages/ray/rllib/algorithms/algorithm.py", line 312, in __init_

[2m[36m(DQN pid=3155)[0m 2022-11-08 17:49:36,187	ERROR worker.py:756 -- Exception raised in creation task: The actor died because of an error raised in its creation task, [36mray::DQN.__init__()[39m (pid=3155, ip=172.21.111.144, repr=DQN)
[2m[36m(DQN pid=3155)[0m   File "/home/develop/anaconda3/envs/minihack_rl/lib/python3.10/site-packages/ray/rllib/algorithms/algorithm.py", line 312, in __init__
[2m[36m(DQN pid=3155)[0m     super().__init__(config=config, logger_creator=logger_creator, **kwargs)
[2m[36m(DQN pid=3155)[0m   File "/home/develop/anaconda3/envs/minihack_rl/lib/python3.10/site-packages/ray/tune/trainable/trainable.py", line 159, in __init__
[2m[36m(DQN pid=3155)[0m     self.setup(copy.deepcopy(self.config))
[2m[36m(DQN pid=3155)[0m   File "/home/develop/anaconda3/envs/minihack_rl/lib/python3.10/site-packages/ray/rllib/algorithms/algorithm.py", line 422, in setup
[2m[36m(DQN pid=3155)[0m     self.workers = WorkerSet(
[2m[36m(DQN pid=3155)[0m   File 

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

Result for DQN_MiniHack-D3QN-v0_ab7af_00006:
  trial_id: ab7af_00006
  


Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mmamello-justice[0m ([33mcoms-4061a-team[0m). Use [1m`wandb login --relogin`[0m to force relogin


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016671781666809694, max=1.0…

wandb: ERROR Failed to sample metric: Not Supported
[2m[36m(DQN pid=3334)[0m 2022-11-08 17:49:49,921	INFO simple_q.py:293 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting `simple_optimizer=True` if this doesn't work for you.
[2m[36m(DQN pid=3334)[0m 2022-11-08 17:49:49,923	INFO algorithm.py:355 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


2022-11-08 17:49:54,746	ERROR trial_runner.py:987 -- Trial DQN_MiniHack-D3QN-v0_ab7af_00007: Error processing event.
ray.tune.error._TuneNoNextExecutorEventError: Traceback (most recent call last):
  File "/home/develop/anaconda3/envs/minihack_rl/lib/python3.10/site-packages/ray/tune/execution/ray_trial_executor.py", line 996, in get_next_executor_event
    future_result = ray.get(ready_future)
  File "/home/develop/anaconda3/envs/minihack_rl/lib/python3.10/site-packages/ray/_private/client_mode_hook.py", line 105, in wrapper
    return func(*args, **kwargs)
  File "/home/develop/anaconda3/envs/minihack_rl/lib/python3.10/site-packages/ray/_private/worker.py", line 2282, in get
    raise value
ray.exceptions.RayActorError: The actor died because of an error raised in its creation task, [36mray::DQN.__init__()[39m (pid=3334, ip=172.21.111.144, repr=DQN)
  File "/home/develop/anaconda3/envs/minihack_rl/lib/python3.10/site-packages/ray/rllib/algorithms/algorithm.py", line 312, in __init_

[2m[36m(DQN pid=3334)[0m 2022-11-08 17:49:54,729	ERROR worker.py:756 -- Exception raised in creation task: The actor died because of an error raised in its creation task, [36mray::DQN.__init__()[39m (pid=3334, ip=172.21.111.144, repr=DQN)
[2m[36m(DQN pid=3334)[0m   File "/home/develop/anaconda3/envs/minihack_rl/lib/python3.10/site-packages/ray/rllib/algorithms/algorithm.py", line 312, in __init__
[2m[36m(DQN pid=3334)[0m     super().__init__(config=config, logger_creator=logger_creator, **kwargs)
[2m[36m(DQN pid=3334)[0m   File "/home/develop/anaconda3/envs/minihack_rl/lib/python3.10/site-packages/ray/tune/trainable/trainable.py", line 159, in __init__
[2m[36m(DQN pid=3334)[0m     self.setup(copy.deepcopy(self.config))
[2m[36m(DQN pid=3334)[0m   File "/home/develop/anaconda3/envs/minihack_rl/lib/python3.10/site-packages/ray/rllib/algorithms/algorithm.py", line 422, in setup
[2m[36m(DQN pid=3334)[0m     self.workers = WorkerSet(
[2m[36m(DQN pid=3334)[0m   File 

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

Result for DQN_MiniHack-D3QN-v0_ab7af_00007:
  trial_id: ab7af_00007
  


Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mmamello-justice[0m ([33mcoms-4061a-team[0m). Use [1m`wandb login --relogin`[0m to force relogin


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01666930833356067, max=1.0)…

[2m[36m(DQN pid=3524)[0m 2022-11-08 17:50:09,383	INFO simple_q.py:293 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting `simple_optimizer=True` if this doesn't work for you.
[2m[36m(DQN pid=3524)[0m 2022-11-08 17:50:09,385	INFO algorithm.py:355 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
wandb: ERROR Failed to sample metric: Not Supported
2022-11-08 17:50:14,476	ERROR trial_runner.py:987 -- Trial DQN_MiniHack-D3QN-v0_ab7af_00008: Error processing event.
ray.tune.error._TuneNoNextExecutorEventError: Traceback (most recent call last):
  File "/home/develop/anaconda3/envs/minihack_rl/lib/python3.10/site-packages/ray/tune/execution/ray_trial_executor.py", line 996, in get_next_executor_event
    future_result = ray.get(ready_future)
  File "/home/develop/anaconda3/envs/minihack_rl/lib/python3.10/site-packages/ray/_private/client_mode_hook.py", line 105, in

Result for DQN_MiniHack-D3QN-v0_ab7af_00008:
  trial_id: ab7af_00008
  


Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


[34m[1mwandb[0m: Currently logged in as: [33mmamello-justice[0m ([33mcoms-4061a-team[0m). Use [1m`wandb login --relogin`[0m to force relogin


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01667124666661645, max=1.0)…

[2m[36m(DQN pid=3691)[0m 2022-11-08 17:50:30,720	INFO simple_q.py:293 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting `simple_optimizer=True` if this doesn't work for you.
[2m[36m(DQN pid=3691)[0m 2022-11-08 17:50:30,722	INFO algorithm.py:355 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
2022-11-08 17:50:30,867	ERROR trial_runner.py:987 -- Trial DQN_MiniHack-D3QN-v0_ab7af_00009: Error processing event.
ray.tune.error._TuneNoNextExecutorEventError: Traceback (most recent call last):
  File "/home/develop/anaconda3/envs/minihack_rl/lib/python3.10/site-packages/ray/tune/execution/ray_trial_executor.py", line 996, in get_next_executor_event
    future_result = ray.get(ready_future)
  File "/home/develop/anaconda3/envs/minihack_rl/lib/python3.10/site-packages/ray/_private/client_mode_hook.py", line 105, in wrapper
    return func(*args, **kwargs)
  File "/h

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

Result for DQN_MiniHack-D3QN-v0_ab7af_00009:
  trial_id: ab7af_00009
  


Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mmamello-justice[0m ([33mcoms-4061a-team[0m). Use [1m`wandb login --relogin`[0m to force relogin


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01666995166694202, max=1.0)…

wandb: ERROR Failed to sample metric: Not Supported
[2m[36m(DQN pid=3870)[0m 2022-11-08 17:50:46,600	INFO simple_q.py:293 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting `simple_optimizer=True` if this doesn't work for you.
[2m[36m(DQN pid=3870)[0m 2022-11-08 17:50:46,602	INFO algorithm.py:355 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(DQN pid=3870)[0m 2022-11-08 17:50:50,979	ERROR worker.py:756 -- Exception raised in creation task: The actor died because of an error raised in its creation task, [36mray::DQN.__init__()[39m (pid=3870, ip=172.21.111.144, repr=DQN)
[2m[36m(DQN pid=3870)[0m   File "/home/develop/anaconda3/envs/minihack_rl/lib/python3.10/site-packages/ray/rllib/algorithms/algorithm.py", line 312, in __init__
[2m[36m(DQN pid=3870)[0m     super().__init__(config=config, logger_creator=logger_creator, **kwargs)
[2m[36m(

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

Result for DQN_MiniHack-D3QN-v0_ab7af_00010:
  trial_id: ab7af_00010
  


Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mmamello-justice[0m ([33mcoms-4061a-team[0m). Use [1m`wandb login --relogin`[0m to force relogin


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016669704999852306, max=1.0…

wandb: ERROR Failed to sample metric: Not Supported
[2m[36m(DQN pid=4073)[0m 2022-11-08 17:51:07,727	INFO simple_q.py:293 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting `simple_optimizer=True` if this doesn't work for you.
[2m[36m(DQN pid=4073)[0m 2022-11-08 17:51:07,729	INFO algorithm.py:355 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


[2m[36m(DQN pid=4073)[0m 2022-11-08 17:51:12,448	ERROR worker.py:756 -- Exception raised in creation task: The actor died because of an error raised in its creation task, [36mray::DQN.__init__()[39m (pid=4073, ip=172.21.111.144, repr=DQN)
[2m[36m(DQN pid=4073)[0m   File "/home/develop/anaconda3/envs/minihack_rl/lib/python3.10/site-packages/ray/rllib/algorithms/algorithm.py", line 312, in __init__
[2m[36m(DQN pid=4073)[0m     super().__init__(config=config, logger_creator=logger_creator, **kwargs)
[2m[36m(DQN pid=4073)[0m   File "/home/develop/anaconda3/envs/minihack_rl/lib/python3.10/site-packages/ray/tune/trainable/trainable.py", line 159, in __init__
[2m[36m(DQN pid=4073)[0m     self.setup(copy.deepcopy(self.config))
[2m[36m(DQN pid=4073)[0m   File "/home/develop/anaconda3/envs/minihack_rl/lib/python3.10/site-packages/ray/rllib/algorithms/algorithm.py", line 422, in setup
[2m[36m(DQN pid=4073)[0m     self.workers = WorkerSet(
[2m[36m(DQN pid=4073)[0m   File 

In [None]:
ray.shutdown()