In [1]:
import gym
import ray

In [2]:
import numpy as np
import torch
import torch.optim as optim
import torch.nn as nn
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch.nn.functional as F

from ray import tune
from ray.tune.schedulers import ASHAScheduler

In [3]:
# Initialize Ray
ray.shutdown()
ray.init(ignore_reinit_error=True)

2020-09-06 13:00:00,509	INFO resource_spec.py:231 -- Starting Ray with 15.43 GiB memory available for workers and up to 7.72 GiB for objects. You can adjust these settings with ray.init(memory=<bytes>, object_store_memory=<bytes>).
2020-09-06 13:00:00,928	INFO services.py:1193 -- View the Ray dashboard at [1m[32mlocalhost:8266[39m[22m


{'node_ip_address': '192.168.7.73',
 'raylet_ip_address': '192.168.7.73',
 'redis_address': '192.168.7.73:6379',
 'object_store_address': '/tmp/ray/session_2020-09-06_13-00-00_508116_16843/sockets/plasma_store',
 'raylet_socket_name': '/tmp/ray/session_2020-09-06_13-00-00_508116_16843/sockets/raylet',
 'webui_url': 'localhost:8266',
 'session_dir': '/tmp/ray/session_2020-09-06_13-00-00_508116_16843'}

In [4]:
print("Dashboard URL: http://{}".format(ray.get_webui_url()))

Dashboard URL: http://localhost:8266


In [5]:
import chainerrl
from chainerrl.wrappers import ContinuingTimeLimit
from chainerrl.wrappers.atari_wrappers import FrameStack, ScaledFloatFrame

# Environment wrapper borrowed from minerl sample code: 
# https://github.com/minerllabs/baselines/tree/master/general/chainerrl
from env_wrappers import (
    SerialDiscreteActionWrapper, CombineActionWrapper, SerialDiscreteCombineActionWrapper,
    ContinuingTimeLimitMonitor,
    MoveAxisWrapper, FrameSkip, ObtainPoVWrapper, PoVWithCompassAngleWrapper, GrayScaleWrapper)


In [6]:
# Agruments for wrapper
class Args:
    def __init__(self):
        self.frame_skip = None
        self.gray_scale = False
        self.env = 'MineRLNavigateDense'
        self.frame_stack = None
        self.disable_action_prior = False # False=Discrete of True=CombineDiscrete
args = Args()

In [7]:
# This entire function is borrowed from MineRL demo files:
# https://github.com/minerllabs/baselines/blob/master/general/chainerrl/baselines/ppo.py#L124
def wrap_env(env, test):

        if isinstance(env, gym.wrappers.TimeLimit):
            # TODO re-enable this line by importing logger
#             logger.info('Detected `gym.wrappers.TimeLimit`! Unwrap it and re-wrap our own time limit.')
            env = env.env
            max_episode_steps = env.spec.max_episode_steps
            env = ContinuingTimeLimit(env, max_episode_steps=max_episode_steps)

        # wrap env: observation...
        # NOTE: wrapping order matters!

        if test and args.monitor:
            env = ContinuingTimeLimitMonitor(
                env, os.path.join(args.outdir, 'monitor'),
                mode='evaluation' if test else 'training', video_callable=lambda episode_id: True)
        if args.frame_skip is not None:
            env = FrameSkip(env, skip=args.frame_skip)
        if args.gray_scale:
            env = GrayScaleWrapper(env, dict_space_key='pov')
        if args.env.startswith('MineRLNavigate'):
            env = PoVWithCompassAngleWrapper(env)
        else:
            env = ObtainPoVWrapper(env)
        env = MoveAxisWrapper(env, source=-1, destination=0)  # convert hwc -> chw as Chainer requires.
        env = ScaledFloatFrame(env)
        if args.frame_stack is not None and args.frame_stack > 0:
            env = FrameStack(env, args.frame_stack, channel_order='chw')

        # wrap env: action...
        if not args.disable_action_prior:
            env = SerialDiscreteActionWrapper(
                env,
                always_keys=[], reverse_keys=[], exclude_keys=['camera'], exclude_noop=False)
        else:
            env = CombineActionWrapper(env)
            env = SerialDiscreteCombineActionWrapper(env)

        return env

### Register MineRL Gym Environment to RLlib

In [8]:
import minerl
from gym import envs



In [9]:
# Register MineRL Gym Environment to RLLIB
# https://docs.ray.io/en/latest/rllib-env.html
from ray.tune.registry import register_env

def minerl_env_creator(env_config):
    import minerl
    
    if 'minerl_env_name' in env_config:
        # TODO use logger
        print('MineRL Env Name found...')
        env_name = env_config['minerl_env_name']
    else:
        # TODO use logger
        print('No MineRL Env name specified, using MineRLNavigateDense-v0')
        env_name = 'MineRLNavigateDense-v0'
        
        
#     # Check minerl environments are imported
#     all_envs = envs.registry.all()
#     env_ids = [env_spec.id for env_spec in all_envs]
#     print(env_ids)

# Doesnt work, need wrapper to discretize the action space
#     minerl_env = gym.make(env_name) 
    
    core_env = gym.make(env_name) # A MineRLNavigate-v0 env
    minerl_env = wrap_env(core_env, test=False)
    
    return minerl_env  

register_env("minerl", minerl_env_creator)

In [10]:
# Registering a custom model, simple version --> fully connected network
# We need this because the default configurations for the model network is
# not compatible with the shape of the output from the MineRL environment
# The output is a 64 x 64 pixels with RGBA of game play P.O.V. --> shape = (4, 64, 64)

# Otherwise we will get:
# ValueError: No default configuration for obs shape [4, 64, 64], you must specify 
# `conv_filters` manually as a model option. Default configurations are only available
# for inputs of shape [42, 42, K] and [84, 84, K]. You may alternatively want to use 
# a custom model or preprocessor.

from ray.rllib.models import ModelCatalog
from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
from ray.rllib.models.torch.fcnet import FullyConnectedNetwork as TorchFC

Instructions for updating:
non-resource variables are not supported in the long term


For now use a fully connected network ?

In [11]:
# https://docs.ray.io/en/releases-0.8.5/rllib-examples.html
# The register custom env and model links to custom_env.py
# https://github.com/ray-project/ray/blob/master/rllib/examples/custom_env.py
class TorchCustomModel(TorchModelV2, nn.Module):
    """Example of a PyTorch custom model that just delegates to a fc-net."""

    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs,
                              model_config, name)
        nn.Module.__init__(self)

        self.torch_sub_model = TorchFC(obs_space, action_space, num_outputs,
                                       model_config, name)

    def forward(self, input_dict, state, seq_lens):
        input_dict["obs"] = input_dict["obs"].float()
        fc_out, _ = self.torch_sub_model(input_dict, state, seq_lens)
        return fc_out, []

    def value_function(self):
        return torch.reshape(self.torch_sub_model.value_function(), [-1])


In [12]:
ModelCatalog.register_custom_model("fc_pov", TorchCustomModel)

In [13]:
from ray import tune
from ray.rllib.agents.ppo import PPOTrainer
from ray.rllib.agents.dqn import DQNTrainer

tune.run(DQNTrainer,
         config={"env": "minerl",
                 "use_pytorch": True,
                 'monitor':True, 
                 "model": {
                    "custom_model": "fc_pov",
                    }
                 }
        )#,
         #stop={"training_iteration": 2, "timesteps_total": 1000})  
# Config notes:
# "log_level": "INFO" for verbose,
# "eager": True for eager execution,

Trial name,status,loc
DQN_minerl_8edd7_00000,RUNNING,


[2m[36m(pid=16967)[0m Instructions for updating:
[2m[36m(pid=16967)[0m non-resource variables are not supported in the long term
[2m[36m(pid=16967)[0m 2020-09-06 13:00:07,737	INFO trainer.py:632 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


[2m[36m(pid=16967)[0m No MineRL Env name specified, using MineRLNavigateDense-v0


[2m[36m(pid=16967)[0m 2020-09-06 13:00:42,193	INFO trainable.py:251 -- Trainable.setup took 34.820 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


Result for DQN_minerl_8edd7_00000:
  custom_metrics: {}
  date: 2020-09-06_13-01-05
  done: false
  episode_len_mean: .nan
  episode_reward_max: .nan
  episode_reward_mean: .nan
  episode_reward_min: .nan
  episodes_this_iter: 0
  episodes_total: 0
  experiment_id: 1968fe337450470482b03c543a767807
  experiment_tag: '0'
  hostname: blackbox
  info:
    last_target_update_ts: 1000
    learner:
      default_policy:
        allreduce_latency: 0.0
        cur_lr: 0.0005
        grad_gnorm: 0.19334669411182404
        max_q: 0.12279994040727615
        mean_q: 0.016800804063677788
        mean_td_error: -0.0336371511220932
        min_q: -0.04623394459486008
    num_steps_sampled: 1000
    num_steps_trained: 32
    num_target_updates: 1
  iterations_since_restore: 1
  node_ip: 192.168.7.73
  num_healthy_workers: 0
  off_policy_estimator: {}
  perf:
    cpu_util_percent: 39.358823529411765
    ram_util_percent: 35.77647058823529
  pid: 16967
  policy_reward_max: {}
  policy_reward_mean: {}
 

Trial name,status,loc,iter,total time (s),ts,reward
DQN_minerl_8edd7_00000,RUNNING,192.168.7.73:16967,1,23.6565,1000,


Result for DQN_minerl_8edd7_00000:
  custom_metrics: {}
  date: 2020-09-06_13-01-39
  done: false
  episode_len_mean: .nan
  episode_reward_max: .nan
  episode_reward_mean: .nan
  episode_reward_min: .nan
  episodes_this_iter: 0
  episodes_total: 0
  experiment_id: 1968fe337450470482b03c543a767807
  experiment_tag: '0'
  hostname: blackbox
  info:
    last_target_update_ts: 1504
    learner:
      default_policy:
        allreduce_latency: 0.0
        cur_lr: 0.0005
        grad_gnorm: 0.07108347117900848
        max_q: 1.20137619972229
        mean_q: 1.0611594915390015
        mean_td_error: 0.08147308975458145
        min_q: 1.0115444660186768
    num_steps_sampled: 2000
    num_steps_trained: 8032
    num_target_updates: 2
  iterations_since_restore: 2
  node_ip: 192.168.7.73
  num_healthy_workers: 0
  off_policy_estimator: {}
  perf:
    cpu_util_percent: 27.977083333333336
    ram_util_percent: 37.43541666666667
  pid: 16967
  policy_reward_max: {}
  policy_reward_mean: {}
  poli

Trial name,status,loc,iter,total time (s),ts,reward
DQN_minerl_8edd7_00000,RUNNING,192.168.7.73:16967,2,57.4533,2000,


Result for DQN_minerl_8edd7_00000:
  custom_metrics: {}
  date: 2020-09-06_13-02-15
  done: false
  episode_len_mean: .nan
  episode_reward_max: .nan
  episode_reward_mean: .nan
  episode_reward_min: .nan
  episodes_this_iter: 0
  episodes_total: 0
  experiment_id: 1968fe337450470482b03c543a767807
  experiment_tag: '0'
  hostname: blackbox
  info:
    last_target_update_ts: 2512
    learner:
      default_policy:
        allreduce_latency: 0.0
        cur_lr: 0.0005
        grad_gnorm: 0.037536777555942535
        max_q: 1.0626637935638428
        mean_q: 1.017970085144043
        mean_td_error: 0.049974989145994186
        min_q: 0.9332568645477295
    num_steps_sampled: 3000
    num_steps_trained: 16032
    num_target_updates: 4
  iterations_since_restore: 3
  node_ip: 192.168.7.73
  num_healthy_workers: 0
  off_policy_estimator: {}
  perf:
    cpu_util_percent: 27.486538461538462
    ram_util_percent: 37.93269230769231
  pid: 16967
  policy_reward_max: {}
  policy_reward_mean: {}
  

Trial name,status,loc,iter,total time (s),ts,reward
DQN_minerl_8edd7_00000,RUNNING,192.168.7.73:16967,3,93.4831,3000,


Result for DQN_minerl_8edd7_00000:
  custom_metrics: {}
  date: 2020-09-06_13-02-52
  done: false
  episode_len_mean: .nan
  episode_reward_max: .nan
  episode_reward_mean: .nan
  episode_reward_min: .nan
  episodes_this_iter: 0
  episodes_total: 0
  experiment_id: 1968fe337450470482b03c543a767807
  experiment_tag: '0'
  hostname: blackbox
  info:
    last_target_update_ts: 3520
    learner:
      default_policy:
        allreduce_latency: 0.0
        cur_lr: 0.0005
        grad_gnorm: 0.022014491260051727
        max_q: 0.9397761821746826
        mean_q: 0.8950495719909668
        mean_td_error: -0.043474163860082626
        min_q: 0.8329958319664001
    num_steps_sampled: 4000
    num_steps_trained: 24032
    num_target_updates: 6
  iterations_since_restore: 4
  node_ip: 192.168.7.73
  num_healthy_workers: 0
  off_policy_estimator: {}
  perf:
    cpu_util_percent: 27.51153846153846
    ram_util_percent: 38.19230769230769
  pid: 16967
  policy_reward_max: {}
  policy_reward_mean: {}
 

Trial name,status,loc,iter,total time (s),ts,reward
DQN_minerl_8edd7_00000,RUNNING,192.168.7.73:16967,4,129.808,4000,


Result for DQN_minerl_8edd7_00000:
  custom_metrics: {}
  date: 2020-09-06_13-03-28
  done: false
  episode_len_mean: .nan
  episode_reward_max: .nan
  episode_reward_mean: .nan
  episode_reward_min: .nan
  episodes_this_iter: 0
  episodes_total: 0
  experiment_id: 1968fe337450470482b03c543a767807
  experiment_tag: '0'
  hostname: blackbox
  info:
    last_target_update_ts: 4528
    learner:
      default_policy:
        allreduce_latency: 0.0
        cur_lr: 0.0005
        grad_gnorm: 0.01348408218473196
        max_q: 1.0141932964324951
        mean_q: 0.9728438258171082
        mean_td_error: -0.03279804065823555
        min_q: 0.809669554233551
    num_steps_sampled: 5000
    num_steps_trained: 32032
    num_target_updates: 8
  iterations_since_restore: 5
  node_ip: 192.168.7.73
  num_healthy_workers: 0
  off_policy_estimator: {}
  perf:
    cpu_util_percent: 27.174509803921566
    ram_util_percent: 38.57647058823529
  pid: 16967
  policy_reward_max: {}
  policy_reward_mean: {}
  p

Trial name,status,loc,iter,total time (s),ts,reward
DQN_minerl_8edd7_00000,RUNNING,192.168.7.73:16967,5,165.826,5000,


2020-09-06 13:04:04,510	ERROR trial_runner.py:523 -- Trial DQN_minerl_8edd7_00000: Error processing event.
Traceback (most recent call last):
  File "/home/blackbox/anaconda3/envs/minerl/lib/python3.7/site-packages/ray/tune/trial_runner.py", line 471, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/home/blackbox/anaconda3/envs/minerl/lib/python3.7/site-packages/ray/tune/ray_trial_executor.py", line 430, in fetch_result
    result = ray.get(trial_future[0], DEFAULT_GET_TIMEOUT)
  File "/home/blackbox/anaconda3/envs/minerl/lib/python3.7/site-packages/ray/worker.py", line 1538, in get
    raise value.as_instanceof_cause()
ray.exceptions.RayTaskError(Error): [36mray::DQN.train()[39m (pid=16967, ip=192.168.7.73)
  File "python/ray/_raylet.pyx", line 479, in ray._raylet.execute_task
  File "python/ray/_raylet.pyx", line 432, in ray._raylet.execute_task.function_executor
  File "/home/blackbox/anaconda3/envs/minerl/lib/python3.7/site-packages/ray/rllib/agents

Trial name,status,loc,iter,total time (s),ts,reward
DQN_minerl_8edd7_00000,ERROR,,5,165.826,5000,

Trial name,# failures,error file
DQN_minerl_8edd7_00000,1,/home/blackbox/ray_results/DQN/DQN_minerl_0_2020-09-06_13-00-0581weh38t/error.txt


Trial name,status,loc,iter,total time (s),ts,reward
DQN_minerl_8edd7_00000,ERROR,,5,165.826,5000,

Trial name,# failures,error file
DQN_minerl_8edd7_00000,1,/home/blackbox/ray_results/DQN/DQN_minerl_0_2020-09-06_13-00-0581weh38t/error.txt


[2m[36m(pid=16967)[0m *** Aborted at 1599422647 (unix time) try "date -d @1599422647" if you are using GNU date ***
[2m[36m(pid=16967)[0m PC: @                0x0 (unknown)
[2m[36m(pid=16967)[0m *** SIGSEGV (@0x7f45e8ff99d0) received by PID 17270 (TID 0x7f460ba94740) from PID 18446744073323649488; stack trace: ***
[2m[36m(pid=16967)[0m     @     0x7f460be053c0 (unknown)
[2m[36m(pid=16967)[0m     @     0x7f460bdfaaab __pthread_clockjoin_ex
[2m[36m(pid=16967)[0m     @     0x7f46099d02d3 std::thread::join()
[2m[36m(pid=16967)[0m     @     0x7f4609f01493 ray::gcs::GlobalStateAccessor::Disconnect()
[2m[36m(pid=16967)[0m     @     0x7f4609da0fbc __pyx_pw_3ray_7_raylet_19GlobalStateAccessor_5disconnect()
[2m[36m(pid=16967)[0m     @     0x5588dccaeb71 _PyMethodDef_RawFastCallKeywords
[2m[36m(pid=16967)[0m     @     0x5588dccb5aef _PyMethodDescr_FastCallKeywords
[2m[36m(pid=16967)[0m     @     0x5588dcd1a37c _PyEval_EvalFrameDefault
[2m[36m(pid=16967)[0m     

TuneError: ('Trials did not complete', [DQN_minerl_8edd7_00000])