In [2]:
import gym
import ray

In [3]:
import numpy as np
import torch
import torch.optim as optim
import torch.nn as nn
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch.nn.functional as F

from ray import tune
from ray.tune.schedulers import ASHAScheduler

In [4]:
# Initialize Ray
ray.shutdown()
ray.init(ignore_reinit_error=True)

2020-09-26 17:18:18,361	INFO resource_spec.py:231 -- Starting Ray with 14.01 GiB memory available for workers and up to 7.02 GiB for objects. You can adjust these settings with ray.init(memory=<bytes>, object_store_memory=<bytes>).
2020-09-26 17:18:18,774	INFO services.py:1193 -- View the Ray dashboard at [1m[32mlocalhost:8265[39m[22m


{'node_ip_address': '192.168.7.73',
 'raylet_ip_address': '192.168.7.73',
 'redis_address': '192.168.7.73:6379',
 'object_store_address': '/tmp/ray/session_2020-09-26_17-18-18_360183_30339/sockets/plasma_store',
 'raylet_socket_name': '/tmp/ray/session_2020-09-26_17-18-18_360183_30339/sockets/raylet',
 'webui_url': 'localhost:8265',
 'session_dir': '/tmp/ray/session_2020-09-26_17-18-18_360183_30339'}

In [5]:
print("Dashboard URL: http://{}".format(ray.get_webui_url()))

Dashboard URL: http://localhost:8265


In [6]:
import chainerrl
from chainerrl.wrappers import ContinuingTimeLimit
from chainerrl.wrappers.atari_wrappers import FrameStack, ScaledFloatFrame

# Environment wrapper borrowed from minerl sample code: 
# https://github.com/minerllabs/baselines/tree/master/general/chainerrl
from env_wrappers import (
    SerialDiscreteActionWrapper, CombineActionWrapper, SerialDiscreteCombineActionWrapper,
    ContinuingTimeLimitMonitor,
    MoveAxisWrapper, FrameSkip, ObtainPoVWrapper, PoVWithCompassAngleWrapper, GrayScaleWrapper)


In [7]:
# Agruments for wrapper
class Args:
    def __init__(self):
        self.frame_skip = None
        self.gray_scale = False
        self.env = 'MineRLNavigateDense'
        self.frame_stack = None
        self.disable_action_prior = False # False=Discrete of True=CombineDiscrete
args = Args()

In [8]:
# This entire function is borrowed from MineRL demo files:
# https://github.com/minerllabs/baselines/blob/master/general/chainerrl/baselines/ppo.py#L124
def wrap_env(env, test):

        if isinstance(env, gym.wrappers.TimeLimit):
            # TODO re-enable this line by importing logger
#             logger.info('Detected `gym.wrappers.TimeLimit`! Unwrap it and re-wrap our own time limit.')
            env = env.env
            max_episode_steps = env.spec.max_episode_steps
            env = ContinuingTimeLimit(env, max_episode_steps=max_episode_steps)

        # wrap env: observation...
        # NOTE: wrapping order matters!

        if test and args.monitor:
            env = ContinuingTimeLimitMonitor(
                env, os.path.join(args.outdir, 'monitor'),
                mode='evaluation' if test else 'training', video_callable=lambda episode_id: True)
        if args.frame_skip is not None:
            env = FrameSkip(env, skip=args.frame_skip)
        if args.gray_scale:
            env = GrayScaleWrapper(env, dict_space_key='pov')
        if args.env.startswith('MineRLNavigate'):
            env = PoVWithCompassAngleWrapper(env)
        else:
            env = ObtainPoVWrapper(env)
        env = MoveAxisWrapper(env, source=-1, destination=0)  # convert hwc -> chw as Chainer requires.
        env = ScaledFloatFrame(env)
        if args.frame_stack is not None and args.frame_stack > 0:
            env = FrameStack(env, args.frame_stack, channel_order='chw')

        # wrap env: action...
        if not args.disable_action_prior:
            env = SerialDiscreteActionWrapper(
                env,
                always_keys=[], reverse_keys=[], exclude_keys=['camera'], exclude_noop=False)
        else:
            env = CombineActionWrapper(env)
            env = SerialDiscreteCombineActionWrapper(env)

        return env

### Register MineRL Gym Environment to RLlib

In [9]:
import minerl
from gym import envs



In [10]:
# Register MineRL Gym Environment to RLLIB
# https://docs.ray.io/en/latest/rllib-env.html
from ray.tune.registry import register_env

def minerl_env_creator(env_config):
    import minerl
    
    if 'minerl_env_name' in env_config:
        # TODO use logger
        print('MineRL Env Name found...')
        env_name = env_config['minerl_env_name']
    else:
        # TODO use logger
        print('No MineRL Env name specified, using MineRLNavigateDense-v0')
        env_name = 'MineRLNavigateDense-v0'
        
        
#     # Check minerl environments are imported
#     all_envs = envs.registry.all()
#     env_ids = [env_spec.id for env_spec in all_envs]
#     print(env_ids)

# Doesnt work, need wrapper to discretize the action space
#     minerl_env = gym.make(env_name) 
    
    core_env = gym.make(env_name) # A MineRLNavigate-v0 env
    minerl_env = wrap_env(core_env, test=False)
    
    return minerl_env  

register_env("minerl", minerl_env_creator)

In [11]:
# Registering a custom model, simple version --> fully connected network
# We need this because the default configurations for the model network is
# not compatible with the shape of the output from the MineRL environment
# The output is a 64 x 64 pixels with RGBA of game play P.O.V. --> shape = (4, 64, 64)

# Otherwise we will get:
# ValueError: No default configuration for obs shape [4, 64, 64], you must specify 
# `conv_filters` manually as a model option. Default configurations are only available
# for inputs of shape [42, 42, K] and [84, 84, K]. You may alternatively want to use 
# a custom model or preprocessor.

from ray.rllib.models import ModelCatalog
from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
from ray.rllib.models.torch.fcnet import FullyConnectedNetwork as TorchFC

Instructions for updating:
non-resource variables are not supported in the long term


For now use a fully connected network ?

In [12]:
# https://docs.ray.io/en/releases-0.8.5/rllib-examples.html
# The register custom env and model links to custom_env.py
# https://github.com/ray-project/ray/blob/master/rllib/examples/custom_env.py
class TorchCustomModel(TorchModelV2, nn.Module):
    """Example of a PyTorch custom model that just delegates to a fc-net."""

    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs,
                              model_config, name)
        nn.Module.__init__(self)

        self.torch_sub_model = TorchFC(obs_space, action_space, num_outputs,
                                       model_config, name)

    def forward(self, input_dict, state, seq_lens):
        input_dict["obs"] = input_dict["obs"].float()
        fc_out, _ = self.torch_sub_model(input_dict, state, seq_lens)
        return fc_out, []

    def value_function(self):
        return torch.reshape(self.torch_sub_model.value_function(), [-1])


In [13]:
ModelCatalog.register_custom_model("fc_pov", TorchCustomModel)

In [15]:
from ray import tune
from ray.rllib.agents.ppo import PPOTrainer
from ray.rllib.agents.dqn import DQNTrainer

tune.run(DQNTrainer,
         config={"env": "minerl",
                 "use_pytorch": True,
                 'monitor':True, 
                 "model": {
                    "custom_model": "fc_pov",
                    }
                 }
        #),
         ,stop={"training_iteration": 2, "timesteps_total": 1000})  
# Config notes:
# "log_level": "INFO" for verbose,
# "eager": True for eager execution,

Trial name,status,loc
DQN_minerl_0af08_00000,RUNNING,


[2m[36m(pid=30652)[0m Instructions for updating:
[2m[36m(pid=30652)[0m non-resource variables are not supported in the long term
[2m[36m(pid=30652)[0m 2020-09-26 17:19:03,172	INFO trainer.py:632 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


[2m[36m(pid=30652)[0m No MineRL Env name specified, using MineRLNavigateDense-v0


[2m[36m(pid=30652)[0m 2020-09-26 17:19:37,321	INFO trainable.py:251 -- Trainable.setup took 34.521 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


Result for DQN_minerl_0af08_00000:
  custom_metrics: {}
  date: 2020-09-26_17-20-05
  done: true
  episode_len_mean: .nan
  episode_reward_max: .nan
  episode_reward_mean: .nan
  episode_reward_min: .nan
  episodes_this_iter: 0
  episodes_total: 0
  experiment_id: 230bd0b49c5a4629afe1ef45bbdd6021
  experiment_tag: '0'
  hostname: blackbox
  info:
    last_target_update_ts: 1000
    learner:
      default_policy:
        allreduce_latency: 0.0
        cur_lr: 0.0005
        grad_gnorm: 0.518851101398468
        max_q: 0.12171119451522827
        mean_q: -0.004640852101147175
        mean_td_error: -0.051513463258743286
        min_q: -0.12466123700141907
    num_steps_sampled: 1000
    num_steps_trained: 32
    num_target_updates: 1
  iterations_since_restore: 1
  node_ip: 192.168.7.73
  num_healthy_workers: 0
  off_policy_estimator: {}
  perf:
    cpu_util_percent: 36.55609756097561
    ram_util_percent: 43.8829268292683
  pid: 30652
  policy_reward_max: {}
  policy_reward_mean: {}
  p

Trial name,status,loc,iter,total time (s),ts,reward
DQN_minerl_0af08_00000,TERMINATED,,1,28.0572,1000,


Trial name,status,loc,iter,total time (s),ts,reward
DQN_minerl_0af08_00000,TERMINATED,,1,28.0572,1000,


[2m[36m(pid=30652)[0m *** Aborted at 1601166008 (unix time) try "date -d @1601166008" if you are using GNU date ***
[2m[36m(pid=30652)[0m PC: @                0x0 (unknown)
[2m[36m(pid=30652)[0m *** SIGSEGV (@0x7ff2fcff99d0) received by PID 31010 (TID 0x7ff31f930740) from PID 18446744073659193808; stack trace: ***
[2m[36m(pid=30652)[0m     @     0x7ff31fca13c0 (unknown)


<ray.tune.analysis.experiment_analysis.ExperimentAnalysis at 0x7fd68f975f10>

[2m[36m(pid=30652)[0m     @     0x7ff31fc96aab __pthread_clockjoin_ex
[2m[36m(pid=30652)[0m     @     0x7ff31d86c2d3 std::thread::join()
[2m[36m(pid=30652)[0m     @     0x7ff31dd9d493 ray::gcs::GlobalStateAccessor::Disconnect()
[2m[36m(pid=30652)[0m     @     0x7ff31dc3cfbc __pyx_pw_3ray_7_raylet_19GlobalStateAccessor_5disconnect()
[2m[36m(pid=30652)[0m     @     0x5622711deb71 _PyMethodDef_RawFastCallKeywords
[2m[36m(pid=30652)[0m     @     0x5622711e5aef _PyMethodDescr_FastCallKeywords
[2m[36m(pid=30652)[0m     @     0x56227124a37c _PyEval_EvalFrameDefault
[2m[36m(pid=30652)[0m     @     0x5622711de20b _PyFunction_FastCallKeywords
[2m[36m(pid=30652)[0m     @     0x562271245e70 _PyEval_EvalFrameDefault
[2m[36m(pid=30652)[0m     @     0x56227118e2b9 _PyEval_EvalCodeWithName
[2m[36m(pid=30652)[0m     @     0x5622711de435 _PyFunction_FastCallKeywords
[2m[36m(pid=30652)[0m     @     0x562271245be6 _PyEval_EvalFrameDefault
[2m[36m(pid=30652)[0m     @ 