In [None]:
import gym
import ray

In [None]:
import numpy as np
import torch
import torch.optim as optim
import torch.nn as nn
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch.nn.functional as F

from ray import tune
from ray.tune.schedulers import ASHAScheduler

In [None]:
# Initialize Ray
ray.shutdown()

In [None]:
from rayflow import Rayflow
r = Rayflow.load("test_input.yml", minio_volume_path="./minio_volume/", mlflow_volume_path="./mlflow_volume/",no_ray=True)

In [None]:
r.start()

In [None]:
ray.init(ignore_reinit_error=True,num_cpus=15, num_gpus=1)

In [None]:
# Register MineRL environments into RLLIB using code by Julius Frost
from minerl_rllib.envs import register
register()

In [None]:
# Registering a custom model, simple version --> fully connected network
# The input is a 64 x 64 pixels with RGBA of game play P.O.V. --> shape = (4, 64, 64)
# Plus other statuses such as inventory states etc. However, the MineRL competition environment
# has use an encoder decoder model to vectorized this information.

# Side note, we can also get around the following error:
# ValueError: No default configuration for obs shape [4, 64, 64], you must specify 
# `conv_filters` manually as a model option. Default configurations are only available
# for inputs of shape [42, 42, K] and [84, 84, K]. You may alternatively want to use 
# a custom model or preprocessor.

# Because the environment wrappers by Julius Frost reshapes the output to
# [64, 64, 4] which is what RLlib expects. (Not confirmed)
# This way we don't need to get around it by customizing the input layer of
# the model network.

from ray.rllib.models import ModelCatalog
from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
from ray.rllib.models.torch.fcnet import FullyConnectedNetwork as TorchFC
import torch.nn as nn
# https://docs.ray.io/en/releases-0.8.5/rllib-examples.html
# The register custom env and model links to custom_env.py
# https://github.com/ray-project/ray/blob/master/rllib/examples/custom_env.py
class TorchCustomModel(TorchModelV2, nn.Module):
    """Example of a PyTorch custom model that just delegates to a fc-net."""

    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs,
                              model_config, name)
        nn.Module.__init__(self)

        self.torch_sub_model = TorchFC(obs_space, action_space, num_outputs,
                                       model_config, name)

    def forward(self, input_dict, state, seq_lens):
        input_dict["obs"] = input_dict["obs"].float()
        fc_out, _ = self.torch_sub_model(input_dict, state, seq_lens)
        return fc_out, []

    def value_function(self):
        return torch.reshape(self.torch_sub_model.value_function(), [-1])

In [None]:
# Register the model with given name
ModelCatalog.register_custom_model("fc_pov", TorchCustomModel)

In [None]:
# Experiment Name used for logging in MLFlow
mlflow_exp_name = 'RF_MineRL_Test1'

# Make sure to allocate enough CPU etc when doing Ray init earlier
cpu_alloc_count = 8

custom_model_name = 'fc_pov'

In [None]:
# Configuration for ray and mlflow which will be parsed by Rayflow wrapper funcitions
train_name = "mlflow_train"
mlflow_logging_config = {
    "run_name":"rayflow_intial_test_run", 
    "run_tags":{},
    "experiment_name":mlflow_exp_name,
    "create_new_experiment": True
}

trainer_config = {
        "env": "MineRLNavigateDenseVectorObf-v0",
         "num_gpus": 1.0,
         "num_workers": 1,
        # Doesnt work for now... was a fix for mlflow loading.
#          "num_envs_per_worker": 1,
#          "num_gpus_per_worker":1,
         "num_cpus_per_worker":cpu_alloc_count,
         "eager": False,
         "use_pytorch": True,
         'monitor':True, 
         "model": {
             "custom_model": custom_model_name,
         }    
}
saver_config = {
        "checkpoint_on_end": True,
        "checkpoint_freq": 2
}
stop = {
        "training_iteration": 3,
}
tune_args = {
        "checkpoint_at_end": True,
        "checkpoint_freq": 2,

        # NOTE: This assumes one ray worker with multiple envs
        # Might end up as bottleneck for number of trials depending
        # on configuration. Refer to:
        # https://docs.ray.io/en/latest/tune/user-guide.html#parallelism-gpus
        "resources_per_trial":{'gpu': 1, 'cpu':cpu_alloc_count},
}

config = {
    "trainer_config":trainer_config,
    "logging_config": mlflow_logging_config
}

In [None]:
# Custom wrapper class around the Ray Tune library
from rayflow.tune import Tune
t = Tune()

In [None]:
import ray.rllib.agents.ppo as ppo
from ray.rllib.agents.ppo import PPOTrainer

In [None]:
# Train the model using specified configurations

run_id, output_config, experiment_analysis = t.run_training(
               trainer=PPOTrainer, 
               name=train_name, 
               stop=stop, 
               mlflow_logging_config=mlflow_logging_config, 
               trainer_config=trainer_config,
               saver_config=saver_config,
               tune_args=tune_args,
               )

In [None]:
# Example of loading the stored model from ML Flow to retrain/fine-tune/evaluate
mlflow_logging_config = {
    "run_name":"rayflow_intial_test_run", 
    "run_tags":{},
    "experiment_name":mlflow_exp_name,
    "create_new_experiment": False
}
config = {
    "trainer_config": trainer_config,
    "logging_config": mlflow_logging_config,
    "trainer": PPOTrainer,
}
mlflow_trainer = t.restore_trainer(PPOTrainer, trainer_config, mlflow_logging_config, run_id=run_id, checkpoint_path="checkpoint_2", artifact_dir="./mlflow_artifacts/")

In [None]:
# Stop the RayFlow docker containers
r.stop()