# Parameters

In [1]:
#Variables to change based upon specifics of test
TEST_EPISODES = 1 # Number of tests to run for each model
TEST_STEPS = 10000 # Total timesteps to run for each model
USING_CUSTOM_ENV = True #Are we using a custom enviroment
DIRECTORY_PATH = "/content/drive/MyDrive/packages/minerl_saved_models" #Directory we have the models saved in
SAVE_LOCATION = "/content/drive/MyDrive/packages/minerl_test_outputs" #Directory we are saving videos to
FORCE_STOP = False #Force stops after one test (for code testing purposes)

#Installations

In [2]:
import sys
from google.colab import drive
# Allow colab to access google drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
LOCAL_MINERL = True

if LOCAL_MINERL:
  !chmod 555 -R "/content/drive/MyDrive/packages/minerl"
  sys.path.append("/content/drive/MyDrive/packages/minerl")
  !chmod 555 -R "/content/drive/MyDrive/packages/MixinGradle-dcfaf61"
  sys.path.append("/content/drive/MyDrive/packages/MixinGradle-dcfaf61")


In [4]:
%%capture
!sudo add-apt-repository -y ppa:openjdk-r/ppa
!sudo apt-get purge openjdk-*
!sudo apt-get install openjdk-8-jdk
!sudo apt-get install xvfb
!sudo apt-get install xserver-xephyr
!sudo apt install tigervnc-standalone-server
!sudo apt-get install -y python3-opengl
!sudo apt-get install ffmpeg
!pip3 install gym==0.13.1
if LOCAL_MINERL:
  !pip3 install -e /content/drive/MyDrive/packages/minerl
else:
  !pip3 install minerl==0.4.4 --verbose
!pip3 install pyvirtualdisplay
!pip3 install -U colabgymrender
!sudo apt-get install xvfb
!pip3 install opencv-python
!pip3 install imageio==2.4.1

# Custom Environment Setup

In [5]:
# Copyright (c) 2020 All Rights Reserved
# Author: William H. Guss, Brandon Houghton
from minerl.herobraine.env_specs.simple_embodiment import SimpleEmbodimentEnvSpec
from minerl.herobraine.hero.handler import Handler
from typing import List

import minerl.herobraine.hero.handlers as handlers
from minerl.herobraine.hero.mc import ALL_ITEMS


"""
The intent of this env_spec is to create a survival environment for our agent to be evaluated in.
This environment allows us to tailor the observation and action spaces to our agent's and UI's needs.
"""

NONE = 'none'
OTHER = 'other'

MS_PER_STEP = 50

ML4MC_SURVIVAL_LENGTH = 1 * 60 * 60 * 20  # 1 hour * 60 minutes * 60 seconds * 20 ticks/steps per second

class ML4MCSurvival(SimpleEmbodimentEnvSpec):
    # ML4MCSurvival constructor
    def __init__(self, *args, **kwargs):
        if 'name' not in kwargs:
            kwargs['name'] = 'ML4MCSurvival-v0' # Add environment name if not added

        super().__init__(*args, max_episode_steps=ML4MC_SURVIVAL_LENGTH, **kwargs)

    # Allows scripts to observe inventory, equipped item, and current location related stats
    def create_observables(self) -> List[Handler]:
        return super().create_observables() + [
            handlers.ObservationFromCurrentLocation(),
        ]

    # Allows scripts to place blocks, equip items, craft items, and smelt items
    def create_actionables(self):
        return super().create_actionables()

    # No rewards for now with this environment
    def create_rewardables(self) -> List[Handler]:
        return [
            handlers.RewardForCollectingItems([
                dict(type="diamond", amount=1, reward=4096),
                dict(type="cobblestone", amount=1, reward=4),
                dict(type="dirt", amount=1, reward=4),
            ]),
            handlers.RewardForDistanceTraveledToCompassTarget(reward_per_block=50.0),
        ]

    # Start the agent with nothing by default, can be modified for testing
    def create_agent_start(self) -> List[Handler]:
        return [
            handlers.SimpleInventoryAgentStart([
                dict(type="iron_pickaxe", quantity=2),
            ])]

    def create_agent_handlers(self) -> List[Handler]:
        return [
            handlers.AgentQuitFromPossessingItem([
                dict(type="diamond", amount=1)]
            )
        ]

    # Use the default world generator
    def create_server_world_generators(self) -> List[Handler]:
        return [handlers.DefaultWorldGenerator(force_reset=True)]

    def create_server_quit_producers(self) -> List[Handler]:
        # Set a timeout to end the episode to prevent it from running forever
        return [
            handlers.ServerQuitFromTimeUp(time_limit_ms=self.max_episode_steps * MS_PER_STEP),
            handlers.ServerQuitWhenAnyAgentFinishes()
        ]

    # This method can be used to change other things about the world such as drawing shapes or spawning a village
    # Not needed for ML4MCSurvival
    def create_server_decorators(self) -> List[Handler]:
        return []

    # This method sets the conditions for the world the agent will spawn into
    def create_server_initial_conditions(self) -> List[Handler]:
        return [
            handlers.TimeInitialCondition(
                allow_passage_of_time=False,
            ),
        ]

    def is_from_folder(self, folder: str) -> bool:
        return folder == 'ml4mc_survival'

    # Don't need docstring as we're not publishing this environment to MineRL's website
    def get_docstring(self):
        return ""

    def determine_success_from_rewards(self, rewards: list) -> bool:
        return sum(rewards) >= 4096

#Setup

In [6]:
import os
import numpy as np
import torch as th
from torch import nn
import gym
import minerl
import pandas as pd
from tqdm.notebook import tqdm
from colabgymrender.recorder import Recorder
from pyvirtualdisplay import Display
import logging
logging.disable(logging.ERROR)
from datetime import datetime

In [7]:
#Start the Display for saving videos on Colab
from pyvirtualdisplay import Display
from os import path
display = Display(visible=False, size=(400, 300))
display.start();

In [8]:
class NatureCNN(nn.Module):
    """
    CNN from DQN nature paper:
        Mnih, Volodymyr, et al.
        "Human-level control through deep reinforcement learning."
        Nature 518.7540 (2015): 529-533.

    :param input_shape: A three-item tuple telling image dimensions in (C, H, W)
    :param output_dim: Dimensionality of the output vector
    """

    def __init__(self, input_shape, output_dim):
        super().__init__()
        n_input_channels = input_shape[0]
        self.cnn = nn.Sequential(
            nn.Conv2d(n_input_channels, 32, kernel_size=8, stride=4, padding=0),
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=4, stride=2, padding=0),
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=0),
            nn.ReLU(),
            nn.Flatten(),
        )

        # Compute shape by doing one forward pass
        with th.no_grad():
            n_flatten = self.cnn(th.zeros(1, *input_shape)).shape[1]

        self.linear = nn.Sequential(
            nn.Linear(n_flatten, 512),
            nn.ReLU(),
            nn.Linear(512, output_dim)
        )

    def forward(self, observations: th.Tensor) -> th.Tensor:
        return self.linear(self.cnn(observations))

In [9]:
class ActionShaping(gym.ActionWrapper):
  def __init__(self, env, camera_angle=10, always_attack=False):
    super().__init__(env)

    self.camera_angle = camera_angle
    self.always_attack = always_attack
    self._actions = [
      [('attack', 1)],
      [('forward', 1)],
      [('forward', 1), ('jump', 1)],
      [('camera', [-self.camera_angle, 0])],
      [('camera', [self.camera_angle, 0])],
      [('camera', [0, self.camera_angle])],
      [('camera', [0, -self.camera_angle])],
    ]

    self.actions = []
    for actions in self._actions:
      act = self.env.action_space.noop()
      for a, v in actions:
        act[a] = v
      if self.always_attack:
        act['attack'] = 1
      self.actions.append(act)

    self.action_space = gym.spaces.Discrete(len(self.actions))

  def action(self, action):
    return self.actions[action]

In [10]:
def dataset_action_batch_to_actions(dataset_actions, camera_margin=5):
  # There are dummy dimensions of shape one
  camera_actions = dataset_actions["camera"].squeeze()
  attack_actions = dataset_actions["attack"].squeeze()
  forward_actions = dataset_actions["forward"].squeeze()
  jump_actions = dataset_actions["jump"].squeeze()
  batch_size = len(camera_actions)
  actions = np.zeros((batch_size,), dtype=np.int)

  for i in range(len(camera_actions)):
    # Moving camera is most important (horizontal first)
    if camera_actions[i][0] < -camera_margin:
      actions[i] = 3
    elif camera_actions[i][0] > camera_margin:
      actions[i] = 4
    elif camera_actions[i][1] > camera_margin:
      actions[i] = 5
    elif camera_actions[i][1] < -camera_margin:
      actions[i] = 6
    elif forward_actions[i] == 1:
      if jump_actions[i] == 1:
        actions[i] = 2
      else:
        actions[i] = 1
    elif attack_actions[i] == 1:
      actions[i] = 0
    else:
      # No reasonable mapping (would be no-op)
      actions[i] = -1
  return actions

In [11]:
def str_to_act(env, actions):
  act = env.action_space.noop()
  for action in actions.split():
    if ":" in action:
      k, v = action.split(':')
      if k == 'camera':
        act[k] = eval(v)
      else:
        act[k] = v
    else:
      act[action] = 1
  return act

In [12]:
abs_CUSTOM = ML4MCSurvival()
abs_CUSTOM.register()

In [13]:
!pip3 install stable-baselines3

Collecting stable-baselines3
  Downloading stable_baselines3-2.3.0-py3-none-any.whl (182 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m182.1/182.1 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting gymnasium<0.30,>=0.28.1 (from stable-baselines3)
  Downloading gymnasium-0.29.1-py3-none-any.whl (953 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m953.9/953.9 kB[0m [31m10.3 MB/s[0m eta [36m0:00:00[0m
Collecting farama-notifications>=0.0.1 (from gymnasium<0.30,>=0.28.1->stable-baselines3)
  Downloading Farama_Notifications-0.0.4-py3-none-any.whl (2.5 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.13->stable-baselines3)
  Downloading nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.7/23.7 MB[0m [31m15.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=1.13->stable-baselines3)
  D

Training

In [14]:
from stable_baselines3.common import results_plotter
from stable_baselines3.common import monitor
from stable_baselines3.common.results_plotter import load_results, ts2xy
from stable_baselines3.common.callbacks import BaseCallback

In [15]:
from stable_baselines3 import PPO

env = gym.make('ML4MCSurvival-v0')

In [16]:
from colabgymrender.recorder import Recorder
env = Recorder(env, "videos", fps=60)

In [17]:
!pip install shimmy

Collecting shimmy
  Downloading Shimmy-1.3.0-py3-none-any.whl (37 kB)
Installing collected packages: shimmy
Successfully installed shimmy-1.3.0


In [18]:
#A wrapper for getting the POV of the avatar from the environment, which is needed for stable_baselines
class ExtractPOV(gym.ObservationWrapper):
  def __init__(self, env):
    super().__init__(env)
    self.observation_space = self.env.observation_space['pov']

  def observation(self, observation):
    return observation['pov']

In [19]:
obs_wrapped_diamond = ExtractPOV(env) #Extracting the POV of the avatar from the environment which is needed for stable_baselines
obs_action_wrapped_diamond = ActionShaping(obs_wrapped_diamond, always_attack=True) #Performing action shaping on the actions of the environment to convert them from dictionaries into an array.
obs = obs_action_wrapped_diamond.reset() #reseting the provided environnment

model = PPO(policy="CnnPolicy", env=obs_action_wrapped_diamond, verbose=1) #Setting the model to be a PPO model with a CnnPolicy. This was just the model used by tutorials, we'll experiment with the best model later

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.


In [None]:
model.learn(total_timesteps=50000) #Training the model, allowing it to walk through 50000 timesteps of the environment (about 1.5 minutes)
env.release() #releasing the recorded environment to actually make a video on Colab.

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 562      |
|    ep_rew_mean     | -2e-06   |
| time/              |          |
|    fps             | 8        |
|    iterations      | 1        |
|    time_elapsed    | 252      |
|    total_timesteps | 2048     |
---------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1.51e+03  |
|    ep_rew_mean          | 29        |
| time/                   |           |
|    fps                  | 8         |
|    iterations           | 2         |
|    time_elapsed         | 455       |
|    total_timesteps      | 4096      |
| train/                  |           |
|    approx_kl            | 0.0402933 |
|    clip_fraction        | 0.275     |
|    clip_range           | 0.2       |
|    entropy_loss         | -1.92     |
|    explained_variance   | -0.00176  |
|    learning_rate        | 0.0003    |
|    loss           

In [None]:
model.save('diamond.pth' )