<a href="https://colab.research.google.com/github/lincolnschick/ML4MC/blob/main/docs/reports/requirement-23/Stone_BC_Iron_Data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Setup

In [1]:
import numpy as np
from pathlib import Path

# Path to the folder holding all the data
DATASET_PATH = "/content/drive/MyDrive/packages/data/MineRLObtainIronPickaxe-v0"

# Custom script to modify the rewards to encourage stone collection
def update_rewards(filename):
  """"
  Function that replaces the rewards in the given numpy file
  to 0 by default or 1 when the amount of cobblestone in the player's
  inventory increases
  """
  data = dict(np.load(filename)) # Load numpy arrays as dictionary for modification
  rewards = data["reward"]
  stone_inventory = data["observation$inventory$cobblestone"]

  last_stone_count = 0 # Initialize stone count in inventory to 0
  camera_actions = data["action$camera"]
  for i in range(len(stone_inventory) - 1): # The rewards array is smaller than the observation array by 1
    rewards[i] = 0 # Remove rewards unrelated to stone collection

    if stone_inventory[i] > last_stone_count:
      rewards[i] = 1 # Add reward of 1 if we detect the player acquired stone

    last_stone_count = stone_inventory[i] # Update stone count

    camera_action = camera_actions[i]
    pitch = camera_action[1] # Access the pitch directly using the index
    if pitch > -10: # Add a penalty for looking up at a pitch greater than -10
      rewards[i] = -1

  np.savez(filename, **data) # Save each numpy array in the original format

# Update rewards for all .npz files in the dataset
for path in Path(DATASET_PATH).rglob("*.npz"):
  update_rewards(path)


In [2]:
import sys
from google.colab import drive
# Allow colab to access google drive
drive.mount('/content/drive')
# Add minerl's folder to path, so we can install it with pip
sys.path.append("/content/drive/MyDrive/packages/minerl")

Mounted at /content/drive


In [3]:
!sudo add-apt-repository -y ppa:openjdk-r/ppa
!sudo apt-get purge openjdk-*
!sudo apt-get install openjdk-8-jdk
!sudo apt-get install xvfb
!sudo apt-get install xserver-xephyr
!sudo apt install tigervnc-standalone-server
!sudo apt-get install -y python3-opengl
!sudo apt-get install ffmpeg
!pip3 install gym==0.13.1
!pip3 install -e /content/drive/MyDrive/packages/minerl
!pip3 install pyvirtualdisplay
!pip3 install -U colabgymrender

PPA publishes dbgsym, you may need to include 'main/debug' component
Repository: 'deb https://ppa.launchpadcontent.net/openjdk-r/ppa/ubuntu/ jammy main'
More info: https://launchpad.net/~openjdk-r/+archive/ubuntu/ppa
Adding repository.
Adding deb entry to /etc/apt/sources.list.d/openjdk-r-ubuntu-ppa-jammy.list
Adding disabled deb-src entry to /etc/apt/sources.list.d/openjdk-r-ubuntu-ppa-jammy.list
Adding key to /etc/apt/trusted.gpg.d/openjdk-r-ubuntu-ppa.gpg with fingerprint DA1A4A13543B466853BAF164EB9B1D8886F44E2A
Get:1 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease [3,626 B]
Hit:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease
Get:3 http://security.ubuntu.com/ubuntu jammy-security InRelease [110 kB]
Hit:4 http://archive.ubuntu.com/ubuntu jammy InRelease
Get:5 http://archive.ubuntu.com/ubuntu jammy-updates InRelease [119 kB]
Get:6 https://ppa.launchpadcontent.net/c2d4u.team/c2d4u4.0+/ubuntu jammy InRelease [18.1 kB]
Hit:7 ht

# Import Libraries

In [4]:
import torch as th
from torch import nn
import gym
import minerl
from minerl.herobraine.env_specs.stone_collection_specs import StoneCollection
from tqdm.notebook import tqdm
from colabgymrender.recorder import Recorder
from pyvirtualdisplay import Display
import logging

# Neural network

In [5]:
class NatureCNN(nn.Module):
    """
    CNN from DQN nature paper:
        Mnih, Volodymyr, et al.
        "Human-level control through deep reinforcement learning."
        Nature 518.7540 (2015): 529-533.

    :param input_shape: A three-item tuple telling image dimensions in (C, H, W)
    :param output_dim: Dimensionality of the output vector
    """

    def __init__(self, input_shape, output_dim):
        super().__init__()
        n_input_channels = input_shape[0]
        self.cnn = nn.Sequential(
            nn.Conv2d(n_input_channels, 32, kernel_size=8, stride=4, padding=0),
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=4, stride=2, padding=0),
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=0),
            nn.ReLU(),
            nn.Flatten(),
        )

        # Compute shape by doing one forward pass
        with th.no_grad():
            n_flatten = self.cnn(th.zeros(1, *input_shape)).shape[1]

        self.linear = nn.Sequential(
            nn.Linear(n_flatten, 512),
            nn.ReLU(),
            nn.Linear(512, output_dim)
        )

    def forward(self, observations: th.Tensor) -> th.Tensor:
        return self.linear(self.cnn(observations))

# Environment wrappers

In [6]:
class ActionShaping(gym.ActionWrapper):
    """
    The default MineRL action space is the following dict:

    Dict(attack:Discrete(2),
         back:Discrete(2),
         camera:Box(low=-180.0, high=180.0, shape=(2,)),
         craft:Enum(crafting_table,none,planks,stick,torch),
         equip:Enum(air,iron_axe,iron_pickaxe,none,stone_axe,stone_pickaxe,wooden_axe,wooden_pickaxe),
         forward:Discrete(2),
         jump:Discrete(2),
         left:Discrete(2),
         nearbyCraft:Enum(furnace,iron_axe,iron_pickaxe,none,stone_axe,stone_pickaxe,wooden_axe,wooden_pickaxe),
         nearbySmelt:Enum(coal,iron_ingot,none),
         place:Enum(cobblestone,crafting_table,dirt,furnace,none,stone,torch),
         right:Discrete(2),
         sneak:Discrete(2),
         sprint:Discrete(2))

    It can be viewed as:
         - buttons, like attack, back, forward, sprint that are either pressed or not.
         - mouse, i.e. the continuous camera action in degrees. The two values are pitch (up/down), where up is
           negative, down is positive, and yaw (left/right), where left is negative, right is positive.
         - craft/equip/place actions for items specified above.
    So an example action could be sprint + forward + jump + attack + turn camera, all in one action.

    This wrapper makes the action space much smaller by selecting a few common actions and making the camera actions
    discrete. You can change these actions by changing self._actions below. That should just work with the RL agent,
    but would require some further tinkering below with the BC one.
    """
    def __init__(self, env, camera_angle=10, always_attack=False):
        super().__init__(env)

        self.camera_angle = camera_angle
        self.always_attack = always_attack
        self._actions = [
            [('attack', 1)],
            [('forward', 1)],
            # Actions below not needed
            # [('back', 1)],
            # [('left', 1)],
            # [('right', 1)],
            # [('jump', 1)],
            # [('forward', 1), ('attack', 1)],
            # [('craft', 'planks')],
            [('forward', 1), ('jump', 1)],
            [('camera', [-self.camera_angle, 0])],
            [('camera', [self.camera_angle, 0])],
            [('camera', [0, self.camera_angle])],
            [('camera', [0, -self.camera_angle])],
        ]

        self.actions = []
        for actions in self._actions:
            act = self.env.action_space.noop()
            for a, v in actions:
                act[a] = v
            if self.always_attack:
                act['attack'] = 1
            self.actions.append(act)

        self.action_space = gym.spaces.Discrete(len(self.actions))

    def action(self, action):
        return self.actions[action]

# Data parser

In [7]:
def dataset_action_batch_to_actions(dataset_actions, camera_margin=5):
    """
    Turn a batch of actions from dataset (`batch_iter`) to a numpy
    array that corresponds to batch of actions of ActionShaping wrapper (_actions).

    Camera margin sets the threshold what is considered "moving camera".

    Note: Hardcoded to work for actions in ActionShaping._actions, with "intuitive"
        ordering of actions.
        If you change ActionShaping._actions, remember to change this!

    Array elements are integers corresponding to actions, or "-1"
    for actions that did not have any corresponding discrete match.
    """
    # There are dummy dimensions of shape one
    camera_actions = dataset_actions["camera"].squeeze()
    attack_actions = dataset_actions["attack"].squeeze()
    forward_actions = dataset_actions["forward"].squeeze()
    jump_actions = dataset_actions["jump"].squeeze()
    batch_size = len(camera_actions)
    actions = np.zeros((batch_size,), dtype=np.int)

    for i in range(len(camera_actions)):
        # Moving camera is most important (horizontal first)
        if camera_actions[i][0] < -camera_margin:
            actions[i] = 3
        elif camera_actions[i][0] > camera_margin:
            actions[i] = 4
        elif camera_actions[i][1] > camera_margin:
            actions[i] = 5
        elif camera_actions[i][1] < -camera_margin:
            actions[i] = 6
        elif forward_actions[i] == 1:
            if jump_actions[i] == 1:
                actions[i] = 2
            else:
                actions[i] = 1
        elif attack_actions[i] == 1:
            actions[i] = 0
        else:
            # No reasonable mapping (would be no-op)
            actions[i] = -1
    return actions

# Parameters

In [8]:
# Parameters:
TRAIN_MODEL_NAME = 'behavioral_cloning.pth'  # name to use when saving the trained agent.
TEST_MODEL_NAME = 'behavioral_cloning.pth'  # name to use when loading the trained agent.

TEST_EPISODES = 5  # number of episodes to test the agent for.
MAX_TEST_EPISODE_LEN = 5000  # 18k is the default for MineRLObtainDiamond.
FINDCAVE_STEPS = 3000  # number of steps to run BC for in evaluations.

# Setup training

In [9]:
def train(epochs, learning_rate, batch_size):
    """
    :param epochs: How many times we train over the dataset
    :param learning_rate: Learning rate for the neural network
    :param batch_size: How many samples before the model is updated
    """

    # abs_STONE = StoneCollection()
    # abs_STONE.register() # Register with gym
    data = minerl.data.make("MineRLObtainIronPickaxe-v0",  data_dir='drive/MyDrive/packages/data', num_workers=4)

    # We know ActionShaping has seven discrete actions, so we create
    # a network to map images to seven values (logits), which represent
    # likelihoods of selecting those actions
    network = NatureCNN((3, 64, 64), 7).cuda()
    optimizer = th.optim.Adam(network.parameters(), lr=learning_rate)
    loss_function = nn.CrossEntropyLoss()

    iter_count = 0
    losses = []
    for dataset_obs, dataset_actions, _, _, _ in tqdm(data.batch_iter(num_epochs=epochs, batch_size=batch_size, seq_len=1)):
        # We only use pov observations (also remove dummy dimensions)
        obs = dataset_obs["pov"].squeeze().astype(np.float32)
        # Transpose observations to be channel-first (BCHW instead of BHWC)
        obs = obs.transpose(0, 3, 1, 2)
        # Normalize observations
        obs /= 255.0

        # Actions need bit more work
        actions = dataset_action_batch_to_actions(dataset_actions)

        # Remove samples that had no corresponding action
        mask = actions != -1
        obs = obs[mask]
        actions = actions[mask]

        # Obtain logits of each action
        logits = network(th.from_numpy(obs).float().cuda())

        # Minimize cross-entropy with target labels.
        # We could also compute the probability of demonstration actions and
        # maximize them.
        loss = loss_function(logits, th.from_numpy(actions).long().cuda())

        # Standard PyTorch update
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        iter_count += 1
        losses.append(loss.item())
        if (iter_count % 1000) == 0:
            mean_loss = sum(losses) / len(losses)
            tqdm.write("Iteration {}. Loss {:<10.3f}".format(iter_count, mean_loss))
            losses.clear()

    th.save(network.state_dict(), TRAIN_MODEL_NAME)

# Download the data

In [10]:
# Download the Iron Pickaxe dataset
minerl.data.download(directory='drive/MyDrive/packages/data', environment='MineRLObtainIronPickaxe-v0');

  full_bar = Bar(frac,

Download: https://minerl.s3.amazonaws.com/v4/MineRLObtainIronPickaxe-v0.tar: 100%|██████████| 2802.0/2801.89952 [00:44<00:00, 62.51MB/s]


# Train

In [None]:
# Train the model for the designated epochs, learning rate, and batch size
train(15, 0.0001, 21)

0it [00:00, ?it/s]

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  actions = np.zeros((batch_size,), dtype=np.int)



Iteration 1000. Loss 0.773     
Iteration 2000. Loss 0.715     
Iteration 3000. Loss 0.879     
Iteration 4000. Loss 0.859     
Iteration 5000. Loss 0.940     
Iteration 6000. Loss 0.875     
Iteration 7000. Loss 0.785     
Iteration 8000. Loss 0.812     
Iteration 9000. Loss 0.956     
Iteration 10000. Loss 0.898     
Iteration 11000. Loss 0.836     
Iteration 12000. Loss 0.886     
Iteration 13000. Loss 0.805     
Iteration 14000. Loss 0.782     
Iteration 15000. Loss 0.890     
Iteration 16000. Loss 0.818     
Iteration 17000. Loss 0.775     
Iteration 18000. Loss 0.776     
Iteration 19000. Loss 0.719     
Iteration 20000. Loss 0.774     
Iteration 21000. Loss 0.847     
Iteration 22000. Loss 0.719     
Iteration 23000. Loss 0.676     
Iteration 24000. Loss 0.841     
Iteration 25000. Loss 0.973     
Iteration 26000. Loss 0.772     
Iteration 27000. Loss 0.790     
Iteration 28000. Loss 0.831     
Iteration 29000. Loss 0.671     
Iteration 30000. Loss 0.734     
Iteration 31000. Lo