# Demo of the MAGICAL benchmark suite for robust IL

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/qxcv/magical/blob/pyglet1.5/demo-notebook.ipynb)

This self-contained Colab notebook shows how to train a simple imitation learning agent on MAGICAL using behavioural cloning (BC).

In [None]:
# Install MAGICAL, Xvfb, and a prerelease version of the 'imitation' library (https://github.com/HumanCompatibleAI/imitation)
!sudo DEBIAN_FRONTEND=noninteractive apt-get install -yq xvfb
!pip install -qU pip
# This usually gives errors of the form "package W requires version X of package Y, but you'll have version Z which is incompatible"
# You can safely ignore those errors; they are dependency conflicts in the underlying libraries.
!pip install --use-feature=2020-resolver -q xvfbwrapper magical-il 'git+git://github.com/HumanCompatibleAI/imitation@556f5d8384d99fa5ab8bc54a9828887a2db8c669#egg=imitation'
if 'vdisplay' not in globals():
    # start a virtual X display for MAGICAL rendering
    import xvfbwrapper
    vdisplay = xvfbwrapper.Xvfb()
    vdisplay.start()

In [None]:
import glob
import logging

import gym
from imitation.algorithms.bc import BC
import imitation.augment as il_augment
import imitation.data.types as il_types
import numpy as np
import stable_baselines3.common.policies as sb3_pols
import torch
from torch import nn
import torch.utils.data as th_data

import magical

magical.register_envs()
logging.basicConfig(level=logging.INFO)
# download trajectories
magical.try_download_demos(dest="demos")

In [None]:
env_ident = 'MoveToCorner'
preproc_name = 'LoRes4E'

In [None]:
demo_paths_by_env = {
    'MoveToCorner': glob.glob('demos/move-to-corner/demo-*.pkl.gz'),
}
demo_paths = demo_paths_by_env[env_ident]
# Gym env name with preprocessor
env_name = f'{env_ident}-Demo-{preproc_name}-v0'

In [None]:
env = gym.make(env_name)
demo_dicts = magical.load_demos(demo_paths[:10])
demo_trajs = []
orig_env_name = None  # we will read this from the demos dicts
for demo_dict in demo_dicts:
    # each demo dict has keys ['trajectory', 'score', 'env_name']
    # (trajectory contains the actual data, and score is generally 1.0 for demonstrations)
    orig_env_name = demo_dict['env_name']
    demo_trajs.append(demo_dict['trajectory'])
demo_trajs_preproc = magical.preprocess_demos_with_wrapper(demo_trajs, orig_env_name, preproc_name=preproc_name)

In [None]:
class MAGICALNet(nn.Module):
    """Custom CNN for MAGICAL policies."""
    def __init__(self, observation_space, out_chans=256, width=2):
        super().__init__()
        w = width
        def conv_block(i, o, k, s, p, b=False):
            return [
                # batch norm has its own bias, so don't add one to conv layers by default
                nn.Conv2d(i, o, kernel_size=k, stride=s, padding=p, bias=b,
                          padding_mode='zeros'),
                nn.ReLU(),
                nn.BatchNorm2d(o)
            ]
        conv_layers = [
            *conv_block(i=observation_space.shape[0], o=32*w, k=5, s=1, p=2, b=True),
            *conv_block(i=32*w, o=64*w, k=3, s=2, p=1),
            *conv_block(i=64*w, o=64*w, k=3, s=2, p=1),
            *conv_block(i=64*w, o=64*w, k=3, s=2, p=1),
            *conv_block(i=64*w, o=64*w, k=3, s=2, p=1),
        ]
        # final FC layer to make feature maps the right size
        test_tensor = torch.zeros((1,) + observation_space.shape)
        for layer in conv_layers:
            test_tensor = layer(test_tensor)
        fc_in_size = np.prod(test_tensor.shape)
        reduction_layers = [
            nn.Flatten(),
            nn.Linear(fc_in_size, out_chans),
            # Stable Baselines will add extra affine layer on top of this reLU
            nn.ReLU(),
        ]
        self.features_dim = out_chans
        all_layers = [*conv_layers, *reduction_layers]
        self.feature_generator = nn.Sequential(*all_layers)

    def forward(self, x, traj_info=None):
        return self.feature_generator(x)

In [None]:
# Build dataset in the format required by imitation. Note that traj.obs contains the final observation after the last
# action, so we drop the last observation when concatenating trajectories.
all_obs = np.concatenate([traj.obs[:-1] for traj in demo_trajs_preproc], axis=0)
all_acts = np.concatenate([traj.acts for traj in demo_trajs_preproc], axis=0)
dataset = il_types.TransitionsMinimal(obs=all_obs, acts=all_acts, infos=[{}] * len(all_obs))
data_loader = th_data.DataLoader(dataset, batch_size=32, shuffle=True, collate_fn=il_types.transitions_collate_fn)
augmenter = il_augment.StandardAugmentations.from_string_spec(
       'rotate,translate,noise', stack_color_space=il_augment.ColorSpace.RGB)
bc_trainer = BC(
    observation_space=env.observation_space,
    action_space=env.action_space,
    policy_class=sb3_pols.ActorCriticCnnPolicy,
    policy_kwargs=dict(features_extractor_class=MAGICALNet),
    expert_data=data_loader,
    augmentation_fn=augmenter)

In [None]:
# try training for longer (e.g. 15,000 batches) to get better performance
bc_trainer.train(n_batches=50)