In [1]:
#%matplotlib inline
%load_ext autoreload
%autoreload 2

import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import wandb

import time
import random
import copy
from copy import deepcopy
import threading

from train_utils import *

import gym3
from procgen import ProcgenGym3Env
import matplotlib.pyplot as plt

device = 'cuda' if torch.cuda.is_available() else 'cpu'; device

'cuda'

In [2]:
bs = 64

color_lookup = {
    'all':[0,1,2,3,4,5,6,7],
    'outer':[0,1,6,7],
    'inner':[2,3,4,5]
}
backgrounds = ['all', 'outer', 'inner']
roads = ['all', 'outer', 'inner']
backnoises = [0, 100]

configs = []
for bg in backgrounds:
    for rd in roads:
        for backnoise in backnoises:
            config = { 
                'name':f"bg:{bg} rd:{rd} noise:{backnoise}",
                'color_theme': color_lookup[bg],
                'color_theme_road':color_lookup[rd],
                'background_noise_level':backnoise
            }
            configs.append(config)

len(configs)

18

In [3]:
def get_env(config, bs=bs):
    return ProcgenGym3Env(num=bs, env_name="testgame", num_levels=100_000, start_level=0,
                         color_theme=config['color_theme'],
                         color_theme_road=config['color_theme_road'],
                         background_noise_level=config['background_noise_level'])

In [4]:
"""config = configs[0]

s = np.array([[.0,.0] for _ in range(bs)], dtype=np.float32)
seq_len = 15
N_IMGS = 4

plt.figure(figsize=(20, 10))

titles=['indist', 'outdist']

for i, title in enumerate(titles):
    ax = plt.subplot(1,5, i+1)
    env = get_env(title, config)

    for i in range(seq_len):
        env.act(s)
        rew, obs, first = env.observe()
        img = obs['rgb']
        img = np.concatenate(img[:N_IMGS],0)
        info = env.get_info()
        
    plt.imshow(img)
    plt.title(f"{config['name']}: {title}")
    plt.axis("off")"""

'config = configs[0]\n\ns = np.array([[.0,.0] for _ in range(bs)], dtype=np.float32)\nseq_len = 15\nN_IMGS = 4\n\nplt.figure(figsize=(20, 10))\n\ntitles=[\'indist\', \'outdist\']\n\nfor i, title in enumerate(titles):\n    ax = plt.subplot(1,5, i+1)\n    env = get_env(title, config)\n\n    for i in range(seq_len):\n        env.act(s)\n        rew, obs, first = env.observe()\n        img = obs[\'rgb\']\n        img = np.concatenate(img[:N_IMGS],0)\n        info = env.get_info()\n        \n    plt.imshow(img)\n    plt.title(f"{config[\'name\']}: {title}")\n    plt.axis("off")'

In [5]:
def testdrive(m, use_autopilot, config):
    TRAINING_WHEELS_WINDOW = 10
    m.eval()
    seq_len = 400
    bs = 256
    val_env = get_env(config, bs=bs)
    s = np.array([[.0,.0] for _ in range(bs)], dtype=np.float32)
    reward = 0
    with torch.no_grad():
        for i in range(seq_len):
            val_env.act(s)
            rew, obs, first = val_env.observe()
            reward += rew.sum()
            img = obs['rgb']
            info = val_env.get_info()
            autopilot_control = np.array([[e["autopilot_"+c] for c in control_properties] for e in info])
            aux = np.array([[e[a] for a in aux_properties] for e in info])
            front = torch.from_numpy(img.astype(np.float32)/255.).unsqueeze(0).permute(0,1,4,2,3).to(device)
            aux = torch.from_numpy(aux.astype(np.float32)).unsqueeze(0).to(device)
            
            if use_autopilot or i < TRAINING_WHEELS_WINDOW:
                s = autopilot_control
            else:
                out, _ = m(front, aux, '')
                s = out.squeeze(0).squeeze(-1).cpu().numpy()
                s = np.clip(s, -5., 5.)
        
    reward /= (bs*seq_len)
    val_env.close()
    m.train()
    return reward

In [6]:
loss_fn = torch.nn.MSELoss().cuda()
def get_model():
    m = VizCNN(use_rnn=False).to(device);
    opt = torch.optim.Adam(m.parameters(), lr=3e-4)
    scaler = torch.cuda.amp.GradScaler() 
    return m, opt, scaler

In [9]:

def train(config):
    m, opt, scaler = get_model()
    global bs
    dataloader = DataLoader(env=get_env(config, bs=bs), bs=bs, seq_len=200)

    m.train()
    logger = Logger()
    n_updates = 100
    counter = 1
    log_cadence = 5_000
    bptt = 1

    while counter < n_updates:
        front_container, aux_container, target_container = dataloader.get_chunk()
        chunk_len, bs, _, _, _ = front_container.shape
        len_ix = 0
        while len_ix < chunk_len:
            front = front_container[len_ix:len_ix+bptt, :, :, :, :].to(device).half()
            aux = aux_container[len_ix:len_ix+bptt, :, :].to(device).half()
            target = target_container[len_ix:len_ix+bptt, :, :].to(device).half()
            len_ix += bptt*4
            with torch.cuda.amp.autocast(): pred, _ = m(front, aux, '')  
            loss = loss_fn(target, pred)
            scaler.scale(loss).backward() 
            scaler.step(opt) 
            scaler.update() 
            opt.zero_grad()
            counter += 1

    torch.save(m.state_dict(), config['name']+".torch")
    
    dataloader.destroy()
    del dataloader, m

In [10]:
for config in configs:
    print("training ", config)
    train(config)

training  {'name': 'bg:all rd:all noise:0', 'color_theme': [0, 1, 2, 3, 4, 5, 6, 7], 'color_theme_road': [0, 1, 2, 3, 4, 5, 6, 7], 'background_noise_level': 0}
training  {'name': 'bg:all rd:all noise:100', 'color_theme': [0, 1, 2, 3, 4, 5, 6, 7], 'color_theme_road': [0, 1, 2, 3, 4, 5, 6, 7], 'background_noise_level': 100}
training  {'name': 'bg:all rd:outer noise:0', 'color_theme': [0, 1, 2, 3, 4, 5, 6, 7], 'color_theme_road': [0, 1, 6, 7], 'background_noise_level': 0}
training  {'name': 'bg:all rd:outer noise:100', 'color_theme': [0, 1, 2, 3, 4, 5, 6, 7], 'color_theme_road': [0, 1, 6, 7], 'background_noise_level': 100}
training  {'name': 'bg:all rd:inner noise:0', 'color_theme': [0, 1, 2, 3, 4, 5, 6, 7], 'color_theme_road': [2, 3, 4, 5], 'background_noise_level': 0}
training  {'name': 'bg:all rd:inner noise:100', 'color_theme': [0, 1, 2, 3, 4, 5, 6, 7], 'color_theme_road': [2, 3, 4, 5], 'background_noise_level': 100}
training  {'name': 'bg:outer rd:all noise:0', 'color_theme': [0, 1, 

In [11]:
results = []

for config_test in configs:
    m, _, _ = get_model()
    baseline_score = testdrive(m, use_autopilot=True, config=config_test)
    
    for config_train in configs:
        m.load_state_dict(torch.load(config_train['name']+".torch"))
        score = testdrive(m, use_autopilot=False, config=config_test) / baseline_score
        result = {'trn_env':config_train['name'],
                 'test_env':config_test['name'],
                 'score':score}
        results.append(result)

In [12]:
import pandas as pd
df = pd.DataFrame(results)

In [13]:
df.pivot(index='trn_env', columns='test_env', values='score')

test_env,bg:all rd:all noise:0,bg:all rd:all noise:100,bg:all rd:outer noise:0,bg:all rd:outer noise:100
trn_env,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
bg:all rd:all noise:0,0.137656,0.128424,0.135654,0.138938
bg:all rd:all noise:100,0.173934,0.154778,0.178464,0.1834
bg:all rd:outer noise:0,0.13612,0.138584,0.149836,0.141311
bg:all rd:outer noise:100,0.165621,0.176746,0.20659,0.203475
