In [1]:
#%matplotlib inline
%load_ext autoreload
%autoreload 2

import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import wandb

import time
import random
import copy
from copy import deepcopy
import threading

from train_utils import *

import gym3
from procgen import ProcgenGym3Env
import matplotlib.pyplot as plt

device = 'cuda' if torch.cuda.is_available() else 'cpu'; device

'cuda'

In [None]:
run_id = wandb.util.generate_id(); run_id

In [None]:
wandb.init(id='14s7sazo', project="carlita", resume="allow")

In [2]:
bs = 64

color_lookup = {
    'all':[0,1,2,3,4,5,6,7],
    'outer':[0,1,6,7],
    'inner':[2,3,4,5]
}
backgrounds = ['outer', 'inner']
roads = ['outer', 'inner']
sidewalks = ['outer', 'inner']
backnoises = [0, 100]

rd, bg, sw = 'all', "all", "all"
backnoise = 100
config = { 
    'name':f"bg:{bg} rd:{rd} noise:{backnoise}",
    'color_theme': color_lookup[bg],
    'color_theme_road':color_lookup[rd],
    'color_theme_sidewalk':color_lookup[sw],
    'background_noise_level':backnoise,
    'render_mode':None,
}
config

{'name': 'bg:all rd:all noise:100',
 'color_theme': [0, 1, 2, 3, 4, 5, 6, 7],
 'color_theme_road': [0, 1, 2, 3, 4, 5, 6, 7],
 'color_theme_sidewalk': [0, 1, 2, 3, 4, 5, 6, 7],
 'background_noise_level': 100,
 'render_mode': None}

In [3]:
def get_env(config, bs=bs):
    return ProcgenGym3Env(num=bs, env_name="testgame", num_levels=100_000, start_level=0,
                         color_theme=config['color_theme'],
                         color_theme_road=config['color_theme_road'],
                         color_theme_sidewalk=config['color_theme_sidewalk'],
                         background_noise_level=config['background_noise_level'],
                         render_mode=config['render_mode'])

In [4]:
def testdrive(m, use_autopilot, config):
    TRAINING_WHEELS_WINDOW = 10
    if not use_autopilot:
        m.eval()
    seq_len = 400
    bs = 256
    val_config = config
    val_config['render_mode'] = 'rgb_array'
    val_env_base = get_env(val_config, bs=bs)
    val_env = gym3.ViewerWrapper(val_env_base, info_key="rgb")
    s = np.array([[.0,.0] for _ in range(bs)], dtype=np.float32)
    reward = 0
    with torch.no_grad():
        h = get_hidden(bs)
        for i in range(seq_len):
            val_env.act(s)
            rew, obs, first = val_env.observe()
            reward += rew.sum()
            img = obs['rgb']
            info = val_env.get_info()
            autopilot_control = np.array([[e["autopilot_"+c] for c in control_properties] for e in info])
            aux = np.array([[e[a] for a in aux_properties] for e in info])
            front = torch.from_numpy(img.astype(np.float32)/255.).unsqueeze(0).permute(0,1,4,2,3).to(device)
            aux = torch.from_numpy(aux.astype(np.float32)).unsqueeze(0).to(device)
            
            if use_autopilot or i < TRAINING_WHEELS_WINDOW:
                s = autopilot_control
            else:
                out, h = m(front, aux, h)
                s = out.squeeze(0).squeeze(-1).cpu().numpy()
                s = np.clip(s, -5., 5.)
        
    reward /= (bs*seq_len)
    val_env_base.close()
    if not use_autopilot:
        m.train()
    return reward

In [5]:
baseline_score = testdrive(None, use_autopilot=True, config=config)
baseline_score

building procgen...done


0.44904296875

In [6]:
loss_fn = torch.nn.MSELoss().cuda()
def get_model():
    m = VizCNN(use_rnn=True).to(device);
    opt = torch.optim.Adam(m.parameters(), lr=3e-4)
    scaler = torch.cuda.amp.GradScaler() 
    return m, opt, scaler

In [7]:

def train(config):
    m, opt, scaler = get_model()
    global bs
    dataloader = DataLoader(env=get_env(config, bs=bs), bs=bs, seq_len=200)

    m.train()
    logger = Logger()
    n_updates = 20_000
    counter = 1
    log_cadence = 1024
    bptt = 32

    while counter < n_updates:
        front_container, aux_container, target_container = dataloader.get_chunk()
        chunk_len, bs, _, _, _ = front_container.shape
        len_ix = 0
        h = get_hidden(bs)
        while len_ix < chunk_len:
            front = front_container[len_ix:len_ix+bptt, :, :, :, :].to(device).half()
            aux = aux_container[len_ix:len_ix+bptt, :, :].to(device).half()
            target = target_container[len_ix:len_ix+bptt, :, :].to(device).half()
            len_ix += bptt
            with torch.cuda.amp.autocast(): pred, h = m(front, aux, h)
            h = (h[0].detach(), h[1].detach())
            loss = loss_fn(target, pred)
            scaler.scale(loss).backward() 
            scaler.step(opt) 
            scaler.update() 
            opt.zero_grad()

            if counter % log_cadence == 0:
                print("testing agent")
                score = testdrive(m, use_autopilot=False, config=config) / baseline_score
                wandb.log({"score":score})
                print(score)

            counter += 1


In [None]:
train(config)

testing agent
0.7997259797311992
testing agent
0.7694315166804402


In [None]:
results = []

for config_test in configs:
    m, _, _ = get_model()
    baseline_score = testdrive(m, use_autopilot=True, config=config_test)
    
    for config_train in configs:
        m.load_state_dict(torch.load(config_train['name']+".torch"))
        score = testdrive(m, use_autopilot=False, config=config_test) / baseline_score
        result = {'trn_env':config_train['name'],
                 'test_env':config_test['name'],
                 'score':score}
        results.append(result)

In [None]:
import pandas as pd
df = pd.DataFrame(results)

In [None]:
df.pivot(index='trn_env', columns='test_env', values='score')