# Train agent recurrent with GPU

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install pyro-ppl &> /dev/null

In [4]:
import sys
cwd = "drive/Shareddrives/Active_Inference_Interaction/"
sys.path.append(cwd)

In [5]:
import os
import glob
import json
import pickle
import datetime
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch 

from scripts.train_agent_recurrent import main

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"device: {device}")

device: cuda


In [6]:
# train config
def parse_args(seed):
    arglist = {
        "data_path": os.path.join(cwd, "interaction-dataset-master"),
        "exp_path": os.path.join(cwd, "exp"),
        "scenario": "DR_CHN_Merging_ZS",
        "filenames": ["vehicle_tracks_000.csv", "vehicle_tracks_003.csv", "vehicle_tracks_007.csv"],
        "valid_lanes": ["3", "4"],
        "checkpoint_path": "none",
        "feature_set": ["lv_s_rel", "lv_ds_rel", "lv_inv_tau"],
        "action_set": ["dds_smooth"],
        # agent args
        "agent": "vin",
        "state_dim": 20,
        "act_dim": 15,
        "horizon": 30,
        "obs_model": "flow",
        "obs_cov": "tied",
        "ctl_cov": "diag",
        "hmm_rank": 0,
        "alpha": 1., 
        "beta": 0., 
        "rwd": "efe",
        "detach": False,
        "hyper_dim": 4,    
        "hyper_cov": True, 
        "train_prior": False,
        # nn args
        "hidden_dim": 64,
        "num_hidden": 2,
        "gru_layers": 2, # use 2 gru layers for inference
        "activation": "relu",
        "norm_obs": True,
        # algo args
        "train_mode": "marginal",
        "bptt_steps": 500,
        "pred_steps": 5,
        "bc_penalty": 1.,
        "obs_penalty": 1.,
        "pred_penalty": 0.2,
        "reg_penalty": 0.1,
        "ortho_penalty": 0.4, 
        "post_obs_penalty": 0.,
        "kl_penalty": 0.01,
        # training args
        "min_eps_len": 50,
        "max_eps_len": 500,
        "train_ratio": 0.7,
        "batch_size": 100,
        "epochs": 1,
        "lr": 0.01, # use 0.005 for nn models
        "lr_flow": 0.001,
        "lr_post": 0.003,
        "decay": 3e-5,
        "grad_clip": 20,
        "decay_steps": 100,
        "decay_rate": 0.9,
        "cp_every": 50,
        "seed": seed,
        "save": False
    }
    return arglist

In [8]:
seed = 0
arglist = parse_args(seed)
model, df_history = main(arglist, device)

seed: 0, device: cuda
feature set: ['lv_s_rel', 'lv_ds_rel', 'lv_inv_tau']
action set: ['dds_smooth']
train size: 615, test size: 263
action model loaded
num parameters: 7640
RecurrentBehaviorCloning(bptt_steps=500, pred_steps=5, bc_penalty=1.0, obs_penalty=1.0, pred_penalty=0.2, reg_penalty=0.1, lr=0.01, lr_flow=0.001, decay=3e-05, grad_clip=20, decay_steps=100, decay_rate=0.9,
agent=VINAgent(
  (rnn): QMDPLayer(state_dim=20, act_dim=15, rank=0, horizon=30, detach=False)
  (obs_model): ConditionalFlow(x_dim=3, z_dim=20, hidden_dim=30, cov=tied, batch_norm=True)
  (ctl_model): ConditionalGaussian(x_dim=1, z_dim=15, cov=diag, batch_norm=False)
))
e: 1/1, loss_u: 2.1402/2.1425, loss_o: 4.2226/4.0426, t: 8.46


In [7]:
# batch training
for seed in range(5, 5 + 10):
    arglist = parse_args(seed)
    model, df_history = main(arglist, device)