In [70]:
import os
# os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
# os.environ["CUDA_VISIBLE_DEVICES"] = "1"

import pickle
from glob import glob
import numpy as np
import torch
import torch.nn as nn
import pandas as pd
import time
from einops import rearrange,reduce,repeat
from tqdm import trange

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [71]:
print(device)

cpu


# MLP 

In [72]:
class MLPNet(nn.Module):
    def __init__(self, 
                 in_dim, # input dimension
                 out_dim, # output dimension
                 hidden_dim, # hidden dimension
                 num_layers # number of layers
                ):
        
        super(MLPNet, self).__init__()
        
        self.model = [nn.Linear(in_dim, 1024), nn.ReLU()]
        self.model += [nn.Dropout(0.5)]
        self.model += [nn.Linear(1024, 256), nn.ReLU()]
        self.model += [nn.Linear(256, 128), nn.ReLU()]
        #for i in range(num_layers-2):
        #    self.model += [nn.Linear(hidden_dim, hidden_dim), nn.ReLU()]
        
        
        self.model += [nn.Linear(hidden_dim, out_dim)]
        
        self.model = nn.Sequential(*self.model)
        
    def forward(self, inp):
        
        # Flatten the last two dimensions
        inp = inp.reshape(inp.shape[0], -1)
        
        out = self.model(inp)
        
        #bz x outputlength x 2
        return out.reshape(inp.shape[0], -1, 2)

In [73]:
class MyLSTM(nn.Module):
    def __init__(self, input_size, output_size, hidden_dim, n_layers):
        super(MyLSTM, self).__init__()

        # Defining some parameters
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers

        #Defining the layers
        # RNN Layer
        self.lstm = nn.LSTM(input_size, hidden_dim, n_layers, batch_first=True)   
        # Fully connected layer
        self.fc = nn.Linear(hidden_dim, output_size)
    
    def forward(self, x):
        
        batch_size = x.size(0)

        #Initializing hidden state for first input using method defined below
        h_t, c_t = self.init_hidden(batch_size)

        # Passing in the input and hidden state into the model and obtaining outputs
        out, (h_t, c_t) = self.lstm(x, (h_t, c_t))
        
        # Reshaping the outputs such that it can be fit into the fully connected layer
        #out = out.contiguous().view(-1, self.hidden_dim)
        out = self.fc(out)
        
        return out, (h_t, c_t)
    
    def init_hidden(self, batch_size):
        # This method generates the first hidden state of zeros which we'll use in the forward pass
        h_0 = torch.zeros(self.n_layers, batch_size, self.hidden_dim).to(device)
        c_0 =  torch.zeros(self.n_layers, batch_size, self.hidden_dim).to(device)       
         # We'll send the tensor holding the hidden state to the device we specified earlier as well
        return h_0, c_0
    



In [74]:
# Autogressive vs. direct mapping
# Batch Norm? 

# Dataset

In [75]:
class ArgoverseDataset(torch.utils.data.Dataset):
    """Dataset class for Argoverse"""
    
    def __init__(self, 
                 data_path,
                 sample_indices):
        super(ArgoverseDataset, self).__init__()
        
        self.data_path = data_path
        self.sample_indices = sample_indices
        self.pkl_list = glob(os.path.join(self.data_path, '*'))
        self.pkl_list.sort()
        
    def __len__(self):
        return len(self.sample_indices)

    def __getitem__(self, idx):
        
        # Load one scene
        pkl_path = self.pkl_list[self.sample_indices[idx]]
        with open(pkl_path, 'rb') as f:
            scene = pickle.load(f)
            
        # the index of agent to be predicted 
        pred_id = np.where(scene["track_id"] == scene['agent_id'])[0][0]
        
        # input: p_in & v_in; output: p_out
        p_in_raw = scene['p_in']
        p_out_raw = scene['p_out'][pred_id]
        v_in_raw = scene['v_in']
        v_out_raw = scene['v_out'][pred_id]
        lane_scene = scene['lane']
        
        # Normalization
        min_vecs = np.min(lane_scene, axis = 0)
        max_vecs = np.max(lane_scene, axis = 0)
        
        # Normalize by vectors
        p_in_normalized = (p_in_raw - min_vecs)/(max_vecs - min_vecs)
        p_out_normalized = (p_out_raw - min_vecs)/(max_vecs - min_vecs)
        v_in_norm = np.linalg.norm(v_in_raw, axis=1, keepdims=True)
        v_in_norm = np.where(v_in_norm == 0.0, 1.0, v_in_norm)
        v_in_normalized = v_in_raw / v_in_norm
        # v_out_normalized = v_out_raw / np.linalg.norm(v_out_raw, axis=1, keepdims=True)
        inp = np.concatenate((p_in_normalized,v_in_normalized),axis=1)
        
        # Convert to float torch tensor
        return torch.from_numpy(inp).float(), torch.from_numpy(p_out_normalized).float()

In [76]:
class RNNdataset(torch.utils.data.Dataset):
    def __init__(self, 
                 data_path,
                 sample_indices):
        self.data_path = data_path
        self.sample_indices = sample_indices
        self.pkl_list = glob(os.path.join(self.data_path, '*'))
        self.pkl_list.sort()
        
    def __len__(self):
        return len(self.sample_indices)
    
    def __getitem__(self,idx):
        # Load one scene
        pkl_path = self.pkl_list[self.sample_indices[idx]]
        with open(pkl_path, 'rb') as f:
            scene = pickle.load(f)
        # the index of agent to be predicted 
        pred_id = np.where(scene["track_id"] == scene['agent_id'])[0][0]
        mask = np.where(scene['car_mask'] == 1)[0]
    
        
        # input: p_in & v_in; output: p_out
        p_in_raw = scene['p_in']
        p_out_raw = scene['p_out'][pred_id]
        v_in_raw = scene['v_in']
        
        lane_scene = scene['lane']
        
        # Normalization
        min_vecs = np.min(lane_scene, axis = 0)
        max_vecs = np.max(lane_scene, axis = 0)
        
        # Normalize by vectors
        p_in_norm = (p_in_raw - min_vecs)/(max_vecs - min_vecs)
        
        v_in_norm = np.linalg.norm(v_in_raw, axis=1, keepdims=True)
        v_in_norm = np.where(v_in_norm == 0.0, 1.0, v_in_norm)
        v_in_norm = v_in_raw / v_in_norm
        # v_out_normalized = v_out_raw / np.linalg.norm(v_out_raw, axis=1, keepdims=True)
        p_track = p_in_norm[mask].reshape(-1,19*2)
        v_track = v_in_norm[mask].reshape(-1,19*2)
        
        p_agent = p_in_norm[pred_id].reshape(1,-1)
        v_agent = v_in_norm[pred_id].reshape(1,-1)
        new_mask = []
        p_result = []
        v_result = []
        if (len(mask) > 10):
            
            dist = ((p_track - p_agent)**2).sum(axis=-1)
            #print('dist is ',dist.shape)
            new_mask = np.argpartition(dist,10)[:10]
            
            p_result = p_track[new_mask,:]
            v_result = v_track[new_mask,:]
        else:
            p_result = np.zeros((10,38))
            v_result = np.zeros((10,38))
            k = p_track.shape[0]
            #print('slice',k)
            p_result[:k] = p_track
            v_result[:k] = v_track
        
        #print('shape is',p_agent.shape,v_agent.shape,p_result.shape,v_result.shape)
        inp = np.vstack((p_agent,v_agent,p_result,v_result))
        #print('inp shape is',inp.shape)


        p_out_normalized = (p_out_raw - min_vecs)/(max_vecs - min_vecs)
        p_out_norm = rearrange(p_out_normalized, 'a b -> (a b)')
        # Convert to float torch tensor
        return torch.from_numpy(inp).float(), torch.from_numpy(p_out_norm).float()



In [77]:
# Try different ways of normalization
# Leverage other features. 

# Hyperparameter

In [78]:
# Grid/Random Search

In [79]:
batch_size = 1024
in_dim = 19*2
out_dim = 30*2
hidden_dim = 128
num_layers = 3
learning_rate = 0.01
decay_rate = 0.95
num_epoch = 1000

# Data Loader

In [80]:
train_path = "./train/train"

# total number of scenes
indices = np.arange(0, 205942)

# train-valid split
np.random.shuffle(indices)
train_indices = indices[:180000]
valid_indices = indices[180000:]

# define datasets
train_set = ArgoverseDataset(train_path, train_indices)
valid_set = ArgoverseDataset(train_path, valid_indices)

train_set = RNNdataset(train_path, train_indices)
valid_set = RNNdataset(train_path, valid_indices)

# create dataloaders
train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=8)
valid_loader = torch.utils.data.DataLoader(valid_set, batch_size=batch_size, shuffle=False, num_workers=8)

# Model, Loss Function and Optimizer

In [81]:
# RNN, LSTM, 1dCNN, Transformer
model = MLPNet(in_dim = in_dim, 
               out_dim = out_dim,
               hidden_dim = hidden_dim, 
               num_layers = num_layers).to(device) # move model to gpu 

# Adaptive Moment Estimation computes adaptive learning rates for each parameter. 
# Compute the decaying averages of past and past squared gradients. 
# Instantiate the model with hyperparameters
model = MyLSTM(input_size=in_dim, output_size=out_dim, hidden_dim=30, n_layers=2).to(device)   #maximum number of hidden size is 120



optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=decay_rate)  # stepwise learning rate decay
loss_fun = nn.MSELoss()

# Training

In [82]:
def train_epoch(train_loader, model, optimizer, loss_function):
    model.train()
    train_mse = []
    for inp, tgt in train_loader:
        inp = inp.to(device)
        tgt = tgt.to(device)
       
        
        pred,_ = model(inp)
        #print(pred.shape,tgt.shape)
        pred = pred[:,-1, :].squeeze(1)
        loss = loss_function(pred, tgt)
        train_mse.append(loss.item()) 
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    train_mse = round(np.sqrt(np.mean(train_mse)),5)
    return train_mse

def eval_epoch(valid_loader, model, loss_function):
    model.eval()
    valid_mse = []
    preds = []
    trues = []
    with torch.no_grad():
        for inp, tgt in valid_loader:
            inp = inp.to(device)
            tgt = tgt.to(device)
            
            loss = 0
            pred,_ = model(inp)
            #print(tgt.shape,pred.shape)
            pred = pred[:,-1, :].squeeze(1)
            loss = loss_function(pred, tgt)
            preds.append(pred.cpu().data.numpy())
            trues.append(tgt.cpu().data.numpy())
            valid_mse.append(loss.item())
            
        preds = np.concatenate(preds, axis = 0)  
        trues = np.concatenate(trues, axis = 0)  
        valid_mse = round(np.sqrt(np.mean(valid_mse)), 5)
    return valid_mse, preds, trues


In [83]:
# Learning Rate Decay
# Dropout
# L1/L2 Regulization

In [84]:
train_rmse = []
valid_rmse = []
min_rmse = 10e8

for i in trange(num_epoch):
    start = time.time()

    # model.train() # if you use dropout or batchnorm. 
    train_rmse.append(train_epoch(train_loader, model, optimizer, loss_fun))

    # model.eval()
    val_rmse, val_preds, val_trues = eval_epoch(valid_loader, model, loss_fun)
    valid_rmse.append(val_rmse)

    # save the best model
    if valid_rmse[-1] < min_rmse:
        min_rmse = valid_rmse[-1] 
        best_model = model
        print('Get best model')
        # torch.save([best_model, i, get_lr(optimizer)], name + ".pth")

    end = time.time()
    
    # Early Stopping
    if (len(train_rmse) > 100 and np.mean(valid_rmse[-5:]) >= np.mean(valid_rmse[-10:-5])):
            break       

    # Learning Rate Decay        
    scheduler.step()
    
    print("Epoch {} | T: {:0.2f} | Train RMSE: {:0.5f} | Valid RMSE: {:0.5f}".format(i + 1, (end-start) / 60, train_rmse[-1], valid_rmse[-1]))

  0%|          | 1/1000 [00:47<13:16:30, 47.84s/it]

Get best model
Epoch 1 | T: 0.80 | Train RMSE: 0.19336 | Valid RMSE: 0.07838


  0%|          | 2/1000 [01:27<11:55:28, 43.01s/it]

Get best model
Epoch 2 | T: 0.66 | Train RMSE: 0.06624 | Valid RMSE: 0.05358


  0%|          | 3/1000 [02:06<11:21:54, 41.04s/it]

Get best model
Epoch 3 | T: 0.64 | Train RMSE: 0.04851 | Valid RMSE: 0.04071


  0%|          | 4/1000 [02:48<11:26:49, 41.38s/it]

Get best model
Epoch 4 | T: 0.70 | Train RMSE: 0.03886 | Valid RMSE: 0.03646


  0%|          | 5/1000 [03:27<11:13:51, 40.63s/it]

Get best model
Epoch 5 | T: 0.66 | Train RMSE: 0.03594 | Valid RMSE: 0.03362


  1%|          | 6/1000 [04:06<11:04:24, 40.11s/it]

Epoch 6 | T: 0.65 | Train RMSE: 0.03444 | Valid RMSE: 0.03792


  1%|          | 7/1000 [04:45<10:57:07, 39.71s/it]

Epoch 7 | T: 0.65 | Train RMSE: 0.03418 | Valid RMSE: 0.03690


  1%|          | 8/1000 [05:20<10:32:13, 38.24s/it]

Epoch 8 | T: 0.59 | Train RMSE: 0.03329 | Valid RMSE: 0.03384


  1%|          | 9/1000 [06:00<10:40:39, 38.79s/it]

Epoch 9 | T: 0.67 | Train RMSE: 0.03304 | Valid RMSE: 0.03406


  1%|          | 10/1000 [06:37<10:32:06, 38.31s/it]

Get best model
Epoch 10 | T: 0.62 | Train RMSE: 0.03277 | Valid RMSE: 0.03182


  1%|          | 11/1000 [07:14<10:25:06, 37.92s/it]

Epoch 11 | T: 0.62 | Train RMSE: 0.03265 | Valid RMSE: 0.03506


  1%|          | 12/1000 [07:54<10:31:49, 38.37s/it]

Epoch 12 | T: 0.66 | Train RMSE: 0.03225 | Valid RMSE: 0.03203


  1%|▏         | 13/1000 [08:31<10:26:15, 38.07s/it]

Get best model
Epoch 13 | T: 0.62 | Train RMSE: 0.03188 | Valid RMSE: 0.03162


  1%|▏         | 14/1000 [09:10<10:30:41, 38.38s/it]

Epoch 14 | T: 0.65 | Train RMSE: 0.03173 | Valid RMSE: 0.03234


  2%|▏         | 15/1000 [09:47<10:20:47, 37.81s/it]

Get best model
Epoch 15 | T: 0.61 | Train RMSE: 0.03164 | Valid RMSE: 0.03142


  2%|▏         | 16/1000 [10:22<10:08:02, 37.08s/it]

Get best model
Epoch 16 | T: 0.59 | Train RMSE: 0.03145 | Valid RMSE: 0.03115


  2%|▏         | 17/1000 [10:57<9:57:40, 36.48s/it] 

Epoch 17 | T: 0.58 | Train RMSE: 0.03120 | Valid RMSE: 0.03210


  2%|▏         | 18/1000 [11:37<10:14:05, 37.52s/it]

Get best model
Epoch 18 | T: 0.67 | Train RMSE: 0.03120 | Valid RMSE: 0.03066


  2%|▏         | 19/1000 [12:14<10:09:03, 37.25s/it]

Get best model
Epoch 19 | T: 0.61 | Train RMSE: 0.03100 | Valid RMSE: 0.03043


  2%|▏         | 20/1000 [12:49<9:57:16, 36.57s/it] 

Epoch 20 | T: 0.58 | Train RMSE: 0.03082 | Valid RMSE: 0.03088


  2%|▏         | 21/1000 [13:27<10:03:28, 36.99s/it]

Get best model
Epoch 21 | T: 0.63 | Train RMSE: 0.03055 | Valid RMSE: 0.03036


  2%|▏         | 22/1000 [14:05<10:10:36, 37.46s/it]

Epoch 22 | T: 0.64 | Train RMSE: 0.03050 | Valid RMSE: 0.03075


  2%|▏         | 23/1000 [14:43<10:10:36, 37.50s/it]

Epoch 23 | T: 0.63 | Train RMSE: 0.03039 | Valid RMSE: 0.03088


  2%|▏         | 24/1000 [15:23<10:23:01, 38.30s/it]

Get best model
Epoch 24 | T: 0.67 | Train RMSE: 0.03046 | Valid RMSE: 0.03006


  2%|▎         | 25/1000 [16:02<10:27:22, 38.61s/it]

Epoch 25 | T: 0.66 | Train RMSE: 0.03015 | Valid RMSE: 0.03077


  3%|▎         | 26/1000 [16:41<10:27:54, 38.68s/it]

Epoch 26 | T: 0.65 | Train RMSE: 0.03025 | Valid RMSE: 0.03270


  3%|▎         | 27/1000 [17:17<10:12:58, 37.80s/it]

Epoch 27 | T: 0.60 | Train RMSE: 0.03015 | Valid RMSE: 0.03086


  3%|▎         | 28/1000 [17:57<10:23:09, 38.47s/it]

Epoch 28 | T: 0.67 | Train RMSE: 0.02999 | Valid RMSE: 0.03012


  3%|▎         | 29/1000 [18:39<10:38:18, 39.44s/it]

Epoch 29 | T: 0.70 | Train RMSE: 0.02986 | Valid RMSE: 0.03025


  3%|▎         | 30/1000 [19:14<10:17:17, 38.18s/it]

Epoch 30 | T: 0.59 | Train RMSE: 0.02976 | Valid RMSE: 0.03043


  3%|▎         | 31/1000 [19:53<10:20:21, 38.41s/it]

Get best model
Epoch 31 | T: 0.65 | Train RMSE: 0.02969 | Valid RMSE: 0.02981


  3%|▎         | 32/1000 [20:28<10:03:46, 37.42s/it]

Get best model
Epoch 32 | T: 0.59 | Train RMSE: 0.02970 | Valid RMSE: 0.02976


  3%|▎         | 33/1000 [21:11<10:28:32, 39.00s/it]

Epoch 33 | T: 0.71 | Train RMSE: 0.02961 | Valid RMSE: 0.03021


  3%|▎         | 34/1000 [21:47<10:16:10, 38.27s/it]

Get best model
Epoch 34 | T: 0.61 | Train RMSE: 0.02954 | Valid RMSE: 0.02966


  4%|▎         | 35/1000 [22:25<10:14:19, 38.20s/it]

Epoch 35 | T: 0.63 | Train RMSE: 0.02941 | Valid RMSE: 0.02979


  4%|▎         | 36/1000 [23:02<10:08:03, 37.85s/it]

Get best model
Epoch 36 | T: 0.62 | Train RMSE: 0.02940 | Valid RMSE: 0.02952


  4%|▎         | 37/1000 [23:39<10:00:27, 37.41s/it]

Get best model
Epoch 37 | T: 0.61 | Train RMSE: 0.02935 | Valid RMSE: 0.02951


  4%|▍         | 38/1000 [24:22<10:28:10, 39.18s/it]

Get best model
Epoch 38 | T: 0.72 | Train RMSE: 0.02934 | Valid RMSE: 0.02932


  4%|▍         | 39/1000 [25:03<10:35:52, 39.70s/it]

Epoch 39 | T: 0.68 | Train RMSE: 0.02924 | Valid RMSE: 0.02968


  4%|▍         | 40/1000 [25:41<10:28:44, 39.30s/it]

Epoch 40 | T: 0.64 | Train RMSE: 0.02921 | Valid RMSE: 0.02933


  4%|▍         | 41/1000 [26:16<10:06:59, 37.98s/it]

Epoch 41 | T: 0.58 | Train RMSE: 0.02914 | Valid RMSE: 0.02949


  4%|▍         | 42/1000 [26:55<10:09:28, 38.17s/it]

Epoch 42 | T: 0.64 | Train RMSE: 0.02913 | Valid RMSE: 0.02934


  4%|▍         | 43/1000 [27:33<10:09:14, 38.20s/it]

Epoch 43 | T: 0.64 | Train RMSE: 0.02898 | Valid RMSE: 0.02953


  4%|▍         | 44/1000 [28:08<9:54:41, 37.32s/it] 

Get best model
Epoch 44 | T: 0.59 | Train RMSE: 0.02904 | Valid RMSE: 0.02920


  4%|▍         | 45/1000 [28:43<9:43:27, 36.66s/it]

Epoch 45 | T: 0.59 | Train RMSE: 0.02894 | Valid RMSE: 0.02935


  4%|▍         | 45/1000 [29:00<10:15:38, 38.68s/it]


KeyboardInterrupt: 

# Evaluation and Submission

In [85]:
test_path = "./val_in/val_in/"
test_pkl_list = glob(os.path.join(test_path, '*'))
test_pkl_list.sort()

test_preds = []
for idx in range(len(test_pkl_list)):
    with open(test_pkl_list[idx], 'rb') as f:
        test_sample = pickle.load(f)
        pred_id = np.where(test_sample["track_id"] == test_sample['agent_id'])[0][0]
        mask = np.where(test_sample['car_mask'] == 1)[0]
        # input: p_in & v_in; output: p_out
        
        p_in_raw = test_sample['p_in']
        #p_out_raw = test_sample['p_out'][pred_id]
        v_in_raw = test_sample['v_in']
        lane_scene = test_sample['lane']
        
        # Normalization
        min_vecs = np.min(lane_scene, axis = 0)
        max_vecs = np.max(lane_scene, axis = 0)
        
        # Normalize by vectors
        p_in_norm = (p_in_raw - min_vecs)/(max_vecs - min_vecs)
        
        v_in_norm = np.linalg.norm(v_in_raw, axis=1, keepdims=True)
        v_in_norm = np.where(v_in_norm == 0.0, 1.0, v_in_norm)
        v_in_norm = v_in_raw / v_in_norm
        # v_out_normalized = v_out_raw / np.linalg.norm(v_out_raw, axis=1, keepdims=True)
        p_track = p_in_norm[mask].reshape(-1,19*2)
        v_track = v_in_norm[mask].reshape(-1,19*2)
        
        p_agent = p_in_norm[pred_id].reshape(1,-1)
        v_agent = v_in_norm[pred_id].reshape(1,-1)
        new_mask = []
        p_result = []
        v_result = []
        if (len(mask) > 10):
            
            dist = ((p_track - p_agent)**2).sum(axis=-1)
            #print('dist is ',dist.shape)
            new_mask = np.argpartition(dist,10)[:10]
            
            p_result = p_track[new_mask,:]
            v_result = v_track[new_mask,:]
        else:
            p_result = np.zeros((10,38))
            v_result = np.zeros((10,38))
            k = p_track.shape[0]
            #print('slice',k)
            p_result[:k] = p_track
            v_result[:k] = v_track
        
        #print('shape is',p_agent.shape,v_agent.shape,p_result.shape,v_result.shape)
        inp = np.vstack((p_agent,v_agent,p_result,v_result)).reshape(1,22,38)
        inp = torch.from_numpy(inp).to(device,dtype=torch.float)
        preds,_ = model(inp)
        preds = preds.squeeze(0)
        pred = preds[-1, :].cpu().data.numpy()
        pred = rearrange(pred, "(b c) -> b c", c =2 )
        # De-Normalization ! 
        pred = pred * (max_vecs[:2] - min_vecs[:2]) +  min_vecs[:2]
        test_preds.append(pred)

# Generate Submission File

In [86]:
# # Submission Files
sample_sub = pd.read_csv('sample_submission.csv')

In [87]:
# Convert to int
predictions = np.concatenate(test_preds).reshape(len(test_preds), -1).astype(int)
sub_df = pd.DataFrame(np.c_[sample_sub["ID"], predictions], columns=[np.r_[["ID"], ["v" + str(i) for i in range(1, 61)]]])
sub_df.to_csv('test_submission.csv', index=None)