In [1]:
import os
# os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
# os.environ["CUDA_VISIBLE_DEVICES"] = "1"

import pickle
from glob import glob
import numpy as np
import torch
import torch.nn as nn
import pandas as pd
import time
from tqdm import tqdm
import matplotlib.pyplot as plt

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [2]:
# torch.cuda.is_available() checks and returns a Boolean True if a GPU is available, else it'll return False
is_cuda = torch.cuda.is_available()

# If we have a GPU available, we'll set our device to GPU. We'll use this device variable later in our code.
if is_cuda:
    device = torch.device("cuda")
    print("GPU is available")
else:
    device = torch.device("cpu")
    print("GPU not available, CPU used")
    

GPU is available


In [3]:
train_path = "../train/train"
# The glob module finds all the pathnames matching a specified pattern
train_pkl_lst = glob(os.path.join(train_path, '*'))
with open(train_pkl_lst[1], 'rb') as f:
    training_sample = pickle.load(f)

# LSTM

In [4]:
class MyLSTM(nn.Module):
    def __init__(self, input_size, output_size, hidden_dim, n_layers):
        super(MyLSTM, self).__init__()

        # Defining some parameters
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers

        #Defining the layers
        # RNN Layer
        self.lstm = nn.LSTM(input_size, hidden_dim, n_layers, batch_first=True)   
        # Fully connected layer
        self.fc = nn.Linear(hidden_dim, output_size)
    
    def forward(self, x):
        
        batch_size = x.size(0)

        #Initializing hidden state for first input using method defined below
        h_t, c_t = self.init_hidden(batch_size)

        #print(x.size())
        #print(x.size(-1))
        # Passing in the input and hidden state into the model and obtaining outputs
        out, (h_t, c_t) = self.lstm(x, (h_t, c_t))
        
        
        # Reshaping the outputs such that it can be fit into the fully connected layer
        out = out.contiguous().view(-1, self.hidden_dim)
        out = self.fc(out)
        
        return out, h_t
    
    def init_hidden(self, batch_size):
        # This method generates the first hidden state of zeros which we'll use in the forward pass
        h_0 = torch.zeros(self.n_layers, batch_size, self.hidden_dim).to(device)
        c_0 =  torch.zeros(self.n_layers, batch_size, self.hidden_dim).to(device)       
         # We'll send the tensor holding the hidden state to the device we specified earlier as well
        return h_0, c_0

In [6]:
# Autogressive vs. direct mapping
# Batch Norm? 

# Dataset

In [5]:
class ArgoverseDataset(torch.utils.data.Dataset):
    """Dataset class for Argoverse"""
    
    def __init__(self, 
                 data_path,
                 sample_indices):
        super(ArgoverseDataset, self).__init__()
        
        self.data_path = data_path
        self.sample_indices = sample_indices
        self.pkl_list = glob(os.path.join(self.data_path, '*'))
        self.pkl_list.sort()
        
    def __len__(self):
        return len(self.sample_indices)

    def __getitem__(self, idx):
        
        # Load one scene
        pkl_path = self.pkl_list[self.sample_indices[idx]]
        with open(pkl_path, 'rb') as f:
            scene = pickle.load(f)
            
        # the index of agent to be predicted 
        pred_id = np.where(scene["track_id"] == scene['agent_id'])[0][0]
        
        # input: p_in & v_in; output: p_out
        inp_scene = np.dstack([scene['p_in'], scene['v_in']])
        out_scene = np.dstack([scene['p_out'], scene['v_out']])
        
        # Normalization 
        min_vecs = np.min(inp_scene, axis = (0,1))
        max_vecs = np.max(inp_scene, axis = (0,1))
        
        # Normalize by vectors
        inp = (inp_scene[pred_id] - min_vecs)/(max_vecs - min_vecs)
        out = (out_scene[pred_id] - min_vecs)/(max_vecs - min_vecs)
        
        dat = np.concatenate((inp, out), axis=0)
        
        # partition data into windows/intervals
        train_data = []
        window_size = 20
        interval = 7
        for i in range(0, len(dat), interval):
            if i + window_size < len(dat): 
                train_data.append(dat[i:i+window_size])
            
        input_seq = []
        target_seq = []
        # store pairs of position and velocity at consecutive timesteps
        for i in range(len(train_data)):
            input_seq.append(train_data[i][:-1])
            target_seq.append(train_data[i][1:])
        
        input_seq = np.array(input_seq, dtype=np.float32)
        target_seq = np.array(target_seq, dtype=np.float32)
        
        return torch.from_numpy(input_seq).float(), torch.from_numpy(target_seq).float()

In [6]:
training_sample['p_out'].shape

(60, 30, 2)

In [37]:
training_sample['v_out'].shape

(60, 30, 2)

In [5]:
# Try different ways of normalization
# Leverage other features. 

# Hyperparameter

In [6]:
# Grid/Random Search

In [14]:
# batch_size = 64
interval = 7 # sampling interval for LSTM
window_size = 20 # number of timesteps to take as input
batch_size = 512
#in_dim = 19*4 # MLP
#out_dim = 4 #30*2 # MLP
input_size = 4 # LSTM
output_size = 30*4 # LSTM (has to match input_size)
hidden_dim = 32 #128 #256 #128 #32 #128
num_layers = 2 #1 #3
learning_rate = 0.01
decay_rate = 0.95
num_epoch = 1000

# Data Loader

In [15]:
train_path = "../train/train"

# total number of scenes
indices = np.arange(0, 205942)

# train-valid split
np.random.shuffle(indices)
train_indices = indices[:180000]
valid_indices = indices[180000:]

# define datasets
train_set = ArgoverseDataset(train_path, train_indices)
valid_set = ArgoverseDataset(train_path, valid_indices)

# create dataloaders
train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=0)
valid_loader = torch.utils.data.DataLoader(valid_set, batch_size=batch_size, shuffle=False, num_workers=0)

In [16]:
len(train_set)

180000

In [80]:
# train_set[0]

In [17]:
len(train_loader)

352

# Model, Loss Function and Optimizer

In [20]:
# # RNN, LSTM, 1dCNN, Transformer
# model = MLPNet(in_dim = in_dim, 
#                out_dim = out_dim,
#                hidden_dim = hidden_dim, 
#                num_layers = num_layers).to(device) # move model to gpu 

model = MyLSTM(input_size=input_size, output_size=output_size, hidden_dim=hidden_dim, n_layers=num_layers).to(device)

# Adaptive Moment Estimation computes adaptive learning rates for each parameter. 
# Compute the decaying averages of past and past squared gradients. 

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=decay_rate)  # stepwise learning rate decay
loss_fun = nn.MSELoss()

In [19]:
model.load_state_dict(torch.load('lstm_hdim_32_wsize_20_interval_7_nlayers_1_bs_512_lr_0.01_decay_0.95_epoch_147.pt'))

RuntimeError: Error(s) in loading state_dict for MyLSTM:
	Missing key(s) in state_dict: "lstm.weight_ih_l1", "lstm.weight_hh_l1", "lstm.bias_ih_l1", "lstm.bias_hh_l1". 
	size mismatch for lstm.weight_ih_l0: copying a param with shape torch.Size([128, 76]) from checkpoint, the shape in current model is torch.Size([128, 4]).
	size mismatch for fc.weight: copying a param with shape torch.Size([60, 32]) from checkpoint, the shape in current model is torch.Size([120, 32]).
	size mismatch for fc.bias: copying a param with shape torch.Size([60]) from checkpoint, the shape in current model is torch.Size([120]).

In [12]:
model.load_state_dict(torch.load('baseline.pt'))

FileNotFoundError: [Errno 2] No such file or directory: 'baseline.pt'

In [118]:
# train_path = "./train/train/"
# train_pkl_list = glob(os.path.join(train_path, '*'))
# train_pkl_list.sort()

# train_preds = []
# for idx in range(3):
#     with open(train_pkl_list[idx], 'rb') as f:
#         train_sample = pickle.load(f)
#         pred_id = np.where(train_sample["track_id"] == train_sample['agent_id'])[0][0]
#         inp_scene = np.dstack([train_sample['p_in'], train_sample['v_in']])

#         # Normalization 
#         min_vecs = np.min(inp_scene, axis = (0,1))
#         max_vecs = np.max(inp_scene, axis = (0,1))
        
#         inp = (inp_scene[pred_id] - min_vecs)/(max_vecs - min_vecs)
        
#         inp = torch.from_numpy(inp).float().to(device).unsqueeze(0)

#         preds = model(inp).cpu().data.numpy()
        
#         # De-Normalization ! 
#         preds = preds * (max_vecs[:2] - min_vecs[:2]) +  min_vecs[:2]
#         train_preds.append(preds)

# Training

In [171]:
for inp, tgt in train_loader:
    print(inp.shape)
    print(tgt.shape)
    print(inp.view(-1, window_size-1, 4).size()) 
    print(inp[0, 0, :, :])
    break
        

torch.Size([512, 5, 19, 4])
torch.Size([512, 5, 19, 4])
torch.Size([2560, 19, 4])
tensor([[1.0000, 1.0000, 0.0566, 0.0473],
        [0.9998, 0.9996, 0.0643, 0.0610],
        [0.9995, 0.9992, 0.0543, 0.0229],
        [0.9993, 0.9988, 0.1135, 0.1166],
        [0.9991, 0.9984, 0.0722, 0.0608],
        [0.9989, 0.9981, 0.1136, 0.1087],
        [0.9987, 0.9977, 0.0845, 0.0779],
        [0.9984, 0.9973, 0.0762, 0.0627],
        [0.9982, 0.9969, 0.0633, 0.0508],
        [0.9980, 0.9965, 0.0501, 0.0407],
        [0.9978, 0.9962, 0.1454, 0.1404],
        [0.9975, 0.9957, 0.0430, 0.0333],
        [0.9973, 0.9954, 0.1270, 0.1274],
        [0.9971, 0.9950, 0.0749, 0.0670],
        [0.9969, 0.9946, 0.0619, 0.0524],
        [0.9966, 0.9942, 0.0747, 0.0485],
        [0.9964, 0.9938, 0.0602, 0.0563],
        [0.9962, 0.9935, 0.1179, 0.1185],
        [0.9959, 0.9930, 0.0306, 0.0269]])


In [21]:
def train_epoch(train_loader, model, optimizer, loss_function):

    train_mse = []
    for inp, tgt in tqdm(train_loader):
        
        inp = inp.view(-1, window_size-1, 4)
        tgt = tgt.view(-1, window_size-1, 4)
        
        #print(inp.size())
        
        inp = inp.to(device)
        tgt = tgt.to(device)
        
        output, hidden = model(inp)
        output = output.to(device)
        #print(output.shape)
        #print(hidden.shape)
        #print(tgt.view(-1, 4).size())
        
        loss = loss_function(output, tgt.view(-1, 4))
        train_mse.append(loss.item()) 
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    train_mse = round(np.sqrt(np.mean(train_mse)),5)
    
    return train_mse

def eval_epoch(valid_loader, model, loss_function):
    
    valid_mse = []
    #preds = []
    #trues = []
    
    with torch.no_grad():
        for inp, tgt in valid_loader:
            
            inp = inp.view(-1, window_size-1, 4)
            tgt = tgt.view(-1, window_size-1, 4)
            
            inp = inp.to(device)
            tgt = tgt.to(device)
            
            loss = 0
            output, hidden = model(inp)
            output = output.to(device)
                    
            loss = loss_function(output, tgt.view(-1, 4))
            
            #preds.append(pred.cpu().data.numpy())
            #trues.append(tgt.cpu().data.numpy())
            
            valid_mse.append(loss.item())
            
        #preds = np.concatenate(preds, axis = 0)  
        #trues = np.concatenate(trues, axis = 0)  
        valid_mse = round(np.sqrt(np.mean(valid_mse)), 5)
    return valid_mse#, preds, trues


In [14]:
# Learning Rate Decay
# Dropout
# L1/L2 Regulization

In [22]:
len(train_pkl_lst)

205942

In [79]:
train_rmse = []
valid_rmse = []
min_rmse = 10e8

for i in tqdm(range(num_epoch)):
    start = time.time()

    model.train() # if you use dropout or batchnorm. 
    train_rmse.append(train_epoch(train_loader, model, optimizer, loss_fun))
    print(train_rmse)
    
    model.eval()
    val_rmse = eval_epoch(valid_loader, model, loss_fun)
    valid_rmse.append(val_rmse)
    print(val_rmse)

    # save the best model
    if valid_rmse[-1] < min_rmse:
        min_rmse = valid_rmse[-1] 
        best_model = model
        
        # torch.save([best_model, i, get_lr(optimizer)], name + ".pth")

    end = time.time()
    
    # Early Stopping
    if (len(train_rmse) > 100 and np.mean(valid_rmse[-5:]) >= np.mean(valid_rmse[-10:-5])):
        torch.save(best_model.state_dict(), f'lstm_hdim_{hidden_dim}_wsize_{window_size}_interval_{interval}_nlayers_{num_layers}_bs_{batch_size}_lr_{learning_rate}_decay_{decay_rate}_epoch_{i+1}.pt')    
        break       

    # Learning Rate Decay        
    scheduler.step()
    
    print("Epoch {} | T: {:0.2f} | Train RMSE: {:0.5f} | Valid RMSE: {:0.5f}".format(i + 1, (end-start) / 60, train_rmse[-1], valid_rmse[-1]))

    plt.figure()
    plt.plot(train_rmse, label="train_rmse")
    plt.plot(valid_rmse, label="valid_rmse")
    plt.xlabel('epochs')
    plt.ylabel('RSME loss')
    plt.title(f'RMSE loss curve for LSTM, hdim: {hidden_dim}, wsize: {window_size}, nlayers: {num_layers}, bs: {batch_size}, lr: {learning_rate}, decay: {decay_rate}')
    plt.legend()
    plt.savefig(f'lstm_loss_curve_v1_hdim_{hidden_dim}_wsize_{window_size}_interval_{interval}_nlayers_{num_layers}_bs_{batch_size}_lr_{learning_rate}_decay_{decay_rate}.png')
    plt.show()
    

  0%|          | 0/1000 [00:00<?, ?it/s]
  return F.mse_loss(input, target, reduction=self.reduction)
  0%|          | 0/352 [00:00<?, ?it/s]
  0%|          | 0/1000 [00:00<?, ?it/s]


RuntimeError: The size of tensor a (120) must match the size of tensor b (4) at non-singleton dimension 1

# Evaluation and Submission

In [23]:
model.load_state_dict(torch.load('lstm_hdim_32_wsize_20_interval_7_nlayers_1_bs_512_lr_0.01_decay_0.95.pt'))
best_model = model

RuntimeError: Error(s) in loading state_dict for MyLSTM:
	Missing key(s) in state_dict: "lstm.weight_ih_l1", "lstm.weight_hh_l1", "lstm.bias_ih_l1", "lstm.bias_hh_l1". 
	size mismatch for fc.weight: copying a param with shape torch.Size([4, 32]) from checkpoint, the shape in current model is torch.Size([120, 32]).
	size mismatch for fc.bias: copying a param with shape torch.Size([4]) from checkpoint, the shape in current model is torch.Size([120]).

In [65]:
best_model

MyLSTM(
  (lstm): LSTM(4, 128, num_layers=2, batch_first=True)
  (fc): Linear(in_features=128, out_features=4, bias=True)
)

In [66]:
test_path = "../val_in/val_in/"
test_pkl_list = glob(os.path.join(test_path, '*'))
test_pkl_list.sort()

test_preds = []
for idx in range(len(test_pkl_list)):
    with open(test_pkl_list[idx], 'rb') as f:
        test_sample = pickle.load(f)
        pred_id = np.where(test_sample["track_id"] == test_sample['agent_id'])[0][0]
        inp_scene = np.dstack([test_sample['p_in'], test_sample['v_in']])

        # Normalization 
        min_vecs = np.min(inp_scene, axis = (0,1))
        max_vecs = np.max(inp_scene, axis = (0,1))
        #print(min_vecs.shape)
        #print(max_vecs.shape)
        
        inp = (inp_scene[pred_id] - min_vecs)/(max_vecs - min_vecs)
        
        inp = torch.from_numpy(inp).float().to(device).unsqueeze(0)

        #print(inp)
        # post-processing for LSTM
        predictions = [[]]
        inp_data = inp[0][-1]
        #print(inp_data.size())
        for i in range(30):
            preds = best_model(inp_data.reshape(1, 1, 4))
            predictions[0].append(preds[0].cpu().data.numpy()[0, :2])
            #print(preds)
            inp_data = preds[0]
            
#         print(inp[0][-1])
#         preds = best_model(inp)#.cpu().data.numpy()
#         print(preds)
#         print(inp.shape)
#         print(preds[0].shape)

        predictions = np.array(predictions)
        #print(predictions.shape)

        # De-Normalization ! 
        predictions = predictions * (max_vecs[:2] - min_vecs[:2]) +  min_vecs[:2]
        test_preds.append(predictions)

In [67]:
print(test_preds[0].shape)
print(test_preds[0])

(1, 30, 2)
[[[1719.4099948   336.36949324]
  [1725.20331361  336.28454933]
  [1731.18027481  336.08865915]
  [1737.28438104  335.81118443]
  [1743.46870725  335.4714306 ]
  [1749.69527642  335.08248608]
  [1755.93245834  334.65360004]
  [1762.15403327  334.19208017]
  [1768.33648664  333.70396898]
  [1774.45859294  333.19495993]
  [1780.5009995   332.67050654]
  [1786.44518599  332.13593142]
  [1792.27408876  331.59644814]
  [1797.97168464  331.05707393]
  [1803.52215851  330.52256425]
  [1808.91240051  329.99732555]
  [1814.12950884  329.48530616]
  [1819.16245456  328.98986545]
  [1824.00187347  328.51368654]
  [1828.6402742   328.05881997]
  [1833.07141396  327.62664   ]
  [1837.29113087  327.21788833]
  [1841.29734401  326.83256494]
  [1845.08838863  326.47021175]
  [1848.6650971   326.12975987]
  [1852.02955035  325.80992226]
  [1855.18507788  325.50908469]
  [1858.13667393  325.22574199]
  [1860.8907894   324.95851988]
  [1863.4559561   324.70658941]]]


# Generate Submission File

### Steps to create submission file 
Run the below cells. The last cell will generate a submission file "test_submission.csv" that you can submit to Kaggle.

In [68]:
# Submission Files
sample_sub = pd.read_csv('sample_submission.csv')

In [71]:
# Save for later use
predictions = np.concatenate(test_preds).reshape(len(test_preds), -1)#.astype(int)
sub_df = pd.DataFrame(np.c_[sample_sub["ID"], predictions], columns=[np.r_[["ID"], ["v" + str(i) for i in range(1, 61)]]])
sub_df["ID"] = sub_df["ID"].astype(int)
sub_df.to_csv(f'test_submission_lstm_hdim_{hidden_dim}_wsize_{window_size}_interval_{interval}_nlayers_{num_layers}_bs_{batch_size}_lr_{learning_rate}_decay_{decay_rate}.csv', index=None)

In [72]:
# Convert to float
predictions = np.concatenate(test_preds).reshape(len(test_preds), -1)
sub_df = pd.DataFrame(np.c_[sample_sub["ID"], predictions], columns=[np.r_[["ID"], ["v" + str(i) for i in range(1, 61)]]])
sub_df["ID"] = sub_df["ID"].astype(int)
sub_df.to_csv('test_submission.csv', index=None)

In [73]:
sub_df

Unnamed: 0,ID,v1,v2,v3,v4,v5,v6,v7,v8,v9,...,v51,v52,v53,v54,v55,v56,v57,v58,v59,v60
0,10002,1719.409995,336.369493,1725.203314,336.284549,1731.180275,336.088659,1737.284381,335.811184,1743.468707,...,1852.029550,325.809922,1855.185078,325.509085,1858.136674,325.225742,1860.890789,324.958520,1863.455956,324.706589
1,10015,727.496177,1228.487343,729.501623,1227.020683,731.551209,1225.541131,733.649858,1224.036116,735.807510,...,787.935138,1192.248323,789.823385,1191.082460,791.612788,1189.971742,793.303437,1188.914976,794.896137,1187.909617
2,10019,575.937420,1245.525411,577.949851,1246.229434,579.888580,1246.909017,581.766447,1247.556617,583.588199,...,618.134380,1208.988457,619.759209,1205.888133,621.359993,1202.839988,622.930811,1199.866418,624.466382,1196.988248
3,10028,1688.421868,315.642609,1686.011086,315.433464,1683.885100,315.277506,1682.060234,315.193592,1680.518450,...,1658.053798,317.807724,1656.362865,318.090679,1654.566922,318.396584,1652.656799,318.726605,1650.622822,319.082017
4,1003,2124.988001,677.337634,2126.253720,676.509473,2127.818240,675.686718,2129.642917,674.856144,2131.708043,...,2218.717961,644.154731,2224.051100,642.274036,2229.361461,640.417128,2234.624730,638.597277,2239.818130,636.827166
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3195,9897,256.107934,806.429025,256.087218,807.626338,256.094916,809.096276,256.088205,810.722548,256.036828,...,240.068192,856.978647,238.445174,858.486782,236.737045,859.748825,234.947427,860.738632,233.081285,861.434141
3196,99,588.224553,1155.657332,588.625253,1156.738643,589.040805,1157.748837,589.466163,1158.689328,589.894316,...,595.922750,1171.919051,596.053556,1172.410363,596.170164,1172.903513,596.272536,1173.400056,596.360345,1173.901477
3197,9905,1759.557891,444.402348,1763.731861,444.406015,1767.959124,444.195250,1772.224438,443.849696,1776.542309,...,1866.443332,422.076754,1869.704598,421.300012,1872.801255,420.583009,1875.730781,419.922832,1878.491704,419.315732
3198,9910,576.237940,1291.916656,577.363511,1294.875644,578.157823,1297.933344,578.671536,1301.083615,578.939946,...,541.070220,1382.560169,537.053684,1384.583994,532.847029,1386.130476,528.465023,1387.168444,523.925801,1387.676014


In [None]:
# Ensemble Method 