In [1]:
import os
# os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
# os.environ["CUDA_VISIBLE_DEVICES"] = "1"

import pickle
from glob import glob
import numpy as np
import torch
import torch.nn as nn
import pandas as pd
import time
from tqdm import tqdm
import matplotlib.pyplot as plt

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [2]:
# torch.cuda.is_available() checks and returns a Boolean True if a GPU is available, else it'll return False
is_cuda = torch.cuda.is_available()

# If we have a GPU available, we'll set our device to GPU. We'll use this device variable later in our code.
if is_cuda:
    device = torch.device("cuda")
    print("GPU is available")
else:
    device = torch.device("cpu")
    print("GPU not available, CPU used")
    

GPU is available


In [3]:
train_path = "../train/train"
# The glob module finds all the pathnames matching a specified pattern
train_pkl_lst = glob(os.path.join(train_path, '*'))
#with open(train_pkl_lst[1], 'rb') as f:
#    training_sample = pickle.load(f)

# LSTM

In [126]:
class MyLSTM(nn.Module):
    def __init__(self, input_size, output_size, hidden_dim, n_layers):
        super(MyLSTM, self).__init__()

        # Defining some parameters
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers

        #Defining the layers
        # RNN Layer
    
        self.lstm = nn.LSTM(input_size, hidden_dim, n_layers, batch_first=True)   
        # Fully connected layer
        self.fc = nn.Linear(hidden_dim, output_size)
    
    def forward(self, x):
        
        batch_size = x.size(0)

        #Initializing hidden state for first input using method defined below
        h_t, c_t = self.init_hidden(batch_size)

        #print(x.size())
        #print(x.size(-1))
        # Passing in the input and hidden state into the model and obtaining outputs
        #print(x.shape)
        #print(h_t.shape)
        #print(c_t.shape)

        #print(f'forward pass: input shape is {x.shape}')
        out, (h_t, c_t) = self.lstm(x, (h_t, c_t))
        #print(f'forward pass: lstm output is {out.shape}')
        
        # Reshaping the outputs such that it can be fit into the fully connected layer
        out = out.contiguous().view(-1, self.hidden_dim)
        #print(f'forward pass: result of reshaping output before passing to fc layer is {out.shape}')
        out = self.fc(out)
        #print(f'forward pass: fc output is {out.shape}')
        
        return out, h_t
    
    def init_hidden(self, batch_size):
        # This method generates the first hidden state of zeros which we'll use in the forward pass
        h_0 = torch.zeros(self.n_layers, batch_size, self.hidden_dim).to(device)
        c_0 =  torch.zeros(self.n_layers, batch_size, self.hidden_dim).to(device)       
         # We'll send the tensor holding the hidden state to the device we specified earlier as well
        return h_0, c_0

In [6]:
# Autogressive vs. direct mapping
# Batch Norm? 

# Dataset

In [154]:
class ArgoverseDataset(torch.utils.data.Dataset):
    """Dataset class for Argoverse"""
    
    def __init__(self, 
                 data_path,
                 sample_indices):
        super(ArgoverseDataset, self).__init__()
        
        self.data_path = data_path
        self.sample_indices = sample_indices
        self.pkl_list = glob(os.path.join(self.data_path, '*'))
        self.pkl_list.sort()
        
    def __len__(self):
        return len(self.sample_indices)

    def __getitem__(self, idx):
        
        # Load one scene
        pkl_path = self.pkl_list[self.sample_indices[idx]]
        with open(pkl_path, 'rb') as f:
            scene = pickle.load(f)
            
        # the index of agent to be predicted 
        pred_id = np.where(scene["track_id"] == scene['agent_id'])[0][0]
        
        # input: p_in & v_in; output: p_out
        inp_scene = np.dstack([scene['p_in'], scene['v_in']])
        out_scene = np.dstack([scene['p_out'], scene['v_out']])
        
        # Normalization 
        min_vecs = np.min(inp_scene, axis = (0,1))
        max_vecs = np.max(inp_scene, axis = (0,1))
        
        # Normalize by vectors
        inp = (inp_scene[pred_id] - min_vecs)/(max_vecs - min_vecs)
        out = (out_scene[pred_id] - min_vecs)/(max_vecs - min_vecs)
        
        #print(inp.shape)
        #print(out.shape)
        #print(inp.flatten().shape)
        #inp = inp.reshape(1, -1)
        out = out[:, :2].reshape(-1) # reshape to 60x1 so it matches shape of model output
        
        
        
        #inp = inp.flatten()
        #out = out[:, :2].flatten()
        
        #inp = inp.reshape(-1, 1)
        #out = out[:, :2].reshape(-1, 1)
        
        return torch.from_numpy(inp).float(), torch.from_numpy(out).float()
    
        #dat = np.concatenate((inp, out), axis=0)
        
        #train_data = []
        #window_size = 20
        #interval = 7
        #for i in range(0, len(dat), interval):
        #    #print(len(dat[i:i+input_length]))
        #    if i + window_size < len(dat): 
        #        train_data.append(dat[i:i+window_size])
            
        #print(len(train_data))
        #print(train_data)
        
        #input_seq = []
        #target_seq = []
        #for i in range(len(train_data)):
        #    input_seq.append(train_data[i][:-1])
        #    target_seq.append(train_data[i][1:])
        
        #print(input_seq)
        
        #input_seq = np.array(input_seq, dtype=np.float32)
        #target_seq = np.array(target_seq, dtype=np.float32)
        
        #print(input_seq.shape)
        #print(target_seq.shape)
        
        ## Convert to float torch tensor
        ##return torch.from_numpy(inp).float(), torch.from_numpy(out).float() #torch.from_numpy(out[:,:2]).float()
        #return torch.from_numpy(input_seq).float(), torch.from_numpy(target_seq).float()

In [5]:
# Try different ways of normalization
# Leverage other features. 

# Hyperparameter

In [6]:
# Grid/Random Search

In [155]:
# batch_size = 64
interval = 7 # sampling interval for LSTM
window_size = 20 # number of timesteps to take as input
batch_size = 512
#in_dim = 19*4 # MLP
#out_dim = 4 #30*2 # MLP
input_size = 4 #19*4 #1#19*4 # LSTM
output_size = 30*2 # LSTM (has to match input_size)
hidden_dim = 32 #128 #256 #128 #32 #128
num_layers = 1 #1 #3
learning_rate = 0.01
decay_rate = 0.95
num_epoch = 1000

# Data Loader

In [156]:
train_path = "../train/train"

# total number of scenes
indices = np.arange(0, 205942)

# train-valid split
np.random.shuffle(indices)
train_indices = indices[:180000]
valid_indices = indices[180000:]

# define datasets
train_set = ArgoverseDataset(train_path, train_indices)
valid_set = ArgoverseDataset(train_path, valid_indices)

# create dataloaders
train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=0)
valid_loader = torch.utils.data.DataLoader(valid_set, batch_size=batch_size, shuffle=False, num_workers=0)

In [157]:
len(train_set)

180000

In [80]:
# train_set[0]

In [158]:
len(train_loader)

352

# Model, Loss Function and Optimizer

In [159]:
# # RNN, LSTM, 1dCNN, Transformer
# model = MLPNet(in_dim = in_dim, 
#                out_dim = out_dim,
#                hidden_dim = hidden_dim, 
#                num_layers = num_layers).to(device) # move model to gpu 

model = MyLSTM(input_size=input_size, output_size=output_size, hidden_dim=hidden_dim, n_layers=num_layers).to(device)

# Adaptive Moment Estimation computes adaptive learning rates for each parameter. 
# Compute the decaying averages of past and past squared gradients. 

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=decay_rate)  # stepwise learning rate decay
loss_fun = nn.MSELoss()

# Training

In [144]:
for inp, tgt in train_loader:
    print(inp.shape)
    print(tgt.shape)
    break
        

(19, 4)
(30, 4)
(19, 4)
(30, 4)
(19, 4)
(30, 4)
(19, 4)
(30, 4)
(19, 4)
(30, 4)
(19, 4)
(30, 4)
(19, 4)
(30, 4)
(19, 4)
(30, 4)
(19, 4)
(30, 4)
(19, 4)
(30, 4)
(19, 4)
(30, 4)
(19, 4)
(30, 4)
(19, 4)
(30, 4)
(19, 4)
(30, 4)
(19, 4)
(30, 4)
(19, 4)
(30, 4)
(19, 4)
(30, 4)
(19, 4)
(30, 4)
(19, 4)
(30, 4)
(19, 4)
(30, 4)
(19, 4)
(30, 4)
(19, 4)
(30, 4)
(19, 4)
(30, 4)
(19, 4)
(30, 4)
(19, 4)
(30, 4)
(19, 4)
(30, 4)
(19, 4)
(30, 4)
(19, 4)
(30, 4)
(19, 4)
(30, 4)
(19, 4)
(30, 4)
(19, 4)
(30, 4)
(19, 4)
(30, 4)
(19, 4)
(30, 4)
(19, 4)
(30, 4)
(19, 4)
(30, 4)
(19, 4)
(30, 4)
(19, 4)
(30, 4)
(19, 4)
(30, 4)
(19, 4)
(30, 4)
(19, 4)
(30, 4)
(19, 4)
(30, 4)
(19, 4)
(30, 4)
(19, 4)
(30, 4)
(19, 4)
(30, 4)
(19, 4)
(30, 4)
(19, 4)
(30, 4)
(19, 4)
(30, 4)
(19, 4)
(30, 4)
(19, 4)
(30, 4)
(19, 4)
(30, 4)
(19, 4)
(30, 4)
(19, 4)
(30, 4)
(19, 4)
(30, 4)
(19, 4)
(30, 4)
(19, 4)
(30, 4)
(19, 4)
(30, 4)
(19, 4)
(30, 4)
(19, 4)
(30, 4)
(19, 4)
(30, 4)
(19, 4)
(30, 4)
(19, 4)
(30, 4)
(19, 4)
(30, 4)
(19, 4)


In [152]:
def train_epoch(train_loader, model, optimizer, loss_function):

    train_mse = []
    for inp, tgt in tqdm(train_loader):
        
        #inp = inp.view(-1, window_size-1, 4)
        #tgt = tgt.view(-1, window_size-1, 4)
        
        #print(inp.size())
        
        #print(inp.shape)
        #print(tgt.shape)
        
        inp = inp.to(device)
        tgt = tgt.to(device)
        
        output, hidden = model(inp)
        output = output.to(device)
        
        print(output.shape)
        print(hidden.shape)
        print(tgt.shape)
        #print(output.shape)
        #print(hidden.shape)
        #print(tgt.view(-1, 4).size())
        
        
        
        loss = loss_function(output, tgt)
        train_mse.append(loss.item()) 
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    train_mse = round(np.sqrt(np.mean(train_mse)),5)
    
    return train_mse

def eval_epoch(valid_loader, model, loss_function):
    
    valid_mse = []
    #preds = []
    #trues = []
    
    with torch.no_grad():
        for inp, tgt in valid_loader:
            
            #inp = inp.view(-1, window_size-1, 4)
            #tgt = tgt.view(-1, window_size-1, 4)
            
            inp = inp.to(device)
            tgt = tgt.to(device)
            
            loss = 0
            output, hidden = model(inp)
            output = output.to(device)
                
            loss = loss_function(output, tgt)
            
            #preds.append(pred.cpu().data.numpy())
            #trues.append(tgt.cpu().data.numpy())
            
            valid_mse.append(loss.item())
            
        #preds = np.concatenate(preds, axis = 0)  
        #trues = np.concatenate(trues, axis = 0)  
        valid_mse = round(np.sqrt(np.mean(valid_mse)), 5)
    return valid_mse#, preds, trues


In [14]:
# Learning Rate Decay
# Dropout
# L1/L2 Regulization

In [132]:
len(train_pkl_lst)

205942

In [160]:
train_rmse = []
valid_rmse = []
min_rmse = 10e8

for i in tqdm(range(num_epoch)):
    start = time.time()

    model.train() # if you use dropout or batchnorm. 
    train_rmse.append(train_epoch(train_loader, model, optimizer, loss_fun))
    print(train_rmse)
    
    model.eval()
    val_rmse = eval_epoch(valid_loader, model, loss_fun)
    valid_rmse.append(val_rmse)
    print(val_rmse)

    # save the best model
    if valid_rmse[-1] < min_rmse:
        min_rmse = valid_rmse[-1] 
        best_model = model
        
        # torch.save([best_model, i, get_lr(optimizer)], name + ".pth")

    end = time.time()
    
    # Early Stopping
    if (len(train_rmse) > 100 and np.mean(valid_rmse[-5:]) >= np.mean(valid_rmse[-10:-5])):
        torch.save(best_model.state_dict(), f'lstm_hdim_{hidden_dim}_wsize_{window_size}_interval_{interval}_nlayers_{num_layers}_bs_{batch_size}_lr_{learning_rate}_decay_{decay_rate}_epoch_{i+1}.pt')    
        break       

    # Learning Rate Decay        
    scheduler.step()
    
    print("Epoch {} | T: {:0.2f} | Train RMSE: {:0.5f} | Valid RMSE: {:0.5f}".format(i + 1, (end-start) / 60, train_rmse[-1], valid_rmse[-1]))

    plt.figure()
    plt.plot(train_rmse, label="train_rmse")
    plt.plot(valid_rmse, label="valid_rmse")
    plt.xlabel('epochs')
    plt.ylabel('RSME loss')
    plt.title(f'RMSE loss curve for LSTM, hdim: {hidden_dim}, wsize: {window_size}, nlayers: {num_layers}, bs: {batch_size}, lr: {learning_rate}, decay: {decay_rate}')
    plt.legend()
    plt.savefig(f'lstm_loss_curve_v1_hdim_{hidden_dim}_wsize_{window_size}_interval_{interval}_nlayers_{num_layers}_bs_{batch_size}_lr_{learning_rate}_decay_{decay_rate}.png')
    plt.show()
    

  0%|          | 0/1000 [00:00<?, ?it/s]
  return F.mse_loss(input, target, reduction=self.reduction)
  0%|          | 0/352 [00:00<?, ?it/s]
  0%|          | 0/1000 [00:00<?, ?it/s]

torch.Size([9728, 60])
torch.Size([1, 512, 32])
torch.Size([512, 60])





RuntimeError: The size of tensor a (9728) must match the size of tensor b (512) at non-singleton dimension 0

# Evaluation and Submission

In [25]:
model.load_state_dict(torch.load('lstm_hdim_32_wsize_20_interval_7_nlayers_1_bs_512_lr_0.01_decay_0.95.pt'))
best_model = model

In [65]:
best_model

MyLSTM(
  (lstm): LSTM(4, 128, num_layers=2, batch_first=True)
  (fc): Linear(in_features=128, out_features=4, bias=True)
)

In [134]:
test_path = "../val_in/val_in"
test_pkl_list = glob(os.path.join(test_path, '*'))
test_pkl_list.sort()

test_preds = []
for idx in range(len(test_pkl_list)):
    with open(test_pkl_list[idx], 'rb') as f:
        test_sample = pickle.load(f)
        pred_id = np.where(test_sample["track_id"] == test_sample['agent_id'])[0][0]
        inp_scene = np.dstack([test_sample['p_in'], test_sample['v_in']])

        # Normalization 
        min_vecs = np.min(inp_scene, axis = (0,1))
        max_vecs = np.max(inp_scene, axis = (0,1))
        
        inp = (inp_scene[pred_id] - min_vecs)/(max_vecs - min_vecs)
        
        inp = inp.reshape(1, -1)
        
        inp_data = torch.from_numpy(inp).float().unsqueeze(0).to(device)

        # post-processing for LSTM
        
        
        output, hidden = best_model(inp_data)
        predictions = output.cpu().data.numpy().reshape(30, 2)

        # De-Normalization ! 
        predictions = predictions * (max_vecs[:2] - min_vecs[:2]) +  min_vecs[:2]
        test_preds.append(predictions)

In [135]:
print(test_preds[0].shape)
print(test_preds[0])

(30, 2)
[[1735.15464966  338.65479859]
 [1733.94562731  338.94213326]
 [1735.31051158  339.49985309]
 [1734.65012734  340.10556383]
 [1736.69297974  340.0074006 ]
 [1736.1030351   340.40336924]
 [1735.38636161  341.4348248 ]
 [1735.06464929  341.70955095]
 [1737.87630256  342.00207736]
 [1736.97182483  342.60859522]
 [1735.18711223  342.99300241]
 [1735.82314954  343.13570993]
 [1737.36782647  343.9364819 ]
 [1737.29353714  344.03207107]
 [1738.58891824  344.45796859]
 [1737.26482026  345.42945732]
 [1738.84851469  345.50824967]
 [1739.73207904  346.4753756 ]
 [1741.91726729  346.63976628]
 [1740.41504141  346.9293478 ]
 [1741.84245773  347.44664619]
 [1741.93256215  347.72150322]
 [1743.29234814  347.58586346]
 [1742.43354689  348.83925516]
 [1737.83187463  349.11330508]
 [1741.5369767   349.36074185]
 [1743.82818851  349.77896082]
 [1740.94463898  350.42629277]
 [1744.55370597  350.96289659]
 [1741.52386682  350.95135696]]


# Generate Submission File

### Steps to create submission file 
Run the below cells. The last cell will generate a submission file "test_submission.csv" that you can submit to Kaggle.

In [136]:
# Submission Files
sample_sub = pd.read_csv('sample_submission.csv')

In [137]:
# Save for later use
predictions = np.concatenate(test_preds).reshape(len(test_preds), -1)#.astype(int)
sub_df = pd.DataFrame(np.c_[sample_sub["ID"], predictions], columns=[np.r_[["ID"], ["v" + str(i) for i in range(1, 61)]]])
sub_df["ID"] = sub_df["ID"].astype(int)
sub_df.to_csv(f'test_submission_lstm_hdim_{hidden_dim}_wsize_{window_size}_interval_{interval}_nlayers_{num_layers}_bs_{batch_size}_lr_{learning_rate}_decay_{decay_rate}.csv', index=None)

In [138]:
# Convert to float
predictions = np.concatenate(test_preds).reshape(len(test_preds), -1)
sub_df = pd.DataFrame(np.c_[sample_sub["ID"], predictions], columns=[np.r_[["ID"], ["v" + str(i) for i in range(1, 61)]]])
sub_df["ID"] = sub_df["ID"].astype(int)
sub_df.to_csv('test_submission.csv', index=None)

In [139]:
sub_df

Unnamed: 0,ID,v1,v2,v3,v4,v5,v6,v7,v8,v9,...,v51,v52,v53,v54,v55,v56,v57,v58,v59,v60
0,10002,1735.154650,338.654799,1733.945627,338.942133,1735.310512,339.499853,1734.650127,340.105564,1736.692980,...,1741.536977,349.360742,1743.828189,349.778961,1740.944639,350.426293,1744.553706,350.962897,1741.523867,350.951357
1,10015,726.124713,1230.547001,725.077134,1230.135992,725.860099,1229.237264,725.632283,1229.980740,725.662231,...,725.162504,1224.285745,724.304063,1224.416170,724.419292,1223.590094,724.567961,1224.685135,724.055162,1224.853199
2,10019,572.624761,1245.055487,573.026634,1244.970067,573.069229,1245.186170,573.367788,1244.715931,573.703307,...,578.924870,1250.390074,579.726618,1250.394318,579.359849,1249.968399,579.784303,1251.154055,580.416134,1251.291811
3,10028,1700.241874,316.086480,1700.525460,316.488920,1700.323302,316.890609,1701.103918,317.393865,1702.043964,...,1709.319240,324.937266,1708.664393,325.239303,1709.991220,325.661801,1710.131299,326.044896,1709.573802,326.235561
4,1003,2111.054734,675.383201,2108.569109,674.975255,2109.229033,674.075435,2108.321365,673.097262,2105.109658,...,2087.494021,660.523827,2088.360739,660.092050,2084.870704,659.015644,2085.411491,658.537079,2086.287679,658.085215
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3195,9897,255.890604,805.689245,256.168465,805.650562,256.346062,805.646480,256.607315,805.495638,256.800517,...,261.586384,804.936101,261.802389,805.090005,262.007380,804.771894,262.185753,804.861020,262.504933,805.065756
3196,99,585.794282,1153.349509,585.975384,1153.354458,586.037156,1152.966003,586.233874,1152.309201,586.178494,...,588.930081,1148.492237,588.974821,1148.565545,589.194163,1148.095441,589.275727,1147.823628,589.819907,1148.155671
3197,9905,1759.216061,444.556901,1759.351763,444.932074,1759.062174,445.237485,1758.360328,445.547824,1758.130023,...,1753.131645,452.437027,1752.818720,452.611036,1752.950008,453.142609,1752.557617,453.319697,1751.935027,453.435647
3198,9910,574.463498,1289.823638,574.486791,1289.501212,574.313908,1289.067951,574.333836,1288.749067,574.467536,...,572.693804,1282.662556,572.708423,1282.348553,572.650770,1282.001251,572.421319,1282.006840,572.554907,1281.757542


In [None]:
# Ensemble Method 