In [1]:
import os
# os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
# os.environ["CUDA_VISIBLE_DEVICES"] = "1"

import pickle
from glob import glob
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import random_split
import pandas as pd
import time
from einops import rearrange,reduce,repeat
from tqdm import trange
from torch.utils.data import Dataset, DataLoader

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
class MyLSTM(nn.Module):
    def __init__(self, input_size, output_size, hidden_dim, n_layers, trans_dim, trans_nhead, trans_num_encoder_layers, trans_num_decoder_layers):
        super(MyLSTM, self).__init__()

        # Defining some parameters
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers
        
        self.lanefc = nn.Linear(400, 400)
        
        #Defining the layers
        # RNN Layer
        self.lstm = nn.LSTM(input_size, hidden_dim, n_layers, batch_first=True)
        
        self.transformer = nn.Transformer(d_model = trans_dim, nhead=trans_nhead, num_encoder_layers=trans_num_encoder_layers, num_decoder_layers=trans_num_decoder_layers, batch_first=True)
        
        self.lane_trans = nn.Transformer(d_model = 8, nhead=4, num_encoder_layers=trans_num_encoder_layers, num_decoder_layers=trans_num_decoder_layers, batch_first=True)
        
        # Fully connected layer
        self.fc = nn.Linear(hidden_dim, output_size)
    
    def forward(self, x, others, lane):
        
        batch_size = x.size(0)
        
        x_trans_other = self.transformer(others, x.reshape((batch_size, -1)).unsqueeze(axis=1))
        
        x_res = torch.cat((x, x_trans_other.reshape(batch_size, 19, -1)), 2)
        
        x_lane = self.lanefc(lane.reshape(batch_size, -1))
        
        x_trans_lane = self.lane_trans(x_lane.reshape(batch_size, 50, 8), x_res)
               
        x_res_all = torch.cat((x_res, x_trans_lane.reshape(batch_size, 19, -1)), 2)
        
        #Initializing hidden state for first input using method defined below
        h_t, c_t = self.init_hidden(batch_size)

        # Passing in the input and hidden state into the model and obtaining outputs
        out, (h_t, c_t) = self.lstm(x_res_all, (h_t, c_t))
        
#         print(out.shape)
        
        # Reshaping the outputs such that it can be fit into the fully connected layer
        #out = out.contiguous().view(-1, self.hidden_dim)
        out = self.fc(out)
        
        return out, (h_t, c_t)
    
    def init_hidden(self, batch_size):
        # This method generates the first hidden state of zeros which we'll use in the forward pass
        h_0 = torch.zeros(self.n_layers, batch_size, self.hidden_dim).to(device)
        c_0 =  torch.zeros(self.n_layers, batch_size, self.hidden_dim).to(device)       
         # We'll send the tensor holding the hidden state to the device we specified earlier as well
        return h_0, c_0

In [3]:
class PVDataset(torch.utils.data.Dataset):
    def __init__(self, 
                 x_data_path,
                 other_data_path,
                 lane_data_path,
                 y_data_path):
        super(PVDataset, self).__init__()
        
        self.X = np.load(x_data_path)
        self.other = np.load(other_data_path)
        self.lane = np.load(lane_data_path)
        self.y = np.load(y_data_path)

    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        sample_x = self.X[idx]
        sample_y = self.y[idx]
        sample_lane = self.lane[idx]
        sample_other = self.other[idx]
        return torch.from_numpy(sample_x).float(), torch.from_numpy(sample_other).float(), torch.from_numpy(sample_lane).float(), torch.from_numpy(sample_y.flatten()).float()

In [4]:
batch_size = 1024
in_dim = 16
out_dim = 30*2
hidden_dim = 60
num_layers = 3
learning_rate = 0.01
decay_rate = 0.95
num_epoch = 1000
trans_dim = 76 
trans_nhead= 19
trans_num_encoder_layers = 2 
trans_num_decoder_layers = 2
name = 'rnn_2_60_layer3'
model_name = name + '_model.pth'
submission_name = name + '_submission.csv'

In [5]:
model = MyLSTM(input_size=in_dim, output_size=out_dim, hidden_dim=hidden_dim, n_layers=num_layers, trans_dim=trans_dim, trans_nhead=trans_nhead, trans_num_encoder_layers=trans_num_encoder_layers, trans_num_decoder_layers=trans_num_decoder_layers).to(device)   #maximum number of hidden size is 120

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=decay_rate)  # stepwise learning rate decay
loss_fun = nn.MSELoss()

In [6]:
def get_lr(optimizer):
    for param_group in optimizer.param_groups:
        return param_group['lr']

def train_epoch(train_loader, model, optimizer, loss_function):
    model.train()
    train_mse = []
    for inp, others, lane, tgt in train_loader:
        inp = inp.to(device)
        others = others.to(device)
        lane = lane.to(device)
        tgt = tgt.to(device)
       
        # print(inp.shape,tgt.shape)
        pred,_ = model(inp, others, lane)
        pred = pred[:,-1, :].squeeze(1)
        loss = loss_function(pred, tgt)
        train_mse.append(loss.item()) 
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    train_mse = round(np.sqrt(np.mean(train_mse)),5)
    return train_mse

def eval_epoch(valid_loader, model, loss_function):
    model.eval()
    valid_mse = []
    with torch.no_grad():
        for inp, others, lane, tgt in valid_loader:
            inp = inp.to(device)
            others = others.to(device)
            lane = lane.to(device)
            tgt = tgt.to(device)
            
            loss = 0
            pred,_ = model(inp, others, lane)
            #print(tgt.shape,pred.shape)
            pred = pred[:,-1, :].squeeze(1)
            loss = loss_function(pred, tgt)
            valid_mse.append(loss.item())
            
        valid_mse = round(np.sqrt(np.mean(valid_mse)), 5)
    return valid_mse

In [7]:
train_rmse = []
valid_rmse = []
min_rmse = 10e8

dataset = PVDataset('train_X.npy', 'train_other.npy', 'train_lane.npy', 'train_y.npy')

for epoch in range(num_epoch):
    start = time.time()
    train_set, valid_set = random_split(
        dataset=dataset,
        lengths=[int(0.9*len(dataset)), int(0.1*len(dataset))],
        generator=torch.Generator().manual_seed(0)
    )

    # create dataloaders
    train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True)
    valid_loader = torch.utils.data.DataLoader(valid_set, batch_size=batch_size, shuffle=False)

    model.train() # if you use dropout or batchnorm. 
    train_rmse.append(train_epoch(train_loader, model, optimizer, loss_fun))

    model.eval()
    valid_rmse.append(eval_epoch(valid_loader, model, loss_fun))

    # save the best model
    if valid_rmse[-1] < min_rmse:
        min_rmse = valid_rmse[-1] 
        best_model = model
        torch.save([best_model, epoch, get_lr(optimizer)], model_name)

    end = time.time()
    
    # Early Stopping
    if (len(train_rmse) > 100 and np.mean(valid_rmse[-5:]) >= np.mean(valid_rmse[-10:-5])):
            break       

    # Learning Rate Decay        
    scheduler.step()
    
    print("Epoch {} | T: {:0.2f} | Train RMSE: {:0.5f} | Valid RMSE: {:0.5f}".format(epoch + 1, (end-start) / 60, train_rmse[-1], valid_rmse[-1]))

Epoch 1 | T: 1.45 | Train RMSE: 0.56617 | Valid RMSE: 0.51253
Epoch 2 | T: 1.43 | Train RMSE: 0.51623 | Valid RMSE: 0.38776
Epoch 3 | T: 1.40 | Train RMSE: 0.39363 | Valid RMSE: 0.38135
Epoch 4 | T: 1.39 | Train RMSE: 0.38577 | Valid RMSE: 0.28267
Epoch 5 | T: 1.41 | Train RMSE: 0.28923 | Valid RMSE: 0.24143


KeyboardInterrupt: 

In [8]:
best_model, _, _ = torch.load(model_name)
best_model

MyLSTM(
  (lanefc): Linear(in_features=400, out_features=400, bias=True)
  (lstm): LSTM(16, 60, num_layers=3, batch_first=True)
  (transformer): Transformer(
    (encoder): TransformerEncoder(
      (layers): ModuleList(
        (0): TransformerEncoderLayer(
          (self_attn): MultiheadAttention(
            (out_proj): NonDynamicallyQuantizableLinear(in_features=76, out_features=76, bias=True)
          )
          (linear1): Linear(in_features=76, out_features=2048, bias=True)
          (dropout): Dropout(p=0.1, inplace=False)
          (linear2): Linear(in_features=2048, out_features=76, bias=True)
          (norm1): LayerNorm((76,), eps=1e-05, elementwise_affine=True)
          (norm2): LayerNorm((76,), eps=1e-05, elementwise_affine=True)
          (dropout1): Dropout(p=0.1, inplace=False)
          (dropout2): Dropout(p=0.1, inplace=False)
        )
        (1): TransformerEncoderLayer(
          (self_attn): MultiheadAttention(
            (out_proj): NonDynamicallyQuantizabl

In [8]:
test_path = "./val_in"
test_pkl_list = glob(os.path.join(test_path, '*'))
test_pkl_list.sort()

test_preds = []
for idx in range(len(test_pkl_list)):
    with open(test_pkl_list[idx], 'rb') as f:
        scene = pickle.load(f)
        
        pred_id = np.where(scene["track_id"] == scene['agent_id'])[0][0]
    
        p_in_raw = scene['p_in'][pred_id]
        v_in_raw = scene['v_in'][pred_id]

        lane_scene = scene['lane']

        # Normalization
        min_vecs = np.min(lane_scene, axis = 0)
        max_vecs = np.max(lane_scene, axis = 0)

        # Normalize by vectors
        p_in_normalized = (p_in_raw - min_vecs)/(max_vecs - min_vecs)
        v_in_norm = np.linalg.norm(v_in_raw, axis=1, keepdims=True)
        v_in_norm = np.where(v_in_norm == 0.0, 1.0, v_in_norm)
        v_in_normalized = v_in_raw / v_in_norm
        inp = np.concatenate((p_in_normalized,v_in_normalized),axis=1)

    #     print(v_in_norm.shape)

        p_in_other_raw = np.array([scene['p_in'][i] for i in range(60) if scene['car_mask'][i] and i != pred_id])
        p_in_other_normalized = (p_in_other_raw- min_vecs)/(max_vecs - min_vecs)
        v_in_other_raw = np.array([scene['v_in'][i] for i in range(60) if scene['car_mask'][i] and i != pred_id])
        v_in_other_norm = np.linalg.norm(v_in_other_raw, axis=2, keepdims=True)
        v_in_other_norm = np.where(v_in_other_norm == 0.0, 1.0, v_in_other_norm)
        v_in_other_normalized = v_in_other_raw / v_in_other_norm
        other = np.concatenate((p_in_other_normalized,v_in_other_normalized),axis=2)
        other = other.reshape((other.shape[0], -1))
        if len(other) < 20:
            other = np.concatenate((other, np.zeros((20-len(other), 76))), axis=0)
        other = other[:20]

        lane_scene = (lane_scene - min_vecs)/(max_vecs - min_vecs)
        if len(lane_scene) < 200:
            lane_scene = np.concatenate((lane_scene, np.zeros((200-len(lane_scene), 2))), axis=0)
        lane_scene = lane_scene[:200]
        
        inp, other, lane_scene = torch.from_numpy(inp).float().to(device).unsqueeze(0), torch.from_numpy(other).float().to(device).unsqueeze(0), torch.from_numpy(lane_scene).float().to(device).unsqueeze(0)
        preds,_ = model(inp, other, lane_scene)
        preds = preds.squeeze(0)
        pred = preds[-1, :].cpu().data.numpy()
        pred = rearrange(pred, "(b c) -> b c", c =2 )
        # De-Normalization ! 
        pred = pred * (max_vecs[:2] - min_vecs[:2]) +  min_vecs[:2]
        test_preds.append(pred)

KeyboardInterrupt: 

In [23]:
# # Submission Files
sample_sub = pd.read_csv('sample_submission.csv')

# Convert to int
predictions = np.concatenate(test_preds).reshape(len(test_preds), -1)
sub_df = pd.DataFrame(np.c_[sample_sub["ID"], predictions], columns=[np.r_[["ID"], ["v" + str(i) for i in range(1, 61)]]])
sub_df['ID'] = sub_df['ID'].astype('int')
sub_df.to_csv(submission_name, index=None)
sub_df.dtypes

ID       int32
v1     float64
v2     float64
v3     float64
v4     float64
        ...   
v56    float64
v57    float64
v58    float64
v59    float64
v60    float64
Length: 61, dtype: object