## Install the package dependencies before running this notebook

In [1]:
import torch
from torch.utils.data import Dataset, DataLoader
import os, os.path 
import numpy 
import pickle
from glob import glob
import math

"""
    number of trajectories in each city
    # austin --  train: 43041 test: 6325 
    # miami -- train: 55029 test:7971
    # pittsburgh -- train: 43544 test: 6361
    # dearborn -- train: 24465 test: 3671
    # washington-dc -- train: 25744 test: 3829
    # palo-alto -- train:  11993 test:1686

    trajectories sampled at 10HZ rate, input 5 seconds, output 6 seconds
    
"""

'\n    number of trajectories in each city\n    # austin --  train: 43041 test: 6325 \n    # miami -- train: 55029 test:7971\n    # pittsburgh -- train: 43544 test: 6361\n    # dearborn -- train: 24465 test: 3671\n    # washington-dc -- train: 25744 test: 3829\n    # palo-alto -- train:  11993 test:1686\n\n    trajectories sampled at 10HZ rate, input 5 seconds, output 6 seconds\n    \n'

## Create a Torch.Dataset class for the training dataset

In [59]:
from glob import glob
import pickle
import numpy as np
import torch.nn as nn
import torch.nn.functional as F

ROOT_PATH = "./"

cities = ["austin", "miami", "pittsburgh", "dearborn", "washington-dc", "palo-alto"]
splits = ["train", "test"]

def get_city_trajectories(city="palo-alto", split="train", normalized=False):

    
    outputs = None
    
    if split=="train":
        f_in = ROOT_PATH + split + "/" + city + "_inputs"
        inputs = pickle.load(open(f_in, "rb"))
        n = len(inputs)
        inputs = np.asarray(inputs)[:int(n * 0.8)]
        
        f_out = ROOT_PATH + split + "/" + city + "_outputs"
        outputs = pickle.load(open(f_out, "rb"))
        outputs = np.asarray(outputs)[:int(n * 0.8)]
        
    elif split == 'val':
        f_in = ROOT_PATH + 'train' + "/" + city + "_inputs"
        inputs = pickle.load(open(f_in, "rb"))
        n = len(inputs)
        inputs = np.asarray(inputs)[int(n * 0.8):]
        
        f_out = ROOT_PATH + 'train' + "/" + city + "_outputs"
        outputs = pickle.load(open(f_out, "rb"))
        outputs = np.asarray(outputs)[int(n * 0.8):]
    
    elif split == 'test':
        f_in = ROOT_PATH + split + "/" + city + "_inputs"
        inputs = pickle.load(open(f_in, "rb"))
        n = len(inputs)
        inputs = np.asarray(inputs)

    if normalized:
        ## Normalize based on initial input coordinates
        ## Loop through agents
        all_normalized_inp = np.zeros((inputs.shape[0], 50, 2))
        all_normalized_out = np.zeros((inputs.shape[0], 60, 2))
        for i in range(len(inputs)):
            dat = inputs[i]
            x_0 = dat[0][0]
            y_0 = dat[0][1]
            temp_x = (dat[:, 0] - x_0)
            temp_y = (dat[:, 1] - y_0)
            normalized_inp = np.dstack([temp_x, temp_y])
            all_normalized_inp[i] = normalized_inp
            
            if split != 'test':
                dat = outputs[i]
                x_0 = dat[0][0]
                y_0 = dat[0][1]
                temp_x = (dat[:, 0] - x_0)
                temp_y = (dat[:, 1] - y_0)
                normalized_out = np.dstack([temp_x, temp_y])
                all_normalized_out[i] = normalized_out
    inputs = all_normalized_inp
    if split != 'test':
        outputs = all_normalized_out
    return inputs, outputs

class ArgoverseDataset(Dataset):
    """Dataset class for Argoverse"""
    def __init__(self, city: str, split:str, transform=None):
        super(ArgoverseDataset, self).__init__()
        self.transform = transform

        self.inputs, self.outputs = get_city_trajectories(city=city, split=split, normalized=True)

    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, idx):
        
        if self.outputs is not None:
            data = (self.inputs[idx], self.outputs[idx])
        else:
            data = self.inputs[idx]
            
        if self.transform:
            data = self.transform(data)

        return data

# intialize a dataset
city = 'palo-alto' 
split = 'train'
train_dataset  = ArgoverseDataset(city = city, split = split)

## Create a DataLoader class for training

In [3]:
batch_sz = 32  # batch size 
train_loader = DataLoader(train_dataset, batch_size=batch_sz)

## Sample a batch of data and visualize 

In [4]:
import matplotlib.pyplot as plt
# import random

# def show_sample_batch(sample_batch):
#     """visualize the trajectory for a batch of samples"""
#     inp, out = sample_batch
#     batch_sz = inp.size(0)
#     agent_sz = inp.size(1)
    
#     fig, axs = plt.subplots(1, batch_sz, figsize=(15, 3), facecolor='w', edgecolor='k')
#     fig.subplots_adjust(hspace = .5, wspace=.001)
#     axs = axs.ravel()   
#     for i in range(batch_sz):
#         axs[i].xaxis.set_ticks([])
#         axs[i].yaxis.set_ticks([])
        
        # first two feature dimensions are (x,y) positions
#         axs[i].scatter(inp[i,:,0], inp[i,:,1])
#         axs[i].scatter(out[i,:,0], out[i,:,1])

        
# for i_batch, sample_batch in enumerate(train_loader):
#     # inp[i] is a scene with 50 coordinates, input[i, j] is a coordinate
#     # gotta loop through each scene in the batch
#     inp, out = sample_batch # inp: (batch size, 50, 2), out: (batch size, 60, 2)
#     """
#     TODO:
#       implement your Deep learning model
#       implement training routine
#     """
#     show_sample_batch(sample_batch)
#     break

## I guess SKLearn is our savior :)

In [5]:
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import LinearRegression
import pandas as pd

In [6]:
cities = ["austin", "miami", "pittsburgh", "dearborn", "washington-dc", "palo-alto"] 
header = ['ID'] + ['v' + str(i) for i in range(0, 120)]
df = pd.DataFrame(columns=header)

for city in cities:
    output = ''
    x_train, y_train = get_city_trajectories(city = city, split='train')
    x_val, y_val = get_city_trajectories(city = city, split='val')
    x_test, y_test = get_city_trajectories(city = city, split='test')
    
    # Reshape for model
    x_train = x_train.reshape(-1,100)
    y_train = y_train.reshape(-1,120)
    x_test = x_test.reshape(-1,100)
    x_val = x_val.reshape(-1,100)
    y_val = y_val.reshape(-1,120)
    
    ## Fit model
    lr = LinearRegression().fit(x_train, y_train)
    
    ## Try MLP???
    #mlp = MLPClassifier(random_state=1, max_iter=300).fit(x_train, y_train)
    
    ## Train Set
    train_preds = lr.predict(x_train)
    train_preds = torch.from_numpy(train_preds)
    y_out = torch.from_numpy(y_train)
    
    ## Validation Set
    val_preds = torch.from_numpy(lr.predict(x_val))
    val_out = torch.from_numpy(y_val)
    loss_fct = nn.MSELoss()
    
    print('Training Loss: {}'.format(loss_fct(train_preds, y_out).item()))
    print('Validation Loss: {}'.format(loss_fct(val_preds, val_out).item()))

    
    ## Predictions + Write Submission
    preds = lr.predict(x_test)
    indices = range(len(x_test))
    row = ['{}_{}'.format(i, city) for i in indices]
    output += ','.join(row) + '\n'
    try:
        with open('./submission.csv', 'a') as f:
            f.write(output)
    except:
        print('Error! Unsuccessful write...')




Training Loss: 22.469596603067284
Validation Loss: 23.394195340919037
Training Loss: 19.01659043726902
Validation Loss: 19.13593787667287
Training Loss: 19.100396399152963
Validation Loss: 19.622918344000684
Training Loss: 23.703932780832538
Validation Loss: 23.53079824564754
Training Loss: 20.71187504473186
Validation Loss: 22.01960472105625
Training Loss: 23.155429901404535
Validation Loss: 25.237217393136


## OR IS PYTORCH!?

In [67]:
device =  torch.device("cuda" if torch.cuda.is_available() else "cpu")
batch_sz = 64

header = ['ID'] + ['v' + str(i) for i in range(0, 120)] 
with open('./lr_pytorch_submission.csv', 'w') as f:
    f.write(','.join(header) + '\n')
    
    
lr = nn.Sequential(nn.Linear(100,120)).to(device)
loss = nn.MSELoss()
optimizer = torch.optim.Adam(lr.parameters(), lr=0.001)    
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 50, gamma=0.9)

for city in cities:
    #lr = nn.Sequential(nn.Linear(100,120)).to(device)
    #loss = nn.MSELoss()
    optimizer = torch.optim.Adam(lr.parameters(), lr=0.0001)    
    
    
    training_data = ArgoverseDataset(city=city, split='train')
    train_loader = DataLoader(training_data, batch_size=batch_sz)
    val_data = ArgoverseDataset(city=city, split='val')
    val_loader = DataLoader(val_data, batch_size=batch_sz)
    test_data = ArgoverseDataset(city=city, split='test')
    test_loader = DataLoader(test_data, batch_size=1)
    
    train_losses = []
    val_losses = []
    
    num_epochs=100
    for epoch in range(num_epochs):
        for i_batch, batch in enumerate(train_loader):
            inp, out = batch
            inp = inp.float().to(device)
            out = out.float().to(device)
            inp = inp.view(inp.shape[0] ,inp.shape[1] * 2)
            pred = lr(inp)
            pred = pred.reshape(pred.shape[0], 60, 2)
            t_l = loss(pred, out)
            optimizer.zero_grad()
            t_l.backward()
            optimizer.step()
        train_losses.append(t_l.item())
        for i_batch, batch in enumerate(val_loader):
                with torch.no_grad():
                    inp, out = batch
                    inp = inp.float().to(device)
                    out = out.float().to(device)
                    inp = inp.view(inp.shape[0] ,inp.shape[1] * 2)
                    
                    pred = lr(inp)
                    pred = pred.reshape(pred.shape[0], 60, 2)

                    v_l = loss(pred, out)
        print('epoch: {}, training loss: {}, validation loss: {}'.format(epoch + 1, t_l, v_l))
        scheduler.step()     
    
    scene = 0
    output = ''
    with torch.no_grad():
        for i_batch, batch in enumerate(test_loader):
            inp = batch
            inp = inp.float().to(device)
            inp = inp.view(inp.shape[0] ,inp.shape[1] * 2)
            
            preds = lr(inp)
            preds = preds.reshape(preds.shape[0], 60, 2)
            flat = preds[0].flatten().cpu().tolist()
            
            row = ['{}_{}'.format(scene, city)] + flat
            row = [str(i) for i in row]
            output += ','.join(row) + '\n'
            
            scene += 1
    
    try:
        with open('./lr_pytorch_submission.csv', 'a') as f:
            f.write(output)
        print('Predictions for {} generated!'.format(city))
    except:
        print('Error! Unsuccessful write...')
        
    print('Done for {}'.format(city))
    
    

epoch: 1, training loss: 109.05902099609375, validation loss: 120.27942657470703
epoch: 2, training loss: 100.95133972167969, validation loss: 108.87681579589844
epoch: 3, training loss: 97.2458724975586, validation loss: 104.93876647949219
epoch: 4, training loss: 93.10277557373047, validation loss: 100.39427185058594
epoch: 5, training loss: 88.6928482055664, validation loss: 95.49293518066406
epoch: 6, training loss: 84.19279479980469, validation loss: 90.42538452148438
epoch: 7, training loss: 79.76187133789062, validation loss: 85.36431884765625
epoch: 8, training loss: 75.53534698486328, validation loss: 80.46369934082031
epoch: 9, training loss: 71.61439514160156, validation loss: 75.84564971923828
epoch: 10, training loss: 68.0610580444336, validation loss: 71.59209442138672
epoch: 11, training loss: 64.90050506591797, validation loss: 67.74463653564453
epoch: 12, training loss: 62.128639221191406, validation loss: 64.31094360351562
epoch: 13, training loss: 59.72176742553711, 

epoch: 3, training loss: 44.19803237915039, validation loss: 26.55101203918457
epoch: 4, training loss: 44.117652893066406, validation loss: 26.440778732299805
epoch: 5, training loss: 44.04559326171875, validation loss: 26.335674285888672
epoch: 6, training loss: 43.97789764404297, validation loss: 26.23484230041504
epoch: 7, training loss: 43.912315368652344, validation loss: 26.137781143188477
epoch: 8, training loss: 43.84754180908203, validation loss: 26.04416275024414
epoch: 9, training loss: 43.78285217285156, validation loss: 25.95372772216797
epoch: 10, training loss: 43.717864990234375, validation loss: 25.866268157958984
epoch: 11, training loss: 43.65241622924805, validation loss: 25.781574249267578
epoch: 12, training loss: 43.586448669433594, validation loss: 25.699478149414062
epoch: 13, training loss: 43.519989013671875, validation loss: 25.619800567626953
epoch: 14, training loss: 43.45310974121094, validation loss: 25.542383193969727
epoch: 15, training loss: 43.38589

epoch: 4, training loss: 49.06246566772461, validation loss: 43.87137985229492
epoch: 5, training loss: 48.93068313598633, validation loss: 44.11536407470703
epoch: 6, training loss: 48.8128547668457, validation loss: 44.32532501220703
epoch: 7, training loss: 48.7057991027832, validation loss: 44.506473541259766
epoch: 8, training loss: 48.60726547241211, validation loss: 44.66313171386719
epoch: 9, training loss: 48.51560592651367, validation loss: 44.79899597167969
epoch: 10, training loss: 48.42955780029297, validation loss: 44.91717529296875
epoch: 11, training loss: 48.34819793701172, validation loss: 45.02030563354492
epoch: 12, training loss: 48.27081298828125, validation loss: 45.1106071472168
epoch: 13, training loss: 48.196800231933594, validation loss: 45.18996047973633
epoch: 14, training loss: 48.12571716308594, validation loss: 45.25996398925781
epoch: 15, training loss: 48.05718994140625, validation loss: 45.321964263916016
epoch: 16, training loss: 47.99092102050781, v

epoch: 6, training loss: 35.05636215209961, validation loss: 21.379932403564453
epoch: 7, training loss: 34.996116638183594, validation loss: 21.42118263244629
epoch: 8, training loss: 34.94198989868164, validation loss: 21.46070098876953
epoch: 9, training loss: 34.893131256103516, validation loss: 21.498374938964844
epoch: 10, training loss: 34.84881591796875, validation loss: 21.534151077270508
epoch: 11, training loss: 34.80845260620117, validation loss: 21.568008422851562
epoch: 12, training loss: 34.77152633666992, validation loss: 21.599950790405273
epoch: 13, training loss: 34.737606048583984, validation loss: 21.630001068115234
epoch: 14, training loss: 34.706329345703125, validation loss: 21.658203125
epoch: 15, training loss: 34.6773681640625, validation loss: 21.684595108032227
epoch: 16, training loss: 34.65047073364258, validation loss: 21.70924186706543
epoch: 17, training loss: 34.62538146972656, validation loss: 21.732196807861328
epoch: 18, training loss: 34.601909637

KeyboardInterrupt: 