## Install the package dependencies before running this notebook

In [2]:
import torch
from torch.utils.data import Dataset, DataLoader
import os, os.path 
import numpy 
import pickle
from glob import glob
import math

"""
    number of trajectories in each city
    # austin --  train: 43041 test: 6325 
    # miami -- train: 55029 test:7971
    # pittsburgh -- train: 43544 test: 6361
    # dearborn -- train: 24465 test: 3671
    # washington-dc -- train: 25744 test: 3829
    # palo-alto -- train:  11993 test:1686

    trajectories sampled at 10HZ rate, input 5 seconds, output 6 seconds
    
"""

'\n    number of trajectories in each city\n    # austin --  train: 43041 test: 6325 \n    # miami -- train: 55029 test:7971\n    # pittsburgh -- train: 43544 test: 6361\n    # dearborn -- train: 24465 test: 3671\n    # washington-dc -- train: 25744 test: 3829\n    # palo-alto -- train:  11993 test:1686\n\n    trajectories sampled at 10HZ rate, input 5 seconds, output 6 seconds\n    \n'

## Create a Torch.Dataset class for the training dataset

In [5]:
from glob import glob
import pickle
import numpy as np
import torch.nn as nn
import torch.nn.functional as F

ROOT_PATH = "./"

cities = ["austin", "miami", "pittsburgh", "dearborn", "washington-dc", "palo-alto"]
splits = ["train", "test"]

def get_city_trajectories(city="palo-alto", split="train", normalized=False):

    
    outputs = None
    
    if split=="train":
        f_in = ROOT_PATH + split + "/" + city + "_inputs"
        inputs = pickle.load(open(f_in, "rb"))
        n = len(inputs)
        inputs = np.asarray(inputs)[:int(n * 0.8)]
        
        f_out = ROOT_PATH + split + "/" + city + "_outputs"
        outputs = pickle.load(open(f_out, "rb"))
        outputs = np.asarray(outputs)[:int(n * 0.8)]
        
    elif split == 'val':
        f_in = ROOT_PATH + 'train' + "/" + city + "_inputs"
        inputs = pickle.load(open(f_in, "rb"))
        n = len(inputs)
        inputs = np.asarray(inputs)[int(n * 0.8):]
        
        f_out = ROOT_PATH + 'train' + "/" + city + "_outputs"
        outputs = pickle.load(open(f_out, "rb"))
        outputs = np.asarray(outputs)[int(n * 0.8):]
    
    else:
        f_in = ROOT_PATH + split + "/" + city + "_inputs"
        inputs = pickle.load(open(f_in, "rb"))
        n = len(inputs)
        inputs = np.asarray(inputs)

    return inputs, outputs

class ArgoverseDataset(Dataset):
    """Dataset class for Argoverse"""
    def __init__(self, city: str, split:str, transform=None):
        super(ArgoverseDataset, self).__init__()
        self.transform = transform

        self.inputs, self.outputs = get_city_trajectories(city=city, split=split, normalized=False)

    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, idx):

        data = (self.inputs[idx], self.outputs[idx])
            
        if self.transform:
            data = self.transform(data)

        return data

# intialize a dataset
city = 'palo-alto' 
split = 'train'
train_dataset  = ArgoverseDataset(city = city, split = split)

## Create a DataLoader class for training

In [6]:
batch_sz = 32  # batch size 
train_loader = DataLoader(train_dataset, batch_size=batch_sz)

## Sample a batch of data and visualize 

In [4]:
import matplotlib.pyplot as plt
# import random

# def show_sample_batch(sample_batch):
#     """visualize the trajectory for a batch of samples"""
#     inp, out = sample_batch
#     batch_sz = inp.size(0)
#     agent_sz = inp.size(1)
    
#     fig, axs = plt.subplots(1, batch_sz, figsize=(15, 3), facecolor='w', edgecolor='k')
#     fig.subplots_adjust(hspace = .5, wspace=.001)
#     axs = axs.ravel()   
#     for i in range(batch_sz):
#         axs[i].xaxis.set_ticks([])
#         axs[i].yaxis.set_ticks([])
        
        # first two feature dimensions are (x,y) positions
#         axs[i].scatter(inp[i,:,0], inp[i,:,1])
#         axs[i].scatter(out[i,:,0], out[i,:,1])

        
# for i_batch, sample_batch in enumerate(train_loader):
#     # inp[i] is a scene with 50 coordinates, input[i, j] is a coordinate
#     # gotta loop through each scene in the batch
#     inp, out = sample_batch # inp: (batch size, 50, 2), out: (batch size, 60, 2)
#     """
#     TODO:
#       implement your Deep learning model
#       implement training routine
#     """
#     show_sample_batch(sample_batch)
#     break

## I guess SKLearn is our savior :)

In [None]:
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import LinearRegression
import pandas as pd

In [100]:
cities = ["austin", "miami", "pittsburgh", "dearborn", "washington-dc", "palo-alto"] 
header = ['ID'] + ['v' + str(i) for i in range(0, 120)]
df = pd.DataFrame(columns=header)

for city in cities:
    output = ''
    x_train, y_train = get_city_trajectories(city = city, split='train')
    x_val, y_val = get_city_trajectories(city = city, split='val')
    x_test, y_test = get_city_trajectories(city = city, split='test')
    
    # Reshape for model
    x_train = x_train.reshape(-1,100)
    y_train = y_train.reshape(-1,120)
    x_test = x_test.reshape(-1,100)
    x_val = x_val.reshape(-1,100)
    y_val = y_val.reshape(-1,120)
    
    ## Fit model
    lr = LinearRegression().fit(x_train, y_train)
    
    ## Try MLP???
    #mlp = MLPClassifier(random_state=1, max_iter=300).fit(x_train, y_train)
    
    ## Train Set
    train_preds = lr.predict(x_train)
    train_preds = torch.from_numpy(train_preds)
    y_out = torch.from_numpy(y_train)
    
    ## Validation Set
    val_preds = torch.from_numpy(lr.predict(x_val))
    val_out = torch.from_numpy(y_val)
    loss_fct = nn.MSELoss()
    
    print('Training Loss: {}'.format(loss_fct(train_preds, y_out).item()))
    print('Validation Loss: {}'.format(loss_fct(val_preds, val_out).item()))

    
    ## Predictions + Write Submission
    preds = lr.predict(x_test)
    indices = range(len(x_test))
    row = ['{}_{}'.format(i, city) for i in indices]
    output += ','.join(row) + '\n'
    try:
        with open('./submission.csv', 'a') as f:
            f.write(output)
    except:
        print('Error! Unsuccessful write...')




Training Loss: 22.469596603067174
Validation Loss: 23.394195340933088
Training Loss: 19.016590437268718
Validation Loss: 19.13593787664996
Training Loss: 19.10039639915304
Validation Loss: 19.62291834399323
Training Loss: 23.703932780832723
Validation Loss: 23.530798245589242
Training Loss: 20.711875044731944
Validation Loss: 22.019604721054357
Training Loss: 23.15542990140436
Validation Loss: 25.237217393145222
