In [1]:
cd /home/urwa/Documents/Projects/NYU Remote/UrbanTrafficPrediction/

/home/urwa/Documents/Projects/NYU Remote/UrbanTrafficPrediction


In [2]:
import pandas as pd
import numpy as np
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

import matplotlib.pyplot as plt
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error

In [3]:
from utils.lstm_utils import prepare_data_lstm, lstm_monthly_dataloader
from utils.lstm_utils import get_device

from utils.lstm_utils import evaluate_edge_monthy
from models.models import LSTM

from utils.lstm_utils import train_one_epoch, store_chekpoint, load_chekpoint
from utils.lstm_utils import evaluate_lstm_pipeline_model, run_inference

In [4]:
torch.manual_seed(2020)
np.random.seed(2020)

In [5]:
config = {'lr': 0.00034439316653688684,
 'layers': 3,
 'step_size': 11,
 'gamma': 0.761795969995615,
 'bptt': 19,
 'dropout': 0.1227497445640586}

In [6]:
device = get_device(cuda=True)
device

device(type='cpu')

In [7]:
data_path = '/home/urwa/Documents/Projects/NYU Remote/data/featureData/com_jfk.csv'
weights_path = '/home/urwa/Documents/Projects/NYU Remote/data/featureData/com_jfk_weights.csv'
test_data_path='/home/urwa/Documents/Projects/NYU Remote/data/featureData/jfk.csv'

exp_dir = '/home/urwa/Documents/Projects/NYU Remote/UrbanTrafficPrediction/data/lstm_12fold/jfk'

In [8]:
dataset, targetColumns, features_cols = prepare_data_lstm(data_path)

Raw Shape:  (8757, 113)
Cleaned Shape:  (8757, 38)
Target columns: 24
Feature coumns:  13


In [9]:
bptt = config['bptt']

R2List = []
EdgeR2List = []
residual_list = []


for m in range(1,13):
    
    print('-------------------------------------------------')
    print('-------------------------------------------------')
    print("Month: ", m)

    month_index  = pd.to_datetime(dataset.Date).dt.month == m
    testData = dataset[month_index]
    testData = testData[bptt:]
    date = testData['Date']
    hour = testData['Hour']
    
    train_inout_seq, test_inout_seq = lstm_monthly_dataloader(dataset,features_cols, targetColumns, m,
                                                              bptt, device)
    
    lstm_layers = config['layers']
    network_size = len(targetColumns)
    feat_size = len(features_cols)
    dropout = config['dropout']
    hidden_layer_size=100
    
    
    model = LSTM(feat_size, network_size, hidden_layer_size, lstm_layers, dropout).to(device)
    model.load_state_dict(torch.load(os.path.join(exp_dir, str(m)+'.pt'), 
                                     map_location=device))
    model.eval()
    residual, r2, rmse, mae = evaluate_lstm_pipeline_model(model, test_inout_seq, device)
    edge_res, edge_r2, edge_rmse, edge_mae = evaluate_edge_monthy(model, test_inout_seq, device, 
                                                                   targetColumns, weights_path, 
                                                              test_data_path, m, bptt)
    
    res_df = pd.DataFrame(residual)
    res_df.columns = targetColumns
    res_df['Date'] = testData['Date'].values
    res_df['Hour'] = testData['Hour'].values
    res_df = res_df[['Date', 'Hour'] + targetColumns]

    R2List.append(r2)
    EdgeR2List.append(edge_r2)
    residual_list.append(res_df)

-------------------------------------------------
-------------------------------------------------
Month:  1
train test split
train shape:  (8016, 38)
test shape:  (741, 38)
train feature tensor shape : torch.Size([8016, 13])
train target tensor shape : torch.Size([8016, 24])
test feature tensor shape : torch.Size([741, 13])
test target tensor shape : torch.Size([741, 24])

sequences
torch.Size([19, 13]) torch.Size([19, 24]) torch.Size([19, 24])
-------------------------------------------------
-------------------------------------------------
Month:  2
train test split
train shape:  (8085, 38)
test shape:  (672, 38)
train feature tensor shape : torch.Size([8085, 13])
train target tensor shape : torch.Size([8085, 24])
test feature tensor shape : torch.Size([672, 13])
test target tensor shape : torch.Size([672, 24])

sequences
torch.Size([19, 13]) torch.Size([19, 24]) torch.Size([19, 24])
-------------------------------------------------
------------------------------------------------

In [10]:
np.mean(R2List)

0.8551248067034599

In [11]:
np.mean(edge_r2)

0.537316842716203

In [12]:
all_res_df = pd.concat(residual_list)
all_res_df.head()

Unnamed: 0,Date,Hour,0.0,0.1,0.2,1.0,1.1,1.2,1.3,2.0,...,4.0,4.1,4.2,4.3,4.4,4.5,5.0,5.1,5.2,5.3
0,2018-01-01,22,20.328918,-8.924545,-41.586807,9.019356,-7.802254,9.949306,-29.105972,-3.065981,...,-0.000494,8.965343,12.226967,-6.22776,0.426897,-3.582016,-2.005474,-0.186229,-0.989357,-0.971815
1,2018-01-01,23,37.911087,-2.735558,6.350784,-0.249817,8.096428,1.964775,-4.059219,-5.352976,...,-0.000462,5.487869,9.29644,1.884888,2.886413,10.934929,0.028029,-0.216831,-0.990197,0.026117
2,2018-01-02,0,68.131332,10.995903,54.378342,29.573792,23.679344,5.784403,33.914406,7.880651,...,-0.00043,12.110622,42.003288,9.16448,0.681951,14.350838,-0.120802,0.502903,0.007681,0.017042
3,2018-01-02,1,-11.819832,-7.377456,-34.619598,1.791199,0.627048,-3.443614,-16.595188,0.069894,...,-2.8e-05,-0.969132,-4.02758,4.140221,1.327187,-5.748075,0.014358,0.106659,0.003759,-0.001335
4,2018-01-02,2,-12.291338,-10.166625,-34.618935,-9.256952,-6.627031,-0.996165,-11.161497,-1.699105,...,1.8e-05,-5.217205,0.921856,-3.431392,-0.120981,-11.890594,0.016052,-0.991999,-0.000932,-0.000745


In [13]:
all_res_df.shape

(8529, 26)

In [14]:
# all_res_df.to_csv(os.path.join(exp_dir,'residual.csv'))

In [15]:
a = torch.rand(5,2)
b = torch.rand(5,2)
x = torch.cat((a,b),axis=1)

TypeError: cat() got an unexpected keyword argument 'axis'

In [None]:
if torch.__version__.startswith('0.4'):
    x = torch.cat((a,b),dim=1)
else:
    x = torch.cat((a,b),axis=1)

In [18]:
import random
a = [1,2,3,4,5]
random.shuffle(a)
a

[4, 3, 5, 2, 1]