In [1]:
cd /home/urwa/Documents/Projects/NYU Remote/UrbanTrafficPrediction/

/home/urwa/Documents/Projects/NYU Remote/UrbanTrafficPrediction


In [2]:
import pandas as pd
import numpy as np
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

import matplotlib.pyplot as plt
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error

In [3]:
from utils.lstm_utils import prepare_data_lstm, lstm_monthly_dataloader
from utils.lstm_utils import get_device

from utils.lstm_utils import evaluate_edge_monthy
from models.models import LSTM

from utils.lstm_utils import train_one_epoch, store_chekpoint, load_chekpoint
from utils.lstm_utils import evaluate_lstm_pipeline_model, run_inference

In [4]:
torch.manual_seed(2020)
np.random.seed(2020)

In [5]:
config = {'lr': 0.00034439316653688684,
 'layers': 3,
 'step_size': 11,
 'gamma': 0.761795969995615,
 'bptt': 19,
 'dropout': 0.1227497445640586}

In [6]:
device = get_device(cuda=True)
device

device(type='cpu')

In [7]:
data_path = '/home/urwa/Documents/Projects/NYU Remote/data/featureData/com_lga.csv'
weights_path = '/home/urwa/Documents/Projects/NYU Remote/data/featureData/com_lga_weights.csv'
test_data_path='/home/urwa/Documents/Projects/NYU Remote/data/featureData/lga.csv'

exp_dir = '/home/urwa/Documents/Projects/NYU Remote/UrbanTrafficPrediction/data/lstm_12fold/lga'

In [8]:
dataset, targetColumns, features_cols = prepare_data_lstm(data_path)

Raw Shape:  (8757, 113)
Cleaned Shape:  (8757, 38)
Target columns: 24
Feature coumns:  13


In [20]:
bptt = config['bptt']

R2List = []
EdgeR2List = []
residual_list = []


for m in range(1,13):
    
    print('-------------------------------------------------')
    print('-------------------------------------------------')
    print("Month: ", m)

    month_index  = pd.to_datetime(dataset.Date).dt.month == m
    testData = dataset[month_index]
    testData = testData[bptt:]
    date = testData['Date']
    hour = testData['Hour']
    
    train_inout_seq, test_inout_seq = lstm_monthly_dataloader(dataset,features_cols, targetColumns, m,
                                                              bptt, device)
    
    lstm_layers = config['layers']
    network_size = len(targetColumns)
    feat_size = len(features_cols)
    dropout = config['dropout']
    hidden_layer_size=100
    
    
    model = LSTM(feat_size, network_size, hidden_layer_size, lstm_layers, dropout).to(device)
    model.load_state_dict(torch.load(os.path.join(exp_dir, str(m)+'.pt'), 
                                     map_location=device))
    model.eval()
    residual, r2, rmse, mae = evaluate_lstm_pipeline_model(model, test_inout_seq, device)
    edge_res, edge_r2, edge_rmse, edge_mae = evaluate_edge_monthy(model, test_inout_seq, device, 
                                                                   targetColumns, weights_path, 
                                                              test_data_path, m, bptt)
    
    res_df = pd.DataFrame(residual)
    res_df.columns = targetColumns
    res_df['Date'] = testData['Date'].values
    res_df['Hour'] = testData['Hour'].values
    res_df = res_df[['Date', 'Hour'] + targetColumns]

    R2List.append(r2)
    EdgeR2List.append(edge_r2)
    residual_list.append(res_df)

-------------------------------------------------
-------------------------------------------------
Month:  1
train test split
train shape:  (8016, 38)
test shape:  (741, 38)
train feature tensor shape : torch.Size([8016, 13])
train target tensor shape : torch.Size([8016, 24])
test feature tensor shape : torch.Size([741, 13])
test target tensor shape : torch.Size([741, 24])

sequences
torch.Size([19, 13]) torch.Size([19, 24]) torch.Size([19, 24])
-------------------------------------------------
-------------------------------------------------
Month:  2
train test split
train shape:  (8085, 38)
test shape:  (672, 38)
train feature tensor shape : torch.Size([8085, 13])
train target tensor shape : torch.Size([8085, 24])
test feature tensor shape : torch.Size([672, 13])
test target tensor shape : torch.Size([672, 24])

sequences
torch.Size([19, 13]) torch.Size([19, 24]) torch.Size([19, 24])
-------------------------------------------------
------------------------------------------------

In [21]:
np.mean(R2List)

0.9128593139648222

In [22]:
np.mean(edge_r2)

0.7266998584211612

In [23]:
all_res_df = pd.concat(residual_list)
all_res_df.head()

Unnamed: 0,Date,Hour,0.0,0.1,0.2,1.0,1.1,1.2,1.3,2.0,...,4.0,4.1,4.2,4.3,4.4,4.5,5.0,5.1,5.2,5.3
0,2018-01-01,22,-92.709473,-41.009491,-99.30864,-6.500353,1.221603,-1.610439,-31.26799,-2.661076,...,0.002633,-10.725203,-4.054335,-12.828392,0.787038,-17.905365,-0.003889,0.000402,-0.005663,0.012074
1,2018-01-01,23,-50.820511,-31.279408,-66.43222,12.491123,11.490036,-6.57434,-3.7966,-2.247045,...,0.003285,1.816553,-4.226374,-16.757883,0.820769,4.57975,-1.003454,-0.00016,-0.006927,0.013991
2,2018-01-02,0,-15.074768,-10.024288,-16.041382,5.571819,12.289932,-7.939967,-11.606377,1.376796,...,0.000697,2.412691,-3.279255,-1.33511,0.645659,4.530804,-0.003008,-1.001099,-0.007398,0.013192
3,2018-01-02,1,0.474425,1.482676,4.821479,0.336904,0.422757,0.949745,3.762868,0.641116,...,-0.000359,-0.465141,0.289057,2.292948,-0.021853,0.447434,-0.001835,-0.000214,-0.002234,0.000153
4,2018-01-02,2,-0.650693,-0.957618,-5.542782,0.537593,-0.786136,0.118516,1.101385,-0.770226,...,-0.000192,-2.772754,-1.297775,-0.380405,-1.006727,-1.178815,7e-06,0.000224,0.000296,0.000102


In [25]:
all_res_df.shape

(8529, 26)

In [30]:
(24*365 - 8529) / 12

19.25

In [31]:
all_res_df.to_csv(os.path.join(exp_dir,'residual.csv'))