In [1]:
cd /home/urwa/Documents/side_projects/urban/UrbanTrafficPrediction/

/home/urwa/Documents/side_projects/urban/UrbanTrafficPrediction


In [2]:
import pandas as pd
import numpy as np
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

import matplotlib.pyplot as plt
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error

In [3]:
from models.models import LSTM_Pipeline
from utils.lstm_utils import get_device, create_data_sequences, prepare_data_tensors, train_test_split_monthly
from utils.lstm_utils import prepare_data_lstm
from utils.lstm_utils import get_community_attachment_matix
from utils.lstm_utils import evaluate_lstm_pipeline_model

In [4]:
torch.manual_seed(2020)
np.random.seed(2020)

In [5]:
config = {'lr': 0.00034439316653688684,
 'layers': 3,
 'step_size': 11,
 'gamma': 0.761795969995615,
 'bptt': 19,
 'dropout': 0.1227497445640586}

In [6]:
config['lr'] = config['lr'] * config['gamma'] **4
config['lr']

0.00011598697153799081

In [7]:
device = get_device(cuda=False)
device

device(type='cpu')

In [8]:
dataset, targetColumns, features_cols = prepare_data_lstm('/home/urwa/Documents/side_projects/urban/data/featureData/jfk.csv')

Raw Shape:  (8757, 1049)
Cleaned Shape:  (8757, 272)
Target columns: 258
Feature coumns:  13


In [9]:
dataset.head()

Unnamed: 0,Date,Hour,1,10,100,101,102,106,107,108,...,maxtemp,mintemp,avgtemp,departure,hdd,cdd,participation,newsnow,snowdepth,ifSnow
0,2018-01-01,3,0,0,0,0,0,0,0,0,...,18,7,12.5,-21.2,52,0,0.0,0.0,0,0
1,2018-01-01,4,0,3,0,0,1,0,0,1,...,18,7,12.5,-21.2,52,0,0.0,0.0,0,0
2,2018-01-01,5,0,4,0,0,1,2,3,1,...,18,7,12.5,-21.2,52,0,0.0,0.0,0,0
3,2018-01-01,6,0,0,1,2,0,3,0,0,...,18,7,12.5,-21.2,52,0,0.0,0.0,0,0
4,2018-01-01,7,3,4,2,0,0,0,9,0,...,18,7,12.5,-21.2,52,0,0.0,0.0,0,0


In [10]:
zone_to_comm_file = '/home/urwa/Documents/side_projects/urban/UrbanTemporalNetworks/Data/ZonetoComm.csv'

In [11]:
pretrained_weights = '/home/urwa/Documents/side_projects/urban/urban_traffic_prediciton/pipeline/fix_attachment/jfk.pt'

In [12]:
def train_one_epoch(model, optimizer, loss_function, train_inout_seq):
    model.train()
    
    losses = []
    for feat,seq, labels in train_inout_seq:
        optimizer.zero_grad()
        model.initialize_hidden_cell(device)
        y_pred = model(seq, feat)

        single_loss = loss_function(y_pred, labels)
        single_loss.backward()
        optimizer.step()

        losses.append(single_loss.item())
    return np.mean(losses)   
        

In [13]:
def store_chekpoint(exp_dir, model,optimizer,lr_scheduler):
    print('------- Saving checkpoint---------------')
    checkpoint_path = os.path.join(exp_dir,'checkpoint.pth')

    torch.save({'model_state_dict': model.state_dict(),                                                 
                    'optimizer_state_dict': optimizer.state_dict(),
                    'scheduler_state_dict': lr_scheduler.state_dict()}, 
               checkpoint_path)

In [14]:
def load_chekpoint(exp_dir, model,optimizer,lr_scheduler):
    print('------- Loading checkpoint---------------')
    checkpoint_path = os.path.join(exp_dir,'checkpoint.pth')

    checkpoint = torch.load(checkpoint_path)
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    lr_scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
    
    return model, optimizer, lr_scheduler

In [15]:
def get_lstm_dataloader(dataset, month):
    
    trainData, testData = train_test_split_monthly(dataset, month)

    X_train, y_train, X_test, y_test = prepare_data_tensors(trainData, testData, 
                                                            features_cols, targetColumns, device)


    train_inout_seq = create_data_sequences(X_train,y_train, bptt)
    test_inout_seq = create_data_sequences(X_test,y_test, bptt)
    print("\nsequences")
    print(train_inout_seq[0][0].shape,train_inout_seq[0][1].shape, train_inout_seq[0][2].shape)
        
    return train_inout_seq, test_inout_seq

In [16]:
bptt = config['bptt']
exp_dir = 'data' 

R2List = []
residual_list = []

at_mat = get_community_attachment_matix(targetColumns, zone_to_comm_file)
print("\nattachment matrix")
print(at_mat.shape)

for m in range(1,13):

    
    print('-------------------------------------------------')
    print('-------------------------------------------------')
    print("Month: ", m)

    train_inout_seq, test_inout_seq = get_lstm_dataloader(dataset, m)
    

    layers = config['layers']
    communities = 24
    network_size = len(targetColumns)
    feat_size = len(features_cols)
    dropout = config['dropout']

    model = LSTM_Pipeline(feat_size = feat_size, hidden_layer_size=communities,
                 network_size=network_size, lstm_layers=layers,
                aggregation_size=communities, dropout=dropout, at_mat=at_mat).to(device)
    
    model.load_state_dict(torch.load(pretrained_weights, map_location=device))
    print("\n model loaded")

    loss_function = nn.L1Loss()   
    optimizer = torch.optim.Adam(model.parameters(), lr=config['lr'])
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=config['step_size'], gamma=config['gamma'])
    print("\n model inititalized")

    print(len(test_inout_seq))
    residual, r2, rmse, mae = evaluate_lstm_pipeline_model(model, test_inout_seq, device)
    print("Pretraining R2: ",r2)

    
    best_r2 = r2
    best_residual = residual
    torch.save(model.state_dict(), os.path.join(exp_dir, str(m)+'.pt'))
    np.save(os.path.join(exp_dir,str(m)+'.npy'), best_residual)

    epochs = 60
    
    print("\n Training model...")
    for i in range(epochs):
        
        loss = train_one_epoch(model, optimizer, loss_function, train_inout_seq)
        scheduler.step()
        store_chekpoint(exp_dir, model, optimizer, scheduler)

        residual, r2, rmse, mae = evaluate_lstm_pipeline_model(model, test_inout_seq, device)
        print(f'epoch: {i:3} loss: {loss:10.8f} r2: {r2:5.3f} rmse: {rmse:5.3f} mae: {mae:5.3f}')

        if r2 > best_r2:
            best_r2 = r2
            best_residual = residual
            torch.save(model.state_dict(), os.path.join(exp_dir, str(m)+'.pt'))
            np.save(os.path.join(exp_dir,str(m)+'.npy'), best_residual)

    print("bet_r2: ", best_r2)

    R2List.append(best_r2)
    residual_list.append(best_residual)


attachment matrix
torch.Size([258, 24])
-------------------------------------------------
-------------------------------------------------
Month:  1
train test split
train shape:  (8016, 272)
test shape:  (741, 272)
train feature tensor shape : torch.Size([8016, 13])
train target tensor shape : torch.Size([8016, 258])
test feature tensor shape : torch.Size([741, 13])
test target tensor shape : torch.Size([741, 258])

sequences
torch.Size([19, 13]) torch.Size([19, 258]) torch.Size([19, 258])

 model loaded

 model inititalized
722
Pretraining R2:  0.5235885448038736

 Training model...
------- Saving checkpoint---------------
epoch:   0 loss: 0.99708172 r2: 0.479 rmse: 3.724 mae: 1.042


KeyboardInterrupt: 

In [None]:
R2List

In [None]:
np.mean(R2List)

In [None]:
residual_list

In [None]:
0.530
0.5506309348043397
0.5972399097890982
0.6083282515195536
0.6267548650792119

In [None]:
bptt = config['bptt']

resdf_list = []

for m in range(1,13):
    month_index  = pd.to_datetime(dataset.Date).dt.month == m
    
    print('-------------------------------------------------')
    print('-------------------------------------------------')
    print("Month: ", m)

    testData = dataset[month_index]
    testData = testData[bptt:]
    date = testData['Date']
    hour = testData['Hour']
    
    print("\n test shape")
    print(testData.shape)

    residual = np.load('data/'+'jfk_'+str(m)+'.npy')
    print("\n residual")
    print(residual.shape)

    res_df = pd.DataFrame(residual)
    res_df.columns = targetColumns
    res_df['Date'] = testData['Date'].values
    res_df['Hour'] = testData['Hour'].values
    res_df = res_df[['Date', 'Hour'] + targetColumns]
    
    print(res_df.head())
    resdf_list.append(res_df)

In [None]:
all_res_df = pd.concat(resdf_list)
all_res_df.head()

In [None]:
all_res_df.to_csv('data/residual_jfk.csv')

In [None]:
# attachment = torch.argmax(F.softmax(model.attachment_matrix, dim=1), dim=1).detach().cpu().numpy()
# community_assignment = dict(zip(targetColumns, attachment))
# community_assignment

In [None]:
# 20 comm
# 0.505

In [None]:
# 50 comm
# 