# Modeling of Peartree Roundabout Traffic

In [None]:
# change directory to parent folder to access all folders
import os
path = os.path.dirname(os.getcwd())
os.chdir(path)
from data_preprocessing.classes.load_traffic_data import Import_Traffic_Data

In [None]:
import networkx as nx
import pandas as pd
import numpy as np
import ast
import keras
import math
import matplotlib.pyplot as plt
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dropout, Dense,concatenate
from stellargraph import StellarGraph, StellarDiGraph
import stellargraph as sg
from datetime import datetime
from stellargraph.layer import GCN_LSTM
import pmdarima as pm
from pmdarima.model_selection import train_test_split
import seaborn as sns
from classes import model_performance,preprocessing
import pickle

## Load Traffic Data

In [None]:
# Peartree roundabout bbox and datetimes of interest
top=51.798433
bottom=51.791451
right=-1.281979
left=-1.289524
datetime_start=datetime(2021,6,23,0,0)
datetime_end=datetime(2021,7,13,10,50)

# load in traffic data
traffic_data,time = Import_Traffic_Data(top,bottom,right,left).load_traffic_data(datetime_start,datetime_end)

In [None]:
# speed capped by speed limit
sp = traffic_data[:,:,5]

# coordinates
lons = traffic_data[0,:,4]
lats = traffic_data[0,:,3]

## Load WX data

In [None]:
# load in 5min wx data from csv
wx_df = pd.read_csv("data_collection/data/wx_data/oxfordcity_wx_variables_5min_intervals.csv")

In [None]:
# collect variables of significance
wx_vars = wx_df[['precipitationIntensity','temperature','humidity','weatherCode']].T

for i in range(4):
    wx_vars.iloc[i] = (wx_vars.iloc[i] - wx_vars.iloc[i].min())/(wx_vars.iloc[i].max() - wx_vars.iloc[i].min())
    
wx_vars = wx_vars.values
#scaler = MinMaxScaler()
#wx_vars = scaler.fit_transform(wx_vars)


## Create Road-Network Graph

In [None]:
# load in csv of node connections
connections = pd.read_csv(f"{path}/data_preprocessing/peartree_roundabout.csv")
connections.head(5)

In [None]:
# convert feeding roads to integers
for i in range(len(connections)):
#for i in range(4):
    try:
        connections.feeding_roads.iloc[i] = ast.literal_eval(connections.feeding_roads.iloc[i])
    except ValueError:
        connections.feeding_roads.iloc[i] = np.nan

# node connections
nodes = connections["Unnamed: 0"]
roads = connections.feeding_roads

# replace nans with 0's
connections.feeding_roads = connections.feeding_roads.fillna(0)

# loop thru and establish edges
edge_list = []
for row in range(len(roads)):
    node1 = connections["Unnamed: 0"].iloc[row]
    node2 = connections.feeding_roads.iloc[row]
    try:
        for i in range(len(node2)):
            edge_list.append([node2[i], node1])
        #node2 = connections.feeding_roads.iloc[row]
    except TypeError:
        edge_list.append([node2, node1])
        
# remove 0's
edges = []
for edge in edge_list:
    if edge[0]==0:
        pass
    else:
        edges.append(edge)      

In [None]:
#build the graph
G = nx.Graph()
for i in range(len(nodes)):
    G.add_node(nodes[i],spd=sp[:,i])
    #G.add_edge(nodes[i])
#G.add_nodes_from(nodes)
G.add_edges_from(edges)

In [None]:
# get adjacency matrix 
A = nx.to_numpy_array(G)

# convert graph to stellargraph object for modeling
square = StellarGraph.from_networkx(G,node_features="spd")

# get feature matrix
X = square.node_features()
#X = sp.T

# Modeling: GCN_LSTM

###### Train/Test Split

In [None]:
# specify the training rate
train_rate = 0.8

# replace missing values with nans
X = np.where(X<0,0,X)

# split train/test
train_data, test_data = preprocessing.train_test_split(X, train_rate)

print("Train data: ", train_data.shape)
print("Test data: ", test_data.shape)

###### Scale Data

In [None]:
# scale data based on max/min
train_scaled, test_scaled = preprocessing.scale_data(train_data, test_data)

###### Pre-process data based on sequence and prediction length

In [None]:
# the number of timesteps up to the prediction that we will feed to the model (5-minute intervals)
seq_len = 12

# the amount of time in advance we want to predict (5-minute intervals)
pre_len = 1

In [None]:
trainX, trainY, testX, testY = preprocessing.sequence_data_preparation(
    seq_len, pre_len, train_scaled, test_scaled
)
print(trainX.shape)
print(trainY.shape)
print(testX.shape)
print(testY.shape)

# Model Training

## 5-min sequence length

In [None]:
gcn_lstm = GCN_LSTM(
    seq_len=seq_len,
    adj=A,
    gc_layer_sizes=[10],
    gc_activations=["linear"],
    lstm_layer_sizes=[200],
    lstm_activations=["linear"],
    dropout=0.0,
)
# model architecture with keras
x_input, x_output = gcn_lstm.in_out_tensors()
model_tgcn = Model(inputs=x_input, outputs=x_output)

# compile model
optimizer = keras.optimizers.Adam(lr=0.001)
model_tgcn.compile(optimizer=optimizer, loss="mse", metrics=["mse"])

history = model_tgcn.fit(
    x=trainX,
    y=trainY,
    epochs=75,
    batch_size=64,
    shuffle=True,
    verbose=1,
    validation_data=(testX,testY)
)

In [None]:
sg.utils.plot_history(history)

## Save Model Weights (T-GCN: 5-min prediction)

In [None]:
# save model to folder
#model_tgcn.save('modeling/models/tgcn-5min')


# load the model from disk
#import keras
#model_tgcn = keras.models.load_model('modeling/models/tgcn-5min')

## 15-min sequence length

In [None]:
# the number of timesteps up to the prediction that we will feed to the model (5-minute intervals)
seq_len = 12

# the amount of time in advance we want to predict (5-minute intervals)
pre_len = 3

trainX, trainY, testX, testY = preprocessing.sequence_data_preparation(
    seq_len, pre_len, train_scaled, test_scaled
)
print(trainX.shape)
print(trainY.shape)
print(testX.shape)
print(testY.shape)

In [None]:
gcn_lstm = GCN_LSTM(
    seq_len=seq_len,
    adj=A,
    gc_layer_sizes=[15],
    gc_activations=["linear"],
    lstm_layer_sizes=[200],
    lstm_activations=["linear"],
    dropout=0.1,
)
# model architecture with keras
x_input, x_output = gcn_lstm.in_out_tensors()
model_tgcn_15 = Model(inputs=x_input, outputs=x_output)

# compile model
optimizer = keras.optimizers.Adam(lr=0.001)
model_tgcn_15.compile(optimizer=optimizer, loss="mse", metrics=["mse"])

history_15 = model_tgcn_15.fit(
    x=trainX,
    y=trainY,
    epochs=50,
    batch_size=64,
    shuffle=True,
    verbose=1,
    validation_data=(testX,testY)
)

In [None]:
pre_len

In [None]:
sg.utils.plot_history(history_15)

In [None]:
# save model to folder
model_tgcn_15.save('modeling/models/tgcn-15min')


# load the model from disk
#import keras
#model = keras.models.load_model('modeling/models/lstm-5min')

## 30-min sequence length

In [None]:
# the number of timesteps up to the prediction that we will feed to the model (5-minute intervals)
seq_len = 12

# the amount of time in advance we want to predict (5-minute intervals)
pre_len = 6

trainX, trainY, testX, testY = preprocessing.sequence_data_preparation(
    seq_len, pre_len, train_scaled, test_scaled
)
print(trainX.shape)
print(trainY.shape)
print(testX.shape)
print(testY.shape)

In [None]:
gcn_lstm = GCN_LSTM(
    seq_len=seq_len,
    adj=A,
    gc_layer_sizes=[15],
    gc_activations=["linear"],
    lstm_layer_sizes=[100],
    lstm_activations=["relu"],
    dropout=0.1,
)
# model architecture with keras
x_input, x_output = gcn_lstm.in_out_tensors()
model_tgcn_30 = Model(inputs=x_input, outputs=x_output)

# compile model
optimizer = keras.optimizers.Adam(lr=0.001)
model_tgcn_30.compile(optimizer=optimizer, loss="mse", metrics=["mse"])

history_30 = model_tgcn_30.fit(
    x=trainX,
    y=trainY,
    epochs=35,
    batch_size=64,
    shuffle=True,
    verbose=1,
    validation_data=(testX,testY)
)

In [None]:
sg.utils.plot_history(history_30)

In [None]:
# save model to folder
model_tgcn_30.save('modeling/models/tgcn-30min')


# load the model from disk
#import keras
#model = keras.models.load_model('modeling/models/lstm-5min')

## 60-min sequence length

In [None]:
# the number of timesteps up to the prediction that we will feed to the model (5-minute intervals)
seq_len = 12*24

# the amount of time in advance we want to predict (5-minute intervals)
pre_len = 12

trainX, trainY, testX, testY = preprocessing.sequence_data_preparation(
    seq_len, pre_len, train_scaled, test_scaled
)
print(trainX.shape)
print(trainY.shape)
print(testX.shape)
print(testY.shape)

In [None]:
gcn_lstm = GCN_LSTM(
    seq_len=seq_len,
    adj=A,
    gc_layer_sizes=[15],
    gc_activations=["linear"],
    lstm_layer_sizes=[100],
    lstm_activations=["relu"],
    dropout=0.1,
)
# model architecture with keras
x_input, x_output = gcn_lstm.in_out_tensors()
model_tgcn_60 = Model(inputs=x_input, outputs=x_output)

# compile model
optimizer = keras.optimizers.Adam(lr=0.001)
model_tgcn_60.compile(optimizer=optimizer, loss="mse", metrics=["mse"])

history_60 = model_tgcn_60.fit(
    x=trainX,
    y=trainY,
    epochs=20,
    batch_size=64,
    shuffle=True,
    verbose=1,
    validation_data=(testX,testY)
)

In [None]:
sg.utils.plot_history(history_60)

In [None]:
# save model to folder
#model_tgcn_60.save('modeling/models/tgcn-60min')


# load the model from disk
#import keras
#model = keras.models.load_model('modeling/models/lstm-5min')