In [None]:
# from google.colab import drive
# drive.mount('/content/drive')

In [None]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
import tensorflow as tf

print(tf.__version__)

tf.config.experimental.list_physical_devices(device_type=None)

In [None]:
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
  try:
    # Currently, memory growth needs to be the same across GPUs
    for gpu in gpus:
      tf.config.experimental.set_memory_growth(gpu, True)
    logical_gpus = tf.config.experimental.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
  except RuntimeError as e:
    # Memory growth must be set before GPUs have been initialized
    print(e)

In [None]:
import tensorflow as tf
import numpy as np
import pandas as pd
import scipy.sparse as sp

# Utils

In [None]:
def normalized_adj(adj):
    adj = sp.coo_matrix(adj)
    rowsum = np.array(adj.sum(1))
    d_inv_sqrt = np.power(rowsum, -0.5).flatten()
    d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0.
    d_mat_inv_sqrt = sp.diags(d_inv_sqrt)
    normalized_adj = adj.dot(d_mat_inv_sqrt).transpose().dot(d_mat_inv_sqrt).tocoo()
    normalized_adj = normalized_adj.astype(np.float32)
    return normalized_adj

def calculate_laplacian(adj, lambda_max=1):
    adj = normalized_adj(adj + sp.eye(adj.shape[0]))
    adj = sp.csr_matrix(adj)
    adj = adj.astype(np.float32)
    return sparse_to_tuple(adj)

def sparse_to_tuple(mx):
    mx = sp.coo_matrix(mx)
    coords = np.vstack((mx.row, mx.col)).transpose()
    L = tf.sparse.SparseTensor(coords, mx.data, mx.shape)
    return tf.sparse.reorder(L)


def weight_variable_glorot(input_dim, output_dim, name=""):
    init_range = np.sqrt(6.0 / (input_dim + output_dim))
    initial = tf.random.uniform([input_dim, output_dim], minval=-init_range,
                            maxval=init_range, dtype=tf.float32)

    return tf.Variable(initial, name=name)


# TGCNCell - keras

In [None]:
from tensorflow.keras.regularizers import l1_l2

class TGCNCell(tf.keras.layers.Layer):
    def __init__(self, num_units, adj, num_nodes, l1, l2, act=tf.nn.tanh, **kwargs):
        super(TGCNCell, self).__init__(**kwargs)
        self._act = act
        self._nodes = num_nodes
        self._units = num_units
        self._adj = [calculate_laplacian(adj)]
        self.l1 = l1
        self.l2 = l2
        self.build_weights()

    def build_weights(self):
        regularizer = l1_l2(l1=self.l1, l2=self.l2)
        self.gates_weights = self.add_weight(shape=(self._units + 1, 2 * self._units),
                                             initializer='glorot_uniform',
                                             regularizer=regularizer,
                                             name='gates_weights')
        self.gates_biases = self.add_weight(shape=(2 * self._units,),
                                            initializer='zeros',
                                            name='gates_biases')
        self.candidate_weights = self.add_weight(shape=(self._units + 1, self._units),
                                                 initializer='glorot_uniform',
                                                 regularizer=regularizer,
                                                 name='candidate_weights')
        self.candidate_biases = self.add_weight(shape=(self._units,),
                                                initializer='zeros',
                                                name='candidate_biases')

    @property
    def state_size(self):
        return self._nodes * self._units

    @property
    def output_size(self):
        return self._units

    def call(self, inputs, state):
        state = tf.reshape(state, [-1, self._nodes, self._units])

        value = tf.nn.sigmoid(
            self._gc(inputs, state, 2 * self._units) + self.gates_biases)

        r, u = tf.split(value=value, num_or_size_splits=2, axis=1)
        r = tf.reshape(r, [-1, self._nodes, self._units])
        u = tf.reshape(u, [-1, self._nodes, self._units])
        r_state = r * state

        c = self._act(self._gc(inputs, r_state, self._units) + self.candidate_biases)
        c = tf.reshape(c, [-1, self._nodes, self._units])
        new_h = u * state + (1 - u) * c
        new_h = tf.reshape(new_h, [-1, self._nodes * self._units])

        return new_h, new_h

    def _gc(self, inputs, state, output_size):
        inputs = tf.expand_dims(inputs, 2)
        state = tf.reshape(state, (-1, self._nodes, self._units))
        x_s = tf.concat([inputs, state], axis=2)
        input_size = x_s.get_shape()[2]
        x0 = tf.transpose(x_s, perm=[1, 2, 0])
        x0 = tf.reshape(x0, shape=[self._nodes, -1])

        for m in self._adj:
            x1 = tf.sparse.sparse_dense_matmul(m, x0)

        x = tf.reshape(x1, shape=[self._nodes, input_size, -1])
        x = tf.transpose(x, perm=[2, 0, 1])
        x = tf.reshape(x, shape=[-1, input_size])

        weights = self.gates_weights if output_size == 2 * self._units else self.candidate_weights
        x = tf.matmul(x, weights)
        biases = self.gates_biases if output_size == 2 * self._units else self.candidate_biases
        x = tf.nn.bias_add(x, biases)
        return x

In [None]:
import pickle

with open("/data/timestep_24/trainX_timestep_24_20240108.pkl", 'rb') as file:
    trainX_loaded = pickle.load(file)

with open("/data/timestep_24/trainY_timestep_24_20240108.pkl", 'rb') as file:
    trainY_loaded = pickle.load(file)

with open("/data/timestep_24/testX_timestep_24_20240108.pkl", 'rb') as file:
    testX_loaded = pickle.load(file)

with open("/data/timestep_24/testY_timestep_24_20240108.pkl", 'rb') as file:
    testY_loaded = pickle.load(file)

# TGCN Model - Keras

In [None]:
###### load data ######
road_adj = pd.read_excel("/data/road_connection.xlsx")
adj = np.mat(road_adj)


###### numpy ######
trainX = np.array(trainX_loaded)
trainY = np.array(trainY_loaded)
testX = np.array(testX_loaded)
testY = np.array(testY_loaded)

gru_units = 128
num_nodes = adj.shape[0]
pre_len = 12
seq_len = 1 # avg traffic from 30 min >> 5 mins + waitiing time 30 mins
# batch_size = 32
l1 = 0.01
l2 = 0.01

class TGCNModel(tf.keras.Model):
    def __init__(self, num_nodes, gru_units, adj, pre_len, l1, l2):
        super(TGCNModel, self).__init__()
        self.num_nodes = num_nodes
        self.gru_units = gru_units
        self.adj = adj
        self.pre_len = pre_len
        self.tgcn_cell = TGCNCell(gru_units, adj, num_nodes, l1, l2)
        self.rnn = tf.keras.layers.RNN(self.tgcn_cell, return_sequences=True)
        self.dense_out = tf.keras.layers.Dense(num_nodes * pre_len)

    def call(self, inputs):
        timesteps = inputs.shape[1]
        x = self.rnn(inputs)
        x = tf.reshape(x, [-1, self.num_nodes*timesteps*self.gru_units])

        x = self.dense_out(x)

        x = tf.reshape(x, [-1, self.pre_len, self.num_nodes])
        print("x shape: ", x.shape)
        return x

In [None]:
# from sklearn.model_selection import TimeSeriesSplit
# from tensorflow.keras.callbacks import EarlyStopping

# tscv = TimeSeriesSplit(n_splits=3)
# num_epochs = 100
# batch_size = 64
# all_loss_histories = []
# all_loss_histories_train = []

# for tr_index, val_index in tscv.split(trainX):
#     print("TRAIN:", tr_index, "TEST:", val_index)
#     X_tr, X_val = trainX[tr_index], trainX[val_index]
#     y_tr, y_val = trainY[tr_index], trainY[val_index]

#     model = TGCNModel(num_nodes, gru_units, adj, pre_len)

#     # Define Early Stopping callback
#     early_stopping = EarlyStopping(monitor='loss', patience=10, restore_best_weights=True)SS

#     model.compile(optimizer='adam', loss='mse', metrics=['mae'])

#     history = model.fit(X_tr, y_tr,
#                         validation_data=(X_val, y_val),
#                         epochs=num_epochs, batch_size=batch_size,
#                         callbacks=[early_stopping], verbose=1)

#     loss_history = history.history['val_loss']
#     all_loss_histories.append(loss_history)
#     loss_history_train = history.history['loss']
#     all_loss_histories_train.append(loss_history_train)

# # Find the epoch with the minimum validation loss
# best_epoch = np.argmin(np.mean(all_loss_histories, axis=0))

# # print("Best Epoch:", best_epoch + 1)

In [None]:
# # Plotting training and validation loss
# import matplotlib.pyplot as plt

# plt.plot(range(1, num_epochs + 1), np.mean(all_loss_histories_train, axis=0), label='Training Loss')
# plt.plot(range(1, num_epochs + 1), np.mean(all_loss_histories, axis=0), label='Validation Loss')
# plt.title('Training and Validation Loss')
# plt.xlabel('Epochs')
# plt.ylabel('Loss')
# plt.legend()
# plt.show()

In [None]:
# n_epochs = best_epoch + 1
batch_size = 16

print(trainX.shape)
print(trainY.shape)
print(testX.shape)
print(testY.shape)

model = TGCNModel(num_nodes, gru_units, adj, pre_len)

model.compile(optimizer='adam', loss='mse', metrics=['mae','mse','mape'])

model.fit(trainX, trainY, epochs = 17, batch_size=batch_size, validation_data=(testX, testY))

(40272, 816, 6)
(40272, 12, 6)
(11472, 816, 6)
(11472, 12, 6)
Epoch 1/17
x shape:  (16, 12, 6)
x shape:  (16, 12, 6)


2024-01-09 13:00:57.576514: I external/local_tsl/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2024-01-09 13:00:58.020116: I external/local_xla/xla/service/service.cc:168] XLA service 0x7f0e76e6ae90 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2024-01-09 13:00:58.020143: I external/local_xla/xla/service/service.cc:176]   StreamExecutor device (0): NVIDIA GeForce RTX 3080 Ti, Compute Capability 8.6
2024-01-09 13:00:58.029003: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2024-01-09 13:00:58.048510: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:454] Loaded cuDNN version 8904
I0000 00:00:1704780058.087522  320355 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


Epoch 2/17
Epoch 3/17
Epoch 4/17
Epoch 5/17
Epoch 6/17
Epoch 7/17
Epoch 8/17

In [None]:
test_metrics = model.evaluate(testX, testY)

loss_value = test_metrics[0]
mae = test_metrics[1]
mse = test_metrics[2]
mape = test_metrics[3]

print('Loss:', loss_value)
print('MAE(veh):', mae)
print('MSE:', mse)
print('MAPE(%):', mape)

In [None]:
predictions = model.predict(testX)

mse = tf.keras.losses.MeanSquaredError()
rmse = tf.sqrt(mse(testY, predictions))
print(f"Test RMSE: {rmse.numpy()}")

ss_total = tf.reduce_sum(tf.square(testY - tf.reduce_mean(testY)))
ss_res = tf.reduce_sum(tf.square(testY - predictions))
r2 = 1 - ss_res / ss_total
print(f"Test R2: {r2.numpy()}")


In [None]:
predictions

In [None]:
predictions.shape

In [None]:
reshaped_predictions = predictions.reshape(-1, predictions.shape[-1])
reshaped_predictions

In [None]:
df_predictions = pd.DataFrame(reshaped_predictions, columns=[f'N0{i}' for i in [1,3,4,6,8,9]])
df_predictions

In [None]:
reshaped_testY = testY.reshape(-1, predictions.shape[-1])
df_testY = pd.DataFrame(reshaped_testY, columns=[f'N0{i}' for i in [1,3,4,6,8,9]])
df_testY

In [None]:
df_predictions.to_csv(f"predictions_timestep_24_batch_size_{batch_size}_20240109.csv")
df_testY.to_csv(f"testY_timestep_24_batch_size_{batch_size}_20240109.csv")

In [None]:
# model.save('/content/drive/MyDrive/Thesis/TGCN/Model')
# model.save_weights('/content/drive/MyDrive/Thesis/TGCN/Model')