# Import data and functions

In [None]:
%tensorflow_version 2.x
import os
os.environ['PYTHONHASHSEED']=str(1)

import tensorflow as tf
import numpy as np
import random

device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
    print(
      '\n\nThis error most likely means that this notebook is not '
      'configured to use a GPU.  Change this in Notebook Settings via the '
      'command palette (cmd/ctrl-shift-P) or the Edit menu.\n\n')
    raise SystemError('GPU device not found')

def cpu():
    with tf.device('/cpu:0'):
    random_image_cpu = tf.random.normal((100, 100, 100, 3))
    net_cpu = tf.keras.layers.Conv2D(32, 7)(random_image_cpu)
    return tf.math.reduce_sum(net_cpu)

def gpu():
    with tf.device('/device:GPU:0'):
    random_image_gpu = tf.random.normal((100, 100, 100, 3))
    net_gpu = tf.keras.layers.Conv2D(32, 7)(random_image_gpu)
    return tf.math.reduce_sum(net_gpu)

def reset_random_seeds(n=1):
    os.environ['PYTHONHASHSEED']=str(n)
    tf.random.set_seed(n)
    np.random.seed(n)
    random.seed(n)

# We run each op once to warm up; see: https://stackoverflow.com/a/45067900
cpu()
gpu()

from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
from scipy.io import loadmat
from tensorflow import keras
from tensorflow.keras import layers
import gc
from sklearn.metrics import r2_score
tf.keras.backend.set_floatx('float64')

def stagger_data(data, h):
    """
    >>> i = np.array([[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12]])
    >>> stagger_data(i, [1, 3])
    (array([[ 3,  4,  5],
           [ 9, 10, 11],
           [ 1,  2,  3],
           [ 7,  8,  9]]), array([[ 4,  5,  6],
           [10, 11, 12]]))
    """
    h.sort()
    len_h = len(h)
    n, m = data.shape
    max_h = max(h)

    Y = data[:, max_h:]
    X = np.zeros((n * len_h, m - max_h), dtype=data.dtype)
    for i in range(len_h):
        X[i * n: i * n + n, :] = data[:, max_h - h[i]:m - h[i]]
    return X, Y


def remove_weekends(data, start=0, bs=36):
    _, m = data.shape
    n_day = int(m / bs)
    weekday = np.concatenate([np.arange(start, 7) % 7, np.arange(n_day) % 7])[:n_day]
    weekday = np.repeat(weekday, bs)
    return data[:, weekday < 5]


def get_flow1(od, s, dir='o', num_s=159):
    """Get the flow of station `s`"""
    n = od.shape[0]
    if dir == 'o':
        idx = np.arange(s, n, num_s)
    elif dir == 'd':
        idx = np.arange((s * num_s), (s * num_s + num_s))
    return np.sum(od[idx, :], axis=0)


def od2flow(od, s_list=None, dir='o', num_s=159):
    if s_list is None:
        s_list = range(num_s)

    n_s = len(s_list)
    flow = np.zeros((n_s, od.shape[1]), dtype=np.float32)
    for i, s in enumerate(s_list):
        flow[i, :] = get_flow1(od, s, dir, num_s)
    return flow


def RMSE(f0, f1, axis=None):
    return np.sqrt(np.mean((f0 - f1) ** 2, axis))


def SMAPE(real, predict):
    a = real.ravel().copy()
    b = predict.ravel().copy()
    mask = ((a>0) & (b>0))
    a = a[mask]
    b = b[mask]
    return 2*np.mean(np.abs(a-b)/(np.abs(a)+np.abs(b)))


def WMAPE(real, predict):
    e = np.sum(np.abs(real - predict))/np.sum(np.abs(real))
    return e


def MAE(real, predict):
    return np.mean(np.abs(real - predict))

def MSE(f0, f1, axis=None):
    return np.mean((f0 - f1) ** 2, axis)


def get_score(real, predict, real_flow, predict_flow):
    print('RMSE of OD: {}'.format(RMSE(real, predict)))
    print('WMAPE of OD: {}'.format(WMAPE(real, predict)))
    print('SMAPE of OD: {}'.format(SMAPE(real, predict)))
    print('MAE of OD: {}'.format(MAE(real, predict)))
    print('r2 of OD: {}'.format(r2_score(real.ravel(), predict.ravel())))
    print('\n')
    print('RMSE of flow: {}'.format(RMSE(real_flow, predict_flow)))
    print('WMAPE of flow: {}'.format(WMAPE(real_flow, predict_flow)))
    print('SMAPE of flow: {}'.format(SMAPE(real_flow, predict_flow)))
    print('MAE of flow: {}'.format(MAE(real_flow, predict_flow)))
    print('r2 of flow: {}'.format(r2_score(real_flow.ravel(), predict_flow.ravel())))

# Load data

In [None]:
data0 = loadmat('drive//MyDrive//data//Huangzhou_OD.mat')
data0 = data0['OD']
data0 = remove_weekends(data0, start=1)
num_s = 80

# Subtract the mean of the training set
data = data0.astype(np.float64)
data_mean = data[:, 0:14*36].reshape([num_s*num_s, 36, -1], order='F')
data_mean = data_mean.mean(axis=2)
for i in range(19):
    data[:, i*36:(i+1)*36] = data[:, i*36:(i+1)*36] - data_mean

# Prepare lagged flow as a feature
flow0 = od2flow(data, num_s=num_s)
flow = np.zeros((flow0.shape[0]*2, flow0.shape[1]), dtype=flow0.dtype)
flow[0:flow0.shape[0], :] = flow0
flow[flow0.shape[0]:, 1:] = flow0[:, 0:-1]

train_idx = np.arange(0, 14*36)
test_idx = np.arange(36*14, 36*19)

h = [3, 4, 5, 6, 7, 8, 9, 10]
# Prepare train and validataion data
X_train, Y_train = stagger_data(data[:, train_idx], h)
m_train = X_train.shape[1]
X_train = np.concatenate([X_train, flow[:, train_idx][:, -m_train-1:-1]/num_s]).T
Y_train = Y_train.T

# Split training and validataion set
reset_random_seeds(0)
random_idx = np.random.permutation(m_train)
train_idx = random_idx[0:int(np.floor(m_train*0.8))]
validate_idx = random_idx[int(np.floor(m_train*0.8)):]
x_train = X_train[train_idx, :]
y_train = Y_train[train_idx, :]
x_validate = X_train[validate_idx, :]
y_validate = Y_train[validate_idx, :]


X_test, Y_test = stagger_data(data[:, (test_idx[0]-max(h)):(test_idx[-1]+1)], h)
X_test = np.concatenate([X_test, flow[:, test_idx-1]/num_s]).T
Y_test = Y_test.T

# Select model order

In [None]:
def create_net(n_input=num_s*num_s*10, n_hidden=50, activation=None):
    seq = keras.Sequential(
      [
      layers.Dense(n_hidden, input_shape=(n_input,), 
                   activation=activation,
                  #  kernel_regularizer=tf.keras.regularizers.L2(0.0001)
                   ),
      layers.Dense(num_s*num_s,
                  #  kernel_regularizer=tf.keras.regularizers.L2(0.0001)
                   )
      ]
    )
    return seq

call_back = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', min_delta=0, patience=20, verbose=0, mode='auto',
    baseline=None, restore_best_weights=True
)

In [None]:
activation_list = ['linear', 'sigmoid', 'relu']
n_hidden_list = np.linspace(10, 100, 10, dtype=np.int)
best_e = 10000

for activation in activation_list:
    for n_hidden in n_hidden_list:
        tf.keras.backend.clear_session()
        reset_random_seeds(0)
        seq = create_net(n_input=x_train.shape[1], n_hidden=n_hidden, activation=activation)
        gc.collect()
        seq.compile(loss="mean_squared_error", optimizer="RMSprop")
        seq.fit(
            x=x_train,
            y=y_train,
            batch_size=32,
            epochs=200,
            verbose=0,
            shuffle=True,
            validation_data =(x_validate, y_validate),
            callbacks=[call_back],
        )
        val_loss = min(seq.history.history['val_loss'])

        if val_loss < best_e:
            best_e = val_loss
            best_model = seq
        print('Activation {} hidden_layers {}, val_loss {}, current best val_loss {}'.format(activation, n_hidden, val_loss, best_e))

Activation linear hidden_layers 10, val_loss 7.687071361927071, current best val_loss 7.687071361927071
Activation linear hidden_layers 20, val_loss 7.7264406078993675, current best val_loss 7.687071361927071
Activation linear hidden_layers 30, val_loss 7.834968065974688, current best val_loss 7.687071361927071
Activation linear hidden_layers 40, val_loss 7.881966850974343, current best val_loss 7.687071361927071
Activation linear hidden_layers 50, val_loss 7.904993095783272, current best val_loss 7.687071361927071
Activation linear hidden_layers 60, val_loss 7.723924906566889, current best val_loss 7.687071361927071
Activation linear hidden_layers 70, val_loss 7.914479140079383, current best val_loss 7.687071361927071
Activation linear hidden_layers 80, val_loss 7.839710818396674, current best val_loss 7.687071361927071
Activation linear hidden_layers 90, val_loss 7.918717567366783, current best val_loss 7.687071361927071
Activation linear hidden_layers 100, val_loss 7.888565891920918

# Multistep forecast for the test set

In [None]:
# One-step forecast
seq = best_model
X1 = np.concatenate([X_train, X_test], axis=0)
predict_OD1 = seq.predict(X1)
tf.keras.backend.clear_session()
gc.collect()


# Two-step forecast
n = data.shape[0]
nh = len(h)
X2 = np.concatenate([X_train, X_test])
# Reuse one-step forecast OD
X2[3:, 0:n] = predict_OD1[0:-3, :]
# Reuse one-step forecast flow
X2[3:, n*nh:n*nh+num_s] = od2flow(predict_OD1[2:-1, :].T, num_s=num_s).T/num_s
predict_OD2 = seq.predict(X2)
tf.keras.backend.clear_session()
gc.collect()


# Three-step forecast
X3 = np.concatenate([X_train, X_test])
# Reuse one and two-step forecast OD
X3[4:, 0:n] = predict_OD2[1:-3, :]
X3[4:, n:2*n] = predict_OD1[0:-4, :]
# Reuse one and two-step forecast flow
X3[4:, n*nh:n*nh+num_s] = od2flow(predict_OD2[3:-1, :].T, num_s=num_s).T/num_s
X3[4:, n*nh+num_s:] = od2flow(predict_OD1[2:-2, :].T, num_s=num_s).T/num_s
predict_OD3 = seq.predict(X3)
tf.keras.backend.clear_session()
gc.collect()

878

# Add mean back and save results to file

In [None]:
real_OD = data0[:, test_idx]
real_flow = od2flow(real_OD, num_s=num_s)

# Add mean values
predict_OD1=predict_OD1[-180:, :].T
for i in range(predict_OD1.shape[1]):
    predict_OD1[:,i] += data_mean[:, i%36]
predict_flow1 = od2flow(predict_OD1, num_s=num_s)
print("\n The result of 1-step prediction: \n")
get_score(real_OD, predict_OD1, real_flow, predict_flow1)

predict_OD2=predict_OD2[-180:, :].T
for i in range(predict_OD2.shape[1]):
    predict_OD2[:,i] += data_mean[:, i%36]
predict_flow2 = od2flow(predict_OD2, num_s=num_s)
print("\n The result of 2-step prediction: \n")
get_score(real_OD, predict_OD2, real_flow, predict_flow2)


predict_OD3=predict_OD3[-180:, :].T
for i in range(predict_OD3.shape[1]):
    predict_OD3[:,i] += data_mean[:, i%36]
predict_flow3 = od2flow(predict_OD3, num_s=num_s)
print("\n The result of 3-step prediction: \n")
get_score(real_OD, predict_OD3, real_flow, predict_flow3)

np.savez_compressed('/content/drive/MyDrive/data/Hangzhou_OD_FNN_step1.npz', data=predict_OD1)
np.savez_compressed('/content/drive/MyDrive/data/Hangzhou_OD_FNN_step2.npz', data=predict_OD2)
np.savez_compressed('/content/drive/MyDrive/data/Hangzhou_OD_FNN_step3.npz', data=predict_OD3)


 The result of 1-step prediction: 

RMSE of OD: 3.9719648466476154
WMAPE of OD: 0.3357734532637882
SMAPE of OD: 0.4712526885640867
MAE of OD: 1.8040434348136745
r2 of OD: 0.9078092935222805


RMSE of flow: 67.16141510009766
WMAPE of flow: 0.09000366181135178
SMAPE of flow: 0.13968013226985931
MAE of flow: 38.68572998046875
r2 of flow: 0.9809799633289706

 The result of 2-step prediction: 

RMSE of OD: 4.012984099561706
WMAPE of OD: 0.33630288879958387
SMAPE of OD: 0.47392898437843534
MAE of OD: 1.8068879857846496
r2 of OD: 0.9058953186107709


RMSE of flow: 68.82543182373047
WMAPE of flow: 0.0922432690858841
SMAPE of flow: 0.14330947399139404
MAE of flow: 39.64836883544922
r2 of flow: 0.9800257905127567

 The result of 3-step prediction: 

RMSE of OD: 4.047180802658473
WMAPE of OD: 0.33652981381422836
SMAPE of OD: 0.4770053964984703
MAE of OD: 1.8081072083851415
r2 of OD: 0.9042846562288088


RMSE of flow: 70.76851654052734
WMAPE of flow: 0.09501465409994125
SMAPE of flow: 0.147140979