Temporal regularized matrix factorization (TRMF) for metro OD forecasting. Code is adapted from [https://github.com/xinychen/transdim](https://github.com/xinychen/transdim)

Original paper for TRMF:
- Hsiang-Fu Yu, Nikhil Rao, Inderjit S. Dhillon, 2016. Temporal regularized matrix factorization for high-dimensional time series prediction. 30th Conference on Neural Information Processing Systems (NIPS 2016),

# Define functions

In [1]:
from functions import *
from numpy.linalg import inv as inv
import random
import time


def reset_random_seeds(n=1):
    os.environ['PYTHONHASHSEED'] = str(n)
    np.random.seed(n)
    random.seed(n)


def kr_prod(a, b):
    return np.einsum('ir, jr -> ijr', a, b).reshape(a.shape[0] * b.shape[0], -1)


def TRMF(train_data, init, time_lags, lambda_w, lambda_x, lambda_theta, eta, maxiter, multi_steps=1, display=10):
    start = time.time()
    W = init["W"]
    X = init["X"]
    theta = init["theta"]

    dim1, dim2 = train_data.shape
    binary_mat = np.zeros((dim1, dim2))
    position = np.where((train_data != 0))
    binary_mat[position] = 1

    d = len(time_lags)
    r = theta.shape[1]

    for iter in range(maxiter):
        if (iter + 1) % display == 0:
            print('Time step: {} time {}'.format(iter + 1, time.time() - start))
        var1 = X.T
        var2 = kr_prod(var1, var1)
        var3 = np.matmul(var2, binary_mat.T)
        var4 = np.matmul(var1, train_data.T)
        for i in range(dim1):
            W[i, :] = np.matmul(inv((var3[:, i].reshape([r, r])) + lambda_w * np.eye(r)), var4[:, i])

        var1 = W.T
        var2 = kr_prod(var1, var1)
        var3 = np.matmul(var2, binary_mat)
        var4 = np.matmul(var1, train_data)
        for t in range(dim2):
            Mt = np.zeros((r, r))
            Nt = np.zeros(r)
            if t < max(time_lags):
                Pt = np.zeros((r, r))
                Qt = np.zeros(r)
            else:
                Pt = np.eye(r)
                Qt = np.einsum('ij, ij -> j', theta, X[t - time_lags, :])
            if t < dim2 - np.min(time_lags):
                if t >= np.max(time_lags) and t < dim2 - np.max(time_lags):
                    index = list(range(0, d))
                else:
                    index = list(np.where((t + time_lags >= np.max(time_lags)) & (t + time_lags < dim2)))[0]
                for k in index:
                    theta0 = theta.copy()
                    theta0[k, :] = 0
                    Mt = Mt + np.diag(theta[k, :] ** 2)
                    Nt = Nt + np.multiply(theta[k, :], (X[t + time_lags[k], :]
                                                        - np.einsum('ij, ij -> j', theta0,
                                                                    X[t + time_lags[k] - time_lags, :])))
                X[t, :] = np.matmul(inv(var3[:, t].reshape([r, r])
                                        + lambda_x * Pt + lambda_x * Mt + lambda_x * eta * np.eye(r)),
                                    (var4[:, t] + lambda_x * Qt + lambda_x * Nt))
            elif t >= dim2 - np.min(time_lags):
                X[t, :] = np.matmul(inv(var3[:, t].reshape([r, r]) + lambda_x * Pt
                                        + lambda_x * eta * np.eye(r)), (var4[:, t] + Qt))
        for k in range(d):
            var1 = X[np.max(time_lags) - time_lags[k]: dim2 - time_lags[k], :]
            var2 = inv(np.diag(np.einsum('ij, ij -> j', var1, var1)) + (lambda_theta / lambda_x) * np.eye(r))
            var3 = np.zeros(r)
            for t in range(np.max(time_lags) - time_lags[k], dim2 - time_lags[k]):
                var3 = var3 + np.multiply(X[t, :],
                                          (X[t + time_lags[k], :]
                                           - np.einsum('ij, ij -> j', theta, X[t + time_lags[k] - time_lags, :])
                                           + np.multiply(theta[k, :], X[t, :])))
            theta[k, :] = np.matmul(var2, var3)

    X_new = np.zeros((dim2 + multi_steps, rank))
    X_new[0: dim2, :] = X.copy()
    for step in range(multi_steps):
        X_new[dim2 + step, :] = np.einsum('ij, ij -> j', theta, X_new[dim2 + step - time_lags, :])

    return W, X_new, theta, np.matmul(W, X_new[dim2: dim2 + multi_steps, :].T)


def OnlineTRMF(sparse_vec, init, lambda_x, time_lags):
    W = init["W"]
    X = init["X"]
    theta = init["theta"]
    dim = sparse_vec.shape[0]
    t, rank = X.shape
    position = np.where(sparse_vec != 0)
    binary_vec = np.zeros(dim)
    binary_vec[position] = 1

    xt_tilde = np.einsum('ij, ij -> j', theta, X[t - 1 - time_lags, :])
    var1 = W.T
    var2 = kr_prod(var1, var1)
    var_mu = np.matmul(var1, sparse_vec) + lambda_x * xt_tilde
    inv_var_Lambda = inv(np.matmul(var2, binary_vec).reshape([rank, rank]) + lambda_x * np.eye(rank))
    X[t - 1, :] = np.matmul(inv_var_Lambda, var_mu)
    return X


def st_prediction(train_data, time_lags, lambda_w, lambda_x, lambda_theta, eta,
                  rank, pred_time_steps, maxiter, multi_steps=1, display=100):
    start = time.time()
    start_time = train_data.shape[1] - pred_time_steps
    # dense_mat0 = dense_mat[:, 0: start_time]
    train_data0 = train_data[:, 0: start_time]
    dim1 = train_data0.shape[0]
    dim2 = train_data0.shape[1]
    max_time_lag = max(time_lags)
    results = {step + 1: np.zeros((dim1, pred_time_steps)) for step in range(multi_steps)}

    for t in range(pred_time_steps):
        if t == 0:
            init = {"W": 0.1 * np.random.rand(dim1, rank), "X": 0.1 * np.random.rand(dim2, rank),
                    "theta": 0.1 * np.random.rand(time_lags.shape[0], rank)}
            W, X, theta, mat_f = TRMF(train_data0, init, time_lags,
                                      lambda_w, lambda_x, lambda_theta, eta, maxiter, multi_steps, display=display)
            # Assign forecast to the corresponding step
            for step in range(multi_steps):
                results[step + 1][:, t] = mat_f[:, step]
            X0 = X[dim2 - max_time_lag:dim2 + 1, :].copy()  # Keep recent max_time_lag + one-step forecast
        else:
            sparse_vec = train_data[:, start_time + t - 1]
            if np.where(sparse_vec > 0)[0].shape[0] > rank:
                init = {"W": W, "X": X0, "theta": theta}
                X = OnlineTRMF(sparse_vec, init, lambda_x / dim2, time_lags)
                X0 = np.zeros((max_time_lag + multi_steps, rank))
                X0[0: max_time_lag, :] = X[1:, :]
                for step in range(multi_steps):
                    step_X = np.einsum('ij, ij -> j', theta, X0[max_time_lag + step - time_lags, :])
                    X0[max_time_lag + step, :] = step_X
                    results[step + 1][:, t] = W @ step_X
                X0 = X0[:max_time_lag + 1, :]  # Keep recent max_time_lag + one-step forecast
            else:
                X = X0.copy()
                X0 = np.zeros((max_time_lag + multi_steps, rank))
                X0[0: max_time_lag, :] = X[1:, :]
                for step in range(multi_steps):
                    step_X = np.einsum('ij, ij -> j', theta, X0[max_time_lag + step - time_lags, :])
                    X0[max_time_lag + step, :] = step_X
                    results[step + 1][:, t] = W @ step_X
                X0 = X0[:max_time_lag + 1, :]  # Keep recent max_time_lag + one-step forecast

        if (t + 1) % 40 == 0:
            print('Time step: {}, time {}'.format(t + 1, time.time() - start))
    return results

# Import data

In [2]:
data0 = loadmat('..//data//Hangzhou_OD.mat')
data0 = data0['OD']
data0 = remove_weekends(data0, start=1)

train_idx = np.arange(0, 36 * 14)
test_idx = np.arange(36 * 14, 36 * 19)
num_s = 80

# Subtract the mean in the training set
data = data0.astype(np.float64)
data_mean = data[:, train_idx].reshape([num_s * num_s, 36, -1], order='F')
data_mean = data_mean.mean(axis=2)
for i in range(19):
    data[:, i * 36:(i + 1) * 36] = data[:, i * 36:(i + 1) * 36] - data_mean

# Parameter tuning
## Tune weights

In [3]:
multi_steps = 1
pred_time_steps = 36 * 4 + (multi_steps - 1)
train_data = data[:, train_idx]
time_lags = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
d = time_lags.shape[0]
maxiter = 300
eta = 0.03
rank = 40
rmse_list = []
weights = range(1000, 10000, 1000)
reset_random_seeds(1)
for weight in weights:
    lambda_w = weight
    lambda_x = weight
    lambda_theta = weight
    results = st_prediction(train_data, time_lags, lambda_w, lambda_x, lambda_theta,
                            eta, rank, pred_time_steps, maxiter, multi_steps, display=100)
    rmse_list.append(RMSE(train_data[:, -36 * 4:], results[1]))
    print(rmse_list)

best_weight = weights[np.argmin(rmse_list)]
print('best_weight is {}'.format(best_weight))  # was 3000

Time step: 100 time 79.45164084434509
Time step: 200 time 159.3595895767212
Time step: 300 time 239.56135749816895
Time step: 40, time 242.61774325370789
Time step: 80, time 244.84792494773865
Time step: 120, time 247.15725874900818
[3.0052290144005642]
Time step: 100 time 77.43766212463379
Time step: 200 time 154.6269087791443
Time step: 300 time 232.30538177490234
Time step: 40, time 235.30813121795654
Time step: 80, time 237.46391034126282
Time step: 120, time 239.5571985244751
[3.0052290144005642, 2.9231605124035376]
Time step: 100 time 77.11366987228394
Time step: 200 time 155.49117469787598
Time step: 300 time 233.71165561676025
Time step: 40, time 236.46105670928955
Time step: 80, time 238.53872323036194
Time step: 120, time 240.8350956439972
[3.0052290144005642, 2.9231605124035376, 2.867187013790351]
Time step: 100 time 77.1600730419159
Time step: 200 time 155.38090705871582
Time step: 300 time 232.86850905418396
Time step: 40, time 235.86785173416138
Time step: 80, time 238.00

## Tune rank

In [4]:
lambda_w = best_weight
lambda_x = best_weight
lambda_theta = best_weight
rmse_list = []
ranks = range(20, 65, 5)
reset_random_seeds(1)
for rank in ranks:
    results = st_prediction(train_data, time_lags, lambda_w, lambda_x, lambda_theta,
                            eta, rank, pred_time_steps, maxiter, multi_steps)
    rmse_list.append(RMSE(train_data[:, -36 * 4:], results[1]))
    print(rmse_list)

best_rank = ranks[np.argmin(rmse_list)]
print("best_rank is {}".format(best_rank))  # was 35

Time step: 100 time 59.36144161224365
Time step: 200 time 119.14671015739441
Time step: 300 time 179.3572609424591
Time step: 40, time 180.70070719718933
Time step: 80, time 181.64855217933655
Time step: 120, time 182.59422874450684
[2.8717235320912806]
Time step: 100 time 68.70429921150208
Time step: 200 time 138.67472863197327
Time step: 300 time 208.43986248970032
Time step: 40, time 210.33303141593933
Time step: 80, time 211.3952956199646
Time step: 120, time 212.53566813468933
[2.8717235320912806, 2.862204449291198]
Time step: 100 time 77.98514008522034
Time step: 200 time 155.91173601150513
Time step: 300 time 234.240008354187
Time step: 40, time 236.6175708770752
Time step: 80, time 238.38280320167542
Time step: 120, time 239.91370820999146
[2.8717235320912806, 2.862204449291198, 2.8817985289484516]
Time step: 100 time 105.11379528045654
Time step: 200 time 211.31941318511963
Time step: 300 time 317.82301902770996
Time step: 40, time 321.0566785335541
Time step: 80, time 323.274

# Forcast and save results

In [5]:
multi_steps = 3
pred_time_steps = 36 * 5 + (multi_steps - 1)
train_data = data
time_lags = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
rank = best_rank
lambda_w = best_weight
lambda_x = best_weight
lambda_theta = best_weight
eta = 0.03

maxiter = 300
reset_random_seeds(1)
results = st_prediction(train_data, time_lags, lambda_w, lambda_x, lambda_theta,
                        eta, rank, pred_time_steps, maxiter, multi_steps)

for step in range(3):
    print(RMSE(data[:, -180:], results[step + 1][:, 2 - step:2 - step + 180]))

mat_hat1 = results[1][:, 2:2 + 180].copy()
mat_hat2 = results[2][:, 1:1 + 180].copy()
mat_hat3 = results[3][:, 0:0 + 180].copy()
for i in range(mat_hat1.shape[1]):
    mat_hat1[:, i] += data_mean[:, i % 36]
    mat_hat2[:, i] += data_mean[:, i % 36]
    mat_hat3[:, i] += data_mean[:, i % 36]

real_OD = data0[:, -180:]
real_flow = od2flow(real_OD, num_s=80)
print('Results of 1-step forecasting:')
predict_flow1 = od2flow(mat_hat1, num_s=80)
get_score(real_OD, mat_hat1, real_flow, predict_flow1)

print('Results of 2-step forecasting:')
predict_flow2 = od2flow(mat_hat2, num_s=80)
get_score(real_OD, mat_hat2, real_flow, predict_flow2)

print('Results of 3-step forecasting:')
predict_flow3 = od2flow(mat_hat3, num_s=80)
get_score(real_OD, mat_hat3, real_flow, predict_flow3)

np.savez_compressed('..//data//Hangzhou_OD_TRMF_step1.npz', data=mat_hat1)
np.savez_compressed('..//data//Hangzhou_OD_TRMF_step2.npz', data=mat_hat2)
np.savez_compressed('..//data//Hangzhou_OD_TRMF_step3.npz', data=mat_hat3)

Time step: 100 time 121.41348910331726
Time step: 200 time 244.27593517303467
Time step: 300 time 368.3721342086792
Time step: 40, time 372.0929684638977
Time step: 80, time 374.3268518447876
Time step: 120, time 376.6232120990753
Time step: 160, time 378.82584285736084
3.7955850349910967
4.282109640092683
4.850035345958773
Results of 1-step forecasting:
RMSE of OD: 3.7955850349910967
WMAPE of OD: 0.3425860171650234
SMAPE of OD: 1.0086288815059803
MAE of OD: 1.8406459745940205
r2 of OD: 0.9158151772082446


RMSE of flow: 70.19953155517578
WMAPE of flow: 0.09900061786174774
SMAPE of flow: 0.04952661693096161
MAE of flow: 42.55283737182617
r2 of flow: 0.9792202572747797
Results of 2-step forecasting:
RMSE of OD: 4.282109640092683
WMAPE of OD: 0.3802070686893558
SMAPE of OD: 0.824451725123523
MAE of OD: 2.0427763406296555
r2 of OD: 0.8928500638751655


RMSE of flow: 105.82966613769531
WMAPE of flow: 0.14904947578907013
SMAPE of flow: 0.17815051972866058
MAE of flow: 64.06503295898438
r2 o