Temporal regularized matrix factorization (TRMF) for metro OD forecasting. Code is adapted from [https://github.com/xinychen/transdim](https://github.com/xinychen/transdim)

Original paper for TRMF:
- Hsiang-Fu Yu, Nikhil Rao, Inderjit S. Dhillon, 2016. Temporal regularized matrix factorization for high-dimensional time series prediction. 30th Conference on Neural Information Processing Systems (NIPS 2016),

# Define functions

In [1]:
from functions import *
from numpy.linalg import inv as inv
import random
import time


def reset_random_seeds(n=1):
    os.environ['PYTHONHASHSEED'] = str(n)
    np.random.seed(n)
    random.seed(n)

def ar4cast(theta, X, time_lags, multi_step):
    dim, rank = X.shape
    d = time_lags.shape[0]
    X_new = np.append(X, np.zeros((multi_step, rank)), axis = 0)
    for t in range(multi_step):
        X_new[dim + t, :] = np.einsum('kr, kr -> r', theta, X_new[dim + t - time_lags, :])
    return X_new

def TRMF(dense_mat, sparse_mat, init_para, init_hyper, time_lags, maxiter):
    """Temporal Regularized Matrix Factorization, TRMF."""

    ## Initialize parameters
    W = init_para["W"]
    X = init_para["X"]
    theta = init_para["theta"]

    ## Set hyperparameters
    lambda_w = init_hyper["lambda_w"]
    lambda_x = init_hyper["lambda_x"]
    lambda_theta = init_hyper["lambda_theta"]
    eta = init_hyper["eta"]

    dim1, dim2 = sparse_mat.shape
    pos_train = np.where(sparse_mat != 0)
    pos_test = np.where((dense_mat != 0) & (sparse_mat == 0))
    binary_mat = sparse_mat.copy()
    binary_mat[pos_train] = 1
    d, rank = theta.shape

    for it in range(maxiter):
        ## Update spatial matrix W
        for i in range(dim1):
            pos0 = np.where(sparse_mat[i, :] != 0)
            Xt = X[pos0[0], :]
            vec0 = Xt.T @ sparse_mat[i, pos0[0]]
            mat0 = inv(Xt.T @ Xt + lambda_w * np.eye(rank))
            W[i, :] = mat0 @ vec0
        ## Update temporal matrix X
        for t in range(dim2):
            pos0 = np.where(sparse_mat[:, t] != 0)
            Wt = W[pos0[0], :]
            Mt = np.zeros((rank, rank))
            Nt = np.zeros(rank)
            if t < np.max(time_lags):
                Pt = np.zeros((rank, rank))
                Qt = np.zeros(rank)
            else:
                Pt = np.eye(rank)
                Qt = np.einsum('ij, ij -> j', theta, X[t - time_lags, :])
            if t < dim2 - np.min(time_lags):
                if t >= np.max(time_lags) and t < dim2 - np.max(time_lags):
                    index = list(range(0, d))
                else:
                    index = list(np.where((t + time_lags >= np.max(time_lags)) & (t + time_lags < dim2)))[0]
                for k in index:
                    Ak = theta[k, :]
                    Mt += np.diag(Ak ** 2)
                    theta0 = theta.copy()
                    theta0[k, :] = 0
                    Nt += np.multiply(Ak, X[t + time_lags[k], :]
                                      - np.einsum('ij, ij -> j', theta0, X[t + time_lags[k] - time_lags, :]))
            vec0 = Wt.T @ sparse_mat[pos0[0], t] + lambda_x * Nt + lambda_x * Qt
            mat0 = inv(Wt.T @ Wt + lambda_x * Mt + lambda_x * Pt + lambda_x * eta * np.eye(rank))
            X[t, :] = mat0 @ vec0
        ## Update AR coefficients theta
        for k in range(d):
            theta0 = theta.copy()
            theta0[k, :] = 0
            mat0 = np.zeros((dim2 - np.max(time_lags), rank))
            for L in range(d):
                mat0 += X[np.max(time_lags) - time_lags[L] : dim2 - time_lags[L] , :] @ np.diag(theta0[L, :])
            VarPi = X[np.max(time_lags) : dim2, :] - mat0
            var1 = np.zeros((rank, rank))
            var2 = np.zeros(rank)
            for t in range(np.max(time_lags), dim2):
                B = X[t - time_lags[k], :]
                var1 += np.diag(np.multiply(B, B))
                var2 += np.diag(B) @ VarPi[t - np.max(time_lags), :]
            theta[k, :] = inv(var1 + lambda_theta * np.eye(rank) / lambda_x) @ var2

        X_new = ar4cast(theta, X, time_lags, multi_step)
        mat_new = W @ X_new[- multi_step :, :].T
        mat_hat = W @ X.T
    mat_hat = np.append(mat_hat, mat_new, axis = 1)

    return mat_hat, W, X_new, theta


def update_x_partial(sparse_mat, W, X, theta, lambda_x, eta, time_lags, back_step):
    d = time_lags.shape[0]
    dim2, rank = X.shape
    tmax = np.max(time_lags)
    for t in range(dim2 - back_step, dim2):
        pos0 = np.where(sparse_mat[:, t] != 0)
        Wt = W[pos0[0], :]
        Mt = np.zeros((rank, rank))
        Nt = np.zeros(rank)
        if t < tmax:
            Pt = np.zeros((rank, rank))
            Qt = np.zeros(rank)
        else:
            Pt = np.eye(rank)
            Qt = np.einsum('ij, ij -> j', theta, X[t - time_lags, :])
        if t < dim2 - np.min(time_lags):
            if t >= tmax and t < dim2 - tmax:
                index = list(range(0, d))
            else:
                index = list(np.where((t + time_lags >= tmax) & (t + time_lags < dim2)))[0]
            for k in index:
                Ak = theta[k, :]
                Mt += np.diag(Ak ** 2)
                theta0 = theta.copy()
                theta0[k, :] = 0
                Nt += np.multiply(Ak, X[t + time_lags[k], :]
                                  - np.einsum('ij, ij -> j', theta0, X[t + time_lags[k] - time_lags, :]))
        vec0 = Wt.T @ sparse_mat[pos0[0], t] + lambda_x * Nt + lambda_x * Qt
        mat0 = inv(Wt.T @ Wt + lambda_x * Mt + lambda_x * Pt + lambda_x * eta * np.eye(rank))
        X[t, :] = mat0 @ vec0
    return X


def TRMF_partial(dense_mat, sparse_mat, init_para, init_hyper, time_lags, maxiter):
    ## Initialize parameters
    W = init_para["W"]
    X = init_para["X"]
    theta = init_para["theta"]
    ## Set hyperparameters
    lambda_x = init_hyper["lambda_x"]
    eta = init_hyper["eta"]
    back_step = 10 * multi_step
    for it in range(maxiter):
        X = update_x_partial(sparse_mat, W, X, theta, lambda_x, eta, time_lags, back_step)
    X_new = ar4cast(theta, X, time_lags, multi_step)
    mat_hat = W @ X_new[- multi_step :, :].T
    mat_hat[mat_hat < 0] = 0

    return mat_hat, W, X_new, theta


def TRMF_forecast(dense_mat, sparse_mat, init_hyper, pred_step, multi_step, rank, time_lags, maxiter, maxiter2=10):
    dim1, T = dense_mat.shape
    d = time_lags.shape[0]
    start_time = T - pred_step
    results = {step + 1: np.zeros((dim1, pred_time_steps)) for step in range(multi_step)}
    for t in range(pred_time_steps):
        if t == 0:
            init_para = {"W": 0.1 * np.random.randn(dim1, rank),
                         "X": 0.1 * np.random.randn(start_time, rank),
                         "theta": 0.1 * np.random.randn(d, rank)}
            mat, W, X_new, theta = TRMF(dense_mat[:, 0 : start_time], sparse_mat[:, 0 : start_time],
                                        init_para, init_hyper, time_lags, maxiter)
            X_new = X_new[0: (start_time + t), :]
        else:
            init_para = {"W": W, "X": X_new, "theta": theta}
            mat, W, X_new, theta = TRMF_partial(dense_mat[:, 0 : start_time + t],
                                                sparse_mat[:, 0 : start_time + t],
                                                init_para, init_hyper, time_lags, maxiter2)
            X_new = X_new[0: (start_time + t), :]
        for step in range(multi_step):
            results[step+1][:, t] = mat[:, -multi_step+step]

        if (t + 1) % 36 == 0:
            print('Time step: {}'.format(t + 1))

    return results

# Import data

In [2]:
data0 = loadmat('..//data//Hangzhou_OD.mat')
data0 = data0['OD']
data0 = remove_weekends(data0, start=1)

train_idx = np.arange(0, 36 * 14)
test_idx = np.arange(36 * 14, 36 * 19)
num_s = 80

# Subtract the mean in the training set
data = data0.astype(np.float64)
data_mean = data[:, train_idx].reshape([num_s * num_s, 36, -1], order='F')
data_mean = data_mean.mean(axis=2)
for i in range(19):
    data[:, i * 36:(i + 1) * 36] = data[:, i * 36:(i + 1) * 36] - data_mean

# Parameter tuning
## Tune weights

In [3]:
multi_step = 1
pred_time_steps = 36 * 4 + (multi_step - 1)
train_data = data[:, train_idx]
time_lags = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
d = time_lags.shape[0]
maxiter = 200
eta = 0.03
rank = 40
rmse_list = []
weights = [100, 300, 500, 1000, 1500, 2000]
start = time.time()
reset_random_seeds(1)
for weight in weights:
    init_hyper = {"lambda_w": weight, "lambda_x": weight, "lambda_theta": weight, "eta": eta}
    results = TRMF_forecast(train_data, train_data, init_hyper, pred_time_steps, multi_step, rank, time_lags, maxiter, maxiter2=10)
    rmse_list.append(RMSE(train_data[:, -36 * 4:], results[1]))
    print('weight={}, time={}'.format(weight, time.time()-start))
    print(rmse_list)

best_weight = weights[np.argmin(rmse_list)]
print('best_weight is {}'.format(best_weight))  # was 3000

Time step: 36
Time step: 72
Time step: 108
Time step: 144
weight=100, time=284.8777244091034
[2.9905920867598628]
Time step: 36
Time step: 72
Time step: 108
Time step: 144
weight=300, time=562.4224574565887
[2.9905920867598628, 2.9726288993595706]
Time step: 36
Time step: 72
Time step: 108
Time step: 144
weight=500, time=839.296044588089
[2.9905920867598628, 2.9726288993595706, 2.9655605801566196]
Time step: 36
Time step: 72
Time step: 108
Time step: 144
weight=1000, time=1116.9267058372498
[2.9905920867598628, 2.9726288993595706, 2.9655605801566196, 2.984341106994974]
Time step: 36
Time step: 72
Time step: 108
Time step: 144
weight=1500, time=1395.1043837070465
[2.9905920867598628, 2.9726288993595706, 2.9655605801566196, 2.984341106994974, 3.002234674724591]
Time step: 36
Time step: 72
Time step: 108
Time step: 144
weight=2000, time=1672.5508267879486
[2.9905920867598628, 2.9726288993595706, 2.9655605801566196, 2.984341106994974, 3.002234674724591, 3.0323108519909576]
best_weight is 5

# Tune rank

In [4]:
init_hyper = {"lambda_w": best_weight, "lambda_x": best_weight, "lambda_theta": best_weight, "eta": eta}
rmse_list = []
ranks = range(30, 150, 10)
reset_random_seeds(1)
for rank in ranks:
    results = TRMF_forecast(train_data, train_data, init_hyper, pred_time_steps, multi_step, rank, time_lags, maxiter, maxiter2=10)
    rmse_list.append(RMSE(train_data[:, -36 * 4:], results[1]))
    print('weight={}, time={}'.format(rank, time.time()-start))
    print(rmse_list)

best_rank = ranks[np.argmin(rmse_list)]
print("best_rank is {}".format(best_rank))

Time step: 36
Time step: 72
Time step: 108
Time step: 144
weight=30, time=1905.2663311958313
[2.9710703583901883]
Time step: 36
Time step: 72
Time step: 108
Time step: 144
weight=40, time=2181.7428998947144
[2.9710703583901883, 2.9737112651901327]
Time step: 36
Time step: 72
Time step: 108
Time step: 144
weight=50, time=2527.1248364448547
[2.9710703583901883, 2.9737112651901327, 2.958622635906084]
Time step: 36
Time step: 72
Time step: 108
Time step: 144
weight=60, time=2909.6129755973816
[2.9710703583901883, 2.9737112651901327, 2.958622635906084, 2.963583845229799]
Time step: 36
Time step: 72
Time step: 108
Time step: 144
weight=70, time=3364.78337430954
[2.9710703583901883, 2.9737112651901327, 2.958622635906084, 2.963583845229799, 2.959423582170035]
Time step: 36
Time step: 72
Time step: 108
Time step: 144
weight=80, time=3865.000997066498
[2.9710703583901883, 2.9737112651901327, 2.958622635906084, 2.963583845229799, 2.959423582170035, 2.9607592797422684]
Time step: 36
Time step: 72


# Forcast and save results

In [5]:
best_weight = best_weight
best_rank = best_rank
multi_step = 3
pred_time_steps = 36 * 5 + (multi_step - 1)
train_data = data
time_lags = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
rank = best_rank
lambda_w = best_weight
lambda_x = best_weight
lambda_theta = best_weight
eta = 0.03
maxiter = 200
init_hyper = {"lambda_w": lambda_w, "lambda_x": lambda_x, "lambda_theta": lambda_theta, "eta": eta}
reset_random_seeds(1)

# def TRMF_forecast(dense_mat, sparse_mat, init_hyper, pred_step, multi_step, rank, time_lags, maxiter):
results = TRMF_forecast(train_data, train_data, init_hyper, pred_time_steps, multi_step, best_rank, time_lags, maxiter, maxiter2=10)

for step in range(3):
    print(RMSE(data[:, -180:], results[step + 1][:, 2 - step:2 - step + 180]))

mat_hat1 = results[1][:, 2:2 + 180].copy()
mat_hat2 = results[2][:, 1:1 + 180].copy()
mat_hat3 = results[3][:, 0:0 + 180].copy()
for i in range(mat_hat1.shape[1]):
    mat_hat1[:, i] += data_mean[:, i % 36]
    mat_hat2[:, i] += data_mean[:, i % 36]
    mat_hat3[:, i] += data_mean[:, i % 36]

real_OD = data0[:, -180:]
real_flow = od2flow(real_OD, num_s=80)
print('Results of 1-step forecasting:')
predict_flow1 = od2flow(mat_hat1, num_s=80)
get_score(real_OD, mat_hat1, real_flow, predict_flow1)

print('Results of 2-step forecasting:')
predict_flow2 = od2flow(mat_hat2, num_s=80)
get_score(real_OD, mat_hat2, real_flow, predict_flow2)

print('Results of 3-step forecasting:')
predict_flow3 = od2flow(mat_hat3, num_s=80)
get_score(real_OD, mat_hat3, real_flow, predict_flow3)

Time step: 36
Time step: 72
Time step: 108
Time step: 144
Time step: 180
3.801086821059394
3.886189916865785
3.9638353646522995
Results of 1-step forecasting:
RMSE of OD: 3.8010868210593944
WMAPE of OD: 0.34021980655924294
SMAPE of OD: 0.9612498400012086
MAE of OD: 1.8279327994836847
r2 of OD: 0.9155709447295383


RMSE of flow: 77.69996643066406
WMAPE of flow: 0.0999877080321312
SMAPE of flow: 0.16383419930934906
MAE of flow: 42.97711181640625
r2 of flow: 0.974542640705724
Results of 2-step forecasting:
RMSE of OD: 3.886189916865785
WMAPE of OD: 0.344813842786603
SMAPE of OD: 0.9622279913999333
MAE of OD: 1.8526156349333174
r2 of OD: 0.9117480333971929


RMSE of flow: 81.19125366210938
WMAPE of flow: 0.10545614361763
SMAPE of flow: 0.17583689093589783
MAE of flow: 45.327579498291016
r2 of flow: 0.9722034937739886
Results of 3-step forecasting:
RMSE of OD: 3.963835364652299
WMAPE of OD: 0.34824376961485337
SMAPE of OD: 0.9626348763133519
MAE of OD: 1.8710439440097208
r2 of OD: 0.9081862

In [6]:
np.savez_compressed('..//data//Hangzhou_OD_TRMF_step1.npz', data=mat_hat1)
np.savez_compressed('..//data//Hangzhou_OD_TRMF_step2.npz', data=mat_hat2)
np.savez_compressed('..//data//Hangzhou_OD_TRMF_step3.npz', data=mat_hat3)