# Load Data

In [1]:
import numpy as np
data = np.load(open('./TaxiBJ21.npy', 'rb'))  # the data file is in the same folder

In [2]:
data.shape

(4272, 2, 32, 32)

Here we simply use the historical data as the input features. External factors, e.g., weather, calendar info, are not used yet. 

In [3]:
# history_length is timestep of the historical window size
# predict_length is the prediction horizon
def generate_data(data, history_length, predict_length):
    history_data = []
    predict_data = []
    total_length = data.shape[0]
    for end_idx in range(history_length + predict_length, total_length):
        predict_frames = data[end_idx-predict_length:end_idx]
        history_frames = data[end_idx-predict_length-history_length:end_idx-predict_length]
        history_data.append(history_frames)
        predict_data.append(predict_frames)
    history_data = np.stack(history_data)
    predict_data = np.stack(predict_data)
    return history_data, predict_data

In [4]:
history_length = 24 * 2  # use the last 24 hours data
predict_length = 1  # predict the next 30 minutes
history_data, predict_data = generate_data(data, history_length, predict_length)
print(history_data.shape)
print(predict_data.shape)

(4223, 48, 2, 32, 32)
(4223, 1, 2, 32, 32)


Split the data with train:valid:test = 8:1:1. Simply, both the validation and testing sets have a data range of 9 days. The other data are used as training set.

In [5]:
history_data_train = history_data[:-18 * 48]
predict_data_train = predict_data[:-18 * 48]

history_data_valid = history_data[-18 * 48:-9 * 48]
predict_data_valid = predict_data[-18 * 48:-9 * 48]

history_data_test = history_data[-9 * 48:]
predict_data_test = predict_data[-9 * 48:]

# Simple history model 1: Use the last value

In [9]:
from sklearn.metrics import mean_squared_error, mean_absolute_error

In [10]:
simple_predict_results1 = history_data_test[:, -1, :, :, :]
simple_predict_results1.shape

(432, 2, 32, 32)

In [11]:
predict_data_test.shape

(432, 1, 2, 32, 32)

Evalute RMSE:

In [13]:
np.sqrt(mean_squared_error(simple_predict_results1.flatten(), predict_data_test.flatten()))

0.02703318006102326

Evaluate MAE:

In [14]:
mean_absolute_error(simple_predict_results1.flatten(), predict_data_test.flatten())

0.012395074262274939

# Simple history model 2: Use the value a day before

In [15]:
simple_predict_results2 = history_data_test[:, 0, :, :, :]
simple_predict_results2.shape

(432, 2, 32, 32)

Evalute RMSE:

In [16]:
np.sqrt(mean_squared_error(simple_predict_results2.flatten(), predict_data_test.flatten()))

0.09044357590690776

Evaluate MAE:

In [17]:
mean_absolute_error(simple_predict_results2.flatten(), predict_data_test.flatten())

0.04845272860485724