A simple online seasonal autoregressive model for time series prediction 

In [1]:
%matplotlib inline
import numpy as np
import tensorflow as tf
from collections import deque
from matplotlib import pyplot as plt
import pandas as pd
from tqdm import tqdm_notebook as tqdm

# Constants

In [2]:
ar_order = 3 # 3
sar_order = 2 # 2
period = 250 # 250
learning_rate = 0.005 # 0.005

# Graph

In [3]:
# tf graph inputs
X = tf.placeholder(tf.float32, shape=[ar_order + sar_order*period], name='history') # 1-D array
Y = tf.placeholder(tf.float32, shape=[1], name='current') # scalar

In [4]:
# tf graph variables
gamma = tf.Variable(initial_value=tf.random.truncated_normal(shape=[ar_order]), 
                    name='ar_params', dtype=tf.float32, shape=[ar_order])
s_gamma = tf.Variable(initial_value=tf.random.truncated_normal(shape=[sar_order]), 
                      name='sar_params', dtype=tf.float32, shape=[sar_order])

In [5]:
def compute_backshift(series, ar_params, s):
    '''
    Compute backshift polynomial (1 - g(B)) X_t
     params:
    series: A tensor, shape (n,) - [X_t, X_{t-1}, ..., X_{t-(n-1)}]
    ar_params: A tensor, shape (p,) - [g_1, g_2, ..., g_p]
    s: seasonal period, an int or int tensor 
    
     returns:
    A tensor [X_t - g(B^s) X_t, X_{t-1} - g(B^s) X_{t-1}, ..., X_{t-(n-ps-1)} - g(B^s) X_{t-(n-ps-1)}], shape (n - ps,)
    where
    g(B^s) = g_1 * B^s + g_2 * B^(2s) + ... + g_p * B^(ps)
    '''
    # constraint: n > p*s
    n = tf.reshape(tf.shape(series), [])
    p = tf.reshape(tf.shape(ar_params), [])
    
    sliding_window = tf.map_fn(lambda i: series[i:i+p*s+1:s], tf.range(n-p*s), dtype=tf.float32) # shape (n-p*s, p+1)
    backshift_polynomial = tf.expand_dims(tf.concat([[1], -ar_params], axis=0), axis=-1) # shape (p+1, 1)
    
    result = tf.matmul(sliding_window, backshift_polynomial) # shape (n-p*s, 1)
    return tf.squeeze(result, axis=-1) # shape (n-p*s,)

In [6]:
def compute_epsilon(current, history, ar_params, sar_params, s):
    '''
    Compute epsilon_t = (1 - gs(B^s))(1 - g(B)) X_t
     params:
    current: a tensor, shape (1,) - [X_t]
    history: a tensor, shape (ar_order + sar_order*period,) - [X_{t-1}, X_{t-2}, ..., X_{t-(p+Ps)}]
    ar_params: A tensor, shape (ar_order,) - [g_1, g_2, ..., g_p]
    ar_params: A tensor, shape (sar_order,) - [gs_1, gs_2, ..., gs_P]
    s: seasonal period, an int or int scalar tensor 
     returns:
    a scalar tensor of type float
    '''
    concat = tf.concat([current, history], axis=0) # shape (ar_order+sar_order*period+1,)
    backshift = compute_backshift(concat, ar_params, s=1) # shape (sar_order*period+1,)
    epsilon = compute_backshift(backshift, sar_params, s=s) # shape (1,)
    return epsilon[0] # return a scalar

In [7]:
epsilon = compute_epsilon(Y, X, gamma, s_gamma, period)
prediction = -compute_epsilon([0], X, gamma, s_gamma, period)

In [8]:
loss = tf.square(epsilon)
train_op = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss) # best lr=0.005
# train_op = tf.train.AdagradOptimizer(learning_rate).minimize(loss)

In [9]:
# Initialize the variables
init = tf.global_variables_initializer()

# Train

In [10]:
df = pd.read_csv('data/Dữ liệu phụ tải workingday miền Bắc năm 2015-082019.csv', 
                 usecols=list(map(str,range(1,25))), engine='python')
MAPEs = []

In [11]:
for i in tqdm(range(1,25)):
    hour = str(i)
    series = df[hour]
    series_diff = series.diff().dropna()
    
    # normalize data
    mean = series_diff.mean()
    std = series_diff.std()
    series_diff_normalized = (series_diff - mean) / std
    
    history = deque([0] * (ar_order+sar_order*period), maxlen=ar_order+sar_order*period)
    preds_diff_normalized = []
    
    # train
    with tf.Session() as sess:
        sess.run(init)
        for x in tqdm(series_diff_normalized):
            pred, _ = sess.run([prediction, train_op], feed_dict={'history:0': np.array(history), 
                                                                  'current:0': np.array([x])})
            history.appendleft(x)
            preds_diff_normalized.append(pred)
        
    split_point = round(len(series) * 0.8)

    # forecast
    preds_diff = np.array(preds_diff_normalized) * std + mean
    preds = np.r_[[0], series[:-1].values + preds_diff]
    forecast = pd.Series(preds, index=series.index)[split_point:]

    testing = series[split_point:]

    MAPEs.append(np.mean(np.abs((testing - forecast) / testing)))

HBox(children=(IntProgress(value=0, max=1216), HTML(value='')))




HBox(children=(IntProgress(value=0, max=1216), HTML(value='')))




HBox(children=(IntProgress(value=0, max=1216), HTML(value='')))




HBox(children=(IntProgress(value=0, max=1216), HTML(value='')))




HBox(children=(IntProgress(value=0, max=1216), HTML(value='')))




HBox(children=(IntProgress(value=0, max=1216), HTML(value='')))




HBox(children=(IntProgress(value=0, max=1216), HTML(value='')))




HBox(children=(IntProgress(value=0, max=1216), HTML(value='')))




HBox(children=(IntProgress(value=0, max=1216), HTML(value='')))




HBox(children=(IntProgress(value=0, max=1216), HTML(value='')))




HBox(children=(IntProgress(value=0, max=1216), HTML(value='')))




HBox(children=(IntProgress(value=0, max=1216), HTML(value='')))




HBox(children=(IntProgress(value=0, max=1216), HTML(value='')))




HBox(children=(IntProgress(value=0, max=1216), HTML(value='')))




HBox(children=(IntProgress(value=0, max=1216), HTML(value='')))




HBox(children=(IntProgress(value=0, max=1216), HTML(value='')))




HBox(children=(IntProgress(value=0, max=1216), HTML(value='')))




HBox(children=(IntProgress(value=0, max=1216), HTML(value='')))




HBox(children=(IntProgress(value=0, max=1216), HTML(value='')))




HBox(children=(IntProgress(value=0, max=1216), HTML(value='')))




HBox(children=(IntProgress(value=0, max=1216), HTML(value='')))




HBox(children=(IntProgress(value=0, max=1216), HTML(value='')))




HBox(children=(IntProgress(value=0, max=1216), HTML(value='')))




HBox(children=(IntProgress(value=0, max=1216), HTML(value='')))




In [12]:
print(MAPEs)
print(np.mean(MAPEs))

[0.056209972481650376, 0.054653217181037356, 0.05230257478800533, 0.049944721676719726, 0.04669711297513028, 0.03940839541744851, 0.034317649235530316, 0.036300467800945944, 0.03967716023603523, 0.043991586050964974, 0.04733506973808972, 0.05406628322434684, 0.05789953843217802, 0.056210368017053675, 0.0496805339136774, 0.04060688471174475, 0.03280607181767226, 0.02848624732360101, 0.034056309437989346, 0.03723644638530487, 0.04368772200750985, 0.05190537902735574, 0.055389847637416584, 0.054123389911962326]
0.0457080395595571
