LSTM method using this method as a source https://machinelearningmastery.com/time-series-forecasting-long-short-term-memory-network-python/

tldr: Transform dataset into supervised method, make it stationary, transform to scale, LSTM

In [1]:
%matplotlib inline

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm, tnrange
import os

from common import mase

from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler

from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM

Using TensorFlow backend.


Model per time series approach

In [2]:
# frame a sequence as a supervised learning problem
def timeseries_to_supervised(data, lag=1):
    df = pd.DataFrame(data)
    columns = [df.shift(i) for i in range(1, lag+1)]
    columns.append(df)
    df = pd.concat(columns, axis=1)
    df.fillna(0, inplace=True)
    return df

# create a differenced series
def difference(dataset, interval=1):
    diff = list()
    for i in range(interval, len(dataset)):
        value = dataset[i] - dataset[i - interval]
        diff.append(value)
    return pd.Series(diff)

# invert differenced value
def inverse_difference(history, yhat, interval=1):
    return yhat + history[-interval]

# scale train and test data to [-1, 1]
def scale(train, test):
    # fit scaler
    scaler = MinMaxScaler(feature_range=(-1, 1))
    scaler = scaler.fit(train)
    # transform train
    train = train.reshape(train.shape[0], train.shape[1])
    train_scaled = scaler.transform(train)
    # transform test
    test = test.reshape(test.shape[0], test.shape[1])
    test_scaled = scaler.transform(test)
    return scaler, train_scaled, test_scaled

# inverse scaling for a forecasted value
def invert_scale(scaler, X, value):
    new_row = [x for x in X] + [value]
    array = np.array(new_row)
    array = array.reshape(1, len(array))
    inverted = scaler.inverse_transform(array)
    return inverted[0, -1]

# fit a network to training data
def fit_seq(train, batch_size, nb_epoch, neurons):
    X, y = train[:, 0:-1], train[:, -1]
    print(X.shape[1])
    print(batch_size)
    assert False
    
    model = Sequential()
    model.add(Dense(neurons, activation='relu', input_dim=X.shape[1]))
    model.add(Dense(1))
    model.compile(loss='mean_squared_error', optimizer='adam')
    
    model.fit(X, y, epochs=nb_epoch, batch_size=batch_size, verbose=0, shuffle=False)
    return model

# make a one-step forecast
def forecast_seq(model, batch_size, X):
    X = X.reshape(1, len(X))
    yhat = model.predict(X, batch_size=batch_size)
    return yhat[0, 0]

Alternative prediction using the last predicted value as the input. This is necessary because we want to predict more than one time unit ahead.

In [3]:
def scale2(train_data):
    # fit scaler
    scaler = MinMaxScaler(feature_range=(-1, 1))
    scaler = scaler.fit(train_data)
    # transform train
    train = train_data.reshape(train_data.shape[0], train_data.shape[1])
    train_scaled = scaler.transform(train_data)
    return scaler, train_scaled

def make_predictions(model, train_scaled, scaler, raw_values, number_of_predictions):
    train_reshaped = train_scaled[:, 0].reshape(len(train_scaled), 1)
    model.predict(train_reshaped, batch_size=1)

    scaled_predictions = list()
    predictions = list()
    X = train_scaled[-1,1:2]

    yhat = forecast_seq(model, 1, X)
    scaled_predictions.append(yhat)
    
    yhat = invert_scale(scaler, X, yhat)
    yhat = yhat + raw_values[-1]
    predictions.append(yhat)
    
    # Predict N steps into the FUTURE!
    for i in range(1, number_of_predictions):
        X = np.array([scaled_predictions[-1]])
        yhat = forecast_seq(model, 1, X)
        scaled_predictions.append(yhat)
        
        yhat = invert_scale(scaler, X, yhat)
        yhat = yhat + predictions[-1]
        predictions.append(yhat)
    
    # removed for mass predictions
    #print(scaled_predictions)
    #print(predictions)
    return predictions
    

def create_model_and_predict(train_series, number_of_predictions):
    # transform data to be stationary
    raw_values = train_series.values
    diff_values = difference(raw_values, 1)

    # transform data to be supervised learning
    supervised = timeseries_to_supervised(diff_values, 1)
    supervised_values = supervised.values

    # transform the scale of the data
    scaler, train_scaled = scale2(supervised_values)
    
    # fit the model
    model = fit_seq(train_scaled, 4, 10, 4)
    
    predictions = make_predictions(model, train_scaled, scaler, raw_values, number_of_predictions)
    return predictions

Single model per category approach

In [56]:
def create_model(window_length, neurons=6):
    model = Sequential()
    model.add(Dense(neurons, activation='relu', input_shape=(window_length,)))
    model.add(Dense(10))
    model.add(Dense(1))
    model.compile(loss='mean_squared_error', optimizer='adam')
    
    return model


def fit_model(model, train, batch_size, nb_epoch):
    X, y = train[:, 0:-1], train[:, -1]
    model.fit(X, y, epochs=nb_epoch, batch_size=batch_size, verbose=0, shuffle=False)

    
def sliding_train_fit_model(train_scaled, model, window_length, batch_size, nb_epoch):
    if train_scaled.shape[0] < window_length:
        train_scaled = np.pad(train_scaled, (window_length - train_scaled.shape[0], 0), 'constant')

    for i in range(train_scaled.shape[0] - window_length + 1):
        fit_model(model, train_scaled[i:i + window_length, 0].reshape((1, window_length)), batch_size, nb_epoch)

        
def scale_series_and_fit(train_series, model, window_length=5, batch_size=4, nb_epoch=2):
    # transform data to be stationary
    raw_values = train_series.values
    diff_values = difference(raw_values, 1)

    # transform data to be supervised learning
    supervised = timeseries_to_supervised(diff_values, 1)
    supervised_values = supervised.values

    # transform the scale of the data
    scaler, train_scaled = scale2(supervised_values)

    # fit the model
    sliding_train_fit_model(train_scaled, model, window_length, batch_size, nb_epoch)

Failsafe single prediction

Prerequisite dataframes, dictionaries, and lists for training and predicting processes. 

In [57]:
# Prediction lengths for different scopes
horizon_lengths = {"H" : 48, "D" : 14, "W" : 13, "M" : 18, "Q" : 8, "Y" : 6}

# Scope + category as key for models, i.e daily_finance or w/e
models = {}

# All filenames for different scopes
filenames = os.listdir("./data/cut/1000/train/")
#filenames = ["Weekly.csv"]

# Results
results_frame = pd.DataFrame()

# Test series frame for plotting or w/e
test_frame = pd.DataFrame()

# M4 info for information about categories
m4_info = pd.read_csv('./data/M4-info.csv')

### Loopy training

In [58]:
for file in filenames:
    train_set = pd.read_csv('./data/cut/1000/train/' + file)
    test_set = pd.read_csv('./data/cut/1000/test/' + file)
    
    # Go through all the time series in the scope 
    for i in tnrange(1, train_set.shape[0], desc=file):
        series_name = train_set.iloc[i, 1]
        series_info = m4_info.loc[m4_info['M4id'] == series_name]
        
        category_name = series_info['category'].values[0].lower()
        scope_name = series_info['SP'].values[0].lower()
        model_key = scope_name + "_" + category_name 
        
        if model_key in models:
            current_model = models[model_key]
        else:
            current_model = create_model(window_length=5)
            models[model_key] = current_model
        
        series = train_set.iloc[i, 2:].dropna()
        print(series.shape)
        scale_series_and_fit(series, current_model, window_length=6, batch_size=4, nb_epoch=2)
        
        assert False
    #assert False

(621,)
[[ -1.81818182e-02  -4.18240000e-01  -3.81818182e-01   2.90909091e-05
    1.27272727e-01   7.27563636e-02]]
(1, 5)
(1,)
[[ -4.18240000e-01  -3.81818182e-01   2.90909091e-05   1.27272727e-01
    7.27563636e-02  -2.90909091e-05]]
(1, 5)
(1,)
[[ -3.81818182e-01   2.90909091e-05   1.27272727e-01   7.27563636e-02
   -2.90909091e-05   6.18210909e-01]]
(1, 5)
(1,)
[[  2.90909091e-05   1.27272727e-01   7.27563636e-02  -2.90909091e-05
    6.18210909e-01   1.81236364e-02]]
(1, 5)
(1,)
[[  1.27272727e-01   7.27563636e-02  -2.90909091e-05   6.18210909e-01
    1.81236364e-02   3.63927273e-02]]
(1, 5)
(1,)
[[  7.27563636e-02  -2.90909091e-05   6.18210909e-01   1.81236364e-02
    3.63927273e-02  -1.09061818e-01]]
(1, 5)
(1,)
[[ -2.90909091e-05   6.18210909e-01   1.81236364e-02   3.63927273e-02
   -1.09061818e-01  -7.63665455e-01]]
(1, 5)
(1,)
[[ 0.61821091  0.01812364  0.03639273 -0.10906182 -0.76366545 -0.16363636]]
(1, 5)
(1,)
[[ 0.01812364  0.03639273 -0.10906182 -0.76366545 -0.16363636  0.

[[ 0.38181818  0.03633455 -0.19994182 -0.10912     0.01818182 -0.16363636]]
(1, 5)
(1,)
[[ 0.03633455 -0.19994182 -0.10912     0.01818182 -0.16363636  0.03639273]]
(1, 5)
(1,)
[[-0.19994182 -0.10912     0.01818182 -0.16363636  0.03639273  0.05454545]]
(1, 5)
(1,)
[[ -1.09120000e-01   1.81818182e-02  -1.63636364e-01   3.63927273e-02
    5.45454545e-02  -2.90909091e-05]]
(1, 5)
(1,)
[[  1.81818182e-02  -1.63636364e-01   3.63927273e-02   5.45454545e-02
   -2.90909091e-05  -2.36363636e-01]]
(1, 5)
(1,)
[[ -1.63636364e-01   3.63927273e-02   5.45454545e-02  -2.90909091e-05
   -2.36363636e-01   3.63927273e-02]]
(1, 5)
(1,)
[[  3.63927273e-02   5.45454545e-02  -2.90909091e-05  -2.36363636e-01
    3.63927273e-02  -2.18210909e-01]]
(1, 5)
(1,)
[[  5.45454545e-02  -2.90909091e-05  -2.36363636e-01   3.63927273e-02
   -2.18210909e-01  -6.54574545e-01]]
(1, 5)
(1,)
[[ -2.90909091e-05  -2.36363636e-01   3.63927273e-02  -2.18210909e-01
   -6.54574545e-01   2.90909091e-05]]
(1, 5)
(1,)
[[ -2.36363636e-

[[ 0.09090909 -0.01818182 -0.36366545  0.09096727 -0.6        -0.18184727]]
(1, 5)
(1,)
[[-0.01818182 -0.36366545  0.09096727 -0.6        -0.18184727 -0.09090909]]
(1, 5)
(1,)
[[-0.36366545  0.09096727 -0.6        -0.18184727 -0.09090909 -0.47275636]]
(1, 5)
(1,)
[[ 0.09096727 -0.6        -0.18184727 -0.09090909 -0.47275636 -0.14542545]]
(1, 5)
(1,)
[[-0.6        -0.18184727 -0.09090909 -0.47275636 -0.14542545 -0.29088   ]]
(1, 5)
(1,)
[[-0.18184727 -0.09090909 -0.47275636 -0.14542545 -0.29088     0.47269818]]
(1, 5)
(1,)
[[-0.09090909 -0.47275636 -0.14542545 -0.29088     0.47269818 -0.09090909]]
(1, 5)
(1,)
[[-0.47275636 -0.14542545 -0.29088     0.47269818 -0.09090909 -0.21815273]]
(1, 5)
(1,)
[[-0.14542545 -0.29088     0.47269818 -0.09090909 -0.21815273 -0.01818182]]
(1, 5)
(1,)
[[-0.29088     0.47269818 -0.09090909 -0.21815273 -0.01818182 -0.69093818]]
(1, 5)
(1,)
[[ 0.47269818 -0.09090909 -0.21815273 -0.01818182 -0.69093818  0.16363636]]
(1, 5)
(1,)
[[ -9.09090909e-02  -2.18152727e

[[-0.41818182 -0.23636364  0.23636364  0.25451636  0.16369455 -0.07275636]]
(1, 5)
(1,)
[[-0.23636364  0.23636364  0.25451636  0.16369455 -0.07275636 -0.27272727]]
(1, 5)
(1,)
[[ 0.23636364  0.25451636  0.16369455 -0.07275636 -0.27272727  0.85454545]]
(1, 5)
(1,)
[[ 0.25451636  0.16369455 -0.07275636 -0.27272727  0.85454545  0.01818182]]
(1, 5)
(1,)
[[ 0.16369455 -0.07275636 -0.27272727  0.85454545  0.01818182 -0.12727273]]
(1, 5)
(1,)
[[-0.07275636 -0.27272727  0.85454545  0.01818182 -0.12727273 -0.01818182]]
(1, 5)
(1,)
[[-0.27272727  0.85454545  0.01818182 -0.12727273 -0.01818182 -0.07269818]]
(1, 5)
(1,)
[[ 0.85454545  0.01818182 -0.12727273 -0.01818182 -0.07269818  0.09085091]]
(1, 5)
(1,)
[[ 0.01818182 -0.12727273 -0.01818182 -0.07269818  0.09085091 -0.10906182]]
(1, 5)
(1,)
[[ -1.27272727e-01  -1.81818182e-02  -7.26981818e-02   9.08509091e-02
   -1.09061818e-01  -2.90909091e-05]]
(1, 5)
(1,)
[[ -1.81818182e-02  -7.26981818e-02   9.08509091e-02  -1.09061818e-01
   -2.90909091e-05

   -9.09090909e-02  -3.63927273e-02]]
(1, 5)
(1,)
[[  3.63927273e-02  -2.00000000e-01   2.90909091e-05  -9.09090909e-02
   -3.63927273e-02   3.63345455e-02]]
(1, 5)
(1,)
[[ -2.00000000e-01   2.90909091e-05  -9.09090909e-02  -3.63927273e-02
    3.63345455e-02  -1.09061818e-01]]
(1, 5)
(1,)
[[  2.90909091e-05  -9.09090909e-02  -3.63927273e-02   3.63345455e-02
   -1.09061818e-01  -1.81818182e-02]]
(1, 5)
(1,)
[[-0.09090909 -0.03639273  0.03633455 -0.10906182 -0.01818182  0.05454545]]
(1, 5)
(1,)
[[-0.03639273  0.03633455 -0.10906182 -0.01818182  0.05454545 -0.14548364]]
(1, 5)
(1,)
[[ 0.03633455 -0.10906182 -0.01818182  0.05454545 -0.14548364 -0.07269818]]
(1, 5)
(1,)
[[-0.10906182 -0.01818182  0.05454545 -0.14548364 -0.07269818 -0.05454545]]
(1, 5)
(1,)
[[-0.01818182  0.05454545 -0.14548364 -0.07269818 -0.05454545  0.16363636]]
(1, 5)
(1,)
[[ 0.05454545 -0.14548364 -0.07269818 -0.05454545  0.16363636  0.03633455]]
(1, 5)
(1,)
[[-0.14548364 -0.07269818 -0.05454545  0.16363636  0.03633455 

[[ 0.16363636 -0.23636364  0.01818182 -0.72730182 -0.14542545 -0.07269818]]
(1, 5)
(1,)
[[-0.23636364  0.01818182 -0.72730182 -0.14542545 -0.07269818 -0.52727273]]
(1, 5)
(1,)
[[ 0.01818182 -0.72730182 -0.14542545 -0.07269818 -0.52727273  0.32724364]]
(1, 5)
(1,)
[[-0.72730182 -0.14542545 -0.07269818 -0.52727273  0.32724364  0.23636364]]
(1, 5)
(1,)
[[-0.14542545 -0.07269818 -0.52727273  0.32724364  0.23636364  0.72724364]]
(1, 5)
(1,)
[[-0.07269818 -0.52727273  0.32724364  0.23636364  0.72724364  0.05454545]]
(1, 5)
(1,)
[[-0.52727273  0.32724364  0.23636364  0.72724364  0.05454545 -0.18178909]]
(1, 5)
(1,)
[[ 0.32724364  0.23636364  0.72724364  0.05454545 -0.18178909 -0.34545455]]
(1, 5)
(1,)
[[ 0.23636364  0.72724364  0.05454545 -0.18178909 -0.34545455  0.12727273]]
(1, 5)
(1,)
[[ 0.72724364  0.05454545 -0.18178909 -0.34545455  0.12727273 -0.09090909]]
(1, 5)
(1,)
[[ 0.05454545 -0.18178909 -0.34545455  0.12727273 -0.09090909 -0.05454545]]
(1, 5)
(1,)
[[-0.18178909 -0.34545455  0.127

(1, 5)
(1,)
[[-0.21815273 -0.25457455 -0.07269818 -0.01818182 -0.05454545  0.16363636]]
(1, 5)
(1,)
[[-0.25457455 -0.07269818 -0.01818182 -0.05454545  0.16363636  0.39997091]]
(1, 5)
(1,)
[[-0.07269818 -0.01818182 -0.05454545  0.16363636  0.39997091 -0.03633455]]
(1, 5)
(1,)
[[-0.01818182 -0.05454545  0.16363636  0.39997091 -0.03633455 -0.14548364]]
(1, 5)
(1,)
[[-0.05454545  0.16363636  0.39997091 -0.03633455 -0.14548364  0.03639273]]
(1, 5)
(1,)
[[ 0.16363636  0.39997091 -0.03633455 -0.14548364  0.03639273  0.23636364]]
(1, 5)
(1,)
[[ 0.39997091 -0.03633455 -0.14548364  0.03639273  0.23636364 -0.2       ]]
(1, 5)
(1,)
[[-0.03633455 -0.14548364  0.03639273  0.23636364 -0.2        -0.03633455]]
(1, 5)
(1,)
[[-0.14548364  0.03639273  0.23636364 -0.2        -0.03633455  0.09085091]]
(1, 5)
(1,)
[[ 0.03639273  0.23636364 -0.2        -0.03633455  0.09085091  0.40002909]]
(1, 5)
(1,)
[[ 0.23636364 -0.2        -0.03633455  0.09085091  0.40002909  0.72724364]]
(1, 5)
(1,)
[[-0.2        -0.036

AssertionError: 

### Loopy predictions

In [None]:
for file in filenames:
    train_set = pd.read_csv('./data/cut/10000/train/' + file)
    test_set = pd.read_csv('./data/cut/10000/test/' + file)
    
    # Go through all the time series in the scope 
    for i in tnrange(1, train_set.shape[0], desc=file):
        series_name = train_set.iloc[i, 1]
        series_info = m4_info.loc[m4_info['M4id'] == series_name]
        
        category_name = series_info['category'].values[0].lower()
        scope_name = series_info['SP'].values[0].lower()
        model_key = scope_name + "_" + category_name 
        
        n_predictons = horizon_lengths[series_name[0].upper()]
        
        series = train_set.iloc[i, 2:].dropna()
        test_series = test_set.iloc[i, 1:].tolist()
        test_series.insert(0, series_name)
        
        pred = create_model_and_predict(series, n_predictons)
        pred.insert(0, series_name)
        
        results_frame = results_frame.append(pd.Series(pred), ignore_index=True)
        test_frame = test_frame.append(pd.Series(test_series), ignore_index=True)   
        #assert False
    #assert False

Write the resulting data frame to .csv

In [None]:
results_frame.to_csv("./d_h_results_seq.csv", index=False)

Printing out a single prediction, if one so chooses

In [None]:
rmse = np.sqrt(mean_squared_error(test_frame.iloc[0, 1:], results_frame.iloc[0, 1:]))
print('Test RMSE: %.3f' % rmse)
# line plot of observed vs predicted
plt.plot(test_frame.iloc[0, 1:], label="Test data")
plt.plot(results_frame.iloc[0, 1:], label="Predictions")
plt.legend()
plt.show()

In [None]:
res_mase = mase(results_frame.iloc[0, 1:], test_frame.iloc[0, 1:], None)