In [1]:
from pandas import DataFrame
from pandas import Series
from pandas import concat
from pandas import read_csv
from pandas import datetime
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from math import sqrt
import matplotlib
# be able to save images on server
matplotlib.use('Agg')
from matplotlib import pyplot
import numpy as np

Using TensorFlow backend.


In [6]:
trend_train = np.arange(11,111, dtype='float64') + np.random.normal(scale=2, size=100)
trend_test_outofrange = np.arange(111,131, dtype='float64') + np.random.normal(scale=2, size=20)
trend_test_inrange = np.arange(31,51, dtype='float64') + np.random.normal(scale=2, size=20)

In [23]:
series = np.append(trend_train, trend_test_outofrange)

In [24]:
def difference(dataset, interval=1):
    diff = list()
    for i in range(interval, len(dataset)):
        value = dataset[i] - dataset[i - interval]
        diff.append(value)
    return Series(diff)

def inverse_difference(history, yhat, interval=1):
    # history[-len(series)] is first item, history[-len(series) - 1] is second, etc. 
    return yhat + history[-interval]

In [25]:
d = difference(series, 1)

In [26]:
def timeseries_to_supervised(data, lag=1):
    df = DataFrame(data)
    columns = [df.shift(i) for i in range(1, lag+1)]
    columns.append(df)
    df = concat(columns, axis=1)
    return df

In [43]:
num_timesteps = 4
s = timeseries_to_supervised(d,num_timesteps)
s = s.values[num_timesteps: , :]
s.shape

(115, 5)

In [45]:
train, test = s[0:-15], s[-15:]
train.shape, test.shape

((100, 5), (15, 5))

In [51]:
train[ -5:, :]

array([[-0.82541707,  5.09347574,  0.1895919 , -0.04403569,  2.22115985],
       [ 2.22115985, -0.82541707,  5.09347574,  0.1895919 , -0.51425204],
       [-0.51425204,  2.22115985, -0.82541707,  5.09347574, -2.63571041],
       [-2.63571041, -0.51425204,  2.22115985, -0.82541707,  5.227649  ],
       [ 5.227649  , -2.63571041, -0.51425204,  2.22115985, -3.43660829]])

In [52]:
test[:5,]

array([[-3.43660829,  5.227649  , -2.63571041, -0.51425204,  5.80392659],
       [ 5.80392659, -3.43660829,  5.227649  , -2.63571041, -1.13601804],
       [-1.13601804,  5.80392659, -3.43660829,  5.227649  , -1.04734216],
       [-1.04734216, -1.13601804,  5.80392659, -3.43660829,  3.52003862],
       [ 3.52003862, -1.04734216, -1.13601804,  5.80392659, -1.26087508]])

In [46]:
def scale(train, test):
    scaler = MinMaxScaler(feature_range=(-1, 1))
    scaler = scaler.fit(train)
    train = train.reshape(train.shape[0], train.shape[1])
    train_scaled = scaler.transform(train)
    test = test.reshape(test.shape[0], test.shape[1])
    test_scaled = scaler.transform(test)
    return scaler, train_scaled, test_scaled

def invert_scale(scaler, X, yhat):
    new_row = [x for x in X] + [yhat]
    array = numpy.array(new_row)
    array = array.reshape(1, len(array))
    inverted = scaler.inverse_transform(array)
    return inverted[0, -1]


In [None]:
def fit_lstm(train, n_batch, nb_epoch, n_neurons):
    X, y = train[:, 0:-1], train[:, -1]
    X = X.reshape(X.shape[0], 1, X.shape[1])
    model = Sequential()
    model.add(LSTM(n_neurons, batch_input_shape=(n_batch, X.shape[1], X.shape[2]), stateful=True))
    model.add(Dense(1))
    model.compile(loss='mean_squared_error', optimizer='adam')
    for i in range(nb_epoch):
        model.fit(X, y, epochs=1, batch_size=n_batch, verbose=0, shuffle=False)
        model.reset_states()
    return model

In [None]:
def experiment(series, n_lag, n_repeats, n_epochs, n_batch, n_neurons):
    diff_values = difference(series, 1)
    supervised = timeseries_to_supervised(diff_values, n_lag)
    supervised_values = supervised.values[n_lag:,:]
    train, test = supervised_values[0:-12], supervised_values[-12:]
    scaler, train_scaled, test_scaled = scale(train, test)
    error_scores = list()
    for r in range(n_repeats):
        train_trimmed = train_scaled[2:, :]
        lstm_model = fit_lstm(train_trimmed, n_batch, n_epochs, n_neurons)
        test_reshaped = test_scaled[:,0:-1]
        test_reshaped = test_reshaped.reshape(len(test_reshaped), 1, 1)
        output = lstm_model.predict(test_reshaped, batch_size=n_batch)
        predictions = list()
            for i in range(len(output)):
            yhat = output[i,0]
            X = test_scaled[i, 0:-1]
            yhat = invert_scale(scaler, X, yhat)
            yhat = inverse_difference(raw_values, yhat, len(test_scaled)+1-i)
            predictions.append(yhat)
        rmse = sqrt(mean_squared_error(raw_values[-12:], predictions))
        print('%d) Test RMSE: %.3f' % (r+1, rmse))
        error_scores.append(rmse)
    return error_scores

In [None]:
def run():
    series = read_csv('shampoo-sales.csv', header=0, parse_dates=[0], index_col=0, squeeze=True, date_parser=parser)
    n_lag = 4
    n_repeats = 30
    n_epochs = 1000
    n_batch = 4
    n_neurons = 3
    results = DataFrame()
    results['results'] = experiment(series, n_lag, n_repeats, n_epochs, n_batch, n_neurons)
    print(results.describe())
    results.boxplot()
    pyplot.savefig('experiment_baseline.png')



In [None]:
run()