In [6]:
from math import sqrt
from numpy import array
from numpy import mean
from pandas import DataFrame
from pandas import concat
import pandas as pd
from sklearn.metrics import mean_squared_error
from tensorflow.keras import Sequential
from keras.layers import Dense

In [7]:
def train_test_split(data, n_test):
    return data[:-n_test], data[-n_test:]

In [8]:
def series_to_supervised(data, n_in=1, n_out=1):
    df = DataFrame(data)
    cols = list()
    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df.shift(-i))
    # put it all together
    agg = concat(cols, axis=1)
    # drop rows with NaN values
    agg.dropna(inplace=True)
    return agg.values

In [10]:
def measure_rmse(actual, predicted):
    return sqrt(mean_squared_error(actual, predicted))

In [11]:
def difference(data, order):
    return [data[i] - data[i - order] for i in range(order, len(data))]

In [12]:
def model_fit(train, config):
    # unpack config
    n_input, n_nodes, n_epochs, n_batch, n_diff = config
    # prepare data
    if n_diff > 0:
        train = difference(train, n_diff)
    # transform series into supervised format
    data = series_to_supervised(train, n_in=n_input)
    # separate inputs and outputs
    features_train, target_train = data[:, :-1], data[:, -1]
    # define model
    model = Sequential()
    model.add(Dense(n_nodes, activation='relu', input_dim=n_input))
    model.add(Dense(1))
    model.compile(loss='mse', optimizer='adam')
    # fit model
    model.fit(features_train, target_train, epochs=n_epochs, batch_size=n_batch, verbose=0)
    return model

In [13]:
def model_predict(model, history, config):
    # unpack config
    n_input, _, _, _, n_diff = config
    # prepare data
    correction = 0.0
    if n_diff > 0:
        correction = history[-n_diff]
        history = difference(history, n_diff)
    # shape input for model
    x_input = array(history[-n_input:]).reshape((1, n_input))
    # make forecast
    pred = model.predict(x_input, verbose=0)
    # correct forecast if it was differenced
    return correction + pred[0]

In [33]:
def walk_forward_validation(data, n_test, cfg):
    predictions = list()
    # split dataset
    train, test = train_test_split(data, n_test)
    # fit model
    model = model_fit(train, cfg)
    # seed history with training dataset
    history = [x for x in train]
    # step over each time-step in the test set
    for i in range(len(test)):
        # fit model and make forecast for history
        pred = model_predict(model, history, cfg)
        # store forecast in list of predictions
        predictions.append(pred)
        # add actual observation to history for the next loop
        history.append(test[i])
    # estimate prediction error
    error = measure_rmse(test, predictions)
    print(' > %.3f' % error)
    return error

In [15]:
def repeat_evaluate(data, config, n_test, n_repeats=10):
    # convert config to a key
    key = str(config)
    # fit and evaluate the model n times
    scores = [walk_forward_validation(data, n_test, config) for _ in range(n_repeats)]
    # summarize score
    result = mean(scores)
    print('> Model[%s] %.3f' % (key, result))
    return (key, result)

In [31]:
def grid_search(data, cfg_list, n_test):
    
    scores = [repeat_evaluate(data, cfg, n_test) for cfg in cfg_list]
    
    scores.sort(key=lambda tup: tup[1])
    return scores

In [17]:
def model_configs():
    # define scope of configs
    n_input = [12]
    n_nodes = [50, 100]
    n_epochs = [100]
    n_batch = [1, 150]
    n_diff = [0, 12]
    # create configs
    configs = list()
    for i in n_input:
        for j in n_nodes:
            for k in n_epochs:
                for l in n_batch:
                    for m in n_diff:
                        cfg = [i, j, k, l, m]
                        configs.append(cfg)
    print('Total configs: %d' % len(configs))
    return configs

In [23]:
df = pd.read_csv('D://DS//Stater_Projects//airline.txt', header=0, index_col=0)
data = df.values

In [24]:
n_test = 12

In [25]:
cfg_list = model_configs()

Total configs: 8


In [34]:
scores = grid_search(data, cfg_list, n_test)

 > 16.692
 > 17.959
 > 21.005
 > 40.335
 > 12.658
 > 20.464
 > 16.589
 > 20.778
 > 17.528
 > 16.204
> Model[[12, 50, 100, 1, 0]] 20.021
 > 18.816
 > 21.978
 > 21.652
 > 20.532
 > 19.150
 > 20.720
 > 20.622
 > 20.127
 > 20.961
 > 18.520
> Model[[12, 50, 100, 1, 12]] 20.308
 > 72.672
 > 69.982
 > 45.788
 > 82.830
 > 54.900
 > 59.806
 > 42.790
 > 44.759
 > 82.812
 > 64.039
> Model[[12, 50, 100, 150, 0]] 62.038
 > 17.468
 > 20.897
 > 18.815
 > 18.693
 > 18.313
 > 18.156
 > 22.825
 > 21.353
 > 20.133
 > 20.221
> Model[[12, 50, 100, 150, 12]] 19.687
 > 18.642
 > 19.663
 > 19.898
 > 18.663
 > 38.229
 > 17.645
 > 18.435
 > 21.978
 > 33.934
 > 17.981
> Model[[12, 100, 100, 1, 0]] 22.507
 > 18.588
 > 19.228
 > 20.216
 > 17.751
 > 17.499
 > 18.551
 > 20.703
 > 19.094
 > 19.951
 > 17.620
> Model[[12, 100, 100, 1, 12]] 18.920
 > 48.063
 > 61.554
 > 41.824
 > 88.276
 > 67.295
 > 59.411
 > 85.916
 > 47.176
 > 38.721
 > 56.863
> Model[[12, 100, 100, 150, 0]] 59.510
 > 18.942
 > 19.815
 > 18.363
 > 19.

In [35]:
for cfg, error in scores[:3]:
    print(cfg, error)

[12, 100, 100, 1, 12] 18.920142185918188
[12, 100, 100, 150, 12] 19.203679430825296
[12, 50, 100, 150, 12] 19.687393086402743
