In [6]:
from math import sqrt
from numpy import array
from numpy import mean
from pandas import DataFrame
from pandas import concat
from pandas import read_csv
from sklearn.metrics import mean_squared_error
from tensorflow.keras import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from tensorflow.keras.layers import LSTM

In [7]:
def train_test_split(data, n_test):
    return data[:-n_test], data[-n_test:]

In [8]:
def series_to_supervised(data, n_in=1, n_out=1):
    df = DataFrame(data)
    cols = list()
    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df.shift(-i))
    # put it all together
    agg = concat(cols, axis=1)
    # drop rows with NaN values
    agg.dropna(inplace=True)
    return agg.values

In [9]:
def measure_rmse(actual, predicted):
    return sqrt(mean_squared_error(actual, predicted))

In [10]:
def difference(data, order):
    return [data[i] - data[i - order] for i in range(order, len(data))]

In [11]:
def model_fit(train, config):
    # unpack config
    n_input, n_nodes, n_epochs, n_batch, n_diff = config
    # prepare data
    if n_diff > 0:
        train = difference(train, n_diff)
    # transform series into supervised format
    data = series_to_supervised(train, n_in=n_input)
    # separate inputs and outputs
    features_train, target_train = data[:, :-1], data[:, -1]
    # reshape input data into [samples, timesteps, features]
    n_features = 1
    features_train= features_train.reshape((features_train.shape[0], features_train.shape[1], n_features))
    # define model
    model = Sequential()
    model.add(LSTM(n_nodes, activation='relu', input_shape=(n_input, n_features)))
    model.add(Dense(n_nodes, activation='relu'))
    model.add(Dense(1))
    model.compile(loss='mse', optimizer='adam')
    # fit model
    model.fit(features_train, target_train, epochs=n_epochs, batch_size=n_batch, verbose=0)
    return model

In [12]:
def model_predict(model, history, config):
    # unpack config
    n_input, _, _, _, n_diff = config
    # prepare data
    correction = 0.0
    if n_diff > 0:
        correction = history[-n_diff]
        history = difference(history, n_diff)
    # reshape sample into [samples, timesteps, features]
    x_input = array(history[-n_input:]).reshape((1, n_input, 1))
    # forecast
    pred = model.predict(x_input, verbose=0)
    return correction + pred[0]

In [13]:
def walk_forward_validation(data, n_test, cfg):
    predictions = list()
    # split dataset
    train, test = train_test_split(data, n_test)
    # fit model
    model = model_fit(train, cfg)
    # seed history with training dataset
    history = [x for x in train]
    # step over each time-step in the test set
    for i in range(len(test)):
        # fit model and make forecast for history
        pred = model_predict(model, history, cfg)
        # store forecast in list of predictions
        predictions.append(pred)
        # add actual observation to history for the next loop
        history.append(test[i])
    # estimate prediction error
    error = measure_rmse(test, predictions)
    print(' > %.3f' % error)
    return error

In [14]:
def repeat_evaluate(data, config, n_test, n_repeats=10):
    # convert config to a key
    key = str(config)
    # fit and evaluate the model n times
    scores = [walk_forward_validation(data, n_test, config) for _ in range(n_repeats)]
    # summarize score
    result = mean(scores)
    print('> Model[%s] %.3f' % (key, result))
    return (key, result)

In [15]:
def grid_search(data, cfg_list, n_test):
    # evaluate configs
    scores = [repeat_evaluate(data, cfg, n_test) for cfg in cfg_list]
    # sort configs by error, asc
    scores.sort(key=lambda tup: tup[1])
    return scores

In [16]:
def model_configs():
    # define scope of configs
    n_input = [12]
    n_nodes = [100]
    n_epochs = [50]
    n_batch = [1, 150]
    n_diff = [12]
    # create configs
    configs = list()
    for i in n_input:
        for j in n_nodes:
            for k in n_epochs:
                for l in n_batch:
                    for m in n_diff:
                        cfg = [i, j, k, l, m]
                        configs.append(cfg)
    print('Total configs: %d' % len(configs))
    return configs

In [17]:
df = read_csv('D://DS//Stater_Projects//airline.txt', header=0, index_col=0)
data = df.values

In [18]:
n_test = 12
cfg_list = model_configs()

Total configs: 2


In [19]:
scores = grid_search(data, cfg_list, n_test)
print('done')

 > 23.314
 > 22.618
 > 25.185
 > 19.491
 > 27.243
 > 22.778
 > 21.318
 > 21.386
 > 20.025
 > 25.807
> Model[[12, 100, 50, 1, 12]] 22.916
 > 21.215
 > 21.673
 > 19.653
 > 22.615
 > 21.014
 > 21.219
 > 17.287
 > 21.262
 > 20.875
 > 21.073
> Model[[12, 100, 50, 150, 12]] 20.789
done


In [20]:
for cfg, error in scores[:3]:
    print(cfg, error)

[12, 100, 50, 150, 12] 20.788647251287948
[12, 100, 50, 1, 12] 22.916472358701085
