In [2]:
from math import sqrt
from numpy import array
from numpy import mean
from pandas import DataFrame
from pandas import concat
from pandas import read_csv
from sklearn.metrics import mean_squared_error
from tensorflow.keras import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers.convolutional import Conv1D
from keras.layers.convolutional import MaxPooling1D

In [3]:
def train_test_split(data, n_test):
    return data[:-n_test], data[-n_test:]

In [4]:
def series_to_supervised(data, n_in=1, n_out=1):
    df = DataFrame(data)
    cols = list()
    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df.shift(-i))
    # put it all together
    agg = concat(cols, axis=1)
    # drop rows with NaN values
    agg.dropna(inplace=True)
    return agg.values

In [5]:
def measure_rmse(actual, predicted):
    return sqrt(mean_squared_error(actual, predicted))

In [6]:
def difference(data, order):
    return [data[i] - data[i - order] for i in range(order, len(data))]

In [7]:
def model_fit(train, config):
    # unpack config
    n_input, n_filters, n_kernel, n_epochs, n_batch, n_diff = config
    # prepare data
    if n_diff > 0:
        train = difference(train, n_diff)
    #transform series into supervised format
    data = series_to_supervised(train, n_in=n_input)
    # separate inputs and outputs
    features_train, target_train = data[:, :-1], data[:, -1]
    # reshape input data into [samples, timesteps, features]
    n_features = 1
    features_train = features_train.reshape((features_train.shape[0], features_train.shape[1], n_features))
    # define model
    model = Sequential()
    model.add(Conv1D(filters=n_filters, kernel_size=n_kernel, activation='relu', input_shape=(n_input, n_features)))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(1))
    model.compile(loss='mse', optimizer='adam')
    # fit
    model.fit(features_train, target_train, epochs=n_epochs, batch_size=n_batch, verbose=0)
    return model

In [8]:
def model_predict(model, history, config):
    # unpack config
    n_input, _, _, _, _, n_diff = config
    # prepare data
    correction = 0.0
    if n_diff > 0:
        correction = history[-n_diff]
        history = difference(history, n_diff)
    x_input = array(history[-n_input:]).reshape((1, n_input, 1))
    # forecast
    pred = model.predict(x_input, verbose=0)
    return correction + pred[0]

In [9]:
def walk_forward_validation(data, n_test, cfg):
    predictions = list()
    # split dataset
    train, test = train_test_split(data, n_test)
    # fit model
    model = model_fit(train, cfg)
    # seed history with training dataset
    history = [x for x in train]
    # step over each time-step in the test set
    for i in range(len(test)):
        # fit model and make forecast for history
        pred = model_predict(model, history, cfg)
        # store forecast in list of predictions
        predictions.append(pred)
        # add actual observation to history for the next loop
        history.append(test[i])
    # estimate prediction error
    error = measure_rmse(test, predictions)
    print(' > %.3f' % error)
    return error

In [10]:
def repeat_evaluate(data, config, n_test, n_repeats=10):
    # convert config to a key
    key = str(config)
    # fit and evaluate the model n times
    scores = [walk_forward_validation(data, n_test, config) for _ in range(n_repeats)]
    # summarize score
    result = mean(scores)
    print('> Model[%s] %.3f' % (key, result))
    return (key, result)

In [11]:
def grid_search(data, cfg_list, n_test):
    # evaluate configs
    scores = [repeat_evaluate(data, cfg, n_test) for cfg in cfg_list]
    # sort configs by error, asc
    scores.sort(key=lambda tup: tup[1])
    return scores

In [12]:
def model_configs():
    # define scope of configs
    n_input = [12]
    n_filters = [64]
    n_kernels = [3, 5]
    n_epochs = [100]
    n_batch = [1, 150]
    n_diff = [0, 12]
    # create configs
    configs = list()
    for a in n_input:
        for b in n_filters:
            for c in n_kernels:
                for d in n_epochs:
                    for e in n_batch:
                        for f in n_diff:
                            cfg = [a,b,c,d,e,f]
                            configs.append(cfg)
    print('Total configs: %d' % len(configs))
    return configs

In [13]:
df = read_csv('D://DS//Stater_Projects//airline.txt', header=0, index_col=0)
data = df.values

In [14]:
n_test = 12

In [15]:
cfg_list = model_configs()

Total configs: 8


In [16]:
scores = grid_search(data, cfg_list, n_test)
print('done')

 > 21.642
 > 23.442
 > 19.370
 > 16.805
 > 33.781
 > 25.504
 > 34.962
 > 23.424
 > 17.355
 > 17.011
> Model[[12, 64, 3, 100, 1, 0]] 23.330
 > 19.625
 > 21.455
 > 18.918
 > 21.282
 > 19.570
 > 20.137
 > 20.803
 > 19.538
 > 19.744
 > 20.508
> Model[[12, 64, 3, 100, 1, 12]] 20.158
 > 71.585
 > 89.248
 > 72.131
 > 81.099
 > 83.620
 > 85.819
 > 75.208
 > 90.935
 > 88.231
 > 75.329
> Model[[12, 64, 3, 100, 150, 0]] 81.320
 > 19.595
 > 18.710
 > 19.621
 > 17.770
 > 19.077
 > 19.795
 > 19.236
 > 19.097
 > 19.631
 > 18.989
> Model[[12, 64, 3, 100, 150, 12]] 19.152
 > 19.544
 > 19.816
 > 17.860
 > 18.844
 > 19.166
 > 18.053
 > 22.558
 > 19.277
 > 27.210
 > 24.222
> Model[[12, 64, 5, 100, 1, 0]] 20.655
 > 18.506
 > 18.679
 > 19.372
 > 17.961
 > 19.887
 > 19.081
 > 18.220
 > 20.080
 > 18.057
 > 19.439
> Model[[12, 64, 5, 100, 1, 12]] 18.928
 > 90.061
 > 85.753
 > 75.292
 > 80.534
 > 81.164
 > 91.941
 > 79.503
 > 80.677
 > 73.495
 > 78.975
> Model[[12, 64, 5, 100, 150, 0]] 81.739
 > 20.933
 > 20.47

In [17]:
for cfg, error in scores[:3]:
    print(cfg, error)

[12, 64, 5, 100, 1, 12] 18.928364215822757
[12, 64, 3, 100, 150, 12] 19.152169626631554
[12, 64, 5, 100, 150, 12] 19.599537876931855
