In [2]:
import pickle
import numpy as np

from sklearn.preprocessing import MinMaxScaler

from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.callbacks import EarlyStopping

In [3]:
# convert an array of values into a data_set matrix
def create_data_set(_data_set, _look_back, _look_forward):
    data_x, data_y = [], []
    for i in range(len(_data_set) - _look_back - _look_forward):
        a = _data_set[i:(i + _look_back)]
        data_x.append(a)
        data_y.append([_data_set[i + j + _look_back] for j in range(_look_forward)])
    return np.array(data_x), np.array(data_y) 

In [30]:
with open ('../Datapreprocessing/Data/Smoothing/Train_x_LB:10_s', 'rb') as fp:
    train_lb10 = pickle.load(fp)
with open ('../Datapreprocessing/Data/Smoothing/Val_x_LB:10_s', 'rb') as fp:
    val_lb10 = pickle.load(fp)
with open ('../Datapreprocessing/Data/Smoothing/Test_x_LB:10_s', 'rb') as fp:
    test_lb10 = pickle.load(fp)

In [5]:
with open ('../Datapreprocessing/Data/Smoothing/Train_x_LB:100_25%_s', 'rb') as fp:
    train_lb100 = pickle.load(fp)
with open ('../Datapreprocessing/Data/Smoothing/Val_x_LB:100_25%_s', 'rb') as fp:
    val_lb100 = pickle.load(fp)
with open ('../Datapreprocessing/Data/Smoothing/Test_x_LB:100_25%_s', 'rb') as fp:
    test_lb100 = pickle.load(fp)

In [6]:
with open ('../Datapreprocessing/Data/Smoothing/Train_x_LB:200_25%_s', 'rb') as fp:
    train_lb200 = pickle.load(fp)
with open ('../Datapreprocessing/Data/Smoothing/Val_x_LB:200_25%_s', 'rb') as fp:
    val_lb200 = pickle.load(fp)
with open ('../Datapreprocessing/Data/Smoothing/Test_x_LB:200_25%_s', 'rb') as fp:
    test_lb200 = pickle.load(fp)

In [31]:
# normalize the train data_set
scaler = MinMaxScaler(feature_range=(0, 1))

train_lb10 = [scaler.fit_transform(np.array(l).reshape(-1,1)) for l in train_lb10]
val_lb10 = [scaler.fit_transform(np.array(l).reshape(-1,1)) for l in val_lb10]
test_lb10 = [scaler.fit_transform(np.array(l).reshape(-1,1)) for l in test_lb10]

train_lb100 = [scaler.fit_transform(np.array(l).reshape(-1,1)) for l in train_lb100]
val_lb100 = [scaler.fit_transform(np.array(l).reshape(-1,1)) for l in val_lb100]
test_lb100 = [scaler.fit_transform(np.array(l).reshape(-1,1)) for l in test_lb100]

train_lb200 = [scaler.fit_transform(np.array(l).reshape(-1,1)) for l in train_lb200]
val_lb200 = [scaler.fit_transform(np.array(l).reshape(-1,1)) for l in val_lb200]
test_lb200 = [scaler.fit_transform(np.array(l).reshape(-1,1)) for l in test_lb200]

In [32]:
look_back = 10
look_forward = 10
units = 100

In [33]:
# add look back
train_x = []
train_y = []
# reshape into X=t and Y=t+n
for sentence in train_lb10:
    tmp_x, tmp_y = create_data_set(sentence, look_back, look_forward)
    train_x.append(tmp_x)
    train_y.append(tmp_y)

train_x_10 = np.array([n for m in train_x for n in m])   
train_y_10 = np.array([n for m in train_y for n in m])

In [34]:
val_x = []
val_y = []
# reshape into X=t and Y=t+n
for sentence in val_lb10:
    tmp_x, tmp_y = create_data_set(sentence, look_back, look_forward)
    val_x.append(tmp_x)
    val_y.append(tmp_y)
    
val_x_10 = np.array([n for m in val_x for n in m])
val_y_10 = np.array([n for m in val_y for n in m])

In [35]:
test_x = []
test_y = []
# reshape into X=t and Y=t+n
for sentence in test_lb10:
    tmp_x, tmp_y = create_data_set(sentence, look_back, look_forward)
    test_x.append(tmp_x)
    test_y.append(tmp_y)
    
test_x_10 = np.array([n for m in test_x for n in m])
test_y_10 = np.array([n for m in test_y for n in m])

In [36]:
# reshape input to be [samples, time steps, features]
train_y_10 = np.reshape(train_y_10, (train_y_10.shape[0], look_forward))
val_y_10 = np.reshape(val_y_10, (val_y_10.shape[0], look_forward))
test_y_10 = np.reshape(test_y_10, (test_y_10.shape[0], look_forward))

In [37]:
# create and fit the LSTM network
callback = [EarlyStopping(monitor='val_loss', min_delta=0, patience=5, verbose=0, mode='auto')]

model = Sequential()
model.add(LSTM(units, input_shape=(look_back, 1)))
model.add(Dense(look_forward))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(train_x_10, train_y_10, epochs=50, batch_size=256, verbose=1, callbacks = callback, validation_data=(val_x_10, val_y_10))
model.save('lb{}_lf{}_u{}_full_s.h5'.format(look_back, look_forward, units))

Train on 679481 samples, validate on 84439 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50


In [14]:
look_back = 100

In [15]:
# add look back
train_x = []
train_y = []
# reshape into X=t and Y=t+n
for sentence in train_lb100:
    tmp_x, tmp_y = create_data_set(sentence, look_back, look_forward)
    train_x.append(tmp_x)
    train_y.append(tmp_y)

train_x_100 = np.array([n for m in train_x for n in m])   
train_y_100 = np.array([n for m in train_y for n in m])

In [16]:
val_x = []
val_y = []
# reshape into X=t and Y=t+n
for sentence in val_lb100:
    tmp_x, tmp_y = create_data_set(sentence, look_back, look_forward)
    val_x.append(tmp_x)
    val_y.append(tmp_y)
    
val_x_100 = np.array([n for m in val_x for n in m])
val_y_100 = np.array([n for m in val_y for n in m])

In [17]:
test_x = []
test_y = []
# reshape into X=t and Y=t+n
for sentence in test_lb100:
    tmp_x, tmp_y = create_data_set(sentence, look_back, look_forward)
    test_x.append(tmp_x)
    test_y.append(tmp_y)
    
test_x_100 = np.array([n for m in test_x for n in m])
test_y_100 = np.array([n for m in test_y for n in m])

In [18]:
# reshape input to be [samples, time steps, features]
train_y_100 = np.reshape(train_y_100, (train_y_100.shape[0], look_forward))
val_y_100 = np.reshape(val_y_100, (val_y_100.shape[0], look_forward))
test_y_100 = np.reshape(test_y_100, (test_y_100.shape[0], look_forward))

In [19]:
# create and fit the LSTM network
callback = [EarlyStopping(monitor='val_loss', min_delta=0, patience=5, verbose=0, mode='auto')]

model = Sequential()
model.add(LSTM(units, input_shape=(look_back, 1)))
model.add(Dense(look_forward))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(train_x_100, train_y_100, epochs=50, batch_size=256, verbose=1, callbacks = callback, validation_data=(val_x_100, val_y_100))
model.save('lb{}_lf{}_u{}_s.h5'.format(look_back, look_forward, units))

Train on 113258 samples, validate on 18598 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50


In [20]:
look_back = 200

In [21]:
# add look back
train_x = []
train_y = []
# reshape into X=t and Y=t+n
for sentence in train_lb200:
    tmp_x, tmp_y = create_data_set(sentence, look_back, look_forward)
    train_x.append(tmp_x)
    train_y.append(tmp_y)

In [22]:
train_x_200 = np.array([n for m in train_x for n in m])   
train_y_200 = np.array([n for m in train_y for n in m])

In [23]:
val_x = []
val_y = []
# reshape into X=t and Y=t+n
for sentence in val_lb200:
    tmp_x, tmp_y = create_data_set(sentence, look_back, look_forward)
    val_x.append(tmp_x)
    val_y.append(tmp_y)
    
val_x_200 = np.array([n for m in val_x for n in m])
val_y_200 = np.array([n for m in val_y for n in m])

In [24]:
test_x = []
test_y = []
# reshape into X=t and Y=t+n
for sentence in test_lb200:
    tmp_x, tmp_y = create_data_set(sentence, look_back, look_forward)
    test_x.append(tmp_x)
    test_y.append(tmp_y)
    
test_x_200 = np.array([n for m in test_x for n in m])
test_y_200 = np.array([n for m in test_y for n in m])

In [25]:
# reshape input to be [samples, time steps, features]
train_y_200 = np.reshape(train_y_200, (train_y_200.shape[0], look_forward))
val_y_200 = np.reshape(val_y_200, (val_y_200.shape[0], look_forward))
test_y_200 = np.reshape(test_y_200, (test_y_200.shape[0], look_forward))

In [29]:
# create and fit the LSTM network
callback = [EarlyStopping(monitor='val_loss', min_delta=0, patience=5, verbose=0, mode='auto')]

model = Sequential()
model.add(LSTM(units, input_shape=(look_back, 1)))
model.add(Dense(look_forward))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(train_x_200, train_y_200, epochs=50, batch_size=256, verbose=1, callbacks = callback,validation_data=(val_x_200, val_y_200))
model.save('lb{}_lf{}_u{}_s.h5'.format(look_back, look_forward, units))

Train on 67821 samples, validate on 18598 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
