In [1]:
import keras
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
%matplotlib inline

Using TensorFlow backend.


In [2]:
# read the train batches from files created by the Preprocessing_NN_Data notebook
def read_batches(filepath):
    batches = []
    number_read = -1
    while True:
        number_read += 1
        try:
            batch = pd.read_csv(filepath + '_' + str(number_read) + '.csv', index_col=0, parse_dates=True)
            batches.append(batch)
        except FileNotFoundError:
            break
    return batches
            
train_batches = read_batches('data/train/batch')

In [3]:
# convert the dataframes in train_batches to the correct input and output shape (and make it arrays)
def convert_to_keras_input(batches):
    input_batches = []
    output_batches = []
    for batch in batches:
        input_df = batch.iloc[:,1:]
        output_df = batch['cl_kadij_out']
        input_batches.append(input_df.as_matrix()[:,np.newaxis,:])
        output_batches.append(output_df.as_matrix())
    return (input_batches, output_batches)

(input_train_batches, output_train_batches) = convert_to_keras_input(train_batches)

In [4]:
# create a Keras model of the NN
n_cells = 11
input_shape = input_train_batches[0].shape[1:]

model = keras.models.Sequential()
model.add(keras.layers.LSTM(n_cells, input_shape=input_shape, return_sequences=True))
model.add(keras.layers.LSTM(n_cells))
model.add(keras.layers.Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')

model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 1, 11)             1012      
_________________________________________________________________
lstm_2 (LSTM)                (None, 11)                1012      
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 12        
Total params: 2,036
Trainable params: 2,036
Non-trainable params: 0
_________________________________________________________________


In [5]:
%%time
# train the model (this will take hours!)

n_epochs = 1000
report_at = 25
for i in range(0, n_epochs):
    first_batch = 0
    if i % report_at == 0:
        print('At epoch', i)
        model.fit(input_train_batches[0], output_train_batches[0], epochs=1, batch_size=len(output_train_batches[0]), verbose=2)
        first_batch = 1
    for j in range(first_batch,len(input_train_batches)):
        model.fit(input_train_batches[j], output_train_batches[j], epochs=1, batch_size=len(output_train_batches[j]), verbose=0)

At epoch 0
Epoch 1/1
2s - loss: 0.1595
At epoch 25
Epoch 1/1
0s - loss: 2.5819e-04
At epoch 50
Epoch 1/1
0s - loss: 2.4322e-04
At epoch 75
Epoch 1/1
0s - loss: 2.3475e-04
At epoch 100
Epoch 1/1
0s - loss: 2.3374e-04
At epoch 125
Epoch 1/1
0s - loss: 2.3379e-04
At epoch 150
Epoch 1/1
0s - loss: 2.3515e-04
At epoch 175
Epoch 1/1
0s - loss: 2.3808e-04
At epoch 200
Epoch 1/1
0s - loss: 2.3662e-04
At epoch 225
Epoch 1/1
0s - loss: 2.4280e-04
At epoch 250
Epoch 1/1
0s - loss: 2.3692e-04
At epoch 275
Epoch 1/1
0s - loss: 2.3962e-04
At epoch 300
Epoch 1/1
0s - loss: 2.3696e-04
At epoch 325
Epoch 1/1
0s - loss: 2.3575e-04
At epoch 350
Epoch 1/1
0s - loss: 2.3438e-04
At epoch 375
Epoch 1/1
0s - loss: 2.3350e-04
At epoch 400
Epoch 1/1
0s - loss: 2.3555e-04
At epoch 425
Epoch 1/1
0s - loss: 2.3838e-04
At epoch 450
Epoch 1/1
0s - loss: 2.3643e-04
At epoch 475
Epoch 1/1
0s - loss: 2.3436e-04
At epoch 500
Epoch 1/1
0s - loss: 2.3413e-04
At epoch 525
Epoch 1/1
0s - loss: 2.3331e-04
At epoch 550
Epoch 

In [6]:
# write the model to a file, don't forget to change the output filename otherwise it won't write!
output_filename = 'models/2layer_lstm_11cells'


from pathlib import Path
output_file = Path(output_filename + '.h5')
if output_file.is_file():
    print('Please choose a different filename, this one already exists!')
else:
    model.save(output_filename + '.h5')
    info_file = open(output_filename + '_info.txt', 'w')
    info_file.write('number of epochs: ' + str(n_epochs) + '\n')
    info_file.write('number of cells per layer: ' + str(n_cells) + '\n\n')
    model.summary(print_fn=lambda x: info_file.write(x + '\n'))
    info_file.close()