In [1]:
from tensorflow.keras import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Flatten
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd


from machine_learning.configurationGR import input_output_dict
from machine_learning.data_wrangling import preprocessing
from machine_learning.utils import read_config

In [2]:
file_loc = 'input/cleaned_up/25509696_cleaned_up.csv'
df = pd.read_csv(file_loc)

# get input and output from configuration
input_name = input_output_dict['INPUT']
output_name = input_output_dict['OUTPUT']
data_in, data_out = preprocessing.input_output_split(df, input_name, output_name)
print(data_in.shape, data_out.shape)
INPUT_DIM = data_in.shape[1]

(14804, 7) (14804, 1)


In [3]:
# configuration for train and test
train_fraction = 0.7
validation_fraction = 0.1
test_fraction = 1 - train_fraction - validation_fraction
train_max_idx = int(data_in.shape[0] * train_fraction)
validation_max_idx = int(train_max_idx + data_in.shape[0] * validation_fraction)

# scale data
dataset = preprocessing.input_output_concatenate(data_in, data_out)
scaler = StandardScaler()
scaler.fit(dataset[:train_max_idx])
dataset = scaler.transform(dataset)
# split in and out
data_in = dataset[:, :-1]
data_out = dataset[:, -1].reshape(-1, 1)

In [4]:
print(data_in.shape, data_out.shape)

(14804, 7) (14804, 1)


In [5]:


train_generator = preprocessing.timeseries_generator(data_in, data_out, 
                                       min_index=0, max_index=train_max_idx, lookback=2, delay=0,
                                       include_out=True, shuffle=False, batch_size=200, step=1)
validation_generator = preprocessing.timeseries_generator(data_in, data_out,
                                                         min_index=train_max_idx,
                                                          max_index=validation_max_idx,
                                                         lookback=2, delay=0, include_out=True,
                                                         shuffle=False, batch_size=128, step=1)
test_generator = preprocessing.timeseries_generator(data_in, data_out,
                                                         min_index=validation_max_idx,
                                                         lookback=2, delay=0, include_out=True,
                                                         shuffle=False, batch_size=128, step=1)

In [6]:
model = Sequential([
    
    LSTM(32, return_sequences=True, activation='relu', input_shape=(2, INPUT_DIM+1)),
#     Dropout(rate=0.2),
    
    LSTM(64, return_sequences=True, activation='relu'),
# #     Dropout(rate=0.2),
    
#     LSTM(128, activation='relu'),
    
#     Flatten(),
#     Dense(32, activation='relu'),
# #     Dropout(rate=0.2),
    
    Dense(64, ),
    
#     Dense(128),
    
    Dense(1)
])

model.compile(optimizer='rmsprop', loss='mse', metrics=['mse'])

In [7]:
model.fit(train_generator, steps_per_epoch=10, epochs=100, validation_data=validation_generator)

Train for 10 steps
Epoch 1/100


KeyboardInterrupt: 

In [None]:
mse = model.history.history['mse']
val_mse = model.history.history['val_mse']
loss = model.history.history['loss']
val_loss = model.history.history['val_loss']

epochs = range(1, len(mse) +1)

plt.plot(epochs, mse, 'bo', label='Training mse')
plt.plot(epochs, val_mse, 'b', label='Validation mse')
plt.title('Training and validation mse')
plt.legend()





In [None]:
model.history.history

In [None]:
plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()

plt.figure()