In [None]:
import pandas as pd
import plotly.graph_objs as go
import plotly.plotly as py
import plotly.offline as offline
import numpy as np
from pandas import DataFrame
from pandas import read_csv
from keras.models import Sequential
from keras.layers import LSTM, GRU
from keras.layers import Dense
from matplotlib import pyplot
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler

pumpNumber = "4110"
dset = "dry"
predict = "flow"
period = 1440
timewindow = 5

dataset = read_csv(pumpNumber+ "set"+dset+".csv", header=0, index_col=0).iloc[::-1]
#dataset = read_csv("/Users/sergiers/Desktop/Lieshout.csv", header=0, index_col=0).iloc[::-1]

In [None]:
dataset = dataset.sort_index(ascending=True)

In [None]:
#For dataset splitting if necessary
#dataset = dataset.loc[:'2018-06-08 17:00']

In [None]:
if(dset == "wet"):
    dataset = dataset[["flow", "level", "WeekNumber", "DayOfWeek", "VOLUME" ]]
else:
    dataset = dataset[["flow", "level", "WeekNumber", "DayOfWeek" ]]

In [None]:
flowstd = dataset["flow"].std()
flowmean = dataset["flow"].mean()
#levelstd = dataset["level"].mean()
#levelmean = dataset["level"].std()

flowmax = dataset.max()
flowmin = dataset.min()

### Plot original data:

In [None]:
realdata = go.Scatter(
          x=dataset.index,
          y=dataset['flow'],
          name='real')


data = [realdata]



layout = go.Layout(
    title='Original data',
    xaxis=dict(
        rangeselector=dict(
            buttons=list([
                dict(count=1,
                     label='1d',
                     step='day',
                     stepmode='backward'),
                dict(count=7,
                     label='1w',
                     step='day',
                     stepmode='backward'),
                dict(count=1,
                     label='1m',
                     step='month',
                     stepmode='backward'),
                dict(step='all')
            ])
        ),        
        rangeslider=dict(
            visible = True
        ),
        type='date'
    ),
    yaxis=dict(
        title='Real'
    )
)
fig = dict(data=data, layout=layout)
offline.plot(fig, filename=pumpNumber+"-"+predict+"-"+dset+"-original.html")

### Normalizing with Standard Scaler

In [None]:
dataset[dataset.columns] = StandardScaler().fit_transform(dataset)

### Shifting Dataset

In [None]:
# To avoid "ValueError: Input arrays should have the same number of samples as target 
# arrays. Found 2095 input samples and 2090 target samples." error
dataset_shifted = dataset.shift(-timewindow)
dataset = dataset.iloc[:-timewindow]
dataset_shifted = dataset_shifted.iloc[:-timewindow]

### Dividing the Dataset for Training and Testing

In [None]:
train_dataset = dataset.iloc[:-period]
train_dataset_shifted = dataset_shifted.iloc[:-period]

test_dataset = dataset.iloc[-period:]
test_dataset_shifted = dataset_shifted.iloc[-period:]

### Training LSTM

In [None]:
model_lstm = Sequential()
model_lstm.add(LSTM(1, input_shape=(1, dataset.shape[1])))
model_lstm.add(Dense(1))
model_lstm.compile(loss='mae', optimizer='adam')

#model_lstm.fit(train_dataset.values.reshape((train_dataset.shape[0], 1, train_dataset.shape[1])), train_dataset_shifted['system.load.5'])
model_lstm.fit(train_dataset.values.reshape((train_dataset.shape[0], 1, train_dataset.shape[1])), train_dataset_shifted[predict])

### Training GRU

In [None]:
model_gru = Sequential()
model_gru.add(GRU(1, input_shape=(1, dataset.shape[1])))
model_gru.add(Dense(1))
model_gru.compile(loss='mae', optimizer='adam')

model_gru.fit(train_dataset.values.reshape((train_dataset.shape[0], 1, train_dataset.shape[1])), train_dataset_shifted[predict])

### Predicting Values

In [None]:
test_dataset_predicted_lstm = model_lstm.predict(test_dataset.values.reshape((test_dataset.shape[0], 1, test_dataset.shape[1])))
test_dataset_predicted_gru = model_gru.predict(test_dataset.values.reshape((test_dataset.shape[0], 1, test_dataset.shape[1])))

### Calculate Error:

In [None]:
mean_absolute_error(test_dataset_shifted[predict], test_dataset_predicted_lstm)

In [None]:
mean_absolute_error(test_dataset_predicted_gru, test_dataset_shifted[predict])

### De-Shift:

In [None]:
result = pd.DataFrame()
lstm = pd.DataFrame()
gru = pd.DataFrame()

lstmtempArray = []
for idx, val in enumerate(test_dataset_predicted_lstm):
    lstmtempArray.append(val[0])
    
grutempArray = []
for idx, val in enumerate(test_dataset_predicted_gru):
    grutempArray.append(val[0])
    
lstm = pd.concat([pd.DataFrame([val], columns=['lstm']) for idx, val in enumerate(lstmtempArray)], ignore_index=True)
gru = pd.concat([pd.DataFrame([val], columns=['gru']) for idx, val in enumerate(grutempArray)], ignore_index=True)

result['real'] = test_dataset_shifted[predict]


#deshift:
#lstm=lstm.iloc[timewindow:]
#gru=gru.iloc[timewindow:]
#result = result.iloc[:-timewindow]

### Recalculate error with deshifting:

In [None]:
mean_absolute_error(result['real'], lstm["lstm"])


In [None]:
mean_absolute_error(gru["gru"], result['real'])


### Plot with normalized values:

In [None]:
#FLOW
#pyplot.plot(test_dataset_shifted[predict], label='real')

#pyplot.plot(test_dataset_predicted_lstm, label='lstm')
#pyplot.plot(test_dataset_predicted_gru, label='gru')
#pyplot.legend()
#pyplot.show()


realdata = go.Scatter(
          x=result.index,
          y=result['real'],
          name='real')

lstmdata = go.Scatter(
          x=result.index,
          y=lstm["lstm"],
          name='lstm',
          yaxis='y2')

grudata = go.Scatter(
          x=result.index,
          y=gru["gru"],
          name='gru',
          yaxis='y2')


data = [realdata,lstmdata,grudata]



layout = go.Layout(
    title='Prediction error normalized',
    xaxis=dict(
        rangeselector=dict(
            buttons=list([
                dict(count=1,
                     label='1d',
                     step='day',
                     stepmode='backward'),
                dict(count=7,
                     label='1w',
                     step='day',
                     stepmode='backward'),
                dict(count=1,
                     label='1m',
                     step='month',
                     stepmode='backward'),
                dict(step='all')
            ])
        ),        
        rangeslider=dict(
            visible = True
        ),
        type='date'
    ),
    yaxis=dict(
        title='Real'
    ),
    yaxis2=dict(
        title='Lstm',
        overlaying='y',
        side='right'
    ),
    yaxis3=dict(
        title='Gru',
        overlaying='y',
        side='right'
    )
)
fig = dict(data=data, layout=layout)
offline.plot(fig, filename=pumpNumber+"-"+predict+"-"+dset+"-normalized.html")

### Denormalize:

In [None]:
#Denormalize

result["real"] = result["real"]*flowstd + flowmean
#test_dataset_shifted["level"] = test_dataset_shifted["level"]*flowstd + flowmean

lstm["lstm"] = lstm["lstm"]*flowstd + flowmean
#test_dataset_predicted_lstm["level"] = test_dataset_predicted_lstm["level"]*flowstd + flowmean

gru["gru"] = gru["gru"]*flowstd + flowmean
#test_dataset_predicted_gru["level"] = test_dataset_predicted_gru["level"]*flowstd + flowmean

#fix negative problem:
result["real"] = np.where(result["real"]<0, 0, result["real"])
lstm["lstm"] = np.where(lstm["lstm"]<0, 0, lstm["lstm"])
gru["gru"] = np.where(gru["gru"]<0, 0, gru["gru"])

### Recalculate error with de-normalization:

In [None]:
mean_absolute_error(result['real'], lstm["lstm"])

In [None]:
mean_absolute_error(gru["gru"], result['real'])

### Plot values:

In [None]:
realdata = go.Scatter(
          x=result.index,
          y=result['real'],
          name='real')

lstmdata = go.Scatter(
          x=result.index,
          y=lstm["lstm"],
          name='lstm',
          yaxis='y2')

grudata = go.Scatter(
          x=result.index,
          y=gru["gru"],
          name='gru',
          yaxis='y2')


data = [realdata,lstmdata,grudata]



layout = go.Layout(
    title='Prediction error (real values)',
    xaxis=dict(
        rangeselector=dict(
            buttons=list([
                dict(count=1,
                     label='1d',
                     step='day',
                     stepmode='backward'),
                dict(count=7,
                     label='1w',
                     step='day',
                     stepmode='backward'),
                dict(count=1,
                     label='1m',
                     step='month',
                     stepmode='backward'),
                dict(step='all')
            ])
        ),        
        rangeslider=dict(
            visible = True
        ),
        type='date'
    ),
    yaxis=dict(
        title='Real'
    ),
    yaxis2=dict(
        title='Lstm',
        overlaying='y',
        side='right'
    ),
    yaxis3=dict(
        title='Gru',
        overlaying='y',
        side='right'
    )
)
fig = dict(data=data, layout=layout)
offline.plot(fig, filename=pumpNumber+"-"+predict+"-"+dset+"-real.html")

### Extra: Finding Optimal Parameters

In [None]:
from sklearn.model_selection import GridSearchCV
from keras.wrappers.scikit_learn import KerasClassifier
import copy

def fit_lstm_tuning(activation, recurrent_activation, loss, optimizer):
    model_lstm = Sequential()
    model_lstm.add(LSTM(1, input_shape=(1, dataset.shape[1]), activation = activation, recurrent_activation = recurrent_activation))
    model_lstm.add(Dense(1))
    model_lstm.compile(loss=loss, optimizer=optimizer, metrics=["accuracy"])
    return model_lstm

activations = ['softmax', 'elu', 'selu', 'softplus', 'softsign', 'relu', 'tanh', 'sigmoid', 'hard_sigmoid', 'linear']
recurrent_activations = ['softmax', 'elu', 'selu', 'softplus', 'softsign', 'relu', 'tanh', 'sigmoid', 'hard_sigmoid', 'linear']

optimizers = ['SGD', 'RMSprop', 'Adagrad', 'Adadelta', 'Adam', 'Adamax', 'Nadam']
losses = ['mean_squared_error', 'mean_absolute_error', 'mean_absolute_percentage_error', 'mean_squared_logarithmic_error', 'squared_hinge', 'hinge', 'categorical_hinge', 'logcosh', 'categorical_crossentropy', 'sparse_categorical_crossentropy', 'binary_crossentropy', 'kullback_leibler_divergence', 'poisson', 'cosine_proximity']

X = copy.deepcopy(train_dataset.values.reshape((train_dataset.shape[0], 1, train_dataset.shape[1])))
Y = copy.deepcopy(train_dataset_shifted['flow'])

model = KerasClassifier(build_fn=fit_lstm_tuning)

param_grid = dict(activation = activations, recurrent_activation = recurrent_activations, optimizer = optimizers, loss = losses)

grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=1)
grid_result = grid.fit(X,Y)

print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))