In [25]:
#Model imports
import tensorflow as tf
import pandas as pd
import plotly.graph_objects as go
import numpy as np


In [2]:
tf.config.list_physical_devices()


[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'),
 PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [3]:
# load data
data = pd.read_csv("training-dataset.csv")
data.drop(['datetime','datetime.1','3570'], axis=1,inplace=True)
data.dtypes

3307          float64
3616          float64
3576          float64
4031_level    float64
4046_level    float64
4058_level    float64
4122_level    float64
4008_level    float64
Day sin       float64
Day cos       float64
Year sin      float64
Year cos      float64
dtype: object

In [4]:
#split test train data
train_data = data.iloc[:int(0.8*len(data))]
test_data = data.iloc[int(0.8*len(data)):]


In [5]:
# extract input and output columns
#assumes there are 6 columns of input values with labels in the 7th column

X_train = train_data.iloc[:,:6]
y_train = train_data.iloc[:,6]
X_test = test_data.iloc[:,:6]
y_test = test_data.iloc[:,6]

In [6]:
# create LSTM model
model = tf.keras.Sequential()
model.add(tf.keras.layers.LSTM(256, input_shape=(X_train.shape[1], 1)))
model.add(tf.keras.layers.Dense(1))
model.compile(optimizer='adam', loss='mean_squared_error')

In [7]:
# plot the model
from tensorflow.keras.utils import plot_model
plot_model(model, to_file='model.png', show_shapes=True, show_layer_names=True)


('You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) ', 'for plot_model/model_to_dot to work.')


In [8]:
# reshape data for LSTM
X_train = X_train.values.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test = X_test.values.reshape(X_test.shape[0], X_test.shape[1], 1)


In [9]:
X_train.shape
X_test.shape


(8813, 6, 1)

In [10]:
# train model
history = model.fit(X_train, y_train, epochs=50, batch_size=200, validation_data=(X_test, y_test))


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [15]:
# make predictions
train_predictions = model.predict(X_train)
test_predictions = model.predict(X_test)


In [44]:
#save model to file
x_test = np.arange(len(test_data))
y_test_pred =pd.Series(test_predictions.flatten(order="C"))


In [45]:
# plot train and test predictions
fig = go.Figure()
fig.add_trace(go.Scatter(x=x, y=y_test, name='Train'))
fig.add_trace(go.Scatter(x=x, y=y_test_pred, name='Test'))
fig.show()

In [14]:

# to determine the most important input features using the SHAP library

import shap

# load the model and the data
model = model
#X_train = pd.read_csv("X_train.csv").values

# explain the model's predictions using SHAP values
explainer = shap.Explainer(model, X_train)
shap_values = explainer(X_test)

# summarize the importance of each feature
shap.summary_plot(shap_values, feature_names=X_test.columns)

ModuleNotFoundError: No module named 'shap'