## Long Short-Term Memory (LSTMs)

In [9]:
import pickle
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, LSTM
from sklearn.metrics import mean_squared_error

In [10]:
# Load your data
raw_df = pickle.load(open("../common/pima.p", "rb"))
feature_cols_with_timestamp = pickle.load(open("../common/feature_cols_with_timestamp.p", "rb"))
split_size = pickle.load(open("../common/split_size.p", "rb"))

# Select the columns you're interested in
df = raw_df[feature_cols_with_timestamp]

In [11]:
# Convert series to supervised learning
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
    n_vars = 1 if type(data) is list else data.shape[1]
    df = pd.DataFrame(data)
    cols, names = list(), list()
    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
        names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
    # put it all together
    agg = pd.concat(cols, axis=1)
    agg.columns = names
    # drop rows with NaN values
    if dropnan:
        agg.dropna(inplace=True)
    return agg

# Convert 'timestamp' to datetime and set as index
df['timestamp'] = pd.to_datetime(df['timestamp'])
df.set_index('timestamp', inplace=True)

# Drop rows with missing values
df = df.dropna()

# Ensure all data is float
values = df.values.astype('float32')

# Normalize features
scaler = MinMaxScaler(feature_range=(0, 1))
scaled = scaler.fit_transform(values)

# Frame as supervised learning
reframed = series_to_supervised(scaled, 1, 1)

# Drop columns we don't want to predict
num_features = len(df.columns)  # Assuming 'df' is your original DataFrame
indices_to_drop = list(range(num_features, 2*num_features))  # Indices of 't' columns for each variable
reframed.drop(reframed.columns[indices_to_drop], axis=1, inplace=True)

# Split into train and test sets
values = reframed.values
n_train_time = 365*24
train = values[:n_train_time, :]
test = values[n_train_time:, :]

# Split into input and outputs
x_train, y_train = train[:, :-1], train[:, -1]
x_test, y_test = test[:, :-1], test[:, -1]

# Reshape input to be 3D [samples, timesteps, features]
x_train = x_train.reshape((x_train.shape[0], 1, x_train.shape[1]))
x_test = x_test.reshape((x_test.shape[0], 1, x_test.shape[1]))

# LSTM Network
model = Sequential()
model.add(LSTM(100, input_shape=(x_train.shape[1], x_train.shape[2])))
model.add(Dense(1))
model.compile(loss='mae', optimizer='adam', run_eagerly=True)

# Fit
history = model.fit(x_train, y_train, epochs=50, batch_size=72, validation_data=(x_test, y_test), verbose=2, shuffle=False)

# Predict
yhat = model.predict(x_test)
x_test = x_test.reshape((x_test.shape[0], x_test.shape[2]))

# Invert scaling for forecast
inv_yhat = np.concatenate((yhat, x_test[:, 1:]), axis=1)
inv_yhat = scaler.inverse_transform(inv_yhat)
inv_yhat = inv_yhat[:,0]

# Invert scaling for actual
y_test = y_test.reshape((len(y_test), 1))
inv_y = np.concatenate((y_test, x_test[:, 1:]), axis=1)
inv_y = scaler.inverse_transform(inv_y)
inv_y = inv_y[:,0]

# Plot actual vs predicted
plt.plot(inv_y, label="Actual")
plt.plot(inv_yhat, label="Predicted")
plt.legend()
plt.show()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['timestamp'] = pd.to_datetime(df['timestamp'])


Epoch 1/50
30/30 - 1s - loss: 0.1425 - 1s/epoch - 41ms/step
Epoch 2/50
30/30 - 1s - loss: 0.0987 - 942ms/epoch - 31ms/step
Epoch 3/50
30/30 - 1s - loss: 0.0952 - 923ms/epoch - 31ms/step
Epoch 4/50
30/30 - 1s - loss: 0.0927 - 913ms/epoch - 30ms/step
Epoch 5/50
30/30 - 1s - loss: 0.0906 - 991ms/epoch - 33ms/step
Epoch 6/50
30/30 - 1s - loss: 0.0889 - 977ms/epoch - 33ms/step
Epoch 7/50
30/30 - 1s - loss: 0.0875 - 926ms/epoch - 31ms/step
Epoch 8/50
30/30 - 1s - loss: 0.0863 - 1s/epoch - 39ms/step
Epoch 9/50
30/30 - 1s - loss: 0.0854 - 996ms/epoch - 33ms/step
Epoch 10/50
30/30 - 1s - loss: 0.0845 - 937ms/epoch - 31ms/step
Epoch 11/50
30/30 - 1s - loss: 0.0839 - 943ms/epoch - 31ms/step
Epoch 12/50
30/30 - 1s - loss: 0.0833 - 1s/epoch - 36ms/step
Epoch 13/50
30/30 - 1s - loss: 0.0828 - 1s/epoch - 34ms/step
Epoch 14/50
30/30 - 1s - loss: 0.0825 - 1s/epoch - 38ms/step
Epoch 15/50
30/30 - 1s - loss: 0.0820 - 1s/epoch - 37ms/step
Epoch 16/50
30/30 - 1s - loss: 0.0817 - 1s/epoch - 45ms/step
Epoch 

ValueError: Unexpected result of `predict_function` (Empty batch_outputs). Please use `Model.compile(..., run_eagerly=True)`, or `tf.config.run_functions_eagerly(True)` for more information of where went wrong, or file a issue/bug to `tf.keras`.