In [24]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.metrics import mean_squared_error

In [25]:
#load data set
data = pd.read_csv('PLAY.csv')
data.head() 

Unnamed: 0,collection_week,Total staffed,NO OF MASKS
0,4/21/2024,273,39984
1,4/14/2024,275,43659
2,4/7/2024,275,43659
3,3/31/2024,273,39543
4,3/24/2024,273,36309


In [26]:
# Convert 'collection_week' to datetime and set as index
data['collection_week'] = pd.to_datetime(data['collection_week'])
data.set_index('collection_week', inplace=True)

In [28]:
data['Time'] = data.groupby(data.index).ngroup() + 1

In [29]:
# Step 3: Normalize the data using MinMaxScaler
scaler = MinMaxScaler()

# Scale all features (No_of_masks, Staffed_beds, and Time)
scaled_data = scaler.fit_transform(data[['No of masks', 'Staffed beds', 'Time']])

# Convert the scaled data back to a DataFrame for easier handling
scaled_data = pd.DataFrame(scaled_data, columns=['No of masks', 'Staffed beds', 'Time'], index=data.index)

In [30]:
# Step 4: Function to create sequences of data for LSTM
def create_sequences(data, time_steps=1):
    X, y = [], []
    for i in range(len(data) - time_steps):
        X.append(data.iloc[i:i + time_steps, 1:].values)  # Exogenous variables (Staffed_beds and Time)
        y.append(data.iloc[i + time_steps, 0])  # Target variable (No_of_masks)
    return np.array(X), np.array(y)

# Set time_steps (look-back window) for LSTM (e.g., 3 time steps)
time_steps = 3
X, y = create_sequences(scaled_data, time_steps)

# Train-test split (80% train, 20% test)
train_size = int(0.8 * len(X))
X_train, y_train = X[:train_size], y[:train_size]
X_test, y_test = X[train_size:], y[train_size:]

In [35]:
y_pred = model.predict(X_test)



In [31]:
# Step 5: Build the LSTM model
model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(LSTM(units=50))
model.add(Dense(1))  # Output layer predicting No_of_masks



In [32]:
#Compile the model
model.compile(optimizer='adam', loss='mse')

# Train the model
model.fit(X_train, y_train, epochs=20, batch_size=16, verbose=1)


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x1e45adf96a0>

In [39]:
# Inverse the scaling for the y_test and y_pred separately
# Only inverse transform the 'No_of_masks' (the first column in scaled_data)
y_test_unscaled = scaler.inverse_transform(np.concatenate((y_test.reshape(-1, 1), np.zeros((len(y_test), 2))), axis=1))[:, 0]
y_pred_unscaled = scaler.inverse_transform(np.concatenate((y_pred, np.zeros((len(y_pred), 2))), axis=1))[:, 0]


In [40]:
# Calculate RMSE for the LSTM model
rmse = np.sqrt(mean_squared_error(y_test_unscaled, y_pred_unscaled))
print(f"LSTM RMSE: {rmse}")

LSTM RMSE: 2550.0447672883342
