In [31]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense, Conv1D, MaxPooling1D, Flatten, Input
from keras.optimizers import Adam
from keras.metrics import RootMeanSquaredError
import tensorflow as tf
import matplotlib.pyplot as plt

In [32]:
# Load the data
df = pd.read_csv('food_supplements.csv')

In [33]:
# Convert 'Sold_date' to datetime format and set as index
df['Sold_date'] = pd.to_datetime(df['Sold_date'], format='%m/%d/%y')
df.set_index('Sold_date', inplace=True)

In [34]:
# Aggregate sales data on a weekly basis for each product
weekly_data = df.pivot_table(values='Sold_quantity', index='Sold_date', 
                             columns='Product_details', aggfunc='sum').resample('W').sum()

In [35]:
# Normalize the sales data
scaler = MinMaxScaler()
weekly_data_scaled = scaler.fit_transform(weekly_data)
weekly_data_scaled = pd.DataFrame(weekly_data_scaled, index=weekly_data.index, columns=weekly_data.columns)


In [36]:
# Prepare data for LSTM
def create_sequences(data, sequence_length):
    x = []
    y = []
    for i in range(len(data) - sequence_length):
        x.append(data.iloc[i:i + sequence_length].values)
        y.append(data.iloc[i + sequence_length].values)
    return np.array(x), np.array(y)


In [37]:
sequence_length = 2  # Number of weeks used to predict the next week
x_lstm, y_lstm = create_sequences(weekly_data_scaled, sequence_length)

In [38]:
# Define the LSTM model to extract features
lstm_model = Sequential([
    Input(shape=(sequence_length, len(weekly_data_scaled.columns))),
    LSTM(92, activation='tanh', recurrent_activation='sigmoid', return_sequences=False),
])

In [39]:

# Compile the LSTM model
lstm_model.compile(optimizer='adam', loss='mean_squared_error')

In [40]:
# Fit the LSTM model
lstm_model.fit(x_lstm, y_lstm, epochs=100, batch_size=32, validation_split=0.2)


Epoch 1/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - loss: 0.0437 - val_loss: 0.0615
Epoch 2/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0399 - val_loss: 0.0568
Epoch 3/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0358 - val_loss: 0.0530
Epoch 4/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0333 - val_loss: 0.0499
Epoch 5/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0312 - val_loss: 0.0476
Epoch 6/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0292 - val_loss: 0.0459
Epoch 7/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0272 - val_loss: 0.0450
Epoch 8/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0278 - val_loss: 0.0442
Epoch 9/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0

<keras.src.callbacks.history.History at 0x28a3b78d0>

In [41]:
# Extract features using LSTM
features = lstm_model.predict(x_lstm)


[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step 


In [42]:
# Define the CNN model for forecasting
input_shape = (features.shape[1], features.shape[2]) if len(features.shape) == 3 else (features.shape[1], 1)  # Adjust input shape
cnn_model = Sequential([
    Input(shape=input_shape),
    Conv1D(64, 3, activation='relu'),
    MaxPooling1D(2),
    Conv1D(64, 3, activation='relu'),
    MaxPooling1D(2),
    Flatten(),
    Dense(len(weekly_data.columns))
])

In [43]:
# Compile the CNN model
cnn_model.compile(optimizer=Adam(), loss='mean_squared_error', metrics=[RootMeanSquaredError()])


In [44]:
# Model summary
cnn_model.summary()

In [45]:
# Fit the CNN model
history = cnn_model.fit(features, y_lstm, epochs=100, batch_size=32, validation_split=0.2)


Epoch 1/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - loss: 0.0429 - root_mean_squared_error: 0.2068 - val_loss: 0.0438 - val_root_mean_squared_error: 0.2094
Epoch 2/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.0276 - root_mean_squared_error: 0.1660 - val_loss: 0.0454 - val_root_mean_squared_error: 0.2130
Epoch 3/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.0258 - root_mean_squared_error: 0.1606 - val_loss: 0.0440 - val_root_mean_squared_error: 0.2097
Epoch 4/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.0249 - root_mean_squared_error: 0.1578 - val_loss: 0.0434 - val_root_mean_squared_error: 0.2083
Epoch 5/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.0255 - root_mean_squared_error: 0.1598 - val_loss: 0.0422 - val_root_mean_squared_error: 0.2055
Epoch 6/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.0203 - root_mean_squared_error: 0.1425 - val_loss: 0.0386 - val_root_mean_squared_error: 0.1964
Epoch 45/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.0202 - root_mean_squared_error: 0.1420 - val_loss: 0.0382 - val_root_mean_squared_error: 0.1955
Epoch 46/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.0197 - root_mean_squared_error: 0.1402 - val_loss: 0.0384 - val_root_mean_squared_error: 0.1959
Epoch 47/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.0195 - root_mean_squared_error: 0.1396 - val_loss: 0.0388 - val_root_mean_squared_error: 0.1969
Epoch 48/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.0201 - root_mean_squared_error: 0.1419 - val_loss: 0.0387 - val_root_mean_squared_error: 0.1968
Epoch 49/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m

In [29]:
# Display RMSE
print("Root Mean Squared Error (RMSE):")
print(history.history['root_mean_squared_error'])

Root Mean Squared Error (RMSE):
[0.20009659230709076, 0.16626320779323578, 0.16228410601615906, 0.15747173130512238, 0.15705691277980804, 0.1548990160226822, 0.1535947024822235, 0.1521092653274536, 0.1505124419927597, 0.1488979309797287, 0.1477975845336914, 0.14643271267414093, 0.14611579477787018, 0.14557215571403503, 0.14424476027488708, 0.14166046679019928, 0.14103074371814728, 0.14163675904273987, 0.14078916609287262, 0.1397809088230133, 0.13968199491500854, 0.13757920265197754, 0.1359274685382843, 0.13477741181850433, 0.1333957314491272, 0.13255728781223297, 0.13271069526672363, 0.13168855011463165, 0.1311829388141632, 0.1304095983505249, 0.12892606854438782, 0.12896393239498138, 0.1278044581413269, 0.12882471084594727, 0.12687474489212036, 0.12618423998355865, 0.12650588154792786, 0.12565390765666962, 0.12461841106414795, 0.12619125843048096, 0.12432552129030228, 0.12371416389942169, 0.12102316319942474, 0.12035020440816879, 0.12050174176692963, 0.11760443449020386, 0.11679359525

In [30]:
# Calculate the average RMSE across all epochs
average_rmse = np.mean(rmse_values)
print("Average RMSE:", average_rmse)


NameError: name 'rmse_values' is not defined