In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense, Conv1D, MaxPooling1D, Flatten, Input, Dropout
from keras.optimizers import Adam
from keras.metrics import RootMeanSquaredError
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
from math import sqrt  # Import sqrt function

# Load the data
df = pd.read_csv('Vitamins.csv')

# Convert 'Sold_date' to datetime format and set as index
df['Sold_date'] = pd.to_datetime(df['Sold_date'], format='%m/%d/%y')
df.set_index('Sold_date', inplace=True)

# Aggregate sales data on a weekly basis for each product
weekly_data = df.pivot_table(values='Sold_quantity', index='Sold_date', 
                             columns='Product_details', aggfunc='sum').resample('W').sum()

# Normalize the sales data
scaler = MinMaxScaler()
weekly_data_scaled = scaler.fit_transform(weekly_data)
weekly_data_scaled = pd.DataFrame(weekly_data_scaled, index=weekly_data.index, columns=weekly_data.columns)

# Prepare data for LSTM
def create_sequences(data, sequence_length):
    x = []
    y = []
    for i in range(len(data) - sequence_length):
        x.append(data.iloc[i:i + sequence_length].values)
        y.append(data.iloc[i + sequence_length].values)
    return np.array(x), np.array(y)

sequence_length = 2  # Number of weeks used to predict the next week
x_lstm, y_lstm = create_sequences(weekly_data_scaled, sequence_length)

print("x_lstm shape:", x_lstm.shape)
print("y_lstm shape:", y_lstm.shape)

x_lstm shape: (162, 2, 196)
y_lstm shape: (162, 196)


In [3]:
lstm_model = Sequential([
    Input(shape=(sequence_length, len(weekly_data_scaled.columns))),
    LSTM(196, activation='tanh', recurrent_activation='sigmoid', return_sequences=False),
])
lstm_model.compile(optimizer='adam', loss='mean_squared_error')

# Fit the LSTM model
history = lstm_model.fit(x_lstm, y_lstm, epochs=100, batch_size=32, validation_split=0.2, verbose=1)

# Extract features using LSTM
features = lstm_model.predict(x_lstm)

# Define and compile the CNN model
input_shape = (features.shape[1], 1)
cnn_model_enhanced = Sequential([
    Input(shape=input_shape),
    Conv1D(128, 3, activation='relu'),
    MaxPooling1D(2),
    Dropout(0.2),
    Conv1D(128, 3, activation='relu'),
    MaxPooling1D(2),
    Flatten(),
    Dense(100, activation='relu'),
    Dropout(0.2),
    Dense(len(weekly_data.columns))
])
cnn_model_enhanced.compile(optimizer=Adam(), loss='mean_squared_error', metrics=[tf.keras.metrics.RootMeanSquaredError()])

# Fit the CNN model
history_cnn = cnn_model_enhanced.fit(features, y_lstm, epochs=100, batch_size=32, validation_split=0.2)

# Predictions made by the CNN model
scaled_predictions = cnn_model_enhanced.predict(features)

# Inverse transform the scaled predictions to original scale
actual_predictions = scaler.inverse_transform(scaled_predictions)

# Create a DataFrame for the forecasted data
forecast_df = pd.DataFrame(actual_predictions, index=weekly_data.index[sequence_length:], columns=weekly_data.columns)

# Reindex the forecast DataFrame to ensure it includes the required date range
forecast_df = forecast_df.reindex(pd.date_range(start=weekly_data.index[sequence_length], end=weekly_data.index[-1], freq='W'))

# Display the forecast for the week of February 5 to 11, 2024
forecast_week = forecast_df.loc['2024-02-04']  # The week starting on February 4, 2024
sorted_forecast_week = forecast_week.sort_values(ascending=False)  # Sort values from highest to lowest

# Convert to DataFrame and save the sorted forecast to Excel
sorted_forecast_week_df = sorted_forecast_week.to_frame(name='Sold_quantity')
sorted_forecast_week_df.index.name = 'Product_details'
sorted_forecast_week_df.reset_index(inplace=True)
sorted_forecast_week_df.to_excel('vitamins_sorted_by_quantity.xlsx', index=False)

# Display RMSE
print("Root Mean Squared Error (RMSE):", history_cnn.history['root_mean_squared_error'])

# Calculate and print RMSE in original units
rmse_original = sqrt(mean_squared_error(y_lstm, actual_predictions))
print("RMSE on original scale:", rmse_original)

Epoch 1/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - loss: 0.0389 - val_loss: 0.0476
Epoch 2/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0345 - val_loss: 0.0452
Epoch 3/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0322 - val_loss: 0.0434
Epoch 4/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.0301 - val_loss: 0.0422
Epoch 5/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.0280 - val_loss: 0.0413
Epoch 6/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.0275 - val_loss: 0.0409
Epoch 7/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0259 - val_loss: 0.0407
Epoch 8/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.0248 - val_loss: 0.0407
Epoch 9/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.0091 - val_loss: 0.0476
Epoch 71/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.0086 - val_loss: 0.0478
Epoch 72/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.0086 - val_loss: 0.0481
Epoch 73/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.0084 - val_loss: 0.0483
Epoch 74/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.0083 - val_loss: 0.0484
Epoch 75/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.0082 - val_loss: 0.0486
Epoch 76/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.0080 - val_loss: 0.0487
Epoch 77/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.0078 - val_loss: 0.0489
Epoch 78/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.0227 - root_mean_squared_error: 0.1506 - val_loss: 0.0406 - val_root_mean_squared_error: 0.2014
Epoch 25/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.0229 - root_mean_squared_error: 0.1514 - val_loss: 0.0408 - val_root_mean_squared_error: 0.2019
Epoch 26/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.0224 - root_mean_squared_error: 0.1498 - val_loss: 0.0402 - val_root_mean_squared_error: 0.2005
Epoch 27/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.0225 - root_mean_squared_error: 0.1501 - val_loss: 0.0405 - val_root_mean_squared_error: 0.2013
Epoch 28/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.0219 - root_mean_squared_error: 0.1479 - val_loss: 0.0403 - val_root_mean_squared_error: 0.2009
Epoch 29/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 0.0177 - root_mean_squared_error: 0.1331 - val_loss: 0.0413 - val_root_mean_squared_error: 0.2033
Epoch 68/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 0.0174 - root_mean_squared_error: 0.1321 - val_loss: 0.0413 - val_root_mean_squared_error: 0.2031
Epoch 69/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 0.0174 - root_mean_squared_error: 0.1319 - val_loss: 0.0412 - val_root_mean_squared_error: 0.2030
Epoch 70/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 0.0170 - root_mean_squared_error: 0.1305 - val_loss: 0.0413 - val_root_mean_squared_error: 0.2033
Epoch 71/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 0.0167 - root_mean_squared_error: 0.1293 - val_loss: 0.0413 - val_root_mean_squared_error: 0.2032
Epoch 72/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37