In [8]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense, Conv1D, MaxPooling1D, Flatten, Input, Dropout
from keras.optimizers import Adam
from keras.metrics import RootMeanSquaredError
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
from math import sqrt  # Import sqrt function

# Load the data
df = pd.read_csv('antiasthma.csv')

# Convert 'Sold_date' to datetime format and set as index
df['Sold_date'] = pd.to_datetime(df['Sold_date'], format='%m/%d/%y')
df.set_index('Sold_date', inplace=True)

# Aggregate sales data on a weekly basis for each product
weekly_data = df.pivot_table(values='Sold_quantity', index='Sold_date', 
                             columns='Product_details', aggfunc='sum').resample('W').sum()

# Normalize the sales data
scaler = MinMaxScaler()
weekly_data_scaled = scaler.fit_transform(weekly_data)
weekly_data_scaled = pd.DataFrame(weekly_data_scaled, index=weekly_data.index, columns=weekly_data.columns)

# Prepare data for LSTM
def create_sequences(data, sequence_length):
    x = []
    y = []
    for i in range(len(data) - sequence_length):
        x.append(data.iloc[i:i + sequence_length].values)
        y.append(data.iloc[i + sequence_length].values)
    return np.array(x), np.array(y)

sequence_length = 2  # Number of weeks used to predict the next week
x_lstm, y_lstm = create_sequences(weekly_data_scaled, sequence_length)

print("x_lstm shape:", x_lstm.shape)
print("y_lstm shape:", y_lstm.shape)

x_lstm shape: (161, 2, 45)
y_lstm shape: (161, 45)


In [9]:
lstm_model = Sequential([
    Input(shape=(sequence_length, len(weekly_data_scaled.columns))),
    LSTM(45, activation='tanh', recurrent_activation='sigmoid', return_sequences=False),
])
lstm_model.compile(optimizer='adam', loss='mean_squared_error')

# Fit the LSTM model
history = lstm_model.fit(x_lstm, y_lstm, epochs=100, batch_size=32, validation_split=0.2, verbose=1)

# Extract features using LSTM
features = lstm_model.predict(x_lstm)

# Define and compile the CNN model
input_shape = (features.shape[1], 1)
cnn_model_enhanced = Sequential([
    Input(shape=input_shape),
    Conv1D(128, 3, activation='relu'),
    MaxPooling1D(2),
    Dropout(0.2),
    Conv1D(128, 3, activation='relu'),
    MaxPooling1D(2),
    Flatten(),
    Dense(100, activation='relu'),
    Dropout(0.2),
    Dense(len(weekly_data.columns))
])
cnn_model_enhanced.compile(optimizer=Adam(), loss='mean_squared_error', metrics=[tf.keras.metrics.RootMeanSquaredError()])

# Fit the CNN model
history_cnn = cnn_model_enhanced.fit(features, y_lstm, epochs=100, batch_size=32, validation_split=0.2)

# Predictions made by the CNN model
scaled_predictions = cnn_model_enhanced.predict(features)

# Inverse transform the scaled predictions to original scale
actual_predictions = scaler.inverse_transform(scaled_predictions)

# Create a DataFrame for the forecasted data
forecast_df = pd.DataFrame(actual_predictions, index=weekly_data.index[sequence_length:], columns=weekly_data.columns)

# Reindex the forecast DataFrame to ensure it includes the required date range
forecast_df = forecast_df.reindex(pd.date_range(start=weekly_data.index[sequence_length], end=weekly_data.index[-1], freq='W'))

# Display the forecast for the week of February 5 to 11, 2024
forecast_week = forecast_df.loc['2024-02-04']  # The week starting on February 4, 2024
sorted_forecast_week = forecast_week.sort_values(ascending=False)  # Sort values from highest to lowest

# Convert to DataFrame and save the sorted forecast to Excel
sorted_forecast_week_df = sorted_forecast_week.to_frame(name='Sold_quantity')
sorted_forecast_week_df.index.name = 'Product_details'
sorted_forecast_week_df.reset_index(inplace=True)
sorted_forecast_week_df.to_excel('antiasthma_sorted_by_quantity.xlsx', index=False)

# Display RMSE
print("Root Mean Squared Error (RMSE):", history_cnn.history['root_mean_squared_error'])

# Calculate and print RMSE in original units
rmse_original = sqrt(mean_squared_error(y_lstm, actual_predictions))
print("RMSE on original scale:", rmse_original)

Epoch 1/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step - loss: 0.0391 - val_loss: 0.0558
Epoch 2/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0405 - val_loss: 0.0538
Epoch 3/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0382 - val_loss: 0.0521
Epoch 4/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0348 - val_loss: 0.0505
Epoch 5/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0364 - val_loss: 0.0491
Epoch 6/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0358 - val_loss: 0.0478
Epoch 7/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0346 - val_loss: 0.0466
Epoch 8/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0323 - val_loss: 0.0455
Epoch 9/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0163 - val_loss: 0.0407
Epoch 71/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0157 - val_loss: 0.0407
Epoch 72/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0166 - val_loss: 0.0407
Epoch 73/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0162 - val_loss: 0.0408
Epoch 74/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0162 - val_loss: 0.0408
Epoch 75/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0167 - val_loss: 0.0409
Epoch 76/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0163 - val_loss: 0.0409
Epoch 77/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0157 - val_loss: 0.0409
Epoch 78/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0198 - root_mean_squared_error: 0.1407 - val_loss: 0.0400 - val_root_mean_squared_error: 0.2001
Epoch 25/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0195 - root_mean_squared_error: 0.1395 - val_loss: 0.0393 - val_root_mean_squared_error: 0.1982
Epoch 26/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0192 - root_mean_squared_error: 0.1385 - val_loss: 0.0399 - val_root_mean_squared_error: 0.1998
Epoch 27/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0186 - root_mean_squared_error: 0.1364 - val_loss: 0.0397 - val_root_mean_squared_error: 0.1993
Epoch 28/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0191 - root_mean_squared_error: 0.1380 - val_loss: 0.0399 - val_root_mean_squared_error: 0.1999
Epoch 29/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0132 - root_mean_squared_error: 0.1149 - val_loss: 0.0418 - val_root_mean_squared_error: 0.2045
Epoch 68/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0136 - root_mean_squared_error: 0.1166 - val_loss: 0.0419 - val_root_mean_squared_error: 0.2048
Epoch 69/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0135 - root_mean_squared_error: 0.1162 - val_loss: 0.0419 - val_root_mean_squared_error: 0.2047
Epoch 70/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0135 - root_mean_squared_error: 0.1163 - val_loss: 0.0417 - val_root_mean_squared_error: 0.2042
Epoch 71/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0131 - root_mean_squared_error: 0.1144 - val_loss: 0.0425 - val_root_mean_squared_error: 0.2061
Epoch 72/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m