In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense, Conv1D, MaxPooling1D, Flatten, Input, Dropout
from keras.optimizers import Adam
from keras.metrics import RootMeanSquaredError
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
from math import sqrt  # Import sqrt function

# Load the data
df = pd.read_csv('cardiovascular.csv')

# Convert 'Sold_date' to datetime format and set as index
df['Sold_date'] = pd.to_datetime(df['Sold_date'], format='%m/%d/%y')
df.set_index('Sold_date', inplace=True)

# Aggregate sales data on a weekly basis for each product
weekly_data = df.pivot_table(values='Sold_quantity', index='Sold_date', 
                             columns='Product_details', aggfunc='sum').resample('W').sum()

# Normalize the sales data
scaler = MinMaxScaler()
weekly_data_scaled = scaler.fit_transform(weekly_data)
weekly_data_scaled = pd.DataFrame(weekly_data_scaled, index=weekly_data.index, columns=weekly_data.columns)

# Prepare data for LSTM
def create_sequences(data, sequence_length):
    x = []
    y = []
    for i in range(len(data) - sequence_length):
        x.append(data.iloc[i:i + sequence_length].values)
        y.append(data.iloc[i + sequence_length].values)
    return np.array(x), np.array(y)

sequence_length = 2  # Number of weeks used to predict the next week
x_lstm, y_lstm = create_sequences(weekly_data_scaled, sequence_length)

print("x_lstm shape:", x_lstm.shape)
print("y_lstm shape:", y_lstm.shape)

x_lstm shape: (162, 2, 50)
y_lstm shape: (162, 50)


In [2]:
# Define the LSTM model to extract features
lstm_model = Sequential([
    Input(shape=(sequence_length, len(weekly_data_scaled.columns))),
    LSTM(50, activation='tanh', recurrent_activation='sigmoid', return_sequences=False),
])

# Compile the LSTM model
lstm_model.compile(optimizer='adam', loss='mean_squared_error')

# Fit the LSTM model
history = lstm_model.fit(x_lstm, y_lstm, epochs=100, batch_size=32, validation_split=0.2, verbose=1)

# Extract features using LSTM
features = lstm_model.predict(x_lstm)

# Print the features of the first three samples to see what they look like
print("Features of the first three samples:\n", features[:3])

# Enhanced CNN model with additional complexity and dropout for regularization
input_shape = (features.shape[1], features.shape[2]) if len(features.shape) == 3 else (features.shape[1], 1)
cnn_model_enhanced = Sequential([
    Input(shape=input_shape),
    Conv1D(128, 3, activation='relu'),
    MaxPooling1D(2),
    Dropout(0.2),
    Conv1D(128, 3, activation='relu'),
    MaxPooling1D(2),
    Flatten(),
    Dense(100, activation='relu'),
    Dropout(0.2),
    Dense(len(weekly_data.columns))
])

cnn_model_enhanced.compile(optimizer=Adam(), loss='mean_squared_error', metrics=[tf.keras.metrics.RootMeanSquaredError()])

# Model summary
cnn_model_enhanced.summary()

# Fit the CNN model
history = cnn_model_enhanced.fit(features, y_lstm, epochs=100, batch_size=32, validation_split=0.2)

# Predictions made by the CNN model
scaled_predictions = cnn_model_enhanced.predict(features)

# Display RMSE
print("Root Mean Squared Error (RMSE):")
print(history.history['root_mean_squared_error'])

# Assuming `y_lstm` and `scaled_predictions` have the same second dimension (number of features or products)
if scaled_predictions.shape[1] == y_lstm.shape[1]:
    actual_predictions = scaler.inverse_transform(scaled_predictions)
    actual_targets = scaler.inverse_transform(y_lstm)
else:
    print("Error: Mismatch in dimensions between predictions and actuals.")

# Calculate RMSE in original units
rmse_original = sqrt(mean_squared_error(actual_targets, actual_predictions))
print("RMSE on original scale:", rmse_original)


Epoch 1/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step - loss: 0.0324 - val_loss: 0.0399
Epoch 2/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0308 - val_loss: 0.0389
Epoch 3/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0292 - val_loss: 0.0381
Epoch 4/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0288 - val_loss: 0.0373
Epoch 5/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0291 - val_loss: 0.0365
Epoch 6/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0277 - val_loss: 0.0359
Epoch 7/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0257 - val_loss: 0.0353
Epoch 8/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0261 - val_loss: 0.0347
Epoch 9/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0172 - val_loss: 0.0301
Epoch 71/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0163 - val_loss: 0.0301
Epoch 72/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0167 - val_loss: 0.0301
Epoch 73/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0157 - val_loss: 0.0301
Epoch 74/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0165 - val_loss: 0.0301
Epoch 75/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0154 - val_loss: 0.0302
Epoch 76/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0165 - val_loss: 0.0302
Epoch 77/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0153 - val_loss: 0.0302
Epoch 78/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0

Epoch 1/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - loss: 0.0297 - root_mean_squared_error: 0.1723 - val_loss: 0.0350 - val_root_mean_squared_error: 0.1871
Epoch 2/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.0261 - root_mean_squared_error: 0.1615 - val_loss: 0.0337 - val_root_mean_squared_error: 0.1835
Epoch 3/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0246 - root_mean_squared_error: 0.1567 - val_loss: 0.0326 - val_root_mean_squared_error: 0.1806
Epoch 4/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.0233 - root_mean_squared_error: 0.1526 - val_loss: 0.0320 - val_root_mean_squared_error: 0.1790
Epoch 5/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.0233 - root_mean_squared_error: 0.1526 - val_loss: 0.0314 - val_root_mean_squared_error: 0.1773
Epoch 6/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0181 - root_mean_squared_error: 0.1346 - val_loss: 0.0290 - val_root_mean_squared_error: 0.1702
Epoch 45/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.0178 - root_mean_squared_error: 0.1332 - val_loss: 0.0292 - val_root_mean_squared_error: 0.1708
Epoch 46/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.0190 - root_mean_squared_error: 0.1379 - val_loss: 0.0288 - val_root_mean_squared_error: 0.1697
Epoch 47/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.0187 - root_mean_squared_error: 0.1368 - val_loss: 0.0287 - val_root_mean_squared_error: 0.1695
Epoch 48/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.0179 - root_mean_squared_error: 0.1337 - val_loss: 0.0288 - val_root_mean_squared_error: 0.1697
Epoch 49/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.0160 - root_mean_squared_error: 0.1265 - val_loss: 0.0301 - val_root_mean_squared_error: 0.1734
Epoch 88/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.0175 - root_mean_squared_error: 0.1321 - val_loss: 0.0298 - val_root_mean_squared_error: 0.1727
Epoch 89/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.0166 - root_mean_squared_error: 0.1289 - val_loss: 0.0291 - val_root_mean_squared_error: 0.1705
Epoch 90/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.0160 - root_mean_squared_error: 0.1263 - val_loss: 0.0290 - val_root_mean_squared_error: 0.1701
Epoch 91/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.0157 - root_mean_squared_error: 0.1253 - val_loss: 0.0289 - val_root_mean_squared_error: 0.1701
Epoch 92/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m