In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense, Conv1D, MaxPooling1D, Flatten, Input
from keras.optimizers import Adam
from keras.metrics import RootMeanSquaredError
import tensorflow as tf
import matplotlib.pyplot as plt

In [3]:
# Load the data
df = pd.read_csv('food_supplements.csv')
#print(df.head(10))

# Convert 'Sold_date' to datetime format and set as index
df['Sold_date'] = pd.to_datetime(df['Sold_date'], format='%m/%d/%y')
df.set_index('Sold_date', inplace=True)
#print(df.head(35))



In [4]:
# Aggregate sales data on a weekly basis for each product
weekly_data = df.pivot_table(values='Sold_quantity', index='Sold_date', 
                             columns='Product_details', aggfunc='sum').resample('W').sum()
print(weekly_data.head(10))
# Extract data for the specific product "Panyawan Plus SCED 450mg 100's"
#product_name = "MX3 Capsule"
#product_data = weekly_data[[product_name]]

# Display the first 10 rows of the specific product's sales data as text
#print(product_data.head(10))
#import matplotlib.pyplot as plt

# Plot the original data
#plt.figure(figsize=(10, 6))
#plt.plot(df.index, df['Sold_quantity'], label='Sold Quantity')
#plt.title('Original Sales Data')
#plt.xlabel('Date')
#plt.ylabel('Sold Quantity')
#plt.legend()
#plt.show()


Product_details  A1 Dietary Supplement Cap 30&#039;s  Ampalaya Plus 550mg Cap  \
Sold_date                                                                       
2020-12-20                                       0.0                      0.0   
2020-12-27                                       0.0                      0.0   
2021-01-03                                       0.0                      0.0   
2021-01-10                                      10.0                      0.0   
2021-01-17                                       0.0                      0.0   
2021-01-24                                      11.0                      0.0   
2021-01-31                                       2.0                      0.0   
2021-02-07                                       0.0                      0.0   
2021-02-14                                       0.0                      0.0   
2021-02-21                                       0.0                      0.0   

Product_details  Apple Cide

In [5]:
# Normalize the sales data
scaler = MinMaxScaler()
weekly_data_scaled = scaler.fit_transform(weekly_data)
weekly_data_scaled = pd.DataFrame(weekly_data_scaled, index=weekly_data.index, columns=weekly_data.columns)

product_name = 'MX3 Capsule'  # Replace 'Your_Product_Name' with the name of the product you're interested in
scaled_product_data = weekly_data_scaled[product_name]
print(scaled_product_data)





Sold_date
2020-12-20    0.000000
2020-12-27    0.000000
2021-01-03    0.228261
2021-01-10    0.413043
2021-01-17    0.271739
                ...   
2024-01-07    0.173913
2024-01-14    0.347826
2024-01-21    0.021739
2024-01-28    0.239130
2024-02-04    0.076087
Freq: W-SUN, Name: MX3 Capsule, Length: 164, dtype: float64


In [6]:
# Prepare data for LSTM
def create_sequences(data, sequence_length):
    x = []
    y = []
    for i in range(len(data) - sequence_length):
        x.append(data.iloc[i:i + sequence_length].values)
        y.append(data.iloc[i + sequence_length].values)
    return np.array(x), np.array(y)




In [7]:
sequence_length = 2  # Number of weeks used to predict the next week
x_lstm, y_lstm = create_sequences(weekly_data_scaled, sequence_length)


In [8]:
# Define the LSTM model to extract features
lstm_model = Sequential([
    LSTM(20, activation='tanh', recurrent_activation='sigmoid'),
])

In [9]:
# Extract features using LSTM
features = lstm_model.predict(x_lstm)


[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step  


In [10]:
# Define the CNN model for forecasting
input_shape = (features.shape[1], features.shape[2]) if len(features.shape) == 3 else (features.shape[1], 1)  # Adjust input shape
cnn_model = Sequential([
    Input(shape=input_shape),
    Conv1D(64, 3, activation='relu'),
    MaxPooling1D(2),
    Conv1D(64, 3, activation='relu'),
    MaxPooling1D(2),
    Flatten(),
    Dense(len(weekly_data.columns))
])

In [11]:
# Compile the CNN model
cnn_model.compile(optimizer=Adam(), loss='mean_squared_error', metrics=[RootMeanSquaredError()])


In [12]:
# Model summary
cnn_model.summary()


In [13]:
# Fit the CNN model
history = cnn_model.fit(features, y_lstm, epochs=100, batch_size=32, validation_split=0.2)


Epoch 1/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - loss: 0.0450 - root_mean_squared_error: 0.2122 - val_loss: 0.0594 - val_root_mean_squared_error: 0.2438
Epoch 2/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0403 - root_mean_squared_error: 0.2008 - val_loss: 0.0527 - val_root_mean_squared_error: 0.2297
Epoch 3/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0330 - root_mean_squared_error: 0.1818 - val_loss: 0.0455 - val_root_mean_squared_error: 0.2134
Epoch 4/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0294 - root_mean_squared_error: 0.1714 - val_loss: 0.0412 - val_root_mean_squared_error: 0.2029
Epoch 5/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0274 - root_mean_squared_error: 0.1654 - val_loss: 0.0414 - val_root_mean_squared_error: 0.2036
Epoch 6/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0232 - root_mean_squared_error: 0.1523 - val_loss: 0.0416 - val_root_mean_squared_error: 0.2039
Epoch 45/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0241 - root_mean_squared_error: 0.1552 - val_loss: 0.0419 - val_root_mean_squared_error: 0.2047
Epoch 46/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0234 - root_mean_squared_error: 0.1530 - val_loss: 0.0421 - val_root_mean_squared_error: 0.2052
Epoch 47/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0234 - root_mean_squared_error: 0.1529 - val_loss: 0.0414 - val_root_mean_squared_error: 0.2035
Epoch 48/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0237 - root_mean_squared_error: 0.1540 - val_loss: 0.0417 - val_root_mean_squared_error: 0.2042
Epoch 49/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0225 - root_mean_squared_error: 0.1501 - val_loss: 0.0411 - val_root_mean_squared_error: 0.2027
Epoch 88/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0218 - root_mean_squared_error: 0.1475 - val_loss: 0.0416 - val_root_mean_squared_error: 0.2039
Epoch 89/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0218 - root_mean_squared_error: 0.1476 - val_loss: 0.0411 - val_root_mean_squared_error: 0.2027
Epoch 90/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0224 - root_mean_squared_error: 0.1495 - val_loss: 0.0413 - val_root_mean_squared_error: 0.2032
Epoch 91/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0219 - root_mean_squared_error: 0.1478 - val_loss: 0.0418 - val_root_mean_squared_error: 0.2044
Epoch 92/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m

In [14]:
# Display RMSE
print("Root Mean Squared Error (RMSE):")
print(history.history['root_mean_squared_error'])

Root Mean Squared Error (RMSE):
[0.21015258133411407, 0.1974484920501709, 0.18307463824748993, 0.1696324497461319, 0.16351532936096191, 0.16081549227237701, 0.15935295820236206, 0.1593688726425171, 0.15948256850242615, 0.15907588601112366, 0.1588820368051529, 0.1587647944688797, 0.15835531055927277, 0.1579224318265915, 0.1575523167848587, 0.1574152708053589, 0.1575595587491989, 0.15737885236740112, 0.15721046924591064, 0.15717828273773193, 0.1571837216615677, 0.15703554451465607, 0.1570475846529007, 0.1567768156528473, 0.1563262641429901, 0.15599703788757324, 0.1562013328075409, 0.156462624669075, 0.1561870574951172, 0.15626657009124756, 0.15639406442642212, 0.1559915691614151, 0.1555059403181076, 0.15544088184833527, 0.15534618496894836, 0.1550048142671585, 0.15523554384708405, 0.15502262115478516, 0.15425065159797668, 0.15461213886737823, 0.15484896302223206, 0.154578298330307, 0.15433728694915771, 0.15476319193840027, 0.15486527979373932, 0.15392857789993286, 0.15347258746623993, 0.

In [20]:
# Predict using the trained CNN model
predicted_values = cnn_model.predict(features)

# Inverse transform the predicted values and the actual values
predicted_values = scaler.inverse_transform(predicted_values)
y_lstm_original = scaler.inverse_transform(y_lstm)

# Calculate RMSE in original units
mse = np.mean((predicted_values - y_lstm_original)**2)
rmse = np.sqrt(mse)
print("Root Mean Squared Error (RMSE) in original units:", rmse)


[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
Root Mean Squared Error (RMSE) in original units: 14.949146403338714
