# ***Forecasting on Historical Sales Data***

In [None]:
import pandas as pd
import os
import matplotlib.pyplot as plt
import plotly.express as px

df_retail_sales_train = pd.read_csv('/content/drive/MyDrive/DeepForecast/DataCollected/Retail_Store_Inventory.csv')


def describeCollectedData(df):
    print(f'DataFrame Shape : ',df.shape)
    print(f'DataFrame Info')
    print(df.info())
    df.dropna()
    df.dropna(axis=1)
    print(f'DataFrame Shape after droping null values : ',df.shape)
    return df

data_df = describeCollectedData(df_retail_sales_train)
def preprocessingStoreData(df):
    df = df.drop(['Units Ordered','Inventory Level','Holiday/Promotion','Competitor Pricing'], axis=1)
    df.rename(columns={'Units Sold':'Sales','Product ID':'Product',
                       'Store ID':'Store',
                       'Demand Forecast':'Demand',
                       "Weather Condition":'Weather_Condition'},inplace=True)
    return df

preprocessedRetailStoreDf = preprocessingStoreData(data_df)


def prepareDataWithSpecificCategory(df, category):
    return df[(df['Category'] == category)]

def prepareDataWithSpecificProduct(df, product):
    return df[(df['Product'] == product)]

df_to_predict = preprocessedRetailStoreDf.sort_values('Date').groupby(['Date'], as_index=False)
df_to_predict = df_to_predict.agg({'Sales':['mean']})
df_to_predict.columns = ['Date','Sales']

print(df_to_predict.shape)
print(df_to_predict.head())

def series_to_supervised(data, window=1, lag=1, dropnan=True):
    cols, names = list(), list()
    # Input sequence (t-n, ... t-1)
    for i in range(window, 0, -1):
        cols.append(data.shift(i))
        names += [('%s(t-%d)' % (col, i)) for col in data.columns]
    # Current timestep (t=0)
    cols.append(data)
    names += [('%s(t)' % (col)) for col in data.columns]
    # Target timestep (t=lag)
    cols.append(data.shift(-lag))
    names += [('%s(t+%d)' % (col, lag)) for col in data.columns]
    # Put it all together
    agg = pd.concat(cols, axis=1)
    agg.columns = names

    # Drop rows with NaN values
    if dropnan:
        #agg.dropna(inplace=True)
        #print(agg.isnull().sum())
        agg.fillna(agg.mean(),inplace=True)

    return agg

df_train = df_to_predict.copy()

window = 179
future_span = 30
series = series_to_supervised(df_train.drop('Date', axis=1), window=window, lag=future_span)

print(series.shape)

DataFrame Shape :  (73100, 15)
DataFrame Info
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 73100 entries, 0 to 73099
Data columns (total 15 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Date                73100 non-null  object 
 1   Store ID            73100 non-null  object 
 2   Product ID          73100 non-null  object 
 3   Category            73100 non-null  object 
 4   Region              73100 non-null  object 
 5   Inventory Level     73100 non-null  int64  
 6   Units Sold          73100 non-null  int64  
 7   Units Ordered       73100 non-null  int64  
 8   Demand Forecast     73100 non-null  float64
 9   Price               73100 non-null  float64
 10  Discount            73100 non-null  int64  
 11  Weather Condition   73100 non-null  object 
 12  Holiday/Promotion   73100 non-null  int64  
 13  Competitor Pricing  73100 non-null  float64
 14  Seasonality         73100 non-null  object 
dtypes: floa

# ***Extract the predictors (x sequences) and the label (future prediction)***

In [None]:
#!pip install tensorflow

import tensorflow as tf
from tensorflow import keras
from keras.layers import Conv1D, MaxPooling1D, Dense, LSTM, RepeatVector, TimeDistributed, Flatten, Dropout

from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from sklearn.preprocessing import StandardScaler

from keras.callbacks import EarlyStopping

import numpy as np

# Label
labels_col = 'Sales(t+%d)' % future_span
labels = series[labels_col]
series = series.drop(labels_col, axis=1)

scaler = StandardScaler()
series = scaler.fit_transform(series)

X_train, X_valid, Y_train, Y_valid = train_test_split(series, labels.values, test_size=0.4, random_state=0)
X_train_series = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_valid_series = X_valid.reshape((X_valid.shape[0], X_valid.shape[1], 1))

print('Train set shape', X_train_series.shape)
print('Validation set shape', X_valid_series.shape)

Train set shape (438, 180, 1)
Validation set shape (293, 180, 1)


# ***CNN Model for Forecasting***

In [None]:
lr = 0.0003
adam = tf.keras.optimizers.Adam(lr)

model_cnn = keras.Sequential()
model_cnn.add(Conv1D(filters=128, kernel_size=2, activation='relu', input_shape=(X_train_series.shape[1], X_train_series.shape[2])))
model_cnn.add(MaxPooling1D(pool_size=8))
model_cnn.add(Flatten())
model_cnn.add(Dense(50, activation='relu'))
model_cnn.add(Dense(50, activation='relu'))
model_cnn.add(Dropout(0.2))
model_cnn.add(Dense(32, activation='relu'))
model_cnn.add(Dropout(0.2))
model_cnn.add(Dense(1))
model_cnn.compile(loss='mse', optimizer=adam)
model_cnn.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
epochs = 700
batch = 10
monitor = EarlyStopping(monitor='val_loss', min_delta=1e-3, patience=50,
        verbose=1, mode='auto', restore_best_weights=True)

cnn_history = model_cnn.fit(X_train_series, Y_train,
    validation_data=(X_valid_series, Y_valid), epochs=epochs, verbose=2)

Epoch 1/700
14/14 - 4s - 257ms/step - loss: 18315.4902 - val_loss: 17779.7734
Epoch 2/700
14/14 - 1s - 47ms/step - loss: 17079.7148 - val_loss: 16024.5664
Epoch 3/700
14/14 - 0s - 22ms/step - loss: 14811.9912 - val_loss: 12961.3262
Epoch 4/700
14/14 - 0s - 20ms/step - loss: 11153.2627 - val_loss: 8544.6475
Epoch 5/700
14/14 - 0s - 19ms/step - loss: 6741.3940 - val_loss: 3948.4429
Epoch 6/700
14/14 - 0s - 22ms/step - loss: 3165.4082 - val_loss: 1514.6267
Epoch 7/700
14/14 - 0s - 23ms/step - loss: 2518.8835 - val_loss: 1308.1688
Epoch 8/700
14/14 - 0s - 21ms/step - loss: 2335.8806 - val_loss: 1306.4967
Epoch 9/700
14/14 - 0s - 22ms/step - loss: 2234.5256 - val_loss: 1306.2095
Epoch 10/700
14/14 - 0s - 20ms/step - loss: 2161.4287 - val_loss: 1275.6626
Epoch 11/700
14/14 - 0s - 21ms/step - loss: 1997.9727 - val_loss: 1181.2726
Epoch 12/700
14/14 - 0s - 23ms/step - loss: 2063.7810 - val_loss: 1179.4945
Epoch 13/700
14/14 - 0s - 22ms/step - loss: 1998.5533 - val_loss: 1115.2153
Epoch 14/700


In [None]:
cnn_train_pred = model_cnn.predict(X_train_series)
cnn_valid_pred = model_cnn.predict(X_valid_series)

print('Train rmse:', np.sqrt(mean_squared_error(Y_train, cnn_train_pred)))
print('Validation rmse:', np.sqrt(mean_squared_error(Y_valid, cnn_valid_pred)))

[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
Train rmse: 14.052730647606959
Validation rmse: 15.859091992516902


# ***LSTM Model for Forecasting***

In [None]:

from keras.layers import LSTM,Dense,Dropout,Input

epochs = 500
batch = 10
lr = 0.0003
adam = tf.keras.optimizers.Adam(lr)

model_lstm=keras.Sequential()
model_lstm.add(Input(shape=(X_train_series.shape[1], X_train_series.shape[2])))
model_lstm.add(LSTM(units=50))
model_lstm.add(Dropout(0.2))
model_lstm.add(Dense(units=50,activation='relu'))
model_lstm.add(Dropout(0.2))
model_lstm.add(Dense(1))
model_lstm.compile(loss = 'mse',optimizer = 'adam', metrics = ['mean_squared_error'])
model_lstm.summary()

In [None]:

#callbacks=[monitor]
lstm_history = model_lstm.fit(X_train_series, Y_train,
    validation_data=(X_valid_series, Y_valid), epochs=epochs, verbose=2)

Epoch 1/500
14/14 - 2s - 169ms/step - loss: 17827.8516 - mean_squared_error: 17827.8516 - val_loss: 17065.6035 - val_mean_squared_error: 17065.6035
Epoch 2/500
14/14 - 2s - 153ms/step - loss: 16304.9893 - mean_squared_error: 16304.9893 - val_loss: 15363.4453 - val_mean_squared_error: 15363.4453
Epoch 3/500
14/14 - 1s - 78ms/step - loss: 14321.3096 - mean_squared_error: 14321.3096 - val_loss: 13166.7578 - val_mean_squared_error: 13166.7578
Epoch 4/500
14/14 - 1s - 79ms/step - loss: 12179.0244 - mean_squared_error: 12179.0244 - val_loss: 10931.3516 - val_mean_squared_error: 10931.3516
Epoch 5/500
14/14 - 1s - 84ms/step - loss: 9891.2109 - mean_squared_error: 9891.2109 - val_loss: 8775.0234 - val_mean_squared_error: 8775.0234
Epoch 6/500
14/14 - 1s - 83ms/step - loss: 7841.5947 - mean_squared_error: 7841.5947 - val_loss: 6763.2485 - val_mean_squared_error: 6763.2485
Epoch 7/500
14/14 - 1s - 96ms/step - loss: 5942.2432 - mean_squared_error: 5942.2432 - val_loss: 4932.6909 - val_mean_square

In [None]:
lstm_train_pred = model_lstm.predict(X_train_series)
lstm_valid_pred = model_lstm.predict(X_valid_series)
print('Train rmse:', np.sqrt(mean_squared_error(Y_train, lstm_train_pred)))
print('Validation rmse:', np.sqrt(mean_squared_error(Y_valid, lstm_valid_pred)))

[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
Train rmse: 10.490646542260498
Validation rmse: 9.445612131111643


# ***CNN ~ LSTM Hybrid Model for Forecasting***

In [None]:
subsequences = 2
timesteps = X_train_series.shape[1]//subsequences
X_train_series_sub = X_train_series.reshape((X_train_series.shape[0], subsequences, timesteps, 1))
X_valid_series_sub = X_valid_series.reshape((X_valid_series.shape[0], subsequences, timesteps, 1))
print('Train set shape', X_train_series_sub.shape)
print('Validation set shape', X_valid_series_sub.shape)

Train set shape (438, 2, 90, 1)
Validation set shape (293, 2, 90, 1)


In [None]:

epochs = 500
batch = 10
lr = 0.0003
adam = tf.keras.optimizers.Adam(lr)

model_cnn_lstm = keras.Sequential()
model_cnn_lstm.add(TimeDistributed(Conv1D(filters=64, kernel_size=1, activation='relu'), input_shape=(None, X_train_series_sub.shape[2], X_train_series_sub.shape[3])))
model_cnn_lstm.add(TimeDistributed(MaxPooling1D(pool_size=2)))
model_cnn_lstm.add(TimeDistributed(Dropout(0.2)))
model_cnn_lstm.add(TimeDistributed(MaxPooling1D(pool_size=2)))
model_cnn_lstm.add(TimeDistributed(Flatten()))
model_cnn_lstm.add(LSTM(50, activation='relu'))
model_cnn_lstm.add(Dense(30, activation='relu'))
model_cnn_lstm.add(Dropout(0.2))
model_cnn_lstm.add(Dense(1))
model_cnn_lstm.compile(loss='mse', optimizer=adam)

model_cnn_lstm.summary()


Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.



In [None]:
cnn_lstm_history = model_cnn_lstm.fit(X_train_series_sub, Y_train, validation_data=(X_valid_series_sub, Y_valid), epochs=epochs, verbose=2)

Epoch 1/500
14/14 - 6s - 412ms/step - loss: 18505.5840 - val_loss: 18174.0879
Epoch 2/500
14/14 - 1s - 46ms/step - loss: 17425.5977 - val_loss: 16424.6328
Epoch 3/500
14/14 - 1s - 89ms/step - loss: 14953.5137 - val_loss: 13424.7549
Epoch 4/500
14/14 - 1s - 91ms/step - loss: 11419.3701 - val_loss: 9507.9160
Epoch 5/500
14/14 - 1s - 40ms/step - loss: 7270.6162 - val_loss: 5347.7617
Epoch 6/500
14/14 - 1s - 40ms/step - loss: 3883.4338 - val_loss: 2452.0649
Epoch 7/500
14/14 - 1s - 40ms/step - loss: 2417.2009 - val_loss: 1744.0037
Epoch 8/500
14/14 - 1s - 46ms/step - loss: 2394.6997 - val_loss: 1767.0442
Epoch 9/500
14/14 - 1s - 40ms/step - loss: 2406.8157 - val_loss: 1837.7917
Epoch 10/500
14/14 - 1s - 41ms/step - loss: 2386.3167 - val_loss: 1719.0068
Epoch 11/500
14/14 - 1s - 44ms/step - loss: 2105.8516 - val_loss: 1700.7682
Epoch 12/500
14/14 - 1s - 40ms/step - loss: 2250.1245 - val_loss: 1676.2517
Epoch 13/500
14/14 - 1s - 44ms/step - loss: 2012.1201 - val_loss: 1559.8893
Epoch 14/500


In [None]:
cnn_lstm_train_pred = model_cnn_lstm.predict(X_train_series_sub)
cnn_lstm_valid_pred = model_cnn_lstm.predict(X_valid_series_sub)
print('Train rmse:', np.sqrt(mean_squared_error(Y_train, cnn_lstm_train_pred)))
print('Validation rmse:', np.sqrt(mean_squared_error(Y_valid, cnn_lstm_valid_pred)))

[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 56ms/step
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
Train rmse: 19.239990503145215
Validation rmse: 21.179013455735053
