# ***Forecasting on Historical Sales Data***

In [1]:
import pandas as pd
import os
import matplotlib.pyplot as plt
import plotly.express as px
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from sklearn.preprocessing import StandardScaler

import numpy as np

#!pip install tensorflow

import tensorflow as tf
from tensorflow import keras
from keras.layers import Conv1D, MaxPooling1D, Dense, LSTM, RepeatVector, TimeDistributed, Flatten, Dropout
from keras.callbacks import EarlyStopping
from keras.layers import LSTM,Dense,Dropout,Input


In [2]:
df_retail_sales_train = pd.read_csv('/content/drive/MyDrive/DeepForecast/DataCollected/Retail_Store_Inventory.csv')

dataFrameForViz = pd.DataFrame()


def describeCollectedData(df):
    print(f'DataFrame Shape : ',df.shape)
    print(f'DataFrame Info')
    print(df.info())
    df.dropna()
    df.dropna(axis=1)
    print(f'DataFrame Shape after droping null values : ',df.shape)
    return df

data_df = describeCollectedData(df_retail_sales_train)
def preprocessingStoreData(df):
    df = df.drop(['Units Ordered','Inventory Level','Holiday/Promotion','Competitor Pricing'], axis=1)
    df.rename(columns={'Units Sold':'Sales','Product ID':'Product',
                       'Store ID':'Store',
                       'Demand Forecast':'Demand',
                       "Weather Condition":'Weather_Condition'},inplace=True)
    return df

preprocessedRetailStoreDf = preprocessingStoreData(data_df)

DataFrame Shape :  (73100, 15)
DataFrame Info
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 73100 entries, 0 to 73099
Data columns (total 15 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Date                73100 non-null  object 
 1   Store ID            73100 non-null  object 
 2   Product ID          73100 non-null  object 
 3   Category            73100 non-null  object 
 4   Region              73100 non-null  object 
 5   Inventory Level     73100 non-null  int64  
 6   Units Sold          73100 non-null  int64  
 7   Units Ordered       73100 non-null  int64  
 8   Demand Forecast     73100 non-null  float64
 9   Price               73100 non-null  float64
 10  Discount            73100 non-null  int64  
 11  Weather Condition   73100 non-null  object 
 12  Holiday/Promotion   73100 non-null  int64  
 13  Competitor Pricing  73100 non-null  float64
 14  Seasonality         73100 non-null  object 
dtypes: floa

In [14]:

def prepareDataWithSpecificCategory(df, category):
    return df[(df['Category'] == category)]

def prepareDataWithSpecificProduct(df, product):
    return df[(df['Product'] == product)]

def series_to_supervised(data, window=1, lag=1, dropnan=True):
    cols, names = list(), list()
    # Input sequence (t-n, ... t-1)
    for i in range(window, 0, -1):
        cols.append(data.shift(i))
        names += [('%s(t-%d)' % (col, i)) for col in data.columns]
    # Current timestep (t=0)
    cols.append(data)
    names += [('%s(t)' % (col)) for col in data.columns]
    # Target timestep (t=lag)
    cols.append(data.shift(-lag))
    names += [('%s(t+%d)' % (col, lag)) for col in data.columns]
    # Put it all together
    agg = pd.concat(cols, axis=1)
    agg.columns = names

    # Drop rows with NaN values
    if dropnan:
        #agg.dropna(inplace=True)
        #print(agg.isnull().sum())
        agg.fillna(agg.mean(),inplace=True)

    return agg

def prepareForecast(preprocessedRetailStoreDf, category):
    df_poductCategory = prepareDataWithSpecificCategory(preprocessedRetailStoreDf,'Groceries')
    df_to_predict = df_poductCategory.sort_values('Date').groupby(['Date'], as_index=False)
    df_to_predict = df_to_predict.agg({'Sales':['mean']})
    df_to_predict.columns = ['Date','Sales']

    df_train = df_to_predict

    window = 179
    future_span = 30
    series = series_to_supervised(df_train.drop('Date', axis=1), window=window, lag=future_span)

    # Label
    labels_col = 'Sales(t+%d)' % future_span
    labels = series[labels_col]
    series = series.drop(labels_col, axis=1)

    scaler = StandardScaler()
    series = scaler.fit_transform(series)

    X_train, X_valid, Y_train, Y_valid = train_test_split(series, labels.values, test_size=0.4, random_state=0)
    X_train_series = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
    X_valid_series = X_valid.reshape((X_valid.shape[0], X_valid.shape[1], 1))
    print('Train set shape', X_train_series.shape)
    print('Validation set shape', X_valid_series.shape)
    dataFrameForViz['dayOfYear']=range(0,X_train_series.shape[0])

    executeModel(X_train_series, X_valid_series, Y_train, Y_valid,category)
    return dataFrameForViz


# ***CNN for Demand Forecasting***

In [6]:

def executeModel(X_train_series, X_valid_series, Y_train, Y_valid,Category):
  lr = 0.0003
  adam = tf.keras.optimizers.Adam(lr)

  model_cnn = keras.Sequential()
  model_cnn.add(Conv1D(filters=128, kernel_size=2, activation='relu', input_shape=(X_train_series.shape[1], X_train_series.shape[2])))
  model_cnn.add(MaxPooling1D(pool_size=8))
  model_cnn.add(Flatten())
  model_cnn.add(Dense(50, activation='relu'))
  model_cnn.add(Dense(50, activation='relu'))
  model_cnn.add(Dropout(0.2))
  model_cnn.add(Dense(32, activation='relu'))
  model_cnn.add(Dropout(0.2))
  model_cnn.add(Dense(1))
  model_cnn.compile(loss='mse', optimizer=adam)
  model_cnn.summary()

  epochs = 700
  batch = 10
  monitor = EarlyStopping(monitor='val_loss', min_delta=1e-3, patience=50,
        verbose=1, mode='auto', restore_best_weights=True)
  cnn_history = model_cnn.fit(X_train_series, Y_train,
    validation_data=(X_valid_series, Y_valid), epochs=epochs, verbose=2)

  train_pred = model_cnn.predict(X_train_series)
  valid_pred = model_cnn.predict(X_valid_series)

  dataFrameForViz[Category] = train_pred.flatten() # Flatten the train_pred array before assigning


In [7]:
for cat in preprocessedRetailStoreDf['Category'].unique():
  prepareForecast(preprocessedRetailStoreDf,cat)

#Exporting the Forecasted Data
dataFrameForViz.to_csv('/content/drive/MyDrive/DeepForecast/Output/RetailOnDemandForecast.csv', encoding='utf-8', index=False, header=True)


Train set shape (438, 180, 1)
Validation set shape (293, 180, 1)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/700
14/14 - 2s - 170ms/step - loss: 19175.2715 - val_loss: 18225.6289
Epoch 2/700
14/14 - 0s - 30ms/step - loss: 17765.6777 - val_loss: 16280.5684
Epoch 3/700
14/14 - 0s - 26ms/step - loss: 15415.4609 - val_loss: 13051.3477
Epoch 4/700
14/14 - 1s - 38ms/step - loss: 11507.0771 - val_loss: 8508.3740
Epoch 5/700
14/14 - 0s - 21ms/step - loss: 6770.9561 - val_loss: 3864.7449
Epoch 6/700
14/14 - 0s - 23ms/step - loss: 3135.7439 - val_loss: 1982.7098
Epoch 7/700
14/14 - 0s - 21ms/step - loss: 2642.7974 - val_loss: 1983.4252
Epoch 8/700
14/14 - 0s - 21ms/step - loss: 2538.9414 - val_loss: 1905.3080
Epoch 9/700
14/14 - 0s - 21ms/step - loss: 2397.3337 - val_loss: 1874.8839
Epoch 10/700
14/14 - 0s - 22ms/step - loss: 2425.3176 - val_loss: 1824.8451
Epoch 11/700
14/14 - 0s - 21ms/step - loss: 2342.0552 - val_loss: 1771.6857
Epoch 12/700
14/14 - 0s - 28ms/step - loss: 2216.5518 - val_loss: 1724.5156
Epoch 13/700
14/14 - 0s - 20ms/step - loss: 2190.4346 - val_loss: 1681.0403
Epoch 14/700


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/700
14/14 - 3s - 216ms/step - loss: 19485.1836 - val_loss: 18936.5059
Epoch 2/700
14/14 - 0s - 31ms/step - loss: 19075.0000 - val_loss: 18294.4863
Epoch 3/700
14/14 - 0s - 21ms/step - loss: 18121.1250 - val_loss: 16781.0449
Epoch 4/700
14/14 - 0s - 21ms/step - loss: 16080.3564 - val_loss: 13982.0049
Epoch 5/700
14/14 - 0s - 29ms/step - loss: 12747.8164 - val_loss: 9775.6523
Epoch 6/700
14/14 - 1s - 38ms/step - loss: 8283.7715 - val_loss: 4993.7593
Epoch 7/700
14/14 - 0s - 22ms/step - loss: 4070.0605 - val_loss: 2128.7664
Epoch 8/700
14/14 - 0s - 24ms/step - loss: 2553.6409 - val_loss: 1905.0907
Epoch 9/700
14/14 - 1s - 41ms/step - loss: 2624.0083 - val_loss: 1854.4749
Epoch 10/700
14/14 - 0s - 21ms/step - loss: 2689.0776 - val_loss: 1817.8965
Epoch 11/700
14/14 - 0s - 20ms/step - loss: 2449.6321 - val_loss: 1781.1760
Epoch 12/700
14/14 - 0s - 22ms/step - loss: 2585.9290 - val_loss: 1770.6093
Epoch 13/700
14/14 - 0s - 21ms/step - loss: 2444.6797 - val_loss: 1720.2225
Epoch 14/70

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/700
14/14 - 3s - 199ms/step - loss: 19274.6660 - val_loss: 18489.5996
Epoch 2/700
14/14 - 0s - 21ms/step - loss: 18358.5234 - val_loss: 17133.2188
Epoch 3/700
14/14 - 0s - 21ms/step - loss: 16686.4082 - val_loss: 14839.6895
Epoch 4/700
14/14 - 0s - 22ms/step - loss: 13772.4062 - val_loss: 11346.7334
Epoch 5/700
14/14 - 0s - 20ms/step - loss: 9908.2432 - val_loss: 6920.5732
Epoch 6/700
14/14 - 0s - 21ms/step - loss: 5627.9912 - val_loss: 3072.0691
Epoch 7/700
14/14 - 0s - 21ms/step - loss: 2939.8970 - val_loss: 1926.3357
Epoch 8/700
14/14 - 0s - 21ms/step - loss: 2933.9438 - val_loss: 1943.7129
Epoch 9/700
14/14 - 0s - 22ms/step - loss: 2554.2061 - val_loss: 1863.9861
Epoch 10/700
14/14 - 0s - 21ms/step - loss: 2485.4692 - val_loss: 1851.3795
Epoch 11/700
14/14 - 0s - 21ms/step - loss: 2642.3462 - val_loss: 1793.5967
Epoch 12/700
14/14 - 0s - 20ms/step - loss: 2536.4343 - val_loss: 1765.0035
Epoch 13/700
14/14 - 0s - 21ms/step - loss: 2438.2234 - val_loss: 1752.2198
Epoch 14/700

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/700
14/14 - 2s - 177ms/step - loss: 19196.3242 - val_loss: 18230.3945
Epoch 2/700
14/14 - 0s - 28ms/step - loss: 17869.2559 - val_loss: 16231.9609
Epoch 3/700
14/14 - 1s - 37ms/step - loss: 15331.7422 - val_loss: 12784.2354
Epoch 4/700
14/14 - 0s - 20ms/step - loss: 11299.4746 - val_loss: 7979.5488
Epoch 5/700
14/14 - 0s - 20ms/step - loss: 6407.0840 - val_loss: 3393.9805
Epoch 6/700
14/14 - 0s - 20ms/step - loss: 3113.2510 - val_loss: 1963.1207
Epoch 7/700
14/14 - 0s - 20ms/step - loss: 2865.7708 - val_loss: 1971.6617
Epoch 8/700
14/14 - 0s - 22ms/step - loss: 2567.3342 - val_loss: 1902.1316
Epoch 9/700
14/14 - 0s - 20ms/step - loss: 2536.5166 - val_loss: 1892.9126
Epoch 10/700
14/14 - 0s - 20ms/step - loss: 2395.6721 - val_loss: 1852.2397
Epoch 11/700
14/14 - 0s - 21ms/step - loss: 2359.2166 - val_loss: 1764.3037
Epoch 12/700
14/14 - 0s - 21ms/step - loss: 2190.5662 - val_loss: 1745.2238
Epoch 13/700
14/14 - 0s - 20ms/step - loss: 2356.2019 - val_loss: 1706.8473
Epoch 14/700


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/700
14/14 - 2s - 168ms/step - loss: 19306.9473 - val_loss: 18492.7227
Epoch 2/700
14/14 - 0s - 32ms/step - loss: 18201.6797 - val_loss: 16883.2109
Epoch 3/700
14/14 - 0s - 21ms/step - loss: 16268.0215 - val_loss: 14198.2627
Epoch 4/700
14/14 - 0s - 22ms/step - loss: 13004.5400 - val_loss: 10106.5645
Epoch 5/700
14/14 - 0s - 28ms/step - loss: 8545.6289 - val_loss: 5371.2041
Epoch 6/700
14/14 - 0s - 20ms/step - loss: 4454.5142 - val_loss: 2285.2390
Epoch 7/700
14/14 - 0s - 20ms/step - loss: 2837.1323 - val_loss: 2008.3744
Epoch 8/700
14/14 - 0s - 21ms/step - loss: 2697.1938 - val_loss: 1931.5679
Epoch 9/700
14/14 - 0s - 33ms/step - loss: 2530.4885 - val_loss: 1868.9484
Epoch 10/700
14/14 - 1s - 40ms/step - loss: 2510.5237 - val_loss: 1833.7993
Epoch 11/700
14/14 - 1s - 46ms/step - loss: 2533.8267 - val_loss: 1801.3436
Epoch 12/700
14/14 - 1s - 47ms/step - loss: 2480.9070 - val_loss: 1761.4937
Epoch 13/700
14/14 - 1s - 45ms/step - loss: 2255.8040 - val_loss: 1723.5359
Epoch 14/700