# Notebook to work with demand forecasting query
---
---

### 

# Imports

#### Standard library imports

In [1]:
import sys
sys.path.append("../")

import os

#### Third party imports

In [25]:
import mysql.connector
import pandas as pd
from statsmodels.graphics.tsaplots import plot_pacf
from statsmodels.graphics.tsaplots import plot_acf
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from statsmodels.tsa.stattools import adfuller
import matplotlib.pyplot as plt
from tqdm import tqdm_notebook
import numpy as np
import pandas as pd
from itertools import product
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline

#### Local application imports

In [3]:
%load_ext autoreload
%autoreload 2

from pkg_dir.config import *
from pkg_dir.src.utils import *
from pkg_dir.src.functions import *
from pkg_dir.src.parameters import *

### 

# Getting data

##### Query data from MySQL and turn it into dataframe

In [14]:
## Function parameters
db_crds = 'mysql_dp'
sql_files_path = '../pkg_dir/sql/queries/'
sql_script = 'demand_forecast_data.sql'
sql_params = sql_params_workhrs

In [15]:
## Function execution
dfx = sql_to_df(db_crds, sql_files_path, sql_script, sql_params)



##### Formatting some fields

In [16]:
dfx['Purchase_Date'] = pd.to_datetime(dfx['Purchase_Date'])

### 

# Exploration

##### Generate grouped dataframe

In [79]:
## Monthly sales per category
dfp = dfx.groupby(
    [
        dfx['Purchase_Date'].rename('Year').dt.year,
        dfx['Purchase_Date'].rename('Month').dt.month,
        'Category'
    ],
).agg(
    count=('Prod_ID', 'sum'),
    revenue=('List_Price', 'sum'),
)

## Resetting index
dfp.reset_index(inplace=True)

## Creating date column with same day
dfp['Date'] = pd.to_datetime(dfp['Year'].astype('str') + '-' + dfp['Month'].astype('str'))
dfp.drop(['Year', 'Month'], axis=1, inplace=True)

In [80]:
dfp

Unnamed: 0,Category,count,revenue,Date
0,dress,8,140,2022-04-01
1,dress,7,75,2022-05-01
2,coat,7,180,2022-09-01
3,coat,4,130,2022-10-01
4,coat,2,40,2022-12-01
5,dress,6,45,2023-03-01
6,dress,6,10,2023-04-01


### 

# SARIMA model

In [85]:
def optimize_SARIMA(parameters_list, d, D, s, exog):
    """
        Return dataframe with parameters, corresponding AIC and SSE
        
        parameters_list - list with (p, q, P, Q) tuples
        d - integration order
        D - seasonal integration order
        s - length of season
        exog - the exogenous variable
    """
    
    results = []
    
    for param in tqdm_notebook(parameters_list):
        try: 
            model = SARIMAX(exog, order=(param[0], d, param[1]), seasonal_order=(param[2], D, param[3], s)).fit(disp=-1)
        except:
            continue
            
        aic = model.aic
        results.append([param, aic])
        
    result_df = pd.DataFrame(results)
    result_df.columns = ['(p,q)x(P,Q)', 'AIC']
    #Sort in ascending order, lower AIC is better
    result_df = result_df.sort_values(by='AIC', ascending=True).reset_index(drop=True)
    
    return result_df

In [86]:
p = range(0, 4, 1)
d = 1
q = range(0, 4, 1)
P = range(0, 4, 1)
D = 1
Q = range(0, 4, 1)
s = 4

parameters = product(p, q, P, Q)
parameters_list = list(parameters)
print(len(parameters_list))

256


In [87]:
result_df = optimize_SARIMA(parameters_list, 1, 1, 4, dfp['count'])
result_dff

ImportError: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html

In [None]:
best_model = SARIMAX(dfp['count'], order=(0, 1, 2), seasonal_order=(0, 1, 2, 4)).fit(dis=-1)
print(best_model.summary())

In [None]:
data['arima_model'] = best_model.fittedvalues
data['arima_model'][:4+1] = np.NaN
forecast = best_model.predict(start=data.shape[0], end=data.shape[0] + 8)
forecast = data['arima_model'].append(forecast)
plt.figure(figsize=(15, 7.5))
plt.plot(forecast, color='r', label='model')
plt.axvspan(data.index[-1], forecast.index[-1], alpha=0.5, color='lightgrey')
plt.plot(dfp['count'], label='actual')
plt.legend()
plt.show()

### 

# Header

### 

# *Notes*

##### References
- https://towardsdatascience.com/time-series-forecasting-with-sarima-in-python-cda5b793977b

---

---