# AR, ARIMA and Seasonal ARIMA by Day and by Month for Time Series Store forecast 
* By Alex Dance https://www.linkedin.com/in/alex-dance/
* This notebook is one of several notebooks for a project to improve store and product forecasts
1.	EDA – Exploratory Data Analysis – includes working with annual forecasts
2.	Main Modelling
3.	XG Boost modelling by Month
4.	Weighted average
5.	ARIMA – Month and Other Modelling
6.	Deep Learning

* There are a total of 6 models: AR, ARIMA and Seasonal Arima  -> By Day and By Month

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime


In [None]:
from pylab import rcParams
import statsmodels.api as sm
import warnings
import itertools
from statsmodels.tsa.ar_model import AR
from statsmodels.tsa.arima_model import ARIMA

In [None]:
df = pd.read_csv("../input/demand-forecasting-kernels-only/train.csv")
df.head()

In [None]:
df['date'] =  pd.to_datetime(df['date'])

# Set Date as Index

In [None]:
df = df.set_index('date')

In [None]:
df.head()

In [None]:
df.sales.sum()

In [None]:
salesbymonth = df.sales.resample('M').sum()

In [None]:
salesbymonth.head()

In [None]:
split = "2017-01-01"

In [None]:
salesbymonth_train= salesbymonth[:split]

In [None]:
salesbymonth_train.head()

In [None]:
salesbymonth_test= salesbymonth[split:]

In [None]:
salesbymonth_test_final=salesbymonth_test.copy() # This file is used to compare all the predections

In [None]:
salesbymonth_test_final = pd.DataFrame(salesbymonth_test_final)

In [None]:
salesbymonth_test_final.head()

In [None]:
salesbymonth_test_final.info()

In [None]:
salesbymonth.sample(5)

# Sales by Day

In [None]:
salesbyday = df.sales.resample('D').sum()

In [None]:
salesbyday_train= salesbyday[:split]

In [None]:
salesbyday_test= salesbyday[split:]

In [None]:
salesbyday_test_final=salesbyday_test.copy() # This file is used to compare all the daily forecasts

In [None]:
salesbyday_test_final = pd.DataFrame(salesbyday_test_final)

In [None]:
salesbyday_test_final.head()

# Seasonal ARIMA by Month

In [None]:
rcParams['figure.figsize'] = 18, 8

In [None]:
decomposition = sm.tsa.seasonal_decompose(salesbymonth_train, model='additive')
fig = decomposition.plot()
plt.show()

In [None]:
p = d = q = range(0, 2)

In [None]:
pdqa = list(itertools.product(p, d, q))

In [None]:
seasonal_pdq = [(x[0], x[1], x[2], 12) for x in list(itertools.product(p, d, q))]

In [None]:
for param in pdqa:
    for param_seasonal in seasonal_pdq:
        try:
            mod = sm.tsa.statespace.SARIMAX(salesbymonth_train, order=param, seasonal_order=param_seasonal,enforce_stationarity=False,enforce_invertibility=False)                                
            results = modl.fit()
            print('ARIMA{}x{}12 - AIC:{}'.format(param, param_seasonal, results.aic))
        except:
            continue


In [None]:
SARIMAMonth = sm.tsa.statespace.SARIMAX(salesbymonth, order=(1, 1, 1), seasonal_order=(1, 1, 1, 12) ,enforce_stationarity=False,enforce_invertibility=False)

In [None]:
SARIMA_results_month = SARIMAMonth.fit()

In [None]:
print(SARIMA_results_month.summary().tables[1])

In [None]:
SARIMA_results_month.plot_diagnostics(figsize=(16, 8))
plt.show()

In [None]:
#SARIMA_predict_month_1 = SARIMA_results_month.predict(start=1461,end=1825) # this is from 1 Jan 2017 to 31 Dec 2017

In [None]:
#SARIMA_predict_month_1 = SARIMA_month_model.predict(start=48,end=60,rder=(1, 1, 1), seasonal_order=(1, 1, 1, 12) ,enforce_stationarity=False,enforce_invertibility=False) # this is from Jan 2017 to  Dec 2017

SARIMA_predict_month_1 = SARIMA_results_month.predict(start=48,end=60) #,order=(1, 1, 1), seasonal_order=(1, 1, 1, 12) ,enforce_stationarity=False,enforce_invertibility=False) # this is from Jan 2017 to  Dec 2017
print(SARIMA_predict_month_1)

In [None]:
salesbymonth_test_final['SeasonalARIMA'] = SARIMA_predict_month_1

In [None]:
salesbymonth_test_final.head()

In [None]:
RMSE_Month_Seasonal_ARIMA  = np.mean(np.sqrt((salesbymonth_test_final['SeasonalARIMA'] - salesbymonth_test_final['sales']) ** 2)) 
print(RMSE_Month_Seasonal_ARIMA)

# AR by Month

In [None]:
model_ar_month = AR (salesbymonth_train)

In [None]:
model_ar_month_fit = model_ar_month.fit()

In [None]:
predictions_month_1 = model_ar_month_fit.predict(start=48,end=59)

In [None]:
AR_month_predictions=pd.DataFrame(predictions_month_1, columns =['AR'])

In [None]:
AR_month_predictions.head()

In [None]:
salesbymonth_test.head(3)

In [None]:
plt.plot(salesbymonth_test)
plt.plot(AR_month_predictions['AR'], color = 'red' )

In [None]:
salesbymonth_test_final['sales']

In [None]:
salesbymonth_test_final['AR'] = AR_month_predictions['AR']

In [None]:
RMSE_Month_AR  = np.mean(np.sqrt((salesbymonth_test_final['AR'] - salesbymonth_test_final['sales']) ** 2)) 
print(RMSE_Month_AR)

In [None]:
salesbymonth_test_final['AR_error'] = salesbymonth_test_final['AR'] - salesbymonth_test_final['sales']
salesbymonth_test_final['AR_error_percent'] = salesbymonth_test_final['AR_error'] / salesbymonth_test_final['sales']

In [None]:
salesbymonth_test_final.sample(10)

In [None]:
salesbymonth_test_final.sum()

In [None]:
salesbymonth_test_final.head()

In [None]:
salesbymonth_train.head()

# SARIMA by day

In [None]:
decomposition_day = sm.tsa.seasonal_decompose(salesbyday_train, model='additive')

In [None]:
fig = decomposition_day.plot()
plt.show()

In [None]:
p = d = q = range(0, 2)

In [None]:
pdqb = list(itertools.product(p, d, q))
seasonal_pdq_day = [(x[0], x[1], x[2], 12) for x in list(itertools.product(p, d, q))]

In [None]:
for param in pdqb:
    for param_seasonal_day in seasonal_pdq_day:
        try:
            mod = sm.tsa.statespace.SARIMAX(salesbyday_train, order=param, seasonal_order=param_seasonal,enforce_stationarity=False,enforce_invertibility=False)                                
            results = mod.fit()
            print('ARIMA{}x{}12 - AIC:{}'.format(param, param_seasonal_day, results.aic))
        except:
            continue

In [None]:
SARIMADay = sm.tsa.statespace.SARIMAX(salesbyday, order=(0, 1, 1), seasonal_order=(0, 1, 1, 12) ,enforce_stationarity=False,enforce_invertibility=False)

In [None]:
SARIMA_results_day = SARIMADay.fit()

In [None]:
print(SARIMA_results_day.summary().tables[1])

In [None]:
SARIMA_results_day.plot_diagnostics(figsize=(16, 8))
plt.show()

In [None]:
SARIMA_predict_day_1 = SARIMA_results_day.predict(start=1461,end=1825) # this is from 1 Jan 2017 to 31 Dec 2017
print(SARIMA_predict_day_1)


In [None]:
salesbyday_test_final['SeasonalARIMA'] = SARIMA_predict_day_1

In [None]:
RMSE_Day_SeasonalARIMA  = np.mean(np.sqrt((salesbyday_test_final['SeasonalARIMA'] - salesbyday_test_final['sales']) ** 2)) 
print(RMSE_Day_SeasonalARIMA)

# ARIMA BY Month 

In [None]:
model_arima_month = ARIMA(salesbymonth_train, order = (7,1,0))

In [None]:
salesbymonth_train.tail(12)

In [None]:
model_arima_month_fit = model_arima_month.fit()

In [None]:
arima_predictions_month = model_arima_month_fit.forecast(steps=12)[0]

In [None]:
print(arima_predictions_month)

In [None]:
ARIMA_month_predictions=pd.DataFrame(arima_predictions_month, columns =['ARIMA'])

In [None]:
ARIMA_month_predictions['ARIMA']

In [None]:
salesbymonth_test_final =salesbymonth_test_final.reset_index()

In [None]:
salesbymonth_test_final.head()

In [None]:
salesbymonth_test_final['ARIMA'] =ARIMA_month_predictions['ARIMA']

In [None]:
salesbymonth_test_final.tail(14)

In [None]:
plt.plot(salesbymonth_test_final['sales'],linestyle='dashed',linewidth=5)
plt.plot(salesbymonth_test_final['ARIMA'], color = 'red' )

In [None]:
RMSE_Month_ARIMA  = np.mean(np.sqrt((salesbymonth_test_final['ARIMA'] - salesbymonth_test_final['sales']) ** 2)) 
print(RMSE_Month_ARIMA)

In [None]:
p=d=q =range(0,8)

In [None]:
pdqmontha = list(itertools.product(p,d,q))

In [None]:
for param in pdqmontha:
    try:
        model_arima_month = ARIMA(salesbymonth_train, order = pdqmontha)
        model_arima_month_fit = model_arima_month.fit()
        print(param,model_arima_month_fit.aic)
    except:
        continue

# ARIMA per day - as per above

In [None]:
model_arima_day = ARIMA(salesbyday_train, order = (2,1,0))

In [None]:
model_arima_day_fit = model_arima_day.fit()

In [None]:
arima_predictions_day = model_arima_day_fit.forecast(steps=365)[0]

In [None]:
ARIMA_day_predictions=pd.DataFrame(arima_predictions_day, columns =['ARIMA'])

In [None]:
ARIMA_day_predictions['ARIMA']

In [None]:
salesbyday_test_final =salesbyday_test_final.reset_index()

In [None]:
salesbyday_test_final['ARIMA'] = ARIMA_day_predictions['ARIMA']

In [None]:
salesbyday_test_final.head()

In [None]:
plt.plot(salesbyday_test_final['sales'],linestyle='dashed',linewidth=5)
plt.plot(salesbyday_test_final['ARIMA'], color = 'red' )

In [None]:
p=d=q =range(0,5)

In [None]:
pdqday = list(itertools.product(p,d,q))

In [None]:
warnings.filterwarnings('ignore')
for param in pdqday:
    try:
        model_arima_month = ARIMA(salesbymonth_train, order = param)
        model_arima_month_fit = model_arima_month.fit()
        print(param,model_arima_month_fit.aic)
    except:
        continue

In [None]:
ARIMA_day_predictions.tail()

In [None]:
RMSE_Day_ARIMA  = np.mean(np.sqrt((salesbyday_test_final['ARIMA'] - salesbyday_test_final['sales']) ** 2))
print(RMSE_Day_ARIMA)

In [None]:
salesbyday_test_final.shape

# AR by Day 

In [None]:
model_ar_day = AR (salesbyday_train)

In [None]:
model_ar_day_fit = model_ar_day.fit()

In [None]:
predictions_day_1 = model_ar_day_fit.predict(start=1461,end=1825)

In [None]:
predictions_day_1.head()

In [None]:
AR_day_predictions=pd.DataFrame(predictions_day_1, columns =['AR'])

In [None]:
AR_day_predictions.shape

In [None]:
AR_day_predictions.head()

In [None]:
salesbyday_test_final['sales']

In [None]:
salesbyday_test_final.head()

In [None]:
AR_day_predictions.shape

In [None]:
salesbyday_test_final['AR'] = AR_day_predictions['AR']

In [None]:
salesbyday_test_final.head()

In [None]:
RMSE_Day_AR  = np.mean(np.sqrt((salesbyday_test_final['AR'] - salesbyday_test_final['sales']) ** 2))
print(RMSE_Day_AR)

In [None]:
salesbyday_test_final['AR_error'] = salesbyday_test_final['AR'] - salesbyday_test_final['sales']

In [None]:
salesbyday_test_final['AR_error_percent'] = salesbyday_test_final['AR_error'] / salesbyday_test_final['sales']

In [None]:
salesbyday_test_final.head(12)

In [None]:
salesbyday_test_final.sum()

In [None]:
salesbymonth_test_final.sum()

In [None]:
salesbyday_test_final.reset_index()

In [None]:
plt.plot(salesbyday_test_final['sales'],linestyle='dashed',linewidth=5)
plt.plot(salesbyday_test_final['ARIMA'], color = 'red' )

In [None]:
plt.plot(salesbymonth_test_final['sales'],linestyle='dashed',linewidth=5)
plt.plot(salesbymonth_test_final['ARIMA'], color = 'red' )
plt.plot(salesbymonth_test_final['AR'], color = 'blue' )
plt.plot(salesbymonth_test_final['SeasonalARIMA'], color = 'orange' )