In [None]:
# !pip install pystan==2.19.1.1
!pip install prophet

**Importing Dependencies ** 

In [None]:
import warnings
from sklearn import preprocessing
from prophet.diagnostics import cross_validation
from prophet import Prophet
import pandas as pd
import numpy as np 
import matplotlib as plt
import matplotlib.pyplot as plt
import seaborn as sns
import itertools
from statsmodels.tsa.stattools import adfuller
import statsmodels.api as sm
import matplotlib.pyplot as plt
from prophet.diagnostics import performance_metrics
import holidays
plt.style.use('fivethirtyeight')
%matplotlib inline

In [None]:
df=pd.read_excel('/content/Sample - Superstore.xls')

In [None]:
df.head()

In [None]:
df.tail()

### Exploring and Analyzing the Dataset and checking for Nan Values if found to impute or drop it 

In [None]:
df.isna().sum()

In [None]:
df.shape

In [None]:
df.dtypes

In [None]:
df.Category.unique()
df.Region.unique()

In [None]:
df.Segment.unique()

In [None]:
df.describe().T

In [None]:
df.groupby('Category')['Sales'].sum()

In [None]:
df.groupby('Sub-Category')['Sales'].sum()

In [None]:
df.groupby(['Category','Sub-Category'])['Sub-Category'].agg(['count'])

In [None]:
ax = plt.gca()
ax.hist(df['Category'],alpha=0.5, bins=10)
#Adding the aesthetics
plt.title('Most Ordered Category')
plt.xlabel('Category')
plt.ylabel('Frequency') 
#Show the plot
plt.show()

In [None]:
sns.barplot(x = 'Sales',y = 'Sub-Category',data = df)
plt.title('Most Sold Sub-Category')
plt.xlabel('Sales')
plt.ylabel('Sub-Category') 
plt.show()

In [None]:
df_pivot=df.pivot_table(values='Profit',index='Region',columns='Category',aggfunc=np.mean)
ax=df_pivot.plot(kind='bar',alpha=0.5)
plt.title('Profit by Region')
plt.xlabel('Region')
plt.ylabel('Profit') 
plt.legend(df_pivot, loc=2)
plt.show()

In [None]:
df_sales=df.loc[(df['Category']=='Furniture')]

In [None]:
df_sales=df_sales[['Order Date','Sales']]

Setting Date as the index and Sales is the trageted column

In [None]:
df_sales=df_sales.set_index('Order Date')

In [None]:
df_sales.head()

Resampling the dataset and put Month Start as as the beginning of the Dataset and taking the mean for each month

In [None]:
y = df_sales['Sales'].resample('MS').mean()

In [None]:
y.head()

In [None]:
y.plot(figsize=(16,6))

In [None]:
# Setting the p = d = q  form 0 - 2
p = d = q = range(0, 2)

# Generaiting all the possible product for pdq variables 
pdq = list(itertools.product(p, d, q))

# Generate all different combinations of seasonal p, q and q triplets
seasonal_pdq = [(x[0], x[1], x[2], 12) for x in list(itertools.product(p, d, q))]

print('Examples of parameter combinations for Seasonal ARIMA...')
print('SARIMAX: {} x {}'.format(pdq[1], seasonal_pdq[1]))
print('SARIMAX: {} x {}'.format(pdq[1], seasonal_pdq[2]))
print('SARIMAX: {} x {}'.format(pdq[2], seasonal_pdq[3]))
print('SARIMAX: {} x {}'.format(pdq[2], seasonal_pdq[4]))

In [None]:
warnings.filterwarnings("ignore")

for param in pdq:
    for param_seasonal in seasonal_pdq:
        try:
            mod = sm.tsa.statespace.SARIMAX(y,
                                            order=param,
                                            seasonal_order=param_seasonal,
                                            enforce_stationarity=False,
                                            enforce_invertibility=False)

            results = mod.fit()

            print('ARIMA{}x{}12 - AIC:{}'.format(param, param_seasonal, results.aic))
        except:
            continue

# Fitting ARIMA MODEL 

The output suggests this values ARIMA(1, 1, 1)x(1, 1, 0, 12)12 - AIC:263.937108438126 

In [None]:
mod = sm.tsa.statespace.SARIMAX(y,
                                order=(1, 1, 1),
                                seasonal_order=(1, 1, 0, 12),
                                enforce_stationarity=False,
                                enforce_invertibility=False)

results = mod.fit()

print(results.summary().tables[1])

### The P>|z| column informs us of the significance of each feature weight. 

In [None]:
# Model Diagnostic 
results.plot_diagnostics(figsize=(15, 12))
plt.show()

One key insight here is the value of the distrubution close to Zero (N,0) means the values of the residual noramlly distributed

In [None]:
pred = results.get_prediction(start=pd.to_datetime('2017-01-01'), dynamic=False)
pred_ci = pred.conf_int()
print(pred_ci)
ax = y['2014':].plot(label='observed')
pred.predicted_mean.plot(ax=ax, label='One-step ahead Forecast', alpha=.7, figsize=(14, 7))
ax.fill_between(pred_ci.index,
                pred_ci.iloc[:, 0],
                pred_ci.iloc[:, 1], color='k', alpha=.2)
ax.set_xlabel('Date')
ax.set_ylabel('Furniture Sales')
plt.legend()
plt.show()

In [None]:
pred = results.get_prediction(start=pd.to_datetime('2017-01-01'), dynamic=False)
pred_ci = pred.conf_int()

In [None]:
# y_forecasted = pred.predicted_mean
# y_truth = y.loc[::]
y_predicted = pred.predicted_mean
y_true = y['2017-01-01':]
mse = ((y_predicted - y_true)**2).mean()
print('Mean Square Error is:', round(mse, 4))

In [None]:
pred_uc = results.get_forecast(steps=50)
pred_ci = pred_uc.conf_int()
ax = y.plot(label='observed', figsize=(14, 6))
pred_uc.predicted_mean.plot(ax=ax, label='Forecast')
ax.fill_between(pred_ci.index,
                pred_ci.iloc[:, 0],
                pred_ci.iloc[:, 1], color='k', alpha=.25)
ax.set_xlabel('Date')
ax.set_ylabel('Furniture Sales')
plt.legend()
plt.show()

# Building fbprophet model for 3 years  a head

In [None]:
model = Prophet(
    seasonality_mode='multiplicative'
)
model.add_country_holidays(country_name='US')


In [None]:
model.fit(df_furniture_sales)

In [None]:
future=model.make_future_dataframe(periods=44, freq = "M", include_history = True)
forecast=model.predict(future)

In [None]:
forecast.head()

In [None]:
model.plot(forecast, uncertainty=True)
plt.show()

In [None]:
f = model.plot_components(forecast)

In [None]:
df_cv = cross_validation(model, initial='730 days', period='180 days', horizon = '365 days')

In [None]:
df_cv.head()

In [None]:
df_p = performance_metrics(df_cv)
df_p.head()