# Without outlier detection

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.seasonal import seasonal_decompose 
from statsmodels.tsa.holtwinters import SimpleExpSmoothing
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from statsmodels.tsa.api import ExponentialSmoothing, SimpleExpSmoothing, Holt
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
%matplotlib inline

In [None]:
df = pd.read_excel(r'C:\Users\Shantanu Datta\Downloads\Spline Data.xlsx', parse_dates= True)
df.head()
df.info()
df.set_index('Date', inplace = True)
df.head()

from statsmodels.tsa.stattools import adfuller
print("Observations of Dickey-fuller test")
dftest = adfuller(df['Sales'],autolag='AIC')
dfoutput=pd.Series(dftest[0:4],index=['Test Statistic','p-value','#lags used','number of observations used'])
for key,value in dftest[4].items():
    dfoutput['critical value (%s)'%key]= value
print(dfoutput)

train, test = df.iloc[:759, 0], df.iloc[759:, 0]
model = ExponentialSmoothing(train, trend='add', seasonal='add', seasonal_periods=52, damped_trend=True)
hw_model = model.fit(optimized=True, remove_bias=False)
pred = hw_model.predict(start=test.index[0], end=test.index[-1])

plt.figure(figsize=(12,8))
plt.plot(train.index, train, label='Train')
plt.plot(test.index, test, label='Test')
plt.plot(pred.index, pred, label='Holt-Winters')
plt.ylabel('Sales')
plt.legend(loc='best')

hw_model.summary()

from sklearn.metrics import mean_absolute_percentage_error
mape = (mean_absolute_percentage_error(test,pred))
print('MAPE = ' +str(mape))

# With outlier detection

In [None]:
df = pd.read_csv(r'C:\Users\Shantanu Datta\Downloads\trimmed_spline.csv', parse_dates= True)
df.head()
df.info()
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace = True)

from statsmodels.tsa.stattools import adfuller
print("Observations of Dickey-fuller test")
dftest = adfuller(df['Sales'],autolag='AIC')
dfoutput=pd.Series(dftest[0:4],index=['Test Statistic','p-value','#lags used','number of observations used'])
for key,value in dftest[4].items():
    dfoutput['critical value (%s)'%key]= value
print(dfoutput)

train, test = df.iloc[:759, 0], df.iloc[759:, 0]
model = ExponentialSmoothing(train, trend='add', seasonal='add', seasonal_periods=52, damped_trend=True)
hw_model = model.fit(optimized=True, remove_bias=False)
pred = hw_model.predict(start=test.index[0], end=test.index[-1])

plt.figure(figsize=(12,8))
plt.plot(train.index, train, label='Train')
plt.plot(test.index, test, label='Test')
plt.plot(pred.index, pred, label='Holt-Winters')
plt.legend(loc='best')
plt.ylabel('Sales')

hw_model.summary()

from sklearn.metrics import mean_absolute_percentage_error
mape = (mean_absolute_percentage_error(test,pred))
print('MAPE = ' +str(mape))

# Online 

In [None]:
df = pd.read_excel(r'C:\Users\Shantanu Datta\Downloads\online_spline.xlsx', parse_dates= True)
df.head()
df.info()
df.set_index('Date', inplace = True)

from statsmodels.tsa.stattools import adfuller
print("Observations of Dickey-fuller test")
dftest = adfuller(df['Sales'],autolag='AIC')
dfoutput=pd.Series(dftest[0:4],index=['Test Statistic','p-value','#lags used','number of observations used'])
for key,value in dftest[4].items():
    dfoutput['critical value (%s)'%key]= value
print(dfoutput)

train, test = df.iloc[:442, 0], df.iloc[442:, 0]
model = ExponentialSmoothing(train, trend='add', seasonal='add', seasonal_periods=52, damped_trend=True)
hw_model = model.fit(optimized=True, remove_bias=False)
pred = hw_model.predict(start=test.index[0], end=test.index[-1])

plt.figure(figsize=(12,8))
plt.plot(train.index, train, label='Train')
plt.plot(test.index, test, label='Test')
plt.plot(pred.index, pred, label='Holt-Winters')
plt.legend(loc='best')
plt.ylabel('Sales')

hw_model.summary()

from sklearn.metrics import mean_absolute_percentage_error
mape = (mean_absolute_percentage_error(test,pred))
print('MAPE = ' +str(mape))