## TIME SERIES ANALYSIS:

### FORECASTING CHAMPAGNE SALES

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels as sm
sm.__version__

'0.12.2'

In [2]:
df = pd.read_csv('champagne.csv')

In [3]:
df.head()

Unnamed: 0,Month,Sales
0,1964-01,2815
1,1964-02,2672
2,1964-03,2755
3,1964-04,2721
4,1964-05,2946


In [4]:
df.tail()

Unnamed: 0,Month,Sales
100,1972-05,4618
101,1972-06,5312
102,1972-07,4298
103,1972-08,1413
104,1972-09,5877


In [5]:
df.drop([106,105],axis=0,inplace=True)

KeyError: ignored

In [None]:
df.columns=["Month","Sales"]
df.head()

In [None]:
df['Month']=pd.to_datetime(df['Month'])

In [None]:
df.set_index('Month',inplace=True)

In [None]:
df.head()

In [None]:
df.describe()

In [None]:
df.plot()

**Although its visually clear that the series is not stationary, stilll we need a robust test to check this. This Testing of Stationarity in the time series is done using Dicky fuller test**

In [None]:
from statsmodels.tsa.stattools import adfuller

In [None]:
def adfuller_test(sales):
    result = adfuller(sales)
    labels = ['ADF Test Statistic','p-value','#Lags Used','#Observation Used']
    for value,label in zip(result,labels):
        print(label  + ': ' + str(value))
    if result[1]<=0.05:
        print('Strong evidence against the null hypothesis, Hence REJECT Ho. and The series is Stationary')
    else:
        print('week evidence against null hypothesis, Hence ACCEPT Ho. that the series is not stationary.')

In [None]:
adfuller_test(df['Sales'])

<br>

**To bring stationarity in the series, Differencing is used as below:**

### Differencing

In [None]:
df['Sales First Difference'] = df['Sales'] - df['Sales'].shift(1)

In [None]:
df['Sales'].shift(1).head()

In [None]:
df['Seasonal First Difference']=df['Sales']-df['Sales'].shift(12)

In [None]:
df.head(15)

In [None]:
adfuller_test(df['Seasonal First Difference'].dropna())

In [None]:
df['Seasonal First Difference'].plot()

In [None]:
from pandas.plotting import autocorrelation_plot
autocorrelation_plot(df['Sales'])
plt.show()

In [None]:
from statsmodels.graphics.tsaplots import plot_acf,plot_pacf

In [None]:
fig = plt.figure(figsize=(12,8))
ax1 = fig.add_subplot(211)
fig = sm.graphics.tsaplots.plot_acf(df['Seasonal First Difference'].iloc[13:],lags=40,ax=ax1)
ax2 = fig.add_subplot(212)
fig = sm.graphics.tsaplots.plot_pacf(df['Seasonal First Difference'].iloc[13:],lags=40,ax=ax2)

<br>

### ARIMA

In [None]:
# For non-seasonal data
# p=1, d=1, q=0 or 1
from statsmodels.tsa.arima.model import ARIMA
model_A = ARIMA(df['Sales'],order=(1,1,1))
model_fit_A = model_A.fit()
model_fit_A.summary()

In [None]:
df['forecast']=model_fit_A.predict(start=90,end=103,dynamic=True)
df[['Sales','forecast']].plot(figsize=(12,8))

### SARIMA

In [None]:
model=sm.tsa.statespace.sarimax.SARIMAX(df['Sales'],order=(1, 1, 1),seasonal_order=(1,1,1,12))
results=model.fit()

In [None]:
df['forecast']=results.predict(start=90,end=103,dynamic=True)
df[['Sales','forecast']].plot(figsize=(12,8))

In [None]:
results.summary()