In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [None]:
pwd

In [None]:
df=pd.read_csv('../input/air-passengers/AirPassengers.csv')

In [None]:
df.head()

In [None]:
df.tail()

In [None]:
 df.shape

In [None]:
df.info()

In [None]:
df.isnull().sum()

# DATASET LOADING

In [None]:
df=pd.read_csv('../input/air-passengers/AirPassengers.csv',index_col='Month',parse_dates=True)

In [None]:
df.head()

In [None]:
df.index

In [None]:
df.index.freq='MS'

In [None]:
df.index

In [None]:
df.plot(figsize=(8,5))

### TIME RESAMPLING

In [None]:
df['#Passengers'].resample('Y').mean().plot(kind='bar')

In [None]:
df['#Passengers'].iloc[:12].mean()

### ROLLING WINDOWS

In [None]:
df['#Passengers'].plot(figsize=(8,5),legend=True)
df['#Passengers'].rolling(window=7).mean().plot(legend=True)

In [None]:
df['#Passengers'].plot(figsize=(8,5),legend=True)
df['#Passengers'].rolling(window=14).mean().plot(legend=True)

### EXPANDING WINDOWS

In [None]:
df['#Passengers'].plot(figsize=(8,5),legend=True)
df['#Passengers'].expanding().mean().plot(legend=True)

 ### Hodrick-Prescott Filter

In [None]:
from statsmodels.tsa.filters.hp_filter import hpfilter

In [None]:
pas_cycle,pas_trend=hpfilter(df['#Passengers'],lamb=1600)

In [None]:
df1=df.copy()

In [None]:
df1['trend']=pas_trend

In [None]:
df1[['#Passengers','trend']].plot(figsize=(12,10))

### ETS DECOMPOSITION

In [None]:
from statsmodels.tsa.seasonal import seasonal_decompose
from pylab import rcParams
rcParams['figure.figsize']=12,5

In [None]:
result=seasonal_decompose(df['#Passengers'],model='additive')

In [None]:
result.plot();

In [None]:
result=seasonal_decompose(df['#Passengers'],model='multiplicative')

In [None]:
result.plot();

### EWMA MODELS

In [None]:
df1['6 month-SMA']=df1['#Passengers'].rolling(window=6).mean()
df1['12 month-SMA']=df1['#Passengers'].rolling(window=12).mean()
df1['EWMA-6']=df1['#Passengers'].ewm(span=6,adjust=False).mean()
df1['EWMA-12']=df1['#Passengers'].ewm(span=12,adjust=False).mean()


In [None]:
df1[['#Passengers','6 month-SMA','12 month-SMA','EWMA-6','EWMA-12']]['1959-01-01':'1961-01-01'].plot(figsize=(12,10))

### HOLT-WINTERS METHOD

#### Simple Exponential Smoothing

In [None]:
from statsmodels.tsa.holtwinters import SimpleExpSmoothing

In [None]:
span=12
alpha=2/(span+1)

#### method 1

In [None]:
df1['EWMA12']=df1['#Passengers'].ewm(alpha=alpha,adjust=False).mean()
df1.head()

##### method 2 :using statsmodels

In [None]:
model=SimpleExpSmoothing(df['#Passengers']).fit(smoothing_level=alpha,optimized=False)
#fitted_model=model.fit(df['#Passengers'])
df1['SES12']=model.fittedvalues.shift(-1)
#model.

In [None]:
df1.head()

### DoubleExpSmoothing

In [None]:
from statsmodels.tsa.holtwinters import ExponentialSmoothing

In [None]:
df1['DES_add_12']=ExponentialSmoothing(df1['#Passengers'],trend='add').fit().fittedvalues.shift(-1)

In [None]:
df1[['#Passengers','SES12','DES_add_12']].plot(figsize=(12,10))

### Triple Exponential Smoothing

In [None]:
df1['TES_mul_12']=ExponentialSmoothing(df1['#Passengers'],trend='mul',seasonal='mul',seasonal_periods=12).fit().fittedvalues

In [None]:
df1[['#Passengers','SES12','TES_mul_12']].plot(figsize=(12,10))

In [None]:
# DES is performing better

## GENERAL FORECASTING METHODS:

## using holt-winters method to forecast for the future 

 ### TES method

In [None]:
train=df.iloc[:109]
test=df.iloc[108:]


In [None]:
from statsmodels.tsa.holtwinters import ExponentialSmoothing

In [None]:
fitted_model=ExponentialSmoothing(train['#Passengers'],trend='mul',seasonal='mul').fit()

In [None]:
test_pred=fitted_model.forecast(36)

In [None]:
test_pred.tail()

In [None]:
train['#Passengers'].plot(legend=True,figsize=(12,10),label='TRAIN')
test['#Passengers'].plot(legend=True,label='TEST')
test_pred.plot(legend=True,label='TES Predictions')
plt.show()

In [None]:
#using holt-winters (DES) method to forecast for the future 

In [None]:
fitted_modeld=ExponentialSmoothing(train['#Passengers'],trend='add',seasonal='add').fit() #.fittedvalues.shift(-1)

In [None]:
test_predd=fitted_modeld.forecast(36)

In [None]:
train['#Passengers'].plot(legend=True,figsize=(12,10),label='TRAIN')
test['#Passengers'].plot(legend=True,label='TEST')
test_predd.plot(legend=True,label='DES Predictions')
plt.show()

In [None]:
# EVALUATION 

In [None]:
from sklearn.metrics import mean_squared_error

In [None]:
mean_squared_error(test['#Passengers'],test_pred) #TES

In [None]:
mean_squared_error(test['#Passengers'],test_predd) #DES

In [None]:
# DES method worked well!

In [None]:
# Now forecasting into future

In [None]:
final_modeld=ExponentialSmoothing(df['#Passengers'],trend='add',seasonal='add').fit() #.fittedvalues.shift(-1)

In [None]:
final_predd=final_modeld.forecast(36)

In [None]:
train['#Passengers'].plot(legend=True,figsize=(12,10),label='TRAIN')
test['#Passengers'].plot(legend=True,label='TEST')
final_predd.plot(legend=True,label='DES Predictions')
plt.show()

## Checking for stationarity :

In [None]:
# Time series data is stationary when there is no trend and seasonality
#method 1: by differencing
#method 2: by dickey fuller test
    

In [None]:
from statsmodels.tsa.statespace.tools import diff

In [None]:
df1['d2']=diff(df['#Passengers'],k_diff=2) #.plot()

In [None]:
from statsmodels.tsa.stattools import adfuller

In [None]:
def adf_test(series,title=''):
    print(f'Augmentted Dickey Fuller Test : {title}')
    result=adfuller(series.dropna(),autolag='AIC')
    labels=['ADF test statistic','p-value','# lags used','# observations']
    out=pd.Series(result[0:4],index=labels)
    for key,val in result[4].items():
        out[f'critical value({key})']=val
    print(out.to_string())
    
    if result[1]<=0.05:
        print('strong evidence against the null hypothesis')
        print('reject the null hypothesis')
        print('data has no unit roots and is stationary')

    else:
        print('weak evidence against the null hypothesis')
        print('fail to reject the null hypothesis')
        print('data has a unit root and is non-stationary')

        

In [None]:
adf_test(df1['#Passengers'])

In [None]:
adf_test(df1['d2'])

## ACF & PACF 

In [None]:
# ACF PLOT:

In [None]:
from statsmodels.graphics.tsaplots import plot_acf,plot_pacf

In [None]:
title='Autocorrelation: No.of Air Passengers'
lags=40
plot_acf(df['#Passengers'],title=title,lags=lags);

In [None]:
# This plot indicates non stationary data as there are large number of lags before ACF Values drop off.

In [None]:
# PACF plot:

In [None]:
# PACF works best with stationary data.Hence apply differencing and make data stationary

In [None]:
df1['d1']=diff(df1['#Passengers'],k_diff=1)

In [None]:
plot_pacf(df1['d1'].dropna(),title=title,lags=np.arange(lags));

## AUTOREGRESSION with Statsmodels

In [None]:
from statsmodels.tsa.ar_model import AR,ARResults

In [None]:
model=AR(train['#Passengers'])
AR1fit=model.fit(maxlag=1)

In [None]:
AR1fit.params

In [None]:
start=len(train)
end=len(train)+len(test)-1


In [None]:
pred1=AR1fit.predict(start=start,end=end).rename('AR1 Predictions')

In [None]:
test.plot(figsize=(8,5),legend=True)
pred1.plot(legend=True)

In [None]:
model=AR(df1['#Passengers'])
AR2fit=model.fit(maxlag=2)
pred2=AR2fit.predict(start,end).rename('AR2 Predictions')


In [None]:
test.plot(figsize=(8,5),legend=True)
pred2.plot(legend=True)

In [None]:
model=AR(df1['#Passengers'])
ARfit=model.fit(ic='t-stat')  # we can choose order p for no.of lags using statsmodels

In [None]:
ARfit.params  # 13 lags

In [None]:
pred13=ARfit.predict(start,end).rename('AR13 Predictions')

In [None]:
test.plot(figsize=(8,5),legend=True)
pred1.plot(legend=True)
pred2.plot(legend=True)
pred13.plot(legend=True)

In [None]:
labels=['AR1','AR2','AR13']
preds=[pred1,pred2,pred13]


In [None]:
import numpy as np
for i in range(3):
    error=np.sqrt(mean_squared_error(test['#Passengers'],preds[i]))
    print(f'{labels[i]} MSE was :{error}')

In [None]:
# AR13 performed well lets build final model and forecast

In [None]:
model=AR(df1['#Passengers'])
ARfit=model.fit(maxlag=None)
forecasted_values=ARfit.predict(start=len(df1),end=len(df1)+36).rename('Forecast')


In [None]:
df1['#Passengers'].plot(legend=True,figsize=(12,10))
forecasted_values.plot(legend=True)

## AUTO-ARIMA

In [None]:
#!pip install pmdarima

In [None]:
from pmdarima import auto_arima
import warnings
warnings.filterwarnings('ignore')

In [None]:
stepwise_fit=auto_arima(df1['#Passengers'],start_p=0,start_q=0,max_p=6,max_q=3,seasonal=True,trace=True,m=12)

In [None]:
stepwise_fit.summary()

In [None]:
# building model:
from statsmodels.tsa.arima_model import ARMA,ARIMA,ARMAResults,ARIMAResults
model=ARIMA(train['#Passengers'],order=(1,1,1))
results=model.fit()
results.summary()

In [None]:
# predictions
predictions=results.predict(start,end,typ='levels').rename('ARIMA(1,1,1) Predictions')

In [None]:
test['#Passengers'].plot(legend=True)
predictions.plot(legend=True)

In [None]:
from statsmodels.tools.eval_measures import rmse


In [None]:
error=rmse(test['#Passengers'],predictions)

In [None]:
error

In [None]:
test['#Passengers'].mean()

In [None]:
# Forecast into future
model=ARIMA(df1['#Passengers'],order=(1,1,1))
results=model.fit()
fcast=results.predict(start=len(df1),end=len(df1)+36,typ='levels').rename('ARIMA(1,1,1) forecast')

In [None]:
df1['#Passengers'].plot(legend=True,figsize=(8,5))
fcast.plot(legend=True)

# SARIMA

In [None]:
from statsmodels.tsa.statespace.sarimax import SARIMAX

In [None]:
model=SARIMAX(train['#Passengers'],order=(1,1,1),seasonal_order=(1,0,0,12))
results=model.fit()
results.summary()


In [None]:
predictions1=results.predict(start,end,typ='levels').rename('SARIMA Predictions')


In [None]:
test['#Passengers'].plot(legend=True,figsize=(8,5))
predictions1.plot(legend=True)

In [None]:
error1=rmse(test['#Passengers'],predictions1)
error1

In [None]:
# Forecast into future
model=SARIMAX(df1['#Passengers'],order=(1,1,1),seasonal_order=(1,0,0,12))
results=model.fit()
fcast=results.predict(start=len(df1),end=len(df1)+36,typ='levels').rename('SARIMA Forecast')

In [None]:
df1['#Passengers'].plot(legend=True,figsize=(8,5))
fcast.plot(legend=True)