In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

In [None]:
df =pd.read_csv("../input/air-passenger-data-for-time-series-analysis/AirPassengers.csv")
df

In [None]:
df.columns

In [None]:
df["Month"]=pd.to_datetime(df["Month"])
df

In [None]:
df.dtypes

In [None]:
df.set_index("Month",inplace=True)
df

In [None]:
plt.plot(df["#Passengers"])

In [None]:
from statsmodels.tsa.stattools import adfuller
adf,pvalue,usedlag_,nobs_,critical_values_,icbest_=adfuller(df)

In [None]:
print(pvalue) #if pvalue > 0.05 then data is not stationary

In [None]:
df["year"]=[d.year for d in df.index]
df

In [None]:
df["month"]=[d.strftime('%b') for d in df.index]
df

In [None]:
years=df['year'].unique()
years

In [None]:
sns.boxplot(x='year',y='#Passengers',data=df)

In [None]:
sns.boxplot(x='month',y='#Passengers',data=df)

In [None]:
from statsmodels.tsa.seasonal import seasonal_decompose
decomposed=seasonal_decompose(df["#Passengers"],
                             model='additive'
                             )

In [None]:
trend=decomposed.trend
sesonal=decomposed.seasonal
residual=decomposed.resid

In [None]:
trend

In [None]:
sesonal

In [None]:
residual

In [None]:
plt.figure(figsize=(12,8))
plt.subplot(411)
plt.plot(df["#Passengers"],label="Orginal",color='red')
plt.legend(loc='upper left')
plt.subplot(412)
plt.plot(trend,label="Trend",color='red')
plt.legend(loc='upper left')
plt.subplot(413)
plt.plot(sesonal,label="Sesonal",color='red')
plt.legend(loc='upper left')
plt.subplot(414)
plt.plot(residual,label="Residual",color='red')
plt.legend(loc='upper left')
plt.show()

In [None]:
!pip install pmdarima
from pmdarima.arima import auto_arima

In [None]:
arima_model=auto_arima(df["#Passengers"],start_p=1,d=1,start_q=1,
                      max_p=5,max_q=5,max_d=5,m=12,
                        start_P=0,D=1,start_Q=0,max_P=5,max_D=5,max_Q=5,
                      seasonal=True,
                       trace=True,
                      error_action="ignore",
                      suppress_warnings=True,
                      stepwise=True,n_fits=50)

In [None]:
arima_model.summary()

In [None]:
size=int(len(df)*0.66)
X_train,X_test=df[0:size],df[size:len(df)]

In [None]:
X_train.shape

In [None]:
X_test.shape

In [None]:
from statsmodels.tsa.statespace.sarimax import SARIMAX

In [None]:
model=SARIMAX(X_train["#Passengers"],
             order=(0,1,1),
             seasonal_order=(2,1,1,12))
result=model.fit()
result.summary()

In [None]:
start_index=0
end_index=len(X_train)-1
train_predction=result.predict(start_index,end_index)
train_predction

In [None]:
st_index=len(X_train)
ed_index=len(df)-1
predction=result.predict(st_index,ed_index)
predction

In [None]:
predction.plot(legend=True)
X_test["#Passengers"].plot(legend=True)

In [None]:
import math
from sklearn.metrics import mean_squared_error

In [None]:
trainScore=math.sqrt(mean_squared_error(X_train['#Passengers'],train_predction))
testScore=math.sqrt(mean_squared_error(X_test["#Passengers"],predction))
trainScore,testScore

In [None]:
forcast=result.predict(start=len(df),
                      end=(len(df)-1)+3*12,
                      typ="levels").rename('Forecust')

In [None]:
plt.figure(figsize=(12,8))
plt.plot(X_train["#Passengers"],label="Training",color='green')
plt.plot(X_test["#Passengers"],label="Test",color='blue')
plt.plot(forcast,label="Forecast",color="red")
plt.legend(loc="upper left")
