In [None]:
import numpy as np 
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

Importing and displaying the first five rows of the dataset.

In [None]:
df = pd.read_csv('/kaggle/input/air-passengers/AirPassengers.csv')
df.head()

* Seasonality - Repeating trends
* Cyclical - Trends with no repetition. There is no repetitive cycle.
* 

In [None]:
ax = df['#Passengers'].plot(figsize = (12,5))
ax.set(ylabel='Number of Passengers');

In [None]:
df.rename(columns = {'#Passengers' : 'Passengers'}, 
          inplace = True)
df['Month'] = pd.to_datetime(df['Month'])
df.info()

The **Hodrick-Prescott Filter** Separates a Time Series into Trend and Cyclical Components.
Link: https://www.statsmodels.org/stable/generated/statsmodels.tsa.filters.hp_filter.hpfilter.html#generated-statsmodels-tsa-filters-hp-filter-hpfilter--page-root

In [None]:
cycle, trend = sm.tsa.filters.hpfilter(df.Passengers, 129600)
decomp = df[['Passengers']]
decomp["cycle"] = cycle
decomp["trend"] = trend

fig, ax = plt.subplots()
decomp[["Passengers", "trend", "cycle"]].plot(ax=ax, figsize=(12, 5))
plt.show()

**Time Series Decomposition with Error Trend Seasonality Model**
* Trend Component shows the general growth/decline pattern.
* Seasonal Component shows any repetitive cycles in the time series. 
* Residual Component is the error or noise that cannot be represented as Trend or Seaonality.

**Additive Model** is used when the trend is more linear and the Trend and Seasonality Components are increasing/decreasing at a constant rate over time. 
**Multiplicative Model** is used when the Trend and Seasonality Components are increasing/decreasing at a non-linear rate over time.

In [None]:
df.dropna(inplace=True)
df.set_index('Month', inplace = True)
df.head()

In [None]:
from statsmodels.tsa.seasonal import seasonal_decompose

results = seasonal_decompose(df.Passengers, model='multiplicative')
results.plot();

Exponentially Weighted Moving Average Models - EWMA Models
* More recent data has more weightage.

In [None]:
# Simple Moving Average
df_sma = df.copy()
df_sma['6-month-SMA'] = df['Passengers'].rolling(window=6).mean()
df_sma['12-month-SMA'] = df['Passengers'].rolling(window=12).mean()
df_sma.plot();

In [None]:
df_ewma = df.copy()
df_ewma['EWMA12'] = df['Passengers'].ewm(span=12,
                                         adjust=False).mean()
df_ewma.plot();

Holt-winters seasonal method comprises of the forecast equation and three smoothing equations. level, trend, and seasonal components.
Single Exponential Smoothing.
Double Exponential Smoothing (Holts-method)


In [None]:
df.index.freq = 'MS'
df.index

In [None]:
df.head()

In [None]:
from statsmodels.tsa.holtwinters import SimpleExpSmoothing

In [None]:
span = 12
alpha = 2/(span + 1)

In [None]:
df['EWMA12'] = df['Passengers'].ewm(alpha=alpha,
                                   adjust=False).mean()
df.head()

In [None]:
model = SimpleExpSmoothing(df['Passengers'])
fitted_model = model.fit(smoothing_level = alpha,
                         optimized = False)
df['SES12'] = fitted_model.fittedvalues.shift(-1)
df.head()

In [None]:
from statsmodels.tsa.holtwinters import ExponentialSmoothing

In [None]:
model = ExponentialSmoothing(df['Passengers'], trend = 'add')
fitted_model = model.fit()
df['DES_add_12'] = fitted_model.fittedvalues.shift(-1)
df.head()

In [None]:
df.plot(figsize = (12,5))

Forecasting Procedure:
* Choose a Model
* Split data into train and test sets
* Fit model on training set
* Evaluate model on test set
* Re-fit model on entire dataset
* Forecast for future data



The Test Set should ideally be at least as large as the maximum forecast horizon required, i.e the maximum lenght of the future forecast. Keep in mind the longer the forecast horizon, the more likely your prediction becomes less accurate.

In [None]:
df = pd.read_csv('/kaggle/input/air-passengers/AirPassengers.csv',
                index_col = 'Month', parse_dates = True)
df.index.freq = 'MS'
df.rename(columns = {'#Passengers' : 'Passengers'}, 
          inplace = True)
df.head()

In [None]:
df.info()

In [None]:
train_date = df.iloc[:109] # df.loc[:'1940-01-01']
test_data = df.iloc[108:]

In [None]:
fitted_model = ExponentialSmoothing(train_date['Passengers'],
                                   trend = 'mul',
                                   seasonal = 'mul',
                                   seasonal_periods = 12).fit()

In [None]:
test_predictions = fitted_model.forecast(36) # 3 years

In [None]:
train_date['Passengers'].plot(legend=True, label = 'Train',
                             figsize = (12,5))
test_data['Passengers'].plot(legend=True, label = 'Test')
test_predictions.plot(legend=True, label = 'Prediction')

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error

In [None]:
print('MSE: ', mean_squared_error(test_data, test_predictions))
print('MAE', mean_absolute_error(test_data, test_predictions))
print('RMSE: ', np.sqrt(mean_squared_error(test_data, test_predictions)))

Stationary data does not exhibit trend or seasonality.

In [None]:
from statsmodels.tsa.statespace.tools import diff

In [None]:
# First Order Difference
df['Passengers'] - df['Passengers'].shift(1)

In [None]:
diff(df['Passengers'], k_diff = 1).plot() 

An autocorrelation plot (also known as a Correlogram) shows the correlation of the series with itself, lagged by x time units.
The y-axis is the correlation and the x-axis is the number of units of lag.
For example, if we compare the standard sales data against the sales data shifted by 1 time step (one day) then we are answering the question, "how correlated are today's sales to yesterday's sales?"
ACF describes the autocorrelation between an observation and another observation at a prior time step that included direct and indirect dependence information.
The PACF only describes the direct relationship between an observation and its lag.

**Autoregression with AR**

In [None]:
from statsmodels.tsa.ar_model import AR, ARResults

In [None]:
df.head()

In [None]:
df.plot(figsize = (12, 8))

In [None]:
train_data = df.iloc[:109] # df.loc[:'1940-01-01']
test_data = df.iloc[108:]

In [None]:
# import warnings
# warnings.filterwarnings('ignore', 'statsmodels.tsa.ar_model.AR', FutureWarning)

model = AR(train_data['Passengers'])
AR1fit = model.fit(maxlag = 1)

In [None]:
print(AR1fit.aic)
print(AR1fit.k_ar)
print(AR1fit.params)

In [None]:
start = len(train_data)
end = len(train_data) + len(test_data) - 1
predictions = AR1fit.predict(start, end)
predictions = predictions.rename('AR(1) Predictions')
predictions.head()

In [None]:
test_data.plot(figsize = (12, 8), legend = True)
predictions.plot(legend = True)

In [None]:
model = AR(train_data['Passengers'])
AR50fit = model.fit(maxlag = 50)
predictions_2 = AR50fit.predict(start, end)
predictions_2 = predictions_2.rename('AR(50) Predictions')
predictions_2.head()

In [None]:
test_data.plot(figsize = (12, 8), legend = True)
predictions.plot(legend = True)
predictions_2.plot(legend = True)

In [None]:
model = AR(train_data['Passengers'])
ARfit = model.fit(ic = 't-stat')
predictions_3 = ARfit.predict(start, end)
predictions_3 = predictions_3.rename('AR Predictions')
predictions_3.head()

In [None]:
ARfit.params

In [None]:
test_data.plot(figsize = (12, 8), legend = True)
predictions.plot(legend = True)
predictions_2.plot(legend = True)
predictions_3.plot(legend = True)

In [None]:
from sklearn.metrics import mean_squared_error
print(mean_squared_error(test_data['Passengers'], predictions))
print(mean_squared_error(test_data['Passengers'], predictions_2))
print(mean_squared_error(test_data['Passengers'], predictions_3))

In [None]:
# Forecasting
model = AR(df['Passengers'])
ARfit = model.fit()
forecasts = ARfit.predict(start = len(df),# Start from the end
                          end = len(df) + 12 # Predict for next 12 months
                         ).rename('Forecasts')

In [None]:
df['Passengers'].plot(figsize = (12, 8), legend = True)
forecasts.plot(legend = True)