In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import adfuller
import statsmodels.api as sm

In [None]:
df = pd.read_csv('./AirPassengers.csv').rename(columns={'Month': 'Date'})
df.info()

In [None]:
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)
df

## Time Series Plot

In [None]:
plt.figure(figsize=(8,4))
plt.plot(df)
plt.title('Airline Passengers')
plt.xlabel('Date')
plt.ylabel('#Passangers')
plt.show()

## Time Series division

In [None]:
train, val, test = df.loc['1949':'1957'], df.loc['1958':'1959'], df.loc['1960':]

In [None]:
plt.figure(figsize=(12, 6))
plt.plot(train.index, train.values, label='Train')
plt.plot(val.index, val.values, label='Validation')
plt.plot(test.index, test.values, label='Test')
plt.title('Airline Passengers')
plt.xlabel('Date')
plt.ylabel('# Passengers')
plt.legend()
plt.show()

## Autocorrelation

In [None]:
from sklearn.preprocessing import MinMaxScaler

In [None]:
scaler = MinMaxScaler()
scaler.fit(train['#Passengers'].values.reshape(-1, 1))
train_norm = scaler.transform(train['#Passengers'].values.reshape(-1, 1))
val_norm = scaler.transform(val['#Passengers'].values.reshape(-1, 1))
test_norm = scaler.transform(test['#Passengers'].values.reshape(-1, 1))

In [None]:
train['#Passengers'].values

In [None]:
def acf_pacf(x, qtd_lag):
    fig = plt.figure(figsize=(16,10))
    ax1 = fig.add_subplot(221)
    fig = sm.graphics.tsa.plot_acf(x, lags=qtd_lag, ax=ax1)
    ax2 = fig.add_subplot(222)
    fig = sm.graphics.tsa.plot_pacf(x, lags=qtd_lag, ax=ax2)
    plt.show()

In [None]:
acf_pacf(train_norm, 36)

## Sliding windows

In [None]:
def create_sliding_windows(series, window_size):
    list_of_sliding_windows = []
    list_size_to_iterate = len(series) - window_size
    for i in range(0, list_size_to_iterate):
        window = series[i: i + window_size + 1]
        list_of_sliding_windows.append(window)

    return np.array(list_of_sliding_windows).reshape(len(list_of_sliding_windows), window_size+1)

In [None]:
train_windows = create_sliding_windows(train_norm, 8)
val_windows = create_sliding_windows(val_norm, 8)
test_windows = create_sliding_windows(test_norm, 8)

In [None]:
train_windows.shape

In [None]:
X_train = train_windows[:, 0:-1]
y_train = train_windows[:, -1]

X_val = val_windows[: ,0: -1 ]
y_val = val_windows[: , -1 ]

X_test = test_windows[: ,0: -1 ]
y_test = test_windows[: , -1 ]

In [None]:
test_windows

## Grid search

In [None]:
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error as MSE

In [None]:
criterion = ['squared_error', 'friedman_mse', 'absolute_error']
max_depths = [5, 10, 15]
mse_best = np.inf
best_params = None
for c in criterion:
  for d in max_depths:
    modelo = DecisionTreeRegressor(criterion=c, max_depth = d)
    modelo.fit(X_train, y_train)
    prev_val = modelo.predict(X_val)

    mse_val = MSE(y_val, prev_val)
    if mse_val < mse_best:
      mse_best = mse_val
      best_params = (c, d)

In [None]:
best_params

## Model training

In [None]:
X_train_full = np.vstack([X_train, X_val])
y_train_full = np.hstack([y_train, y_val])

In [None]:
X_train_full.shape

In [None]:
y_train_full.shape

In [None]:
model = DecisionTreeRegressor(criterion=best_params[0], max_depth=best_params[1])
model.fit(X_train_full, y_train_full)
prev_train = modelo.predict(X_train_full)

In [None]:
plt.plot(prev_train, label = 'predict')
plt.plot(y_train_full, label = 'target')
plt.legend(loc = 'best')
plt.show()

In [None]:
MSE(y_train_full, prev_train)

In [None]:
y_real = scaler.inverse_transform(y_train_full.reshape(-1, 1))
predict_real = scaler.inverse_transform(prev_train.reshape(-1, 1))

In [None]:
MSE(y_real, predict_real)

In [None]:
prev_test = model.predict(X_test)

In [None]:
plt.plot(prev_test, label = 'predict')
plt.plot(y_test, label = 'target')
plt.legend(loc = 'best')
plt.show()

In [None]:
MSE(y_test, prev_test)

In [None]:
y_real = scaler.inverse_transform(y_test.reshape(-1, 1))
predict_real = scaler.inverse_transform(prev_test.reshape(-1, 1))

In [None]:
MSE(y_real, predict_real)