In [None]:
# !pip install statsmodels=='0.13.2'

In [None]:
import statsmodels as sts
sts.__version__

'0.13.2'

In [None]:
#importando bibliotecas essenciais
import pandas as pd
import numpy as np
from statsmodels.tsa.arima_model import ARIMA
import statsmodels.api as sm
import warnings
import itertools
warnings.filterwarnings("ignore") # specify to ignore warning messages

  import pandas.util.testing as tm


> Importando os dados


In [None]:
df_macau = pd.read_csv('https://raw.githubusercontent.com/luk3mn/TCC-predicao-de-series-temporais/master/dataset/dataset_macau.csv')
# Removendo todos as virgulas do dataset
df_macau.replace(',','.', regex=True, inplace=True)

# Convertendo a coluna "Data" em datetime
df_macau.Data = pd.to_datetime(df_macau.Data)

# Tranformando a coluna "Data" em index
df_macau.set_index('Data', inplace=True)

# Convertendo todos os objetos para valores numericos
c = df_macau.select_dtypes(object).columns
df_macau[c] = df_macau[c].apply(pd.to_numeric,errors='coerce')
df_macau.head()

Unnamed: 0_level_0,Velocidade,Temperatura,Umidade,Pressão,Direção
Data,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2004-01-01,6.33,29.13,67.67,1010.97,11.0
2004-01-02,4.63,29.47,70.33,1010.67,5.67
2004-01-03,5.47,29.0,73.0,1010.2,7.67
2004-01-04,7.97,30.0,70.0,1009.57,10.67
2004-01-05,6.73,29.6,67.67,1009.03,11.0


In [None]:
df_petrolina = pd.read_csv('https://raw.githubusercontent.com/luk3mn/TCC-predicao-de-series-temporais/master/dataset/dataset_petrolina.csv')
# Removendo todos as virgulas do dataset
df_petrolina.replace(',','.', regex=True, inplace=True)

# Convertendo a coluna "Data" em datetime
df_petrolina.Data = pd.to_datetime(df_petrolina.Data)

# Tranformando a coluna "Data" em index
df_petrolina.set_index('Data', inplace=True)

# Convertendo todos os objetos para valores numericos
c = df_petrolina.select_dtypes(object).columns
df_petrolina[c] = df_petrolina[c].apply(pd.to_numeric,errors='coerce')
df_petrolina.head()

Unnamed: 0_level_0,Velocidade,Temperatura,Umidade,Pressão,Direção
Data,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2004-01-01,6.5,26.46,62.32,969.02,147.89
2004-01-02,6.62,26.71,63.68,969.3,160.62
2004-01-03,8.47,27.23,62.08,969.26,127.97
2004-01-04,6.5,27.25,57.7,969.06,120.62
2004-01-05,8.11,27.57,60.93,968.4,110.7


## Otimização de hiperparâmetros para a base de dados da velocidade do vento

> Quando um algoritmo de aprendizado de máquina é ajustado para um problema específico, como quando você está usando uma pesquisa em grade ou uma pesquisa aleatória, então você está ajustando os hiperparâmetros do modelo para descobrir os parâmetros do modelo que resultam no mais hábil previsões.

In [None]:
def find_params(data, p_min, p_max, q_min, q_max, d_max, seasonal=False):
    # Define the p, d and q parameters to take any value between 0 and 2
    p = range(p_min, (p_max+1))
    d = range(0, (d_max+1))
    q = range(q_min, (q_max+1))

    # Generate all different combinations of p, q and q triplets
    pdq = list(itertools.product(p, d, q))

    if seasonal:
        # Generate all different combinations of seasonal p, q and q triplets
        seasonal_pdq = [(x[0], x[1], x[2], 12) for x in list(itertools.product(p, d, q))]
    
    aic=[]
    for param in pdq:
            if seasonal:
                for param_seasonal in seasonal_pdq:
                    try:
                        mod = sm.tsa.statespace.SARIMAX(data,
                                                        order=param,
                                                        seasonal_order=param_seasonal,
                                                        enforce_stationarity=False,
                                                        enforce_invertibility=False)

                        results = mod.fit()
                        aic.append(results.aic)
                        # pega o melhor modelo
                        if (np.min(aic) == results.aic):
                            min_aic=results.aic
                            best_model=param
                            best_model_seasonal=param_seasonal
                        print('ARIMA{}x{}12 - AIC:{}'.format(param, param_seasonal, results.aic))
                    except:
                        continue
            else:
                try:
                    mod = ARIMA(data, order=param)
                    results = mod.fit(disp=0)
                    aic.append(results.aic)
                    # pega o melhor modelo
                    if (np.min(aic) == results.aic):
                        min_aic=results.aic
                        best_model=param
                    print('ARIMA{} - AIC:{}'.format(param, results.aic))
                except:
                    continue
    if (seasonal):
        print("\n========================================")
        print("       MELHOR MODELO (SAZONAL)          ")
        print("----------------------------------------")
        print('SARIMA{}x{}12 - AIC:{}'.format(best_model, best_model_seasonal, min_aic))
        print("========================================")
    else:
        print("\n========================================")
        print("             MELHOR MODELO               ")
        print("----------------------------------------")
        print('ARIMA{} - AIC:{}'.format(best_model, min_aic))
        print("========================================")

## Definição dos parametros para o modelo SARIMA - Macau

In [None]:
find_params(df_macau.Velocidade,0,9,0,9,2)

ARIMA(0, 0, 0) - AIC:17884.270141149675
ARIMA(0, 0, 1) - AIC:16310.435495647169
ARIMA(0, 0, 2) - AIC:15811.158042353827
ARIMA(0, 0, 3) - AIC:15475.940533266295
ARIMA(0, 0, 4) - AIC:15197.754331945098
ARIMA(0, 0, 5) - AIC:15110.75326392025
ARIMA(0, 0, 6) - AIC:14935.148526892477
ARIMA(0, 0, 7) - AIC:14920.2951836443
ARIMA(0, 0, 8) - AIC:14853.17721873613
ARIMA(0, 0, 9) - AIC:14794.998935068515
ARIMA(0, 1, 0) - AIC:16168.716471292257
ARIMA(0, 1, 1) - AIC:14468.587458670887
ARIMA(0, 1, 2) - AIC:14313.42541866954
ARIMA(0, 2, 0) - AIC:21185.13278515999
ARIMA(0, 2, 1) - AIC:16176.913151790091
ARIMA(1, 0, 0) - AIC:15223.887444801256
ARIMA(1, 0, 1) - AIC:14427.134759017785
ARIMA(1, 0, 2) - AIC:14293.133393358921
ARIMA(1, 0, 3) - AIC:14288.253822832776
ARIMA(1, 0, 4) - AIC:14279.486389665328
ARIMA(1, 0, 5) - AIC:14270.946466953283
ARIMA(1, 0, 6) - AIC:14265.688581414679
ARIMA(1, 0, 7) - AIC:14264.10486031395
ARIMA(1, 0, 8) - AIC:14231.06949320301
ARIMA(1, 0, 9) - AIC:14231.78370151267
ARIMA(1, 

## Definição dos parametros para o modelo ARIMA - Macau

In [None]:
find_params(df_macau.Velocidade,0,2,0,2,2,seasonal=True)

ARIMA(0, 0, 0)x(0, 0, 0, 12)12 - AIC:30390.99444124057
ARIMA(0, 0, 0)x(0, 0, 1, 12)12 - AIC:25770.397079529524
ARIMA(0, 0, 0)x(0, 0, 2, 12)12 - AIC:23199.50518422301
ARIMA(0, 0, 0)x(0, 1, 0, 12)12 - AIC:17666.545640652497
ARIMA(0, 0, 0)x(0, 1, 1, 12)12 - AIC:16217.374805178588
ARIMA(0, 0, 0)x(0, 1, 2, 12)12 - AIC:16176.882923467438
ARIMA(0, 0, 0)x(0, 2, 0, 12)12 - AIC:22757.675030143255
ARIMA(0, 0, 0)x(0, 2, 1, 12)12 - AIC:17656.067054499978
ARIMA(0, 0, 0)x(0, 2, 2, 12)12 - AIC:16208.732565517359
ARIMA(0, 0, 0)x(1, 0, 0, 12)12 - AIC:17584.564210366563
ARIMA(0, 0, 0)x(1, 0, 1, 12)12 - AIC:16267.181089132151
ARIMA(0, 0, 0)x(1, 0, 2, 12)12 - AIC:16210.874368846631
ARIMA(0, 0, 0)x(1, 1, 0, 12)12 - AIC:16587.72597972709
ARIMA(0, 0, 0)x(1, 1, 1, 12)12 - AIC:16216.866579120966
ARIMA(0, 0, 0)x(1, 1, 2, 12)12 - AIC:16174.636390686539
ARIMA(0, 0, 0)x(1, 2, 0, 12)12 - AIC:19986.361384568125
ARIMA(0, 0, 0)x(1, 2, 1, 12)12 - AIC:16629.533885796132
ARIMA(0, 0, 0)x(1, 2, 2, 12)12 - AIC:16207.59493862