In [1]:
import os
import warnings

import pandas as pd
from statsmodels.tsa.arima.model import ARIMA

In [4]:
test_size = 7
datasets = [{
    'name': 'casos_confirmados',
    'path': 'data/covid/casos_confirmados.csv'
}, {
    'name': 'sinteticos',
    'path': 'data/sinteticos/sinteticos.csv'
}, {
    'name': 'temperaturas',
    'path': 'data/temperatura_minima_diaria/temperaturas.csv'
}]
metrics = ['rmse', 'mape', 'mae', 'mpe']

In [6]:
warnings.filterwarnings('ignore')
for dataset in datasets:
    ts = pd.read_csv(dataset['path'], index_col=0, sep=';').values
    grid_results_df = pd.read_csv(os.path.join('results/arima', f"{dataset['name']}_grid_results.csv"), sep=';')
    for metric in metrics:
        print(f"Dataset: {dataset['name']} | Optimization Metric: {metric.upper()}")

        best = grid_results_df[grid_results_df[metric].abs().eq(grid_results_df[metric].abs().min())].iloc[0]
        order = (best.p, best.d, best.q)
        print(f"Parameters: Order={order}")
        
        %timeit -n10 ARIMA(ts, order=order, enforce_stationarity=False, enforce_invertibility=False).fit()
        model = ARIMA(ts, order=order, enforce_stationarity=False, enforce_invertibility=False).fit()
        %timeit -n100 forecast = model.forecast(steps=test_size)

Dataset: casos_confirmados | Optimization Metric: RMSE
Parameters: Order=(0.0, 4.0, 4.0)
779 ms ± 2.4 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
960 µs ± 2.51 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
Dataset: casos_confirmados | Optimization Metric: MAPE
Parameters: Order=(1.0, 4.0, 6.0)
1.34 s ± 2.11 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
970 µs ± 1.83 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
Dataset: casos_confirmados | Optimization Metric: MAE
Parameters: Order=(0.0, 4.0, 4.0)
779 ms ± 1.2 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
958 µs ± 2.81 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
Dataset: casos_confirmados | Optimization Metric: MPE
Parameters: Order=(0.0, 4.0, 8.0)
1.61 s ± 1.33 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
959 µs ± 5.72 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
Dataset: sinteticos | Optimization Metric: RMSE
Parameters: Order=(0.0, 0.0, 0.0)
38