In [1]:
import pandas as pd
import numpy as np
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_absolute_error
import itertools

In [2]:

def grid_search_arima(data, p_values, d_values, q_values):
    best_score, best_cfg = float("inf"), None
    train_size = int(len(data) * 0.8)
    train, test = data[:train_size], data[train_size:]
    
    for p, d, q in itertools.product(p_values, d_values, q_values):
        try:
            model = ARIMA(train, order=(p, d, q))
            model_fit = model.fit()
            predictions = model_fit.forecast(steps=len(test))
            mae = mean_absolute_error(test, predictions)
            if mae < best_score:
                best_score, best_cfg = mae, (p, d, q)
        except:
            continue
    return best_cfg, best_score

def evaluate_arima(data, best_cfg):
    train_size = int(len(data) * 0.8)
    train, test = data[:train_size], data[train_size:]
    model = ARIMA(train, order=best_cfg)
    model_fit = model.fit()
    predictions = model_fit.forecast(steps=len(test))
    mae = mean_absolute_error(test, predictions)
    return mae

In [None]:
data = pd.read_csv('/workspaces/benchmark_ts_model_test/datasets/candy_production.csv', index_col = 0, parse_dates = True)

print(data.head())

In [18]:
# arima_params = {'p_values': [0, 1, 2], 'd_values': [0, 1], 'q_values': [0, 1, 2]}
arima_params = {'p_values': list(range(0, 12)), 'd_values': list(range(0, 4)), 'q_values': list(range(0, 12))}


In [None]:
# ARIMA
best_cfg, _ = grid_search_arima(data, **arima_params)
arima_mae = evaluate_arima(data, best_cfg)


In [None]:
print(arima_mae)
print(best_cfg)


In [11]:
res = itertools.product([0, 1, 2], [0, 1, 2], [0, 1, 2])

In [None]:
for comb in res: 
    print(comb)

In [None]:
range_res = list(range(0, 12))
print(range_res)