In [1]:
import numpy as np
import pandas as pd
import optuna
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import mean_squared_error


In [2]:
df = pd.read_csv('../../train_belgrade.csv')

df['Timestamp'] = pd.to_datetime(df['Timestamp'])

In [3]:
def objective(trial):
    # Define the parameter space
    p = trial.suggest_int('p', 0, 3)
    d = trial.suggest_int('d', 0, 2)
    q = trial.suggest_int('q', 0, 3)
    P = trial.suggest_int('P', 0, 2)
    D = trial.suggest_int('D', 0, 1)
    Q = trial.suggest_int('Q', 0, 2)
    s = trial.suggest_categorical('s', [288, 2016])

    train_size = int(len(df) * 0.8)
    train, test = df.iloc[:train_size], df.iloc[train_size:]

    try:
        model = SARIMAX(train['Detections'], order=(p, d, q), seasonal_order=(P, D, Q, s), enforce_stationarity=False, enforce_invertibility=False)
        model_fit = model.fit(disp=False)

        forecast = model_fit.forecast(steps=len(test))

        mse = mean_squared_error(test['Detections'], forecast)
    except Exception as e:
        print(f"Error: {e}")
        mse = np.inf

    return mse


In [4]:

# Set up Optuna
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=50)


[I 2024-02-18 22:13:22,522] A new study created in memory with name: no-name-8d461151-2397-4c96-ade9-179311a452ca
[I 2024-02-18 22:14:58,303] Trial 0 finished with value: 45.72819537112374 and parameters: {'p': 3, 'd': 0, 'q': 3}. Best is trial 0 with value: 45.72819537112374.
[I 2024-02-18 22:15:27,761] Trial 1 finished with value: 803.6991788531298 and parameters: {'p': 0, 'd': 2, 'q': 1}. Best is trial 0 with value: 45.72819537112374.
[I 2024-02-18 22:16:07,970] Trial 2 finished with value: 159.0571566205344 and parameters: {'p': 3, 'd': 1, 'q': 1}. Best is trial 0 with value: 45.72819537112374.
[I 2024-02-18 22:16:20,923] Trial 3 finished with value: 302.7603611714143 and parameters: {'p': 0, 'd': 1, 'q': 0}. Best is trial 0 with value: 45.72819537112374.
[I 2024-02-18 22:16:26,634] Trial 4 finished with value: 141.2332756810953 and parameters: {'p': 0, 'd': 0, 'q': 0}. Best is trial 0 with value: 45.72819537112374.
[I 2024-02-18 22:17:55,820] Trial 5 finished with value: 45.728195

In [None]:

best_params = study.best_params
best_mse = study.best_value

print(f"Best Parameters: {best_params}")
print(f"Best MSE: {best_mse}")


Best Parameters: {'p': 1, 'd': 0, 'q': 3, 's': 2016}
Best MSE: 113.71186630078755


In [None]:

best_order = (best_params['p'], best_params['d'], best_params['q'])
best_seasonal_order = (best_params['P'], best_params['D'], best_params['Q'], best_params['s'])

model = SARIMAX(df['Detections'], order=best_order, seasonal_order=best_seasonal_order, enforce_stationarity=False, enforce_invertibility=False)
model_fit = model.fit()

KeyError: 'P'