In [100]:
import numpy as np
import pandas as pd
import optuna
from sklearn.metrics import mean_squared_error


In [101]:
df = pd.read_csv('../../train_belgrade.csv')

df['Timestamp'] = pd.to_datetime(df['Timestamp'])

In [102]:

def snaive_forecasting(train_series, seasonal_lag):
    forecast = train_series.shift(seasonal_lag)
    return forecast


In [103]:

def objective(trial):
    seasonal_lag = trial.suggest_int('seasonal_lag', 1, 288*7)

    train_size = int(len(df) * 0.9)
    train, test = df.iloc[:train_size], df.iloc[train_size:]

    forecast = snaive_forecasting(train['Detections'], seasonal_lag)[-len(test):]

    mse = mean_squared_error(test['Detections'], forecast)
    return mse

study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=200)


[I 2024-02-18 21:56:33,684] A new study created in memory with name: no-name-6eaade51-c340-4220-9f72-eaafdb8e6979
[I 2024-02-18 21:56:33,689] Trial 0 finished with value: 89.67281420765028 and parameters: {'seasonal_lag': 779}. Best is trial 0 with value: 89.67281420765028.
[I 2024-02-18 21:56:33,693] Trial 1 finished with value: 81.15095628415301 and parameters: {'seasonal_lag': 1463}. Best is trial 1 with value: 81.15095628415301.
[I 2024-02-18 21:56:33,696] Trial 2 finished with value: 96.14207650273224 and parameters: {'seasonal_lag': 1642}. Best is trial 1 with value: 81.15095628415301.
[I 2024-02-18 21:56:33,700] Trial 3 finished with value: 90.89959016393442 and parameters: {'seasonal_lag': 510}. Best is trial 1 with value: 81.15095628415301.
[I 2024-02-18 21:56:33,703] Trial 4 finished with value: 83.35109289617486 and parameters: {'seasonal_lag': 1049}. Best is trial 1 with value: 81.15095628415301.
[I 2024-02-18 21:56:33,707] Trial 5 finished with value: 102.08333333333333 an

In [104]:

# Best parameters and MSE
best_lag = study.best_params['seasonal_lag']
best_mse = study.best_value

print(f"Best Seasonal Lag: {best_lag}")
print(f"Best MSE: {best_mse}")


Best Seasonal Lag: 1764
Best MSE: 71.93374316939891


In [105]:

train = df['Detections']
forecast = snaive_forecasting(train, best_lag)