In [1]:
import pandas as pd
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from sklearn.metrics import mean_squared_error, mean_absolute_error
from statsmodels.tools.eval_measures import rmse
import optuna

In [2]:
df = pd.read_csv('../../train_belgrade.csv')

df['Timestamp'] = pd.to_datetime(df['Timestamp'])

In [3]:
train_size = int(len(df) * 0.9)
train, test = df[0:train_size], df[train_size:]

In [4]:
target_column = 'Detections'

print(train.columns)


Index(['Unnamed: 0', 'Timestamp', 'Detections', 'index', 'WeatherMain',
       'Temperature', 'WeatherDescription'],
      dtype='object')


In [5]:
def objective(trial):
    params = {
        'trend': trial.suggest_categorical('trend', [None, 'add']),
        'seasonal': trial.suggest_categorical('seasonal', [None, 'add']),
        'seasonal_periods': trial.suggest_categorical('seasonal_periods', [120, 288, 288*7])
    }

    model = ExponentialSmoothing(train[target_column], **params)
    fitted_model = model.fit(optimized=True)

    predictions = fitted_model.forecast(len(test))

    mse = mean_squared_error(test[target_column], predictions)

    return mse


In [6]:
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=10)


[I 2024-03-06 23:24:58,440] A new study created in memory with name: no-name-055f3661-8856-4628-b1e8-15c777342253
[I 2024-03-06 23:24:58,825] Trial 0 finished with value: 154.23292090633598 and parameters: {'trend': 'add', 'seasonal': None, 'seasonal_periods': 2016}. Best is trial 0 with value: 154.23292090633598.
[I 2024-03-06 23:24:59,114] Trial 1 finished with value: 154.23292090633598 and parameters: {'trend': 'add', 'seasonal': None, 'seasonal_periods': 288}. Best is trial 0 with value: 154.23292090633598.
[I 2024-03-06 23:24:59,324] Trial 2 finished with value: 138.35301726821334 and parameters: {'trend': None, 'seasonal': 'add', 'seasonal_periods': 120}. Best is trial 2 with value: 138.35301726821334.
[I 2024-03-06 23:25:01,448] Trial 3 finished with value: 97.10898159081512 and parameters: {'trend': 'add', 'seasonal': 'add', 'seasonal_periods': 288}. Best is trial 3 with value: 97.10898159081512.
[I 2024-03-06 23:25:01,750] Trial 4 finished with value: 154.23292090633598 and pa

In [7]:
best_params = study.best_params
best_model = ExponentialSmoothing(train[target_column], trend=best_params['trend'], seasonal=best_params['seasonal'], seasonal_periods=best_params['seasonal_periods']).fit()
best_predictions = best_model.forecast(len(test))

best_mse = mean_squared_error(test[target_column], best_predictions)

print(f"Best MSE: {best_mse}")


Best MSE: 73.144880918099


In [8]:
# TODO: grafik, stacionarnost, analiza gresaka