In [None]:
from neuralprophet import NeuralProphet, set_log_level
import pandas as pd
import json
import matplotlib.pyplot as plt
import optuna
import numpy as np
from sklearn.metrics import mean_squared_error
from optuna.samplers import RandomSampler
from optuna.pruners import MedianPruner
import joblib
from msse import msse

In [None]:
# Poor data aggregation

datalist = []

file_list = [
    "year_2023.json",
    "year_2024.json",
    "year_2025.json"
]

for file in file_list:
    with open("data/" + file, encoding="utf-8") as f:
        converted = pd.DataFrame(json.load(f)["data"])

        # My 2024 file is different from all the others.
        if ("2024" in file):
            converted = converted.drop(["main_load", "down"], axis=1)

        datalist.append(converted)

df = pd.concat(datalist)

df['date'] = pd.to_datetime(df['date'], format="%Y/%m/%d %H:%M")
df = df.set_index("date")

df.sort_values('date')
df = df.reset_index()

In [None]:
# Bring the data to the form that is needed in NeuralProphet.

df_gpu = df.rename(columns={"date":"ds", "gpu_load": 'y'}).copy()

df_gpu = df_gpu.drop("cpu_load", axis=1)

# Exclude anomalies.
df_gpu.loc[df_gpu["y"] < 10.58, "y"] = pd.NA

df_gpu = df_gpu.drop_duplicates(subset="ds", keep="first")

# Remove the gaps in the measurements.
df_gpu = df_gpu.set_index('ds').asfreq('15min')
df_gpu['y'] = df_gpu['y'].interpolate(method='time')
df_gpu = df_gpu.reset_index()

In [None]:
# Anyway, this doesn't work...

set_log_level("ERROR")

In [None]:
"""
The segments along which the dataset is divided into 
validation and training samples. The last cut is not 
the last measurement in order to have a test sample.
"""

validation_cutts = [
    pd.to_datetime("2025-01-25"),
    pd.to_datetime("2025-02-01"),
    pd.to_datetime("2025-02-08"),
    pd.to_datetime("2025-02-15")
]

In [None]:
# The number of predictions that TaskShift model will make

TOTAL_FORECASTS = 192

In [None]:
"""
Hyperparameters are selected as follows: 
The following happens iteratively for each model: 
the sample is divided according to the ith section. 
The first part is a training sample, the following 
TOTAL_FORECASTS are a validation sample. Then the MSSE
is measured on the validation sample and saved. This 
happens in all sections. Then all the MSSES are averaged 
and this is the final one.
"""

def testing_pararms(df, cut_dates, n_lags, learning_rate=None, epochs=None, batch_size=None, seasonality_reg=0, n_changepoints=10, trend_reg=None):
    msse_list = []
    for cutoff in cut_dates:
        df_train = df[df["ds"] < cutoff].copy()
        df_test = df[(df["ds"] >= cutoff) & (df["ds"] < cutoff + pd.Timedelta(days=2))].copy()

        model = NeuralProphet(n_lags=n_lags,
                            n_forecasts=TOTAL_FORECASTS,
                            epochs=epochs,
                            learning_rate=learning_rate,
                            batch_size=batch_size,
                            seasonality_reg=seasonality_reg,
                            n_changepoints=n_changepoints,
                            trend_reg=trend_reg)

        model = model.add_country_holidays("RU")

        _ = model.fit(df_train)

        df_future = model.make_future_dataframe(df_train, periods=TOTAL_FORECASTS)
        forecast = model.predict(df_future)

        forecasts_list = model.get_latest_forecast(forecast)["origin-0"].astype(float)

        if len(forecasts_list) != df_test.shape[0]:
            print("Не сходятся количество значений")

        msse_list.append(msse(forecasts_list, df_test["y"]))

    return np.mean(msse_list)

In [None]:
def objective(trial):
    n_lags = trial.suggest_int("n_lags", 148, 192*3)
    learning_rate = trial.suggest_float("learning_rate", 1e-4, 10)
    epochs = trial.suggest_int("epochs", 5, 500)
    batch_size = trial.suggest_int("batch_size", 8, 1024)
    seasonality_reg = trial.suggest_float("seasonality_reg", 0.1, 10)
    n_changepoints = trial.suggest_int("n_changepoints", 1, 15)
    trend_reg = trial.suggest_float("seasonality_reg", 0.01, 50)

    msse = testing_pararms(df_gpu,
                           validation_cutts,
                           n_lags=n_lags,
                           learning_rate=learning_rate,
                           epochs=epochs,
                           batch_size=batch_size,
                           seasonality_reg=seasonality_reg,
                           n_changepoints=n_changepoints,
                           trend_reg=trend_reg)

    return msse

In [None]:
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=60)

joblib.dump(study, "study.pkl")