In [1]:
import os
import sys

import torch
import pandas as pd
import dotenv
import mlflow
from autogluon.timeseries import TimeSeriesPredictor, TimeSeriesDataFrame
import plotly.graph_objects as go
from huggingface_hub import login

sys.path.append("..")

from utils import calculate_metrics, TrainingConfig

dotenv.load_dotenv("../../.env")

token = os.environ["HF_TOKEN"]
login(token=token)

mlflow.set_tracking_uri("http://127.0.0.1:5000")
mlflow.set_experiment("Time_Series_Forecasting");

Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


In [2]:
data_dir = '../../data/panama-electricity-load-forecasting/processed/'

train_df = pd.read_parquet(os.path.join(data_dir, 'train.parquet'))
test_df = pd.read_parquet(os.path.join(data_dir, 'test.parquet'))

test_len = test_df['datetime'].max() - test_df['datetime'].min()
# val_df = train_df[train_df['datetime'] >= train_df['datetime'].max() - test_len]
# train_df = train_df[train_df['datetime'] < train_df['datetime'].max() - test_len]

In [3]:
past_covariates_names = [
    "T2M_toc",
    "QV2M_toc",
    "TQL_toc",
    "W2M_toc",
    "T2M_san",
    "QV2M_san",
    "TQL_san",
    "W2M_san",
    "T2M_dav",
    "QV2M_dav",
    "TQL_dav",
    "W2M_dav",
]

known_covariates_names = [
    "holiday",
    "school",
]

train_df = (
    train_df[["datetime", "nat_demand"] + past_covariates_names + known_covariates_names]
    .rename(columns={"datetime": "timestamp", "nat_demand": "target"})
    .copy()
)
train_df["item_id"] = 0

# val_df = val_df[['datetime', 'nat_demand'] + known_covariates_names].rename(columns={'datetime': 'timestamp', 'nat_demand': 'target'}).copy()
# val_df['item_id'] = 0

test_df = (
    test_df[["datetime", "nat_demand"] + past_covariates_names + known_covariates_names]
    .rename(columns={"datetime": "timestamp", "nat_demand": "target"})
    .copy()
)
test_df["item_id"] = 0

train_data = TimeSeriesDataFrame.from_data_frame(train_df, id_column='item_id', timestamp_column='timestamp')
# val_data = TimeSeriesDataFrame.from_data_frame(val_df, id_column='item_id', timestamp_column='timestamp')
test_data = TimeSeriesDataFrame.from_data_frame(test_df, id_column='item_id', timestamp_column='timestamp')

In [9]:
config = TrainingConfig(
    prediction_length=24 * 3,  # 3 дня
    artifact_path="../../models/auto_ml_all_data",
)

In [None]:
predictor = TimeSeriesPredictor(
    prediction_length=config.prediction_length,
    path=config.artifact_path,
    known_covariates_names=known_covariates_names,
).fit(
    train_data=train_data,
    verbosity=4,
    hyperparameters={
        "DirectTabular": {},
        "RecursiveTabular": {},
        "TemporalFusionTransformer": {},
        "Chronos": [
            {"model_path": "bolt_small", "ag_args": {"name_suffix": "ZeroShot"}},
            {"model_path": "bolt_small", "fine_tune": True, "ag_args": {"name_suffix": "FineTuned"}},
        ]
    },
    enable_ensemble=False,
    #presets="high_quality",
)

Beginning AutoGluon training...
AutoGluon will save models to '/home/nikita/projects/time_series_analysis/models/auto_ml_all_data'
AutoGluon Version:  1.2
Python Version:     3.12.7
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #59~22.04.1-Ubuntu SMP PREEMPT_DYNAMIC Wed Mar 19 17:07:41 UTC 2
CPU Count:          12
GPU Count:          1
Memory Avail:       19.40 GB / 30.95 GB (62.7%)
Disk Space Avail:   161.14 GB / 233.67 GB (69.0%)

Fitting with arguments:
{'enable_ensemble': False,
 'eval_metric': WQL,
 'hyperparameters': {'Chronos': [{'ag_args': {'name_suffix': 'ZeroShot'},
                                  'model_path': 'bolt_small'},
                                 {'ag_args': {'name_suffix': 'FineTuned'},
                                  'fine_tune': True,
                                  'model_path': 'bolt_small'}],
                     'DirectTabular': {},
                     'RecursiveTabular': {},
                     'TemporalFusionTransformer'

[1000]	valid_set's l1: 0.151291


	-0.1512	 = Validation score   (-mean_absolute_error)
	1.97s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: WeightedEnsemble_L2 ...
	Ensemble Weights: {'LightGBM': 1.0}
	-0.1512	 = Validation score   (-mean_absolute_error)
	0.0s	 = Training   runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 2.12s ... Best model: WeightedEnsemble_L2 | Estimated inference throughput: 51888.3 rows/s (72 batch size)
TabularPredictor saved. To load, use: predictor = TabularPredictor.load("/home/nikita/projects/time_series_analysis/models/auto_ml_all_data/models/RecursiveTabular/W0/tabular_predictor")
Shortening all series to at most 1000096
		-0.0356      = Validation score (-WQL)
		2.245   s    = Training runtime
		0.682   s    = Prediction runtime
	-0.0356       = Validation score (-WQL)
	2.26    s     = Training runtime
	0.68    s     = Validation (prediction) runtime
Training timeseries model DirectTabular. 
	Window 0
Shortening all series to at most 100

In [10]:
predictor = TimeSeriesPredictor.load(config.artifact_path)

In [11]:
leaderboard = predictor.leaderboard(
    test_data,
    extra_metrics=['MASE', 'MAPE', 'MSE', 'MAE', 'SQL'],
)
leaderboard.rename(columns={'score_test': 'WQL_test', 'score_val': 'WQL_val'}, inplace=True)
leaderboard

Unnamed: 0,model,WQL_test,WQL_val,pred_time_test,pred_time_val,fit_time_marginal,fit_order,MASE,MAPE,MSE,MAE,SQL
0,ChronosFineTuned[bolt_small],-0.014154,-0.049029,0.113091,0.014652,130.81596,4,-0.218616,-0.016884,-646.796497,-20.635853,-0.190477
1,ChronosZeroShot[bolt_small],-0.015607,-0.045981,1.356877,1.524213,0.823531,3,-0.236617,-0.018132,-780.519475,-22.335009,-0.210039
2,RecursiveTabular,-0.032938,-0.035576,0.602367,0.681967,2.257937,1,-0.403684,-0.031579,-1830.629494,-38.104904,-0.443262
3,DirectTabular,-0.04074,-0.02218,0.113662,0.2191,24.962076,2,-0.695335,-0.054586,-4968.161087,-65.634775,-0.548263


In [11]:
from utils.mlflow_logging import log_leaderboard_to_mlflow

log_leaderboard_to_mlflow(leaderboard, 'AllData')

The git executable must be specified in one of the following ways:
    - be included in your $PATH
    - be set via $GIT_PYTHON_GIT_EXECUTABLE
    - explicitly set via git.refresh(<full-path-to-git-executable>)

All git commands will error until this is rectified.

This initial message can be silenced or aggravated in the future by setting the
$GIT_PYTHON_REFRESH environment variable. Use one of the following values:
    - quiet|q|silence|s|silent|none|n|0: for no message or exception
    - error|e|exception|raise|r|2: for a raised exception

Example:
    export GIT_PYTHON_REFRESH=quiet



🏃 View run ChronosFineTuned[bolt_small]_AllData at: http://127.0.0.1:5000/#/experiments/185045746886025740/runs/505308ffa2024e3f89acdb29d8c41454
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/185045746886025740
🏃 View run ChronosZeroShot[bolt_small]_AllData at: http://127.0.0.1:5000/#/experiments/185045746886025740/runs/7468135871e543de9c01dc4556f093b7
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/185045746886025740
🏃 View run RecursiveTabular_AllData at: http://127.0.0.1:5000/#/experiments/185045746886025740/runs/e4f1d0ab06ed4b419e8007babe6e14d2
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/185045746886025740
🏃 View run DirectTabular_AllData at: http://127.0.0.1:5000/#/experiments/185045746886025740/runs/be7abd0fdb59477693dbd2ec72c0d071
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/185045746886025740


In [12]:
k = 12

top_k_models = leaderboard.sort_values(['SQL'], ascending=False).head(k)['model'].tolist()
window_size = config.prediction_length
test_length = len(test_df)
max_iterations = (test_length + window_size - 1) // window_size - 1 # ещё -1 из-за known_covariates

current_data = train_data.copy()
all_models_predictions = {}

for i in range(max_iterations):
    for model_name in top_k_models:
        if model_name not in all_models_predictions:
            all_models_predictions[model_name] = []
            
        start_idx = i * window_size
        end_idx = start_idx + window_size
        
        future_covariates = test_data[start_idx:start_idx + config.prediction_length][known_covariates_names]
        prediction_covariates = pd.concat([current_data[known_covariates_names], future_covariates])
        
        predictions = predictor.predict(current_data, 
                                       model=model_name, 
                                       known_covariates=prediction_covariates)
                                       
        all_models_predictions[model_name].append(predictions)
        
    current_data = pd.concat([current_data, test_data[start_idx:end_idx]])

test_df_shape = test_df.shape[0]
all_models_predictions = {k: pd.concat(v)[:test_df_shape] for k, v in all_models_predictions.items()}

In [None]:
import ipywidgets as widgets
from IPython.display import display, clear_output
import datetime
from utils.plotting import plot_forecasts

min_date = test_df["timestamp"].min().date()
max_date = test_df["timestamp"].max().date()

start_date_picker = widgets.DatePicker(
    description='Start date:',
    disabled=False,
    value=min_date
)

end_date_picker = widgets.DatePicker(
    description='End date:',
    disabled=False,
    value=max_date
)

output_area = widgets.Output()

def on_button_clicked(b):
    with output_area:
        clear_output(wait=True)
        start_date = datetime.datetime.combine(start_date_picker.value, datetime.datetime.min.time())
        end_date = datetime.datetime.combine(end_date_picker.value, datetime.datetime.min.time())
        plot_forecasts(df=test_df[:config.prediction_length * max_iterations],models_predictions=all_models_predictions, start_date=start_date, end_date=end_date)

plot_button = widgets.Button(description="Plot Forecasts")
plot_button.on_click(on_button_clicked)

controls = widgets.VBox([
    widgets.HBox([start_date_picker, end_date_picker]),
    plot_button
])

display(controls, output_area)

with output_area:
    plot_forecasts(df=test_df[:config.prediction_length * max_iterations],models_predictions=all_models_predictions)

VBox(children=(HBox(children=(DatePicker(value=datetime.date(2020, 1, 1), description='Start date:'), DatePick…

Output()

In [None]:
y_true = test_df['target']
y_pred = all_models_predictions['ChronosFineTuned[bolt_small]']['mean'].values

sk_m.mean_squared_error(y_true=y_true[:y_pred.shape[0]], y_pred=y_pred)

5488.754710196724

In [31]:
test_df['timestamp'].max()

Timestamp('2020-01-31 23:00:00')

In [16]:
date_mask = (test_df['timestamp'] >= '2020-01-10') & (test_df['timestamp'] < '2020-01-17')

test_df[date_mask]

Unnamed: 0,timestamp,target,T2M_toc,QV2M_toc,TQL_toc,W2M_toc,T2M_san,QV2M_san,TQL_san,W2M_san,T2M_dav,QV2M_dav,TQL_dav,W2M_dav,holiday,school,item_id
216,2020-01-10 00:00:00,1131.5281,24.929865,0.017285,0.000013,21.301868,23.750177,0.016995,0.003576,10.696918,22.984552,0.016171,0.075134,6.629415,0,0,0
217,2020-01-10 01:00:00,1093.7796,24.922937,0.017270,0.001275,21.954500,23.501062,0.016682,0.004702,11.579647,22.813562,0.015920,0.072235,6.986607,0,0,0
218,2020-01-10 02:00:00,1061.8127,24.971002,0.017233,0.001014,22.545584,23.197565,0.016317,0.002250,12.126106,22.596002,0.015642,0.065491,7.229447,0,0,0
219,2020-01-10 03:00:00,1037.6880,25.026514,0.017226,0.001494,23.026767,22.940576,0.015960,0.002110,12.278517,22.370264,0.015418,0.055237,7.265826,0,0,0
220,2020-01-10 04:00:00,1038.8560,25.145166,0.017373,0.005375,23.509849,22.692041,0.015527,0.002665,12.065393,22.152979,0.015206,0.048904,7.164663,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
379,2020-01-16 19:00:00,1424.0133,26.575769,0.019315,0.007227,21.104333,25.677332,0.018018,0.015354,9.955149,24.091394,0.017201,0.089203,5.120333,0,0,0
380,2020-01-16 20:00:00,1394.5532,26.307031,0.019453,0.011784,21.419926,25.267969,0.017950,0.017555,10.111186,23.572656,0.017172,0.094604,5.211394,0,0,0
381,2020-01-16 21:00:00,1361.0084,26.213312,0.019370,0.015614,22.201817,25.025812,0.017951,0.017937,9.926674,23.369562,0.017188,0.092407,5.004218,0,0,0
382,2020-01-16 22:00:00,1311.0800,26.126337,0.019334,0.015160,22.534374,24.923212,0.018021,0.011055,10.030370,23.282587,0.017220,0.094116,5.005732,0,0,0


In [61]:
import numpy as np
import pandas as pd
import sklearn.metrics as sk_m
from typing import Dict, List, Optional, Union

def calculate_sklearn_metrics(df: pd.DataFrame,
                            target_column: str = 'target',
                           forecast_cols: List[str] = ['0.1', '0.5', '0.9'],
                           naive_forecast_col: Optional[str] = None,
                           metrics: List[str] = ['MASE', 'MAPE', 'MSE', 'MAE', 'SQL']) -> Dict[str, float]:
    for col in forecast_cols:
        if col not in df.columns:
            raise ValueError(f"Столбец с прогнозом '{col}' не найден в датафрейме")
    
    if 'MASE' in metrics and naive_forecast_col is None:
        df['naive_forecast'] = df['0.5'].shift(1)
        naive_forecast_col = 'naive_forecast'
    
    df = df.dropna(subset=['0.5'] + ([naive_forecast_col] if naive_forecast_col else []))
    
    if len(df) == 0:
        raise ValueError("После удаления NaN значений датафрейм пуст")
    
    results = {}
    
    y_true = df[target_column].values
    y_pred = df['0.5'].values
    
    if 'MSE' in metrics:
        results['MSE'] = sk_m.mean_squared_error(y_true, y_pred)
    
    if 'MAE' in metrics:
        results['MAE'] = sk_m.mean_absolute_error(y_true, y_pred)
    
    if 'MAPE' in metrics:
        mask = y_true != 0
        if not np.any(mask):
            results['MAPE'] = np.nan
        else:
            mape = np.mean(np.abs((y_true[mask] - y_pred[mask]) / y_true[mask])) * 100
            results['MAPE'] = mape
    
    if 'MASE' in metrics:
        if naive_forecast_col not in df.columns:
            results['MASE'] = np.nan
        else:
            naive_errors = np.abs(df['0.5'].values[1:] - df[naive_forecast_col].values[1:])
            denominator = np.mean(naive_errors)
            
            if denominator == 0:
                results['MASE'] = np.nan
            else:
                numerator = sk_m.mean_absolute_error(y_true, y_pred)
                results['MASE'] = numerator / denominator
    
    if 'SQL' in metrics:
        sql_losses = []
        for forecast_col in forecast_cols:
            y_pred = df[forecast_col].values
            
            try:
                quantile = float(forecast_col)
            except ValueError:
                continue
            
            errors = y_true - y_pred
            sql_loss = np.mean(np.maximum(quantile * errors, (quantile - 1) * errors))
            sql_losses.append(sql_loss)
        
        if sql_losses:
            results['SQL'] = sum(sql_losses) / len(sql_losses)
        else:
            results['SQL'] = np.nan
    
    return results

In [69]:
prefix = 'AllData'

for k, v in all_models_predictions.items():
    run_name = f"{k}_{prefix}"

    pred_df = pd.DataFrame(v).reset_index(drop=True)
    pred_df['target'] = test_df[:config.prediction_length * max_iterations]['target'].values
    _metrics = calculate_sklearn_metrics(pred_df)

    with mlflow.start_run(run_name=run_name):
        mlflow.log_metrics(_metrics)
        mlflow.log_param("model_name", model_name)

        mlflow.set_tag("prefix", prefix)

🏃 View run ChronosFineTuned[bolt_small]_AllData at: http://127.0.0.1:5000/#/experiments/185045746886025740/runs/4af835f2cf374245bb02cf5f41defa06
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/185045746886025740
🏃 View run ChronosZeroShot[bolt_small]_AllData at: http://127.0.0.1:5000/#/experiments/185045746886025740/runs/f72def3e767749159d564f28a3c92c22
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/185045746886025740
🏃 View run RecursiveTabular_AllData at: http://127.0.0.1:5000/#/experiments/185045746886025740/runs/0e58760991fc4b0b8d3b74b62e2448d0
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/185045746886025740
🏃 View run DirectTabular_AllData at: http://127.0.0.1:5000/#/experiments/185045746886025740/runs/e48faa94445f4b7588d7ac9ed629c72b
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/185045746886025740


In [62]:
pred_df = pd.DataFrame(all_models_predictions['ChronosFineTuned[bolt_small]'].reset_index(drop=True))
pred_df['target'] = test_df[:config.prediction_length * max_iterations]['target'].values

calculate_sklearn_metrics(pred_df)

{'MSE': 5496.380038097112,
 'MAE': 53.8071345690901,
 'MAPE': 4.216594596034557,
 'MASE': 1.2343038856109387,
 'SQL': 17.909953705210327}

In [60]:
pd.DataFrame(all_models_predictions['ChronosFineTuned[bolt_small]'].reset_index(drop=True)).shape

(720, 10)

In [58]:
y_true = test_df[:config.prediction_length * max_iterations]['target'].values
calculate_sklearn_metrics(pd.DataFrame(all_models_predictions['ChronosFineTuned[bolt_small]'].reset_index(drop=True)), y_true=y_true)

ValueError: Found input variables with inconsistent numbers of samples: [720, 719]