In [1]:
import os
import sys

import torch
import pandas as pd
import dotenv
import mlflow
from autogluon.timeseries import TimeSeriesPredictor, TimeSeriesDataFrame
import plotly.graph_objects as go
from huggingface_hub import login

sys.path.append("..")

from utils import calculate_metrics, TrainingConfig

dotenv.load_dotenv("../../.env")

token = os.environ["HF_TOKEN"]
login(token=token)

mlflow.set_tracking_uri("http://127.0.0.1:5000")
mlflow.set_experiment("Time_Series_Forecasting");

Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


In [2]:
data_dir = '../../data/panama-electricity-load-forecasting/processed/'

train_df = pd.read_parquet(os.path.join(data_dir, 'train.parquet'))
test_df = pd.read_parquet(os.path.join(data_dir, 'test.parquet'))

test_len = test_df['datetime'].max() - test_df['datetime'].min()
# val_df = train_df[train_df['datetime'] >= train_df['datetime'].max() - test_len]
# train_df = train_df[train_df['datetime'] < train_df['datetime'].max() - test_len]

In [3]:
train_df = train_df[['datetime', 'nat_demand']].rename(columns={'datetime': 'timestamp', 'nat_demand': 'target'}).copy()
train_df['item_id'] = 0

# val_df = val_df[['datetime', 'nat_demand']].rename(columns={'datetime': 'timestamp', 'nat_demand': 'target'}).copy()
# val_df['item_id'] = 0

test_df = test_df[['datetime', 'nat_demand']].rename(columns={'datetime': 'timestamp', 'nat_demand': 'target'}).copy()
test_df['item_id'] = 0

train_data = TimeSeriesDataFrame.from_data_frame(train_df)
# val_data = TimeSeriesDataFrame.from_data_frame(val_df)
test_data = TimeSeriesDataFrame.from_data_frame(test_df)

In [None]:
config = TrainingConfig(
    prediction_length=24 * 3, # 3 дня
    artifact_path="../../models/auto_ml_single_target",
)

predictor = TimeSeriesPredictor(
    prediction_length=config.prediction_length, path=config.artifact_path
).fit(train_data=train_data, verbosity=4, presets="high_quality", enable_ensemble=False)

Beginning AutoGluon training...
AutoGluon will save models to '/home/nikita/projects/time_series_analysis/models/auto_ml_single_target'
AutoGluon Version:  1.2
Python Version:     3.12.7
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #59~22.04.1-Ubuntu SMP PREEMPT_DYNAMIC Wed Mar 19 17:07:41 UTC 2
CPU Count:          12
GPU Count:          1
Memory Avail:       23.30 GB / 30.95 GB (75.3%)
Disk Space Avail:   161.62 GB / 233.67 GB (69.2%)
Setting presets to: high_quality

Fitting with arguments:
{'enable_ensemble': False,
 'eval_metric': WQL,
 'hyperparameters': 'default',
 'known_covariates_names': [],
 'num_val_windows': 1,
 'prediction_length': 72,
 'quantile_levels': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
 'random_seed': 123,
 'refit_every_n_windows': 1,
 'refit_full': False,
 'skip_model_selection': False,
 'target': 'target',
 'verbosity': 4}

Inferred time series frequency: 'h'
Provided train_data has 43775 rows, 1 time series. Median time series

[1000]	valid_set's l1: 0.159531
[2000]	valid_set's l1: 0.152268


	-0.152	 = Validation score   (-mean_absolute_error)
	3.08s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: WeightedEnsemble_L2 ...
	Ensemble Weights: {'LightGBM': 1.0}
	-0.152	 = Validation score   (-mean_absolute_error)
	0.0s	 = Training   runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 3.24s ... Best model: WeightedEnsemble_L2 | Estimated inference throughput: 37177.1 rows/s (72 batch size)
TabularPredictor saved. To load, use: predictor = TabularPredictor.load("/home/nikita/projects/time_series_analysis/models/auto_ml_single_target/models/RecursiveTabular/W0/tabular_predictor")
Shortening all series to at most 1000096
		-0.0344      = Validation score (-WQL)
		3.380   s    = Training runtime
		0.864   s    = Prediction runtime
	-0.0344       = Validation score (-WQL)
	3.39    s     = Training runtime
	0.86    s     = Validation (prediction) runtime
Training timeseries model DirectTabular. 
	Window 0
Shortening all series to at most 

In [139]:
leaderboard = predictor.leaderboard(
    test_data,
    extra_metrics=['MASE', 'MAPE', 'MSE', 'MAE', 'SQL']
)
leaderboard.rename(columns={'score_test': 'WQL_test', 'score_val': 'WQL_val'}, inplace=True)

Generating leaderboard for all models trained
Additional data provided, testing on additional data. Resulting leaderboard will be sorted according to test score (`score_test`).
Loaded cached predictions for models ['DirectTabular', 'DynamicOptimizedTheta', 'AutoETS', 'PatchTST', 'NPTS', 'ChronosZeroShot[bolt_base]', 'TemporalFusionTransformer', 'DeepAR', 'TiDE', 'RecursiveTabular', 'SeasonalNaive', 'ChronosFineTuned[bolt_small]']
Prediction order: ['DirectTabular', 'DynamicOptimizedTheta', 'AutoETS', 'PatchTST', 'NPTS', 'ChronosZeroShot[bolt_base]', 'TemporalFusionTransformer', 'DeepAR', 'TiDE', 'RecursiveTabular', 'SeasonalNaive', 'ChronosFineTuned[bolt_small]']
Extending existing cached predictions
Cached predictions saved to /home/nikita/projects/time_series_analysis/models/auto_ml_single_target/models/cached_predictions.pkl


In [None]:
from utils.mlflow_logging import log_leaderboard_to_mlflow

log_leaderboard_to_mlflow(leaderboard, 'SingleSeries')

🏃 View run ChronosFineTuned[bolt_small]_SingleSeries at: http://127.0.0.1:5000/#/experiments/185045746886025740/runs/0d275f8aff4e4811aaee3690310a67ca
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/185045746886025740
🏃 View run ChronosZeroShot[bolt_base]_SingleSeries at: http://127.0.0.1:5000/#/experiments/185045746886025740/runs/5cc888089c7b446b9237f5e9044b670d
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/185045746886025740
🏃 View run PatchTST_SingleSeries at: http://127.0.0.1:5000/#/experiments/185045746886025740/runs/7d77d2af91cc457199d9b022f0744c06
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/185045746886025740
🏃 View run NPTS_SingleSeries at: http://127.0.0.1:5000/#/experiments/185045746886025740/runs/63edaaec21ff479f8db0b8e1cac6cd72
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/185045746886025740
🏃 View run TiDE_SingleSeries at: http://127.0.0.1:5000/#/experiments/185045746886025740/runs/08517c75512345249b6dda819bf6a6a4
🧪 View exper

In [120]:
k = 8

top_k_models = leaderboard.sort_values(['SQL'], ascending=False).head(k)['model'].tolist()
all_models_predictions = {}

for model_name in top_k_models:
    model_predictions = predictor.predict(train_data, model=model_name)
    all_models_predictions[model_name] = model_predictions

Loaded cached predictions for models ['ChronosFineTuned[bolt_small]', 'ChronosZeroShot[bolt_base]', 'PatchTST', 'NPTS', 'TiDE', 'TemporalFusionTransformer', 'DeepAR', 'RecursiveTabular', 'AutoETS', 'DynamicOptimizedTheta', 'SeasonalNaive', 'DirectTabular']
Prediction order: {'ChronosFineTuned[bolt_small]'}
Extending existing cached predictions
Cached predictions saved to /home/nikita/projects/time_series_analysis/models/auto_ml_single_target/models/cached_predictions.pkl
Loaded cached predictions for models ['ChronosFineTuned[bolt_small]', 'ChronosZeroShot[bolt_base]', 'PatchTST', 'NPTS', 'TiDE', 'TemporalFusionTransformer', 'DeepAR', 'RecursiveTabular', 'AutoETS', 'DynamicOptimizedTheta', 'SeasonalNaive', 'DirectTabular']
Prediction order: {'ChronosZeroShot[bolt_base]'}
Extending existing cached predictions
Cached predictions saved to /home/nikita/projects/time_series_analysis/models/auto_ml_single_target/models/cached_predictions.pkl
Loaded cached predictions for models ['ChronosFine

In [121]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import numpy as np

train_size = 500
sample_series_id = list(train_df.keys())[0]

model_names = list(all_models_predictions.keys())
n_models = len(model_names)

rows = int(np.ceil(n_models / 2))
cols = 2

fig = make_subplots(rows=rows, cols=cols, subplot_titles=model_names, vertical_spacing=0.1)

for i, model_name in enumerate(model_names):
    row = i // 2 + 1
    col = i % 2 + 1
    
    model_pred = all_models_predictions[model_name]
    
    fig.add_trace(
        go.Scatter(
            x=train_df["timestamp"][-train_size:],
            y=train_df["target"][-train_size:],
            name="Train",
            line=dict(color="#6495ED"),
        ),
        row=row, col=col
    )
    
    fig.add_trace(
        go.Scatter(
            x=test_df["timestamp"],
            y=test_df["target"],
            name="Test",
            line=dict(color="#50C878"),
        ),
        row=row, col=col
    )
    
    mean_values = model_pred['mean']
        
    fig.add_trace(
        go.Scatter(
            x=test_df["timestamp"],
            y=mean_values,
            name=f"{model_name} (mean)",
            line=dict(color="#D70040", dash="dot"),
        ),
        row=row, col=col
    )
    
    upper_quantile = None
    lower_quantile = None
    
    upper_quantile = model_pred["0.9"]
    lower_quantile = model_pred["0.1"]
    
    if upper_quantile is not None and lower_quantile is not None:
        fig.add_trace(
            go.Scatter(
                x=test_df['timestamp'],
                y=upper_quantile,
                mode='lines',
                line=dict(width=0),
                showlegend=False
            ),
            row=row, col=col
        )
        
        fig.add_trace(
            go.Scatter(
                x=test_df['timestamp'],
                y=lower_quantile,
                mode='lines',
                fill='tonexty',
                fillcolor='rgba(255, 127, 14, 0.6)',
                line=dict(width=0),
                name=f"{model_name} CI (0.1-0.9)"
            ),
            row=row, col=col
        )

fig.update_layout(
    title="Forecasts for Different Models",
    template="plotly_white",
    height=300*rows,
    width=1400,
    showlegend=False
)

for i in range(1, rows*cols+1):
    fig.update_xaxes(title_text="Date", row=i//cols+1, col=i%cols if i%cols!=0 else cols)
    fig.update_yaxes(title_text="National Demand", row=i//cols+1, col=i%cols if i%cols!=0 else cols)

fig.show()

In [130]:
k = 12

top_k_models = leaderboard.sort_values(['SQL'], ascending=False).head(k)['model'].tolist()
window_size = config.prediction_length
test_length = len(test_df)
max_iterations = (test_length + window_size - 1) // window_size

current_data = train_data.copy()
all_models_predictions = {}

for i in range(max_iterations):
    for model_name in top_k_models:
        if model_name not in all_models_predictions:
            all_models_predictions[model_name] = []
            
        predictions = predictor.predict(current_data, model=model_name)
        all_models_predictions[model_name].append(predictions)
    
    start_idx = i * window_size
    end_idx = start_idx + window_size
    current_data = pd.concat([current_data, test_data[start_idx:end_idx]])

test_df_shape = test_df.shape[0]
all_models_predictions = {k: pd.concat(v)[:test_df_shape] for k, v in all_models_predictions.items()}

Loaded cached predictions for models ['ChronosFineTuned[bolt_small]', 'ChronosZeroShot[bolt_base]', 'PatchTST', 'NPTS', 'TiDE', 'TemporalFusionTransformer', 'DeepAR', 'RecursiveTabular', 'AutoETS', 'DynamicOptimizedTheta', 'SeasonalNaive', 'DirectTabular']
Prediction order: {'ChronosFineTuned[bolt_small]'}
Extending existing cached predictions
Cached predictions saved to /home/nikita/projects/time_series_analysis/models/auto_ml_single_target/models/cached_predictions.pkl
Loaded cached predictions for models ['ChronosFineTuned[bolt_small]', 'ChronosZeroShot[bolt_base]', 'PatchTST', 'NPTS', 'TiDE', 'TemporalFusionTransformer', 'DeepAR', 'RecursiveTabular', 'AutoETS', 'DynamicOptimizedTheta', 'SeasonalNaive', 'DirectTabular']
Prediction order: {'ChronosZeroShot[bolt_base]'}
Extending existing cached predictions
Cached predictions saved to /home/nikita/projects/time_series_analysis/models/auto_ml_single_target/models/cached_predictions.pkl
Loaded cached predictions for models ['ChronosFine

In [135]:
from utils.plotting import plot_forecasts

plot_forecasts(df=test_df,models_predictions=all_models_predictions, start_date = '2020-01-04', end_date = '2020-01-08')

In [None]:
import ipywidgets as widgets
from IPython.display import display, clear_output
import datetime

min_date = test_df["timestamp"].min().date()
max_date = test_df["timestamp"].max().date()

start_date_picker = widgets.DatePicker(
    description='Start date:',
    disabled=False,
    value=min_date
)

end_date_picker = widgets.DatePicker(
    description='End date:',
    disabled=False,
    value=max_date
)

output_area = widgets.Output()

def on_button_clicked(b):
    with output_area:
        clear_output(wait=True)
        start_date = datetime.datetime.combine(start_date_picker.value, datetime.datetime.min.time())
        end_date = datetime.datetime.combine(end_date_picker.value, datetime.datetime.min.time()) + datetime.timedelta(days=1) - datetime.timedelta(seconds=1)
        plot_forecasts(df=test_df,models_predictions=all_models_predictions, start_date=start_date, end_date=end_date)

plot_button = widgets.Button(description="Plot Forecasts")
plot_button.on_click(on_button_clicked)

controls = widgets.VBox([
    widgets.HBox([start_date_picker, end_date_picker]),
    plot_button
])

display(controls, output_area)

with output_area:
    plot_forecasts(df=test_df,models_predictions=all_models_predictions)

VBox(children=(HBox(children=(DatePicker(value=datetime.date(2020, 1, 1), description='Start date:'), DatePick…

Output()