More recent models, such as TSMixer, TFT and NHITS achieve better accuracy than LSTM in most settings.

In [8]:
import logging
import pandas as pd
from ray import tune
from neuralforecast import NeuralForecast
from neuralforecast.auto import (
    AutoNHITS, AutoNBEATS, AutoNBEATSx, AutoTFT, AutoVanillaTransformer,
    AutoInformer, AutoAutoformer, AutoFEDformer, AutoPatchTST, AutoLSTM, AutoGRU,
    AutoRNN, AutoDilatedRNN, AutoTCN, AutoMLP, AutoMLPMultivariate, AutoDLinear,
    AutoNLinear, AutoTimeXer, AutoTSMixer, AutoTSMixerx, AutoTimeMixer,
    AutoTimesNet, AutoiTransformer, AutoSOFTS, AutoStemGNN, AutoBiTCN,
    AutoDeepAR, AutoDeepNPTS, AutoHINT, AutoKAN
)
from neuralforecast.utils import AirPassengersDF
from utilsforecast.plotting import plot_series

# Suppress unnecessary logging
logging.getLogger('pytorch_lightning').setLevel(logging.ERROR) 

In [None]:
selected_sensors_df = pd.read_csv("../data/selected_sensors2_cleaned.csv", index_col=0)


In [None]:
scenarios_sensors = {
    # 0: 1, 4372603
    # "0_12M_train_7M_test": {"train_start": "2017-03-25", "train_end": "2018-03-25", "test_start": "2018-03-26", "test_end": "2018-10-10"},
    '2': {
        "26M_train":  {"train_start": "2017-04-01", "train_end": "2019-06-01"},
        "24M_train":  {"train_start": "2017-04-01", "train_end": "2019-04-01"},
        "22M_train":  {"train_start": "2017-04-01", "train_end": "2019-02-01"},
        "20M_train":  {"train_start": "2017-04-01", "train_end": "2018-12-01"},
        "18M_train":  {"train_start": "2017-04-01", "train_end": "2018-10-01"},
        "12M_train":  {"train_start": "2017-04-01", "train_end": "2018-04-01"},
        "10M_train":  {"train_start": "2017-04-01", "train_end": "2018-01-25"},
        "8M_train":   {"train_start": "2017-04-01", "train_end": "2017-10-25"},
        
        # Non-Heating Periods
        # "NH_3M_train":  {"train_start": "2017-04-15", "train_end": "2017-07-15"},
        # "NH_4M_train":  {"train_start": "2017-04-15", "train_end": "2017-08-15"},
        # "NH_2M_train":  {"train_start": "2017-04-15", "train_end": "2017-06-15"},
        # "NH_1M_train":  {"train_start": "2017-04-15", "train_end": "2017-05-15"},
        # "NH_15D_train": {"train_start": "2017-04-15", "train_end": "2017-04-30"},
        # "NH_feb_2M_train": {"train_start": "2017-02-15", "train_end": "2017-04-15"},
        # "NH_feb_1M_train": {"train_start": "2017-02-15", "train_end": "2017-04-15"},
        # "NH_mar_2M_train": {"train_start": "2017-03-15", "train_end": "2017-05-15"},
        # "NH_mar_1M_train": {"train_start": "2017-03-15", "train_end": "2017-04-15"},

        # # Heating Periods
        # "H_5M_train":     {"train_start": "2017-06-01", "train_end": "2017-11-01"},
        # "H_3M_jul_train": {"train_start": "2017-07-01", "train_end": "2017-10-10"},
        # "H_3M_sep_train": {"train_start": "2017-09-01", "train_end": "2017-12-10"},
        # "H_3M_nov_train": {"train_start": "2017-11-01", "train_end": "2018-02-10"},
        },
}
scenarios_sensors['5'] = scenarios_sensors['2'].copy()
scenarios_sensors['6'] = scenarios_sensors['2'].copy()

def split_data(df, scenario, date_col="ds"):
    """Extracts train and test data based on train end date."""
    train_data = df[df[date_col] <= scenario['train_end']]
    test_start = pd.to_datetime(scenario['train_end']) + pd.Timedelta(days=1)
    test_data = df[df[date_col] >= test_start]
    return train_data, test_data

from joblib import Parallel, delayed
import time
from MLForecastPipeline import *

auto_models = {
    "AutoNHITS": AutoNHITS, "AutoNBEATS": AutoNBEATS, "AutoNBEATSx": AutoNBEATSx,
    "AutoTFT": AutoTFT, "AutoVanillaTransformer": AutoVanillaTransformer,
    "AutoInformer": AutoInformer, "AutoAutoformer": AutoAutoformer, "AutoFEDformer": AutoFEDformer,
    "AutoPatchTST": AutoPatchTST, "AutoLSTM": AutoLSTM, "AutoGRU": AutoGRU,
    "AutoRNN": AutoRNN, "AutoDilatedRNN": AutoDilatedRNN, "AutoTCN": AutoTCN,
    "AutoMLP": AutoMLP, "AutoMLPMultivariate": AutoMLPMultivariate, "AutoDLinear": AutoDLinear,
    "AutoNLinear": AutoNLinear, "AutoTimeXer": AutoTimeXer, "AutoTSMixer": AutoTSMixer,
    "AutoTSMixerx": AutoTSMixerx, "AutoTimeMixer": AutoTimeMixer,
    "AutoTimesNet": AutoTimesNet, "AutoiTransformer": AutoiTransformer, "AutoSOFTS": AutoSOFTS,
    "AutoStemGNN": AutoStemGNN, "AutoBiTCN": AutoBiTCN, "AutoDeepAR": AutoDeepAR,
    "AutoDeepNPTS": AutoDeepNPTS, "AutoKAN": AutoKAN,
    "AutoHINT": AutoHINT
}

# Function to determine horizon for a single scenario
def determine_scenario_horizon(scenario):
    """Determine the forecast horizon based on the test period length of a specific scenario."""
    train_end = pd.to_datetime(scenario['train_end'])
    test_start = train_end + pd.Timedelta(days=1)
    test_length = (pd.to_datetime(scenario.get('test_end', test_start)) - test_start).days
    return test_length

# Function to create models dynamically for each scenario
def get_auto_model(model_cls, horizon):
    """Dynamically initialize models with scenario-specific horizons."""
    config = model_cls.get_default_config(h=horizon, backend="ray")
    config["random_seed"] = tune.randint(1, 10)
    return model_cls(h=horizon, config=config, backend='ray', num_samples=3)

# Function to process each scenario with its specific horizon
def process_scenario(sensor_name, scenario_name, scenario, selected_sensors_df, models):
    """Process each scenario independently with a dynamically set horizon."""
    print(f'Processing: {sensor_name}_{scenario_name}')

    formatted_df = format_df_to_mlforecast(selected_sensors_df[['full_date', sensor_name]], 'full_date', sensor_name, unique_id=sensor_name)
    formatted_df = formatted_df[['ds', 'y', 'unique_id']]
    
    train_df, test_df = split_data(formatted_df, scenario)
    
    scenario_horizon = determine_scenario_horizon(scenario)

    scenario_models = {name: get_auto_model(model_cls, scenario_horizon) for name, model_cls in auto_models.items()}

    results = evaluate_models_nfcst(train_df, test_df, scenario_models)
    save_results(results, f"results/run_10/{sensor_name}_{scenario_name}.csv")

    return results

def run_all_scenarios_parallel(scenarios_sensors, selected_sensors_df):
    """
    Run all scenarios in parallel, ensuring each scenario gets its own horizon-based models.
    """
    results = Parallel(n_jobs=12)( 
        delayed(process_scenario)(
            sensor_name, scenario_name, scenario, selected_sensors_df
        )
        for sensor_name, scenarios in scenarios_sensors.items()
        for scenario_name, scenario in scenarios.items()
    )

    return results



In [None]:
run_all_scenarios_parallel()