In [1]:
import pandas as pd
import time
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go

In [2]:
from numba import njit
from window_ops.expanding import expanding_mean
from window_ops.rolling import rolling_mean

@njit
def rolling_mean_14(x):
    return rolling_mean(x, window_size=14)
@njit
def rolling_mean_30(x):
    return rolling_mean(x, window_size=30)

In [3]:
def format_df_to_mlforecast(df, date_col, target_col, unique_id='mean'):
    df_ = df.rename({
        date_col: "ds",
        # target_col: 'y',
    }, axis=1)

    df_['ds'] = pd.to_datetime(df_['ds'])

    df_['y'] = df_[target_col].copy()
    # df_.drop(columns=target_col)

    df_['unique_id'] = unique_id
    return df_

In [4]:
selected_sensors_df = pd.read_csv("../data/selected_sensors2_cleaned.csv", index_col=0)

In [5]:
TEST_START_DATE = "2019-04-02"
scenarios_sensors = {
    # 0: 1, 4372603
    # "0_12M_train_7M_test": {"train_start": "2017-03-25", "train_end": "2018-03-25", "test_start": "2018-03-26", "test_end": "2018-10-10"},
    '2': {
        # "18M_train":  {"train_start": "2017-04-01", "train_end": "2018-10-01"},
        # "12M_train":  {"train_start": "2017-04-01", "train_end": "2018-04-01", "val_start": "2017-04-01", "val_end": "2018-04-01"},
        # "12M_train_3M_val":  {"train_start": "2017-04-01", "train_end": "2018-04-01", "val_start": "2018-04-01", "val_end": "2018-07-01"},
        "12M_train_6M_val":  {"train_start": "2017-04-01", "train_end": "2018-04-01", "val_start": "2018-04-01", "val_end": "2018-10-01"},
        "12M_train_9M_val":  {"train_start": "2017-04-01", "train_end": "2018-04-01", "val_start": "2018-04-01", "val_end": "2019-01-01"},
        "12M_train_12M_val":  {"train_start": "2017-04-01", "train_end": "2018-04-01", "val_start": "2018-04-01", "val_end": "2019-04-01"},
        },
}
scenarios_sensors['5'] = scenarios_sensors['2'].copy()
scenarios_sensors['6'] = scenarios_sensors['2'].copy()

In [6]:
from MLForecastPipeline import *

In [7]:
def full_split_data(df, scenario, test_start_date=TEST_START_DATE, date_col="ds"):
    train_data = df[df[date_col] <= scenario['train_end']]
    val_data = df[(df[date_col] > scenario['val_start']) & (df[date_col] <= scenario['val_end'])]
    test_data = df[df[date_col] >= test_start_date]
    return train_data, val_data, test_data

def split_data(df, scenario, date_col="ds"):
    """Extracts train and test data based on train end date."""
    train_data = df[df[date_col] <= scenario['train_end']]
    test_start = pd.to_datetime(scenario['train_end']) + pd.Timedelta(days=1)
    test_data = df[df[date_col] >= test_start]
    return train_data, test_data

models = {
    "SGD_Optuna": SGDRegressor( penalty='elasticnet', l1_ratio=0.5, alpha=0.001, random_state=42 ),
    # "SGDRegressor": SGDRegressor(random_state=42),
    # "SGD_ElasticNet": SGDRegressor( penalty='elasticnet', l1_ratio=0.5, alpha=0.001, random_state=42 ),
}

# Define lag transformations
from mlforecast.lag_transforms import *
lag_transforms_options = [
    # {},
    {1: [rolling_mean_14], 7: [rolling_mean_30], 30: [expanding_mean]},
    {1: [expanding_mean], 7: [rolling_mean_14], 30: [expanding_mean]},
    # {7: [RollingMean(window_size=7)], 30: [RollingMean(window_size=30)], 60: [RollingMean(window_size=60)], },
    {7: [RollingMean(7), RollingStd(7)], 30: [RollingMean(30)], 60: [ExpandingMean()], 14: [ExponentiallyWeightedMean(alpha=0.3)],},
    {7: [RollingMean(7), RollingStd(7), ExpandingStd()], 14: [RollingMean(14), ExpandingStd(), ExponentiallyWeightedMean(alpha=0.3)], 30: [RollingMean(30)], 60: [ExpandingMean()],},
]

In [8]:
# Reshaping to MLForecast format
def format_multi_df_to_mlforecast(df):
    df_melted = df.melt(id_vars=['full_date'], var_name='unique_id', value_name='y')
    return df_melted.rename(columns={'full_date': 'ds'})

In [9]:
def optuna_objective(trial, train_df, test_df, transforms, lags, lag_transforms):
    alpha = trial.suggest_float('alpha', 1e-6, 1, log=True)
    l1_ratio = trial.suggest_float('l1_ratio', 0.0, 1.0)
    max_iter = trial.suggest_int('max_iter', 300, 1000, step=100)  # Optimizing max_iter (number of iterations)
    eta0 = trial.suggest_float('eta0', 1e-6, 1, log=True)
    tol = trial.suggest_loguniform('tol', 1e-6, 1e-3)

    model = SGDRegressor(alpha=alpha, l1_ratio=l1_ratio, max_iter=max_iter, eta0=eta0, tol=tol, penalty='elasticnet', random_state=42)

    try:
        fcst = MLForecast(
            models=[model],
            freq='D',
            lags=lags,
            target_transforms=transforms,
            lag_transforms=lag_transforms,
            num_threads=1,
        )
        fcst.fit(train_df)
        predictions = fcst.predict(h=len(test_df))
        mape = mape_met(test_df['y'].values, predictions['SGDRegressor'].values)
        return mape
    except Exception as e:
        print(e)
        return float('inf')
    
import optuna

def run_optuna_search(train_df, test_df, transforms, lags, lag_transforms, n_trials=30):
    study = optuna.create_study(direction='minimize')
    study.optimize(lambda trial: optuna_objective(trial, train_df, test_df, transforms, lags, lag_transforms), n_trials=n_trials)
    return study.best_params


In [10]:
from joblib import Parallel, delayed
import time

def process_scenario(sensor_name, scenario_name, scenario, selected_sensors_df, models, lag_transforms_options, ratios=[0.33, 0.66, 1]):
    """ Process each scenario independently and save results. """
    print(f'{sensor_name}_{scenario_name}')
    formatted_df = format_df_to_mlforecast(selected_sensors_df[['full_date', sensor_name]], 'full_date', sensor_name, unique_id=sensor_name)
    formatted_df = formatted_df[['ds', 'y', 'unique_id']]
    
    _, val_test_df = split_data(formatted_df, scenario) # everything after train
    train_df, val_df, test_df = full_split_data(formatted_df, scenario) # here different validation set but same test 
    optimal_lags_list = get_optimal_lags(train_df, 'y', ratios=ratios, low_resources=True)
    target_transforms = get_dynamic_transforms(train_df, remove_boxcox=True)

    print(len(target_transforms))
    print(len(optimal_lags_list))
    print(len(models))
    print(len(lag_transforms_options))

    results = evaluate_models_sgd_tune(train_df, val_df, val_test_df, models, target_transforms, lag_transforms_options, optimal_lags_list, n_jobs=-1, n_trials=30)

    # Save results
    save_results(results, f"results/run_18/{sensor_name}_{scenario_name}.csv")

    return results

def run_all_scenarios_parallel(scenarios_sensors, selected_sensors_df, models, lag_transforms_options, ratios=[0.33, 0.66, 1]):
    # don't use all cpus (instead all but one)
    results = Parallel(n_jobs=15)( 
        delayed(process_scenario)(sensor_name, scenario_name, scenario, selected_sensors_df, models, lag_transforms_options, ratios=ratios)
        for sensor_name, scenarios in scenarios_sensors.items()
        for scenario_name, scenario in scenarios.items()
    )

    return results

In [11]:
from joblib import Parallel, delayed

def sgd_optuna_objective(trial, train_df, test_df, transforms, lags, lag_transforms):
    alpha = trial.suggest_float('alpha', 1e-6, 1, log=True)
    l1_ratio = trial.suggest_float('l1_ratio', 0.0, 1.0)
    max_iter = trial.suggest_int('max_iter', 300, 1000, step=100)  # Optimizing max_iter (number of iterations)
    eta0 = trial.suggest_float('eta0', 1e-6, 1, log=True)
    tol = trial.suggest_float('tol', 1e-6, 1e-3, log=True)

    model = SGDRegressor(alpha=alpha, l1_ratio=l1_ratio, max_iter=max_iter, eta0=eta0, tol=tol, penalty='elasticnet', random_state=42)

    try:
        fcst = MLForecast(
            models=[model],
            freq='D',
            lags=lags,
            target_transforms=transforms,
            lag_transforms=lag_transforms,
            num_threads=1,
        )
        fcst.fit(train_df)
        predictions = fcst.predict(h=len(test_df))
        mape = mape_met(test_df['y'].values, predictions['SGDRegressor'].values)
        return mape
    except Exception as e:
        print(e)
        return float('inf')
    
import optuna
def run_optuna_search(train_df, test_df, transforms, lags, lag_transforms, n_trials=30, n_jobs=-1):
    study = optuna.create_study(direction='minimize')
    study.optimize(lambda trial: sgd_optuna_objective(trial, train_df, test_df, transforms, lags, lag_transforms), n_trials=n_trials, n_jobs=n_jobs)
    return study.best_params

def evaluate_model_for_configuration(lag_name, optimal_lags, transform_combination, lag_transforms, model_name, model, train_df, val_df, val_test_df, date_features, max_test_length, transforms, n_trials=42):
    best_params = run_optuna_search(train_df, val_df, list(transform_combination), optimal_lags, lag_transforms, n_trials=n_trials)
    optuna_model = SGDRegressor(**best_params, random_state=42)
    test_lengths = list(range(30, 181, 30)) + [240, 300, 360, 480, 600, 720, max_test_length]

    try:
        fcst = MLForecast(
            models=[optuna_model],
            freq='D',
            lags=optimal_lags,
            target_transforms=list(transform_combination),
            date_features=date_features,
            num_threads=1,  # Multi-threading during model fitting
            lag_transforms=lag_transforms,
        )

        fcst.fit(train_df)
        predictions = fcst.predict(h=max_test_length)
        
        test_df_copy = val_test_df.copy()
        test_df_copy['forecast'] = predictions['SGDRegressor'].values

        error_dict = {}
        for test_length in test_lengths:  # Define test segment lengths
            eval_subset = test_df_copy.iloc[:test_length]  # Take subset for evaluation
            error_dict[f"test_{test_length}_days"] = mape_met(eval_subset['y'].values, eval_subset['forecast'].values)

        return {
            "Model": model_name,
            "Transforms": stringify_transform(list(transform_combination)),
            "Lags": optimal_lags,
            "Lag Name": lag_name,
            "Lag Transforms": str(lag_transforms),
            **error_dict,
            "preds": test_df_copy['forecast'].values,
            "params": best_params 
        }
    except Exception as e:
        print(f"Error evaluating model {model_name}: {e}")
        return None

from itertools import combinations, chain
def evaluate_models_sgd_tune_parallel(train_df, val_df, val_test_df, models, target_transforms, lag_transforms_options, optimal_lags_list, date_features=['dayofweek', 'month'], n_trials=42, n_jobs=-1):
    results = []
    print(target_transforms)
    valid_transform_combinations = [()] + list(chain(combinations(target_transforms, 1), combinations(target_transforms, 2)))
    valid_transform_combinations = [tc for tc in valid_transform_combinations if filter_conflicting_transforms(tc)]

    # Generate all combinations of the parameters to evaluate
    configs = [
        (lag_name, optimal_lags, transform_combination, lag_transforms, model_name, model)
        for lag_name, optimal_lags in optimal_lags_list.items()
        for transform_combination in valid_transform_combinations
        for lag_transforms in lag_transforms_options
        for model_name, model in models.items()
    ]

    print(len(configs))

    # Run evaluations in parallel using Joblib
    results = Parallel(n_jobs=n_jobs, verbose=30)(
        delayed(evaluate_model_for_configuration)(
            lag_name, optimal_lags, transform_combination, lag_transforms, model_name, model, train_df, val_df, val_test_df, date_features, len(val_test_df), target_transforms, n_trials)
        for lag_name, optimal_lags, transform_combination, lag_transforms, model_name, model in configs
    )

    results = [res for res in results if res is not None]
    
    return pd.DataFrame(results)


In [13]:
for sensor_name, scenarios in scenarios_sensors.items():
    for scenario_name, scenario in scenarios.items():
        formatted_df = format_df_to_mlforecast(selected_sensors_df[['full_date', sensor_name]], 'full_date', sensor_name, unique_id=sensor_name)
        formatted_df = formatted_df[['ds', 'y', 'unique_id']]
        
        _, val_test_df = split_data(formatted_df, scenario) # everything after train
        train_df, val_df, test_df = full_split_data(formatted_df, scenario) # here different validation set but same test 
        optimal_lags_list = get_optimal_lags(train_df, 'y', low_resources=False)
        target_transforms = get_dynamic_transforms(train_df, remove_boxcox=True)

        results = evaluate_models_sgd_tune_parallel(train_df, val_df, val_test_df, models, target_transforms, lag_transforms_options, optimal_lags_list, n_trials=30)

        save_results(results, f"results/run_18/{sensor_name}_{scenario_name}.csv")

[<mlforecast.target_transforms.AutoDifferences object at 0x000001884815F020>, <mlforecast.target_transforms.AutoSeasonalDifferences object at 0x000001884813E450>, <mlforecast.target_transforms.AutoSeasonalityAndDifferences object at 0x000001884813FAD0>, <mlforecast.target_transforms.LocalStandardScaler object at 0x000001884813D550>, <mlforecast.target_transforms.LocalMinMaxScaler object at 0x000001884813D3D0>]
432


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:   19.9s
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:   21.4s
[Parallel(n_jobs=-1)]: Done   3 tasks      | elapsed:   23.7s
[Parallel(n_jobs=-1)]: Done   4 tasks      | elapsed:   24.4s
[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:   25.2s
[Parallel(n_jobs=-1)]: Done   6 tasks      | elapsed:   25.4s
[Parallel(n_jobs=-1)]: Done   7 tasks      | elapsed:   27.6s
[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:   29.6s
[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:   30.1s
[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed:   32.7s
[Parallel(n_jobs=-1)]: Done  11 tasks      | elapsed:   33.5s
[Parallel(n_jobs=-1)]: Done  12 tasks      | elapsed:   41.9s
[Parallel(n_jobs=-1)]: Done  13 tasks      | elapsed:   50.8s
[Parallel(n_jobs=-1)]: Done  14 tasks      | elapsed:   52.4s
[Parallel(n_jobs=-1)]: Done  15 tasks      | elapsed:  

Results saved to results/run_18/2_12M_train_6M_val.csv
[<mlforecast.target_transforms.AutoDifferences object at 0x0000018845A2D4C0>, <mlforecast.target_transforms.AutoSeasonalDifferences object at 0x00000188499C5CA0>, <mlforecast.target_transforms.AutoSeasonalityAndDifferences object at 0x00000188499C58B0>, <mlforecast.target_transforms.LocalStandardScaler object at 0x00000188499C54C0>, <mlforecast.target_transforms.LocalMinMaxScaler object at 0x00000188499C66C0>]
432


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:   18.6s
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:   19.4s
[Parallel(n_jobs=-1)]: Done   3 tasks      | elapsed:   21.1s
[Parallel(n_jobs=-1)]: Done   4 tasks      | elapsed:   21.9s
[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:   22.2s
[Parallel(n_jobs=-1)]: Done   6 tasks      | elapsed:   26.2s
[Parallel(n_jobs=-1)]: Done   7 tasks      | elapsed:   28.6s
[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:   32.9s
[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:   32.9s
[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed:   33.2s
[Parallel(n_jobs=-1)]: Done  11 tasks      | elapsed:   38.1s
[Parallel(n_jobs=-1)]: Done  12 tasks      | elapsed:   39.9s
[Parallel(n_jobs=-1)]: Done  13 tasks      | elapsed:   56.8s
[Parallel(n_jobs=-1)]: Done  14 tasks      | elapsed:   59.3s
[Parallel(n_jobs=-1)]: Done  15 tasks      | elapsed:  

Results saved to results/run_18/2_12M_train_9M_val.csv
[<mlforecast.target_transforms.AutoDifferences object at 0x0000018848062B40>, <mlforecast.target_transforms.AutoSeasonalDifferences object at 0x0000018848134B90>, <mlforecast.target_transforms.AutoSeasonalityAndDifferences object at 0x0000018848136E10>, <mlforecast.target_transforms.LocalStandardScaler object at 0x0000018848135C10>, <mlforecast.target_transforms.LocalMinMaxScaler object at 0x0000018848137170>]
432


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:   24.8s
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:   26.6s
[Parallel(n_jobs=-1)]: Done   3 tasks      | elapsed:   27.6s
[Parallel(n_jobs=-1)]: Done   4 tasks      | elapsed:   28.4s
[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:   28.9s
[Parallel(n_jobs=-1)]: Done   6 tasks      | elapsed:   31.9s
[Parallel(n_jobs=-1)]: Done   7 tasks      | elapsed:   32.5s
[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:   32.7s
[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:   34.7s
[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed:   36.8s
[Parallel(n_jobs=-1)]: Done  11 tasks      | elapsed:   43.3s
[Parallel(n_jobs=-1)]: Done  12 tasks      | elapsed:   49.1s
[Parallel(n_jobs=-1)]: Done  13 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done  14 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done  15 tasks      | elapsed:  

Results saved to results/run_18/2_12M_train_12M_val.csv
[<mlforecast.target_transforms.AutoDifferences object at 0x0000018847FE2BA0>, <mlforecast.target_transforms.AutoSeasonalDifferences object at 0x000001884997E840>, <mlforecast.target_transforms.AutoSeasonalityAndDifferences object at 0x000001884997C6B0>, <mlforecast.target_transforms.LocalStandardScaler object at 0x000001884997FCB0>, <mlforecast.target_transforms.LocalMinMaxScaler object at 0x000001884997D040>]
432


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:   13.9s
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:   14.0s
[Parallel(n_jobs=-1)]: Done   3 tasks      | elapsed:   15.0s
[Parallel(n_jobs=-1)]: Done   4 tasks      | elapsed:   15.6s
[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:   16.3s
[Parallel(n_jobs=-1)]: Done   6 tasks      | elapsed:   17.5s
[Parallel(n_jobs=-1)]: Done   7 tasks      | elapsed:   17.7s
[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:   24.1s
[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:   25.0s
[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed:   25.4s
[Parallel(n_jobs=-1)]: Done  11 tasks      | elapsed:   26.8s
[Parallel(n_jobs=-1)]: Done  12 tasks      | elapsed:   28.1s
[Parallel(n_jobs=-1)]: Done  13 tasks      | elapsed:   40.3s
[Parallel(n_jobs=-1)]: Done  14 tasks      | elapsed:   40.7s
[Parallel(n_jobs=-1)]: Done  15 tasks      | elapsed:  

Results saved to results/run_18/5_12M_train_6M_val.csv
[<mlforecast.target_transforms.AutoDifferences object at 0x0000018848062810>, <mlforecast.target_transforms.AutoSeasonalDifferences object at 0x0000018849AF3350>, <mlforecast.target_transforms.AutoSeasonalityAndDifferences object at 0x0000018849AF1A60>, <mlforecast.target_transforms.LocalStandardScaler object at 0x0000018849AF3410>, <mlforecast.target_transforms.LocalMinMaxScaler object at 0x0000018849AF1880>]
432


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:   17.9s
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:   19.9s
[Parallel(n_jobs=-1)]: Done   3 tasks      | elapsed:   20.7s
[Parallel(n_jobs=-1)]: Done   4 tasks      | elapsed:   21.4s
[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:   23.3s
[Parallel(n_jobs=-1)]: Done   6 tasks      | elapsed:   24.6s
[Parallel(n_jobs=-1)]: Done   7 tasks      | elapsed:   27.1s
[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:   29.9s
[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:   30.2s
[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed:   32.8s
[Parallel(n_jobs=-1)]: Done  11 tasks      | elapsed:   33.2s
[Parallel(n_jobs=-1)]: Done  12 tasks      | elapsed:   35.0s
[Parallel(n_jobs=-1)]: Done  13 tasks      | elapsed:   55.6s
[Parallel(n_jobs=-1)]: Done  14 tasks      | elapsed:   59.9s
[Parallel(n_jobs=-1)]: Done  15 tasks      | elapsed:  

Results saved to results/run_18/5_12M_train_9M_val.csv
[<mlforecast.target_transforms.AutoDifferences object at 0x00000188455D0320>, <mlforecast.target_transforms.AutoSeasonalDifferences object at 0x0000018849B933E0>, <mlforecast.target_transforms.AutoSeasonalityAndDifferences object at 0x0000018849B90440>, <mlforecast.target_transforms.LocalStandardScaler object at 0x0000018849B92660>, <mlforecast.target_transforms.LocalMinMaxScaler object at 0x0000018849B90E00>]
432


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:   20.9s
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:   22.4s
[Parallel(n_jobs=-1)]: Done   3 tasks      | elapsed:   23.2s
[Parallel(n_jobs=-1)]: Done   4 tasks      | elapsed:   25.3s
[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:   28.2s
[Parallel(n_jobs=-1)]: Done   6 tasks      | elapsed:   30.3s
[Parallel(n_jobs=-1)]: Done   7 tasks      | elapsed:   30.7s
[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:   31.1s
[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:   35.4s
[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed:   42.5s
[Parallel(n_jobs=-1)]: Done  11 tasks      | elapsed:   43.0s
[Parallel(n_jobs=-1)]: Done  12 tasks      | elapsed:   45.0s
[Parallel(n_jobs=-1)]: Done  13 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done  14 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done  15 tasks      | elapsed:  

Results saved to results/run_18/5_12M_train_12M_val.csv
[<mlforecast.target_transforms.AutoDifferences object at 0x0000018847EAC3B0>, <mlforecast.target_transforms.AutoSeasonalDifferences object at 0x00000188480185F0>, <mlforecast.target_transforms.AutoSeasonalityAndDifferences object at 0x0000018848018CB0>, <mlforecast.target_transforms.LocalStandardScaler object at 0x000001884819DAC0>, <mlforecast.target_transforms.LocalMinMaxScaler object at 0x000001884819FBC0>]
432


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:   16.6s
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:   16.8s
[Parallel(n_jobs=-1)]: Done   3 tasks      | elapsed:   16.8s
[Parallel(n_jobs=-1)]: Done   4 tasks      | elapsed:   17.9s
[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:   19.2s
[Parallel(n_jobs=-1)]: Done   6 tasks      | elapsed:   20.4s
[Parallel(n_jobs=-1)]: Done   7 tasks      | elapsed:   24.6s
[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:   24.9s
[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:   26.4s
[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed:   27.1s
[Parallel(n_jobs=-1)]: Done  11 tasks      | elapsed:   29.4s
[Parallel(n_jobs=-1)]: Done  12 tasks      | elapsed:   32.2s
[Parallel(n_jobs=-1)]: Done  13 tasks      | elapsed:   41.3s
[Parallel(n_jobs=-1)]: Done  14 tasks      | elapsed:   44.4s
[Parallel(n_jobs=-1)]: Done  15 tasks      | elapsed:  

Results saved to results/run_18/6_12M_train_6M_val.csv
[<mlforecast.target_transforms.AutoDifferences object at 0x0000018847BB7DA0>, <mlforecast.target_transforms.AutoSeasonalDifferences object at 0x0000018849AF3200>, <mlforecast.target_transforms.AutoSeasonalityAndDifferences object at 0x0000018849AF34D0>, <mlforecast.target_transforms.LocalStandardScaler object at 0x0000018849AF0290>, <mlforecast.target_transforms.LocalMinMaxScaler object at 0x0000018849AF3860>]
432


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:   20.8s
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:   21.0s
[Parallel(n_jobs=-1)]: Done   3 tasks      | elapsed:   21.4s
[Parallel(n_jobs=-1)]: Done   4 tasks      | elapsed:   24.3s
[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:   24.8s
[Parallel(n_jobs=-1)]: Done   6 tasks      | elapsed:   29.2s
[Parallel(n_jobs=-1)]: Done   7 tasks      | elapsed:   29.2s
[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:   30.1s
[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:   33.9s
[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed:   36.3s
[Parallel(n_jobs=-1)]: Done  11 tasks      | elapsed:   36.7s
[Parallel(n_jobs=-1)]: Done  12 tasks      | elapsed:   39.2s
[Parallel(n_jobs=-1)]: Done  13 tasks      | elapsed:   57.9s
[Parallel(n_jobs=-1)]: Done  14 tasks      | elapsed:   58.3s
[Parallel(n_jobs=-1)]: Done  15 tasks      | elapsed:  

Results saved to results/run_18/6_12M_train_9M_val.csv
[<mlforecast.target_transforms.AutoDifferences object at 0x0000018846804440>, <mlforecast.target_transforms.AutoSeasonalDifferences object at 0x000001884819DA00>, <mlforecast.target_transforms.AutoSeasonalityAndDifferences object at 0x000001884819FA70>, <mlforecast.target_transforms.LocalStandardScaler object at 0x000001884819C560>, <mlforecast.target_transforms.LocalMinMaxScaler object at 0x000001884819E8D0>]
432


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:   24.3s
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:   25.4s
[Parallel(n_jobs=-1)]: Done   3 tasks      | elapsed:   26.2s
[Parallel(n_jobs=-1)]: Done   4 tasks      | elapsed:   27.3s
[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:   29.4s
[Parallel(n_jobs=-1)]: Done   6 tasks      | elapsed:   31.7s
[Parallel(n_jobs=-1)]: Done   7 tasks      | elapsed:   32.2s
[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:   34.1s
[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:   42.0s
[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed:   43.3s
[Parallel(n_jobs=-1)]: Done  11 tasks      | elapsed:   48.0s
[Parallel(n_jobs=-1)]: Done  12 tasks      | elapsed:   52.7s
[Parallel(n_jobs=-1)]: Done  13 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done  14 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done  15 tasks      | elapsed:  

Results saved to results/run_18/6_12M_train_12M_val.csv


In [None]:
# run_all_scenarios_parallel(scenarios_sensors, selected_sensors_df, models, lag_transforms_options)

In [None]:
# for sensor_name, scenarios in scenarios_sensors.items():
#     for scenario_name, scenario in scenarios.items():
#         process_scenario(sensor_name, scenario_name, scenario, selected_sensors_df, models, lag_transforms_options)