In [None]:
import pandas as pd

import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go

In [None]:
from numba import njit
from window_ops.expanding import expanding_mean
from window_ops.rolling import rolling_mean

@njit
def rolling_mean_14(x):
    return rolling_mean(x, window_size=14)
@njit
def rolling_mean_30(x):
    return rolling_mean(x, window_size=30)

In [None]:
def format_df_to_mlforecast(df, date_col, target_col, unique_id='mean'):
    df_ = df.rename({
        date_col: "ds",
        # target_col: 'y',
    }, axis=1)

    df_['ds'] = pd.to_datetime(df_['ds'])

    df_['y'] = df_[target_col].copy()
    # df_.drop(columns=target_col)

    df_['unique_id'] = unique_id
    return df_

In [None]:
selected_sensors_df = pd.read_csv("../data/selected_sensors2_cleaned.csv", index_col=0)

In [None]:
scenarios_sensors = {
    # 0: 1, 4372603
    # "0_12M_train_7M_test": {"train_start": "2017-03-25", "train_end": "2018-03-25", "test_start": "2018-03-26", "test_end": "2018-10-10"},
    '2': {
        "0_10M_train_9M_test":  {"train_start": "2017-04-01", "train_end": "2018-01-25", "test_start": "2018-01-26", "test_end": "2018-10-10"},
        "0_8M_train_11M_test":  {"train_start": "2017-04-01", "train_end": "2017-10-25", "test_start": "2017-10-26", "test_end": "2018-10-10"},
        
        # Non-Heating Periods
        "0_NonHeating_3M_train_3M_test":  {"train_start": "2017-04-15", "train_end": "2017-07-15", "test_start": "2017-07-16", "test_end": "2017-10-01"},
        "0_NonHeating_4M_train_2M_test":  {"train_start": "2017-04-15", "train_end": "2017-08-15", "test_start": "2017-08-16", "test_end": "2017-10-01"},
        "0_NonHeating_2M_train_4M_test":  {"train_start": "2017-04-15", "train_end": "2017-06-15", "test_start": "2017-06-16", "test_end": "2017-10-01"},
        "0_NonHeating_1M_train_5M_test":  {"train_start": "2017-04-15", "train_end": "2017-05-15", "test_start": "2017-05-16", "test_end": "2017-10-01"},
        "0_NonHeating_15D_train_5M_test": {"train_start": "2017-04-15", "train_end": "2017-04-30", "test_start": "2017-05-01", "test_end": "2017-10-01"},
        "0_NonHeating_feb_2M_train_4M_test": {"train_start": "2017-02-15", "train_end": "2017-04-15", "test_start": "2017-04-16", "test_end": "2017-08-16"},
        "0_NonHeating_feb_1M_train_4M_test": {"train_start": "2017-02-15", "train_end": "2017-04-15", "test_start": "2017-04-16", "test_end": "2017-08-16"},
        "0_NonHeating_mar_2M_train_4M_test": {"train_start": "2017-03-15", "train_end": "2017-05-15", "test_start": "2017-05-16", "test_end": "2017-09-16"},
        "0_NonHeating_mar_1M_train_4M_test": {"train_start": "2017-03-15", "train_end": "2017-04-15", "test_start": "2017-05-16", "test_end": "2017-09-16"},

        # Heating Periods
        "0_Heating_5M_train_1Y_test":     {"train_start": "2017-06-01", "train_end": "2017-11-01", "test_start": "2017-11-02", "test_end": "2018-10-10"},
        "0_Heating_3M_jul_train_1Y_test": {"train_start": "2017-07-01", "train_end": "2017-10-10", "test_start": "2017-10-11", "test_end": "2018-10-10"},
        "0_Heating_3M_sep_train_1Y_test": {"train_start": "2017-09-01", "train_end": "2017-12-10", "test_start": "2017-12-11", "test_end": "2018-12-10"},
        "0_Heating_3M_nov_train_1Y_test": {"train_start": "2017-11-01", "train_end": "2018-02-10", "test_start": "2018-02-11", "test_end": "2018-12-10"},
        },
}
scenarios_sensors['5'] = scenarios_sensors['2'].copy()
scenarios_sensors['6'] = scenarios_sensors['2'].copy()

In [None]:
from MLForecastPipeline import *

In [None]:
def get_seasonal_data(df, start_date, end_date, date_col="ds"):
    """Filters data for a specific seasonal period."""
    return df[(df[date_col] >= start_date) & (df[date_col] <= end_date)]

def split_data(df, scenario, date_col="ds"):
    """Extracts train and test data based on a given time window scenario."""
    train_data = get_seasonal_data(df, scenario["train_start"], scenario["train_end"], date_col)
    test_data = get_seasonal_data(df, scenario["test_start"], scenario["test_end"], date_col)
    return train_data, test_data

models = {
    # "XGBRegressor": XGBRegressor(),
    # "SGDRegressor_42": SGDRegressor(random_state=42),
    # "SGDRegressor_1": SGDRegressor(random_state=1),
    # "Ridge": Ridge(),
    "Lasso": Lasso()
}

# Define lag transformations

lag_transforms_options = [
    {1: [expanding_mean], 7: [rolling_mean_14], 30: [expanding_mean]},
    # {1: [rolling_mean_14], 7: [rolling_mean_30], 30: [expanding_mean]},
    # {1: [rolling_mean_14], 30: [expanding_mean]},
    # {1: [rolling_mean_14]},
    # {},
]

In [None]:
# from tqdm.notebook import trange, tqdm
# from time import sleep

# for i in trange(3, desc='1st loop'):
#     for j in tqdm(range(100), desc='2nd loop'):
#         sleep(0.01)

In [9]:
# Loop through scenarios and evaluate models
results = []

for sensor_name, scenarios in scenarios_sensors.items():
    formatted_df = format_df_to_mlforecast(selected_sensors_df[['full_date', sensor_name]], 'full_date', sensor_name, unique_id=sensor_name)
    formatted_df = formatted_df[['ds', 'y', 'unique_id']]

    for scenario_name, scenario in scenarios.items():

        train_df, test_df = split_data(formatted_df, scenario)

        optimal_lags_list = get_optimal_lags(train_df, 'y', 
                                            ratios=[1]
                                            # ratios=[0.33, 0.66, 1]
                                            #  ratios=[0.25, 0.5, 0.75, 1]
        )
        target_transforms = get_dynamic_transforms(train_df)
        results = evaluate_models(train_df, test_df, models, target_transforms, lag_transforms_options, optimal_lags_list)

        save_results(results, f"results/{scenario_name}.csv") 

Lasso MAPE: 40.20% with transforms (<mlforecast.target_transforms.AutoSeasonalDifferences object at 0x000002BBB4B49250>, <mlforecast.target_transforms.LocalMinMaxScaler object at 0x000002BBB58766C0>), lags [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104], and lag_transforms {1: [CPUDispatcher(<function expanding_mean at 0x000002BBA2C74360>)], 7: [CPUDispatcher(<function rolling_mean_14 at 0x000002BB90DE6AC0>)], 30: [CPUDispatcher(<function expanding_mean at 0x000002BBA2C74360>)]}
44/48 Training Lasso with transforms (<mlforecast.target_transforms.AutoSeasonalDifferences object at 0x000002BBB4B49250>, <mlforecast.target_transform

  df[names] = values
  df[names] = values
  df[names] = values
  df[names] = values
  df[names] = values
  df[names] = values
  df[names] = values
  df[names] = values
  df[names] = values
  df[names] = values
  df[feat_name] = feat_vals[restore_idxs]
  df[feat_name] = feat_vals[restore_idxs]


Lasso MAPE: 40.61% with transforms (<mlforecast.target_transforms.AutoSeasonalDifferences object at 0x000002BBB4B49250>, <mlforecast.target_transforms.LocalBoxCox object at 0x000002BBB4B28EF0>), lags [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104], and lag_transforms {1: [CPUDispatcher(<function expanding_mean at 0x000002BBA2C74360>)], 7: [CPUDispatcher(<function rolling_mean_14 at 0x000002BB90DE6AC0>)], 30: [CPUDispatcher(<function expanding_mean at 0x000002BBA2C74360>)]}
45/48 Training Lasso with transforms (<mlforecast.target_transforms.AutoSeasonalityAndDifferences object at 0x000002BBB5877230>, <mlforecast.target_transform

  df[names] = values
  df[names] = values
  df[names] = values
  df[names] = values
  df[names] = values
  df[names] = values
  df[names] = values
  df[names] = values
  df[names] = values
  df[names] = values
  df[feat_name] = feat_vals[restore_idxs]
  df[feat_name] = feat_vals[restore_idxs]


Lasso MAPE: 40.20% with transforms (<mlforecast.target_transforms.AutoSeasonalityAndDifferences object at 0x000002BBB5877230>, <mlforecast.target_transforms.LocalStandardScaler object at 0x000002BBB5874380>), lags [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104], and lag_transforms {1: [CPUDispatcher(<function expanding_mean at 0x000002BBA2C74360>)], 7: [CPUDispatcher(<function rolling_mean_14 at 0x000002BB90DE6AC0>)], 30: [CPUDispatcher(<function expanding_mean at 0x000002BBA2C74360>)]}
46/48 Training Lasso with transforms (<mlforecast.target_transforms.AutoSeasonalityAndDifferences object at 0x000002BBB5877230>, <mlforecast.ta

  df[names] = values
  df[names] = values
  df[names] = values
  df[names] = values
  df[names] = values
  df[names] = values
  df[names] = values
  df[names] = values
  df[names] = values
  df[names] = values
  df[feat_name] = feat_vals[restore_idxs]
  df[feat_name] = feat_vals[restore_idxs]


Lasso MAPE: 40.20% with transforms (<mlforecast.target_transforms.AutoSeasonalityAndDifferences object at 0x000002BBB5877230>, <mlforecast.target_transforms.LocalMinMaxScaler object at 0x000002BBB58766C0>), lags [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104], and lag_transforms {1: [CPUDispatcher(<function expanding_mean at 0x000002BBA2C74360>)], 7: [CPUDispatcher(<function rolling_mean_14 at 0x000002BB90DE6AC0>)], 30: [CPUDispatcher(<function expanding_mean at 0x000002BBA2C74360>)]}
47/48 Training Lasso with transforms (<mlforecast.target_transforms.AutoSeasonalityAndDifferences object at 0x000002BBB5877230>, <mlforecast.targ

  df[names] = values
  df[names] = values
  df[names] = values
  df[names] = values
  df[names] = values
  df[names] = values
  df[names] = values
  df[names] = values
  df[names] = values
  df[names] = values
  df[feat_name] = feat_vals[restore_idxs]
  df[feat_name] = feat_vals[restore_idxs]


Lasso MAPE: 40.61% with transforms (<mlforecast.target_transforms.AutoSeasonalityAndDifferences object at 0x000002BBB5877230>, <mlforecast.target_transforms.LocalBoxCox object at 0x000002BBB4B28EF0>), lags [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104], and lag_transforms {1: [CPUDispatcher(<function expanding_mean at 0x000002BBA2C74360>)], 7: [CPUDispatcher(<function rolling_mean_14 at 0x000002BB90DE6AC0>)], 30: [CPUDispatcher(<function expanding_mean at 0x000002BBA2C74360>)]}
Results saved to results/0_8M_train_11M_test.csv
Total model fits to run: 48
0/48 Training Lasso with transforms (), lags [1, 2, 3, 4, 5, 6, 7, 8, 9, 1

KeyboardInterrupt: 

In [10]:
results

Unnamed: 0,Model,Transforms,Lags,Lag Transforms,Lag Name,test_30_days,test_60_days,test_90_days,test_120_days,test_150_days,test_180_days,test_240_days,test_300_days,test_350_days
0,Lasso,(),"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...",{1: [CPUDispatcher(<function expanding_mean at...,lags_26_features_26,25.937552,36.726503,44.229218,47.909544,43.811895,41.211552,46.842566,43.940497,40.320784
1,Lasso,(<mlforecast.target_transforms.AutoDifferences...,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...",{1: [CPUDispatcher(<function expanding_mean at...,lags_26_features_26,27.161347,36.983814,42.808567,44.227898,42.750567,48.956148,91.071874,116.230058,122.413551
2,Lasso,(<mlforecast.target_transforms.AutoSeasonalDif...,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...",{1: [CPUDispatcher(<function expanding_mean at...,lags_26_features_26,25.937552,36.726503,44.229218,47.909544,43.811895,41.211552,46.842566,43.940497,40.320784
3,Lasso,(<mlforecast.target_transforms.AutoSeasonality...,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...",{1: [CPUDispatcher(<function expanding_mean at...,lags_26_features_26,25.937552,36.726503,44.229218,47.909544,43.811895,41.211552,46.842566,43.940497,40.320784
4,Lasso,(<mlforecast.target_transforms.LocalStandardSc...,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...",{1: [CPUDispatcher(<function expanding_mean at...,lags_26_features_26,55.942247,63.970645,68.734508,70.940767,67.446049,61.61434,51.681581,44.407099,42.165418
5,Lasso,(<mlforecast.target_transforms.LocalMinMaxScal...,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...",{1: [CPUDispatcher(<function expanding_mean at...,lags_26_features_26,56.086627,64.101643,68.801092,70.959498,67.419517,61.555403,51.658324,44.391143,42.16626
6,Lasso,(<mlforecast.target_transforms.LocalBoxCox obj...,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...",{1: [CPUDispatcher(<function expanding_mean at...,lags_26_features_26,59.983264,67.28707,71.569515,73.536396,70.310534,64.94364,53.560531,46.474636,44.769349
7,Lasso,(<mlforecast.target_transforms.AutoDifferences...,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...",{1: [CPUDispatcher(<function expanding_mean at...,lags_26_features_26,26.047628,35.714382,41.590058,43.008514,42.069127,48.925084,92.378372,118.080094,124.399089
8,Lasso,(<mlforecast.target_transforms.AutoDifferences...,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...",{1: [CPUDispatcher(<function expanding_mean at...,lags_26_features_26,26.047628,35.714382,41.590058,43.008514,42.069127,48.925084,92.378372,118.080094,124.399089
9,Lasso,(<mlforecast.target_transforms.AutoSeasonalDif...,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...",{1: [CPUDispatcher(<function expanding_mean at...,lags_26_features_26,55.942247,63.970645,68.734508,70.940767,67.446049,61.61434,51.681581,44.407099,42.165418
