In [1]:
import pandas as pd

import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go

In [2]:
from numba import njit
from window_ops.expanding import expanding_mean
from window_ops.rolling import rolling_mean

@njit
def rolling_mean_14(x):
    return rolling_mean(x, window_size=14)
@njit
def rolling_mean_30(x):
    return rolling_mean(x, window_size=30)

In [3]:
def format_df_to_mlforecast(df, date_col, target_col, unique_id='mean'):
    df_ = df.rename({
        date_col: "ds",
        # target_col: 'y',
    }, axis=1)

    df_['ds'] = pd.to_datetime(df_['ds'])

    df_['y'] = df_[target_col].copy()
    # df_.drop(columns=target_col)

    df_['unique_id'] = unique_id
    return df_

In [4]:
selected_sensors_df = pd.read_csv("../data/selected_sensors2_cleaned.csv", index_col=0)

In [5]:
from functions import holt_winters_imputation_and_expand

selected_sensors_df['2'] = holt_winters_imputation_and_expand(selected_sensors_df['2'], 365)
selected_sensors_df['5'] = holt_winters_imputation_and_expand(selected_sensors_df['5'], 365)

In [6]:
scenarios = {
    # 0: 1, 4372603
    # "0_12M_train_7M_test": {"train_start": "2017-03-25", "train_end": "2018-03-25", "test_start": "2018-03-26", "test_end": "2018-10-10"},
    '2': {
        "0_10M_train_9M_test":  {"train_start": "2017-03-25", "train_end": "2018-01-25", "test_start": "2018-01-26", "test_end": "2018-10-10"},
        "0_8M_train_11M_test":  {"train_start": "2017-03-25", "train_end": "2017-10-25", "test_start": "2017-10-26", "test_end": "2018-10-10"},
        
        # Non-Heating Periods
        "0_NonHeating_3M_train_3M_test":  {"train_start": "2017-04-15", "train_end": "2017-07-15", "test_start": "2017-07-16", "test_end": "2017-10-01"},
        "0_NonHeating_4M_train_2M_test":  {"train_start": "2017-04-15", "train_end": "2017-08-15", "test_start": "2017-08-16", "test_end": "2017-10-01"},
        "0_NonHeating_2M_train_4M_test":  {"train_start": "2017-04-15", "train_end": "2017-06-15", "test_start": "2017-06-16", "test_end": "2017-10-01"},
        "0_NonHeating_1M_train_5M_test":  {"train_start": "2017-04-15", "train_end": "2017-05-15", "test_start": "2017-05-16", "test_end": "2017-10-01"},
        "0_NonHeating_15D_train_5M_test": {"train_start": "2017-04-15", "train_end": "2017-04-30", "test_start": "2017-05-01", "test_end": "2017-10-01"},
        "0_NonHeating_feb_2M_train_4M_test": {"train_start": "2017-02-15", "train_end": "2017-04-15", "test_start": "2017-04-16", "test_end": "2017-08-16"},
        "0_NonHeating_mar_2M_train_4M_test": {"train_start": "2017-03-15", "train_end": "2017-05-15", "test_start": "2017-05-16", "test_end": "2017-09-16"},

        # Heating Periods
        "0_Heating_5M_train_1Y_test":     {"train_start": "2017-06-01", "train_end": "2017-11-01", "test_start": "2017-11-02", "test_end": "2018-10-10"},
        "0_Heating_3M_jul_train_1Y_test": {"train_start": "2017-07-01", "train_end": "2017-10-10", "test_start": "2017-10-11", "test_end": "2018-10-10"},
        "0_Heating_3M_sep_train_1Y_test": {"train_start": "2017-09-01", "train_end": "2017-12-10", "test_start": "2017-12-11", "test_end": "2018-12-10"},
        },
    '1': {
        "0_10M_train_9M_test": {"train_start": "2017-03-25", "train_end": "2018-01-25", "test_start": "2018-01-26", "test_end": "2018-10-10"},
        "0_8M_train_11M_test": {"train_start": "2017-03-25", "train_end": "2017-10-25", "test_start": "2017-10-26", "test_end": "2018-10-10"},
        
        # Non-Heating Periods
        "0_NonHeating_3M_train_3M_test": {"train_start": "2017-04-15", "train_end": "2017-07-15", "test_start": "2017-07-16", "test_end": "2017-10-01"},
        "0_NonHeating_4M_train_2M_test": {"train_start": "2017-04-15", "train_end": "2017-08-15", "test_start": "2017-08-16", "test_end": "2017-10-01"},
        "0_NonHeating_2M_train_4M_test": {"train_start": "2017-04-15", "train_end": "2017-06-15", "test_start": "2017-06-16", "test_end": "2017-10-01"},
        "0_NonHeating_1M_train_5M_test": {"train_start": "2017-04-15", "train_end": "2017-05-15", "test_start": "2017-05-16", "test_end": "2017-10-01"},

        # Heating Periods
        "0_Heating_5M_train_1Y_test": {"train_start": "2017-06-01", "train_end": "2017-11-01", "test_start": "2017-11-02", "test_end": "2018-10-10"},
        "0_Heating_3M_train_1Y_test": {"train_start": "2017-07-01", "train_end": "2017-10-10", "test_start": "2017-10-11", "test_end": "2018-10-10"},
        }
}


In [7]:
scenarios = {
    # 0: 1, 4372603
    "0_12M_train_7M_test": {"train_start": "2017-03-25", "train_end": "2018-03-25", "test_start": "2018-03-26", "test_end": "2018-10-10"},
    "0_10M_train_9M_test":  {"train_start": "2017-03-25", "train_end": "2018-01-25", "test_start": "2018-01-26", "test_end": "2018-10-10"},
    "0_8M_train_11M_test": {"train_start": "2017-03-25", "train_end": "2017-10-25", "test_start": "2017-10-26", "test_end": "2018-10-10"},
    
    # Non-Heating Periods
    "0_NonHeating_3M_train_3M_test": {"train_start": "2017-04-15", "train_end": "2017-07-15", "test_start": "2017-07-16", "test_end": "2017-10-01"},
    "0_NonHeating_4M_train_2M_test": {"train_start": "2017-04-15", "train_end": "2017-08-15", "test_start": "2017-08-16", "test_end": "2017-10-01"},
    "0_NonHeating_2M_train_4M_test": {"train_start": "2017-04-15", "train_end": "2017-06-15", "test_start": "2017-06-16", "test_end": "2017-10-01"},
    "0_NonHeating_1M_train_5M_test": {"train_start": "2017-04-15", "train_end": "2017-05-15", "test_start": "2017-05-16", "test_end": "2017-10-01"},

    # Heating Periods
    "0_Heating_5M_train_1Y_test": {"train_start": "2017-06-01", "train_end": "2017-11-01", "test_start": "2017-11-02", "test_end": "2018-10-10"},
    "0_Heating_3M_train_1Y_test": {"train_start": "2017-07-01", "train_end": "2017-10-10", "test_start": "2017-10-11", "test_end": "2018-10-10"},
}


In [8]:
scenarios = {
    # 0: 1, 4372603
    "0_NonHeating_1M_train_5M_test": {"train_start": "2017-04-15", "train_end": "2017-05-15", "test_start": "2017-05-16", "test_end": "2017-10-01"},

    # Heating Periods
    "0_Heating_5M_train_1Y_test": {"train_start": "2017-06-01", "train_end": "2017-11-01", "test_start": "2017-11-02", "test_end": "2018-10-10"},
    "0_Heating_3M_train_1Y_test": {"train_start": "2017-07-01", "train_end": "2017-10-10", "test_start": "2017-10-11", "test_end": "2018-10-10"},
    
    "0_12M_train_7M_test": {"train_start": "2017-03-25", "train_end": "2018-03-25", "test_start": "2018-03-26", "test_end": "2018-10-10"},
    "0_10M_train_9M_test":  {"train_start": "2017-03-25", "train_end": "2018-01-25", "test_start": "2018-01-26", "test_end": "2018-10-10"},
    "0_8M_train_11M_test": {"train_start": "2017-03-25", "train_end": "2017-10-25", "test_start": "2017-10-26", "test_end": "2018-10-10"},
    
    # Non-Heating Periods
    "0_NonHeating_3M_train_3M_test": {"train_start": "2017-04-15", "train_end": "2017-07-15", "test_start": "2017-07-16", "test_end": "2017-10-01"},
    "0_NonHeating_4M_train_2M_test": {"train_start": "2017-04-15", "train_end": "2017-08-15", "test_start": "2017-08-16", "test_end": "2017-10-01"},
    "0_NonHeating_2M_train_4M_test": {"train_start": "2017-04-15", "train_end": "2017-06-15", "test_start": "2017-06-16", "test_end": "2017-10-01"},
    
}


In [9]:
from MLForecastPipeline import *

In [10]:
def get_seasonal_data(df, start_date, end_date, date_col="ds"):
    """Filters data for a specific seasonal period."""
    return df[(df[date_col] >= start_date) & (df[date_col] <= end_date)]

def split_data(df, scenario, date_col="ds"):
    """Extracts train and test data based on a given time window scenario."""
    train_data = get_seasonal_data(df, scenario["train_start"], scenario["train_end"], date_col)
    test_data = get_seasonal_data(df, scenario["test_start"], scenario["test_end"], date_col)
    return train_data, test_data

models = {
    "XGBRegressor": XGBRegressor(),
    "SGDRegressor": SGDRegressor(),
    "Ridge": Ridge(),
    "Lasso": Lasso()
}

# Define lag transformations

lag_transforms_options = [
    {1: [expanding_mean], 7: [rolling_mean_14], 30: [expanding_mean]},
    {1: [rolling_mean_14], 7: [rolling_mean_30], 30: [expanding_mean]},
    {1: [rolling_mean_14], 30: [expanding_mean]},
    # {1: [rolling_mean_14]},
    # {},
]

In [11]:
# from tqdm.notebook import trange, tqdm
# from time import sleep

# for i in trange(3, desc='1st loop'):
#     for j in tqdm(range(100), desc='2nd loop'):
#         sleep(0.01)

In [12]:
# # Loop through scenarios and evaluate models
# results = []
# sensor_name = '0'

# formatted_df = format_df_to_mlforecast(selected_sensors1_df[['full_date', sensor_name]], 'full_date', sensor_name, sensor_name)
# formatted_df = formatted_df[['ds', 'y', 'unique_id']]

# for scenario_name, scenario in scenarios.items():
#     print(scenario_name)

#     train_df, test_df = split_data(formatted_df, scenario)

#     optimal_lags_list = get_optimal_lags(train_df, 'y')
#     target_transforms = get_dynamic_transforms(train_df)
#     results = evaluate_models(train_df, test_df, models, target_transforms, lag_transforms_options, optimal_lags_list)

#     save_results(results, f"results/{scenario_name}.json") 

In [13]:
for column in selected_sensors_df.columns:
    if column != "full_date" and column != 'Unnamed: 0':
        fig = go.Figure()
        fig.add_trace(
            go.Scatter(
                x=selected_sensors_df['full_date'],
                y=selected_sensors_df[column],
                mode='lines+markers',
                name=column,
                connectgaps=False  # Ensures gaps for NaN values
            )
        )
        fig.update_layout(
            title=f"Values of {column} Over Time",
            xaxis_title="Date",
            yaxis_title="Value",
            legend_title="Feature"
        )
        fig.show()