In [88]:
# root_dir = "../outputs/exp2_init_models/exp2_init_models"
# root_dir = "../../outputs/exp1_all"
root_dir = "../../outputs/Exp3/exp3_merged"
# root_dir = "../../outputs/Exp3"
exp_name = "exp3"

In [89]:
import numpy as np
import os
import pandas as pd
import warnings
warnings.filterwarnings("ignore")
import matplotlib.pyplot as plt
import seaborn as sns

def process_model_directories(root_dir, lista_de_modelos,exp_name):
    MAE_values = []
    RMSE_values = []
    SMAPE_values = []
    r2_values = []
    MASE_values = []
    training_time_values = []
    inference_time_values = []
    model_memory_values = []

    for subdir in os.listdir(root_dir):
        subdir_path = os.path.join(root_dir, subdir)
        if os.path.isdir(subdir_path):
            for model_dir in lista_de_modelos:
                model_dir_path = os.path.join(subdir_path, model_dir)
                if os.path.isdir(model_dir_path):
                    if exp_name == "exp3":
                        try:
                            for file_name in os.listdir(model_dir_path):
                                if "model_metrics" in file_name and file_name.endswith('.csv'):
                                    model_metrics_path = os.path.join(model_dir_path, file_name)
                                    if os.path.isfile(model_metrics_path):
                                                model_metrics = pd.read_csv(model_metrics_path)
                                                rename_dict = {
                                                        'mae': 'MAE',
                                                        'rmse': 'RMSE',
                                                        'r2': 'r2',
                                                        'smape': 'SMAPE',
                                                        'mase': 'MASE',
                                                        'Training Time': 'Training_time',
                                                        'Inference Time': 'Inference_time',
                                                        'Model memory (MB)': 'Model memory (MB)'
                                                    }
                                                model_metrics.rename(columns=rename_dict, inplace=True)

                        except Exception as e:
                            print(f"An error occurred: {e} and {model_metrics_path} does not exist")
                    else:
                        model_metrics_path = os.path.join(model_dir_path, "model_data.csv")
                        model_metrics = pd.read_csv(model_metrics_path)
                    MAE_values.append(float(model_metrics.MAE))
                    RMSE_values.append(float(model_metrics['RMSE']))
                    SMAPE_values.append(float(model_metrics['SMAPE']))
                    r2_values.append(float(model_metrics['r2']))
                    MASE_values.append(float(model_metrics['MASE']))
                    training_time_values.append(float(model_metrics['Training_time']))
                    inference_time_values.append(float(model_metrics['Inference_time']))
                    model_memory_values.append(float(model_metrics['Model memory (MB)']))
        
    MAE_array = np.array(MAE_values)
    RMSE_array = np.array(RMSE_values)
    SMAPE_array = np.array(SMAPE_values)
    r2_array = np.array(r2_values)
    MASE_array = np.array(MASE_values)
    training_time_array = np.array(training_time_values)
    inference_time_array = np.array(inference_time_values)
    model_memory_array = np.array(model_memory_values)
    print(f"processed {len(model_memory_array)} seeds")
    return (MAE_array, RMSE_array, SMAPE_array, r2_array, MASE_array,
            training_time_array, inference_time_array, model_memory_array)
def arrays_to_dataframe(MAE_array, RMSE_array, SMAPE_array, r2_array, MASE_array,
                        training_time_array, inference_time_array, model_memory_array):
    SMALL_CONSTANT = 1
    log_training_time = np.log(training_time_array + SMALL_CONSTANT)
    log_inference_time = np.log(inference_time_array + SMALL_CONSTANT)

    # Create a DataFrame
    df = pd.DataFrame({
        'MAE': MAE_array,
        'RMSE': RMSE_array,
        'SMAPE': SMAPE_array,
        'r2': r2_array,
        'MASE': MASE_array,
        'Training_time': training_time_array,
        'Training_time_log': log_training_time,
        'Inference_time': inference_time_array,
        'Inference_time_log': log_inference_time,
        'Model_memory': model_memory_array
    })
    
    return df
import pandas as pd

def extract_window_size(model_name):
    parts = model_name.split('_ws_')
    if len(parts) > 1:
        try:
            window_size = int(parts[1])
            return parts[0], window_size
        except ValueError:
            pass
    return model_name, None

def add_window_size_column(df):
    if 'Window Size' not in df.columns:
        df[['Model', 'Window Size']] = df['Model'].apply(extract_window_size).apply(pd.Series)
    return df

def update_model_names(df, name_mapping):
    df['Model'] = df['Model'].replace(name_mapping)
    return df

## Generate csv for every seed

In [90]:
root_dir_exp1 = os.path.join(root_dir,"testbed_0")# because model names are inside seed folders
folder_names = []

# Iterate through each item in the directory
for item in os.listdir(root_dir_exp1):
    if os.path.isdir(os.path.join(root_dir_exp1, item)):

        folder_names.append(item)
folder_names

['Finetuned_128CL_True_RoPE_tested_on_64CL',
 'Finetuned_256CL_False_RoPE_tested_on_64CL',
 'Finetuned_32CL_False_RoPE_tested_on_32CL',
 'Finetuned_128CL_False_RoPE_tested_on_128CL',
 'Finetuned_128CL_False_RoPE_tested_on_256CL',
 'Finetuned_256CL_True_RoPE_tested_on_256CL',
 'Finetuned_32CL_True_RoPE_tested_on_32CL',
 'Finetuned_128CL_True_RoPE_tested_on_32CL',
 'Finetuned_32CL_False_RoPE_tested_on_256CL',
 'Finetuned_256CL_True_RoPE_tested_on_32CL',
 'Finetuned_64CL_True_RoPE_tested_on_64CL',
 'Finetuned_128CL_True_RoPE_tested_on_256CL',
 'Finetuned_32CL_False_RoPE_tested_on_128CL',
 'Finetuned_32CL_True_RoPE_tested_on_256CL',
 'Finetuned_128CL_False_RoPE_tested_on_64CL',
 'Finetuned_256CL_False_RoPE_tested_on_256CL',
 'Finetuned_64CL_True_RoPE_tested_on_32CL',
 'Finetuned_32CL_False_RoPE_tested_on_64CL',
 'Finetuned_64CL_True_RoPE_tested_on_256CL',
 'Finetuned_256CL_False_RoPE_tested_on_32CL',
 'Finetuned_128CL_True_RoPE_tested_on_128CL',
 'Finetuned_256CL_True_RoPE_tested_on_128CL'

In [108]:
import pandas as pd

def extract_window_size(model_name):
    parts = model_name.split('_ws_')
    if len(parts) > 1:
        try:
            return parts[0], int(parts[1])
        except ValueError:
            pass
    return model_name, None

def add_window_size_column(df):
    df[['Model', 'Window Size']] = df['Model'].apply(extract_window_size).apply(pd.Series)
    return df

def update_model_names(df, name_mapping):
    df['Model'] = df['Model'].replace(name_mapping)
    return df

# Initialize a list to collect dataframes
all_metrics = []

# Iterate through each model to process directories and collect metrics
for model_name in folder_names:
    lista_de_modelos = [f"{model_name}"]
    metrics_arrays = process_model_directories(root_dir, lista_de_modelos, exp_name)
    df = arrays_to_dataframe(*metrics_arrays)
    df["Model"] = model_name
    all_metrics.append(df)


all_models_df = pd.concat(all_metrics, ignore_index=True)

all_models_df.rename(columns={
    'Inference_time': 'Inference Time',
    'Training_time': 'Training Time',
    'Model_memory':'Model memory (MB)'
}, inplace=True)

all_models_df = add_window_size_column(all_models_df)
print(all_models_df.Model.unique())
if exp_name == "exp2":
    name_mapping = {
        'AdaptiveRandomForest': 'Adaptive Random Forest (ARF)',
        'HoeffdingAdaptiveTreeRegressor': 'Hoeffding Adaptive Tree Regressor',
        'HoeffdingTreeRegressor': 'Hoeffding Tree Regressor',
        'MLP_partialfit': 'MLP partialfit',
        'PassiveAggressive': 'Passive Aggressive (PA)',
        'SGDRegressor': 'SGD Regressor',
        'SRPRegressor': 'SRP Regressor',
        'XGBRegressor': 'XGBoost Regressor',
    }
    all_models_df = update_model_names(all_models_df, name_mapping)
elif exp_name=="exp1":
    name_mapping = {
        'SVR': 'Support Vector Regressor (SVR)',
        'XGBRegressor': 'XGBoost Regressor',
        'RandomForestRegressor': 'Random Forest',
        'LinearRegression': 'Linear Regression (LR)',
        'AdaBoostRegressor': 'Ada Boost Regressor',
        'DecisionTreeRegressor': 'Decision Tree Regressor',
        'MLPRegressor': 'MLP partialfit',
        'PassiveAggressiveRegressor': 'Passive Aggressive (PA)',
        'KNeighborsRegressor': 'K-Neighbors Regressor',
        'LSTM': 'LSTM',
        'GRU': 'GRU',
        'SGDRegressor': 'SGD Regressor',
        'BI-LSTM': 'BI-LSTM',
        'LSTM_ATTN': 'LSTM with Attention'
    }
    all_models_df = update_model_names(all_models_df, name_mapping)

    all_models_df = all_models_df[all_models_df['Model'] != 'MLP partialfit']
    all_models_df = all_models_df[all_models_df['Model'] != 'K-Neighbors Regressor']

all_models_df.to_csv(f"../../outputs/{exp_name}_full_results.csv", index=False)


processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 16 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 16 seeds
processed 20 seeds
processed 15 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 15 seeds
processed 20 seeds
['Finetuned_128CL_True_RoPE_tested_on_64CL'
 'Finetuned_256CL_False_RoPE_tested_on_64CL'
 'Finetuned_32CL_False_RoPE_tested_on_32CL'
 'Finetuned_128CL_False_RoPE_tested_on_128CL'
 'Finetuned_128CL_False_RoPE_tested_on_256CL'
 'Finetuned_256CL_True_RoPE_tested_on_256CL'
 'Finetuned_32CL_True_RoPE_tested_on_32CL'
 'Finetuned_128CL_True_RoPE_tested_on_32CL'
 'Finetuned_32CL_False_RoPE_tested_on_256CL'
 'Finetuned_256CL_True_RoPE_tested_on_32CL'
 'Finetuned_64CL_True_

In [109]:
all_models_df

Unnamed: 0,MAE,RMSE,SMAPE,r2,MASE,Training Time,Training_time_log,Inference Time,Inference_time_log,Model memory (MB),Model,Window Size
0,3.979095,8.427499,23.472632,0.913205,1.125024,1470.660387,7.294147,76.969064,4.356312,0.316566,Finetuned_128CL_True_RoPE_tested_on_64CL,
1,5.061859,8.177308,24.805921,0.910219,1.605502,1467.349281,7.291894,76.777868,4.353857,0.316566,Finetuned_128CL_True_RoPE_tested_on_64CL,
2,4.735010,8.081811,24.093711,0.911998,1.305857,1478.667276,7.299573,76.599787,4.351565,0.316505,Finetuned_128CL_True_RoPE_tested_on_64CL,
3,5.736139,10.110521,26.056445,0.867899,2.175479,1469.414234,7.293299,76.758397,4.353607,0.316566,Finetuned_128CL_True_RoPE_tested_on_64CL,
4,4.198985,8.101142,22.758916,0.917276,1.163660,1466.501148,7.291316,76.607342,4.351662,0.316566,Finetuned_128CL_True_RoPE_tested_on_64CL,
...,...,...,...,...,...,...,...,...,...,...,...,...
537,4.684654,9.123501,23.969622,0.895643,1.212260,1477.973678,7.299104,136.439170,4.923181,0.316566,Finetuned_256CL_True_RoPE_tested_on_64CL,
538,3.833669,7.984131,22.693442,0.923137,1.175680,1478.619176,7.299540,136.345406,4.922499,0.316566,Finetuned_256CL_True_RoPE_tested_on_64CL,
539,4.502005,8.037910,23.373216,0.917776,1.260558,1481.795525,7.301684,136.539286,4.923910,0.316566,Finetuned_256CL_True_RoPE_tested_on_64CL,
540,6.104835,11.565793,27.485174,0.829384,1.386435,1481.444956,7.301448,136.494026,4.923580,0.316566,Finetuned_256CL_True_RoPE_tested_on_64CL,


In [110]:
all_models_df.Model.unique()

array(['Finetuned_128CL_True_RoPE_tested_on_64CL',
       'Finetuned_256CL_False_RoPE_tested_on_64CL',
       'Finetuned_32CL_False_RoPE_tested_on_32CL',
       'Finetuned_128CL_False_RoPE_tested_on_128CL',
       'Finetuned_128CL_False_RoPE_tested_on_256CL',
       'Finetuned_256CL_True_RoPE_tested_on_256CL',
       'Finetuned_32CL_True_RoPE_tested_on_32CL',
       'Finetuned_128CL_True_RoPE_tested_on_32CL',
       'Finetuned_32CL_False_RoPE_tested_on_256CL',
       'Finetuned_256CL_True_RoPE_tested_on_32CL',
       'Finetuned_64CL_True_RoPE_tested_on_64CL',
       'Finetuned_128CL_True_RoPE_tested_on_256CL',
       'Finetuned_32CL_False_RoPE_tested_on_128CL',
       'Finetuned_32CL_True_RoPE_tested_on_256CL',
       'Finetuned_128CL_False_RoPE_tested_on_64CL',
       'Finetuned_256CL_False_RoPE_tested_on_256CL',
       'Finetuned_64CL_True_RoPE_tested_on_32CL',
       'Finetuned_32CL_False_RoPE_tested_on_64CL',
       'Finetuned_64CL_True_RoPE_tested_on_256CL',
       'Finetuned_256C

In [111]:
# all_models_df[(all_models_df['Model'] == "Adaptive Random Forest (ARF)") & (all_models_df['Window Size'] == 64)][["MAE","Training Time","Inference Time"]]

In [112]:
all_models_df

Unnamed: 0,MAE,RMSE,SMAPE,r2,MASE,Training Time,Training_time_log,Inference Time,Inference_time_log,Model memory (MB),Model,Window Size
0,3.979095,8.427499,23.472632,0.913205,1.125024,1470.660387,7.294147,76.969064,4.356312,0.316566,Finetuned_128CL_True_RoPE_tested_on_64CL,
1,5.061859,8.177308,24.805921,0.910219,1.605502,1467.349281,7.291894,76.777868,4.353857,0.316566,Finetuned_128CL_True_RoPE_tested_on_64CL,
2,4.735010,8.081811,24.093711,0.911998,1.305857,1478.667276,7.299573,76.599787,4.351565,0.316505,Finetuned_128CL_True_RoPE_tested_on_64CL,
3,5.736139,10.110521,26.056445,0.867899,2.175479,1469.414234,7.293299,76.758397,4.353607,0.316566,Finetuned_128CL_True_RoPE_tested_on_64CL,
4,4.198985,8.101142,22.758916,0.917276,1.163660,1466.501148,7.291316,76.607342,4.351662,0.316566,Finetuned_128CL_True_RoPE_tested_on_64CL,
...,...,...,...,...,...,...,...,...,...,...,...,...
537,4.684654,9.123501,23.969622,0.895643,1.212260,1477.973678,7.299104,136.439170,4.923181,0.316566,Finetuned_256CL_True_RoPE_tested_on_64CL,
538,3.833669,7.984131,22.693442,0.923137,1.175680,1478.619176,7.299540,136.345406,4.922499,0.316566,Finetuned_256CL_True_RoPE_tested_on_64CL,
539,4.502005,8.037910,23.373216,0.917776,1.260558,1481.795525,7.301684,136.539286,4.923910,0.316566,Finetuned_256CL_True_RoPE_tested_on_64CL,
540,6.104835,11.565793,27.485174,0.829384,1.386435,1481.444956,7.301448,136.494026,4.923580,0.316566,Finetuned_256CL_True_RoPE_tested_on_64CL,


In [113]:
all_models_df.columns

Index(['MAE', 'RMSE', 'SMAPE', 'r2', 'MASE', 'Training Time',
       'Training_time_log', 'Inference Time', 'Inference_time_log',
       'Model memory (MB)', 'Model', 'Window Size'],
      dtype='object')

In [117]:
if exp_name != "exp3":

    all_models_df = all_models_df.dropna()

    summary = all_models_df.groupby(['Model', 'Window Size']).agg({
        'MAE': ['mean', 'std'],
        'RMSE': ['mean', 'std'],
        'SMAPE': ['mean', 'std'],
        'r2': ['mean', 'std'],
        'MASE': ['mean', 'std'],
        'Training Time': ['mean'],
        'Inference Time': ['mean'],
        'Model memory (MB)': ['mean'],
    }).reset_index()

    summary = summary.round(3)

    summary.columns = ['_'.join(col).strip() if col[1] else col[0] for col in summary.columns.values]


    summary = summary.rename(columns={
        'Model_': 'Model',
        'Window Size_': 'Window Size',
        'MAE_mean': 'MAE_mean',
        'MAE_std': 'MAE_std',
        'RMSE_mean': 'RMSE_mean',
        'RMSE_std': 'RMSE_std',
        'SMAPE_mean': 'SMAPE_mean',
        'SMAPE_std': 'SMAPE_std',
        'r2_mean': 'r2_mean',
        'r2_std': 'r2_std',
        'MASE_mean': 'MASE_mean',
        'MASE_std': 'MASE_std',
        'Training Time_mean': 'Training Time',
        'Inference Time_mean': 'Inference Time',
        'Model memory (MB)_mean': 'Model memory (MB)'
    })

    summary.to_csv(f"../../outputs/{exp_name}_model_metrics_avg.csv", index=False)

else:
    
    summary = all_models_df.groupby(['Model']).agg({
        'MAE': ['mean', 'std'],
        'RMSE': ['mean', 'std'],
        'SMAPE': ['mean', 'std'],
        'r2': ['mean', 'std'],
        'MASE': ['mean', 'std'],
        'Training Time': ['min', 'max'],#['mean'],
        'Inference Time': ['min', 'max'],#['mean'],
        'Model memory (MB)': ['mean'],
    }).reset_index()

    summary = summary.round(3)
    summary.to_csv(f"../../outputs/{exp_name}_model_metrics_avg.csv", index=False)

summary 

Unnamed: 0_level_0,Model,MAE,MAE,RMSE,RMSE,SMAPE,SMAPE,r2,r2,MASE,MASE,Training Time,Training Time,Inference Time,Inference Time,Model memory (MB)
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,mean,std,mean,std,mean,std,mean,std,min,max,min,max,mean
0,Finetuned_128CL_False_RoPE_tested_on_128CL,3.668,0.189,7.229,0.402,22.71,0.555,0.938,0.007,1.054,0.108,1464.059,1479.32,76.326,76.96,0.316
1,Finetuned_128CL_False_RoPE_tested_on_256CL,3.529,0.131,7.427,0.277,21.791,0.431,0.937,0.005,0.873,0.039,1462.988,1479.112,75.386,75.939,0.316
2,Finetuned_128CL_False_RoPE_tested_on_32CL,5.439,0.67,11.5,0.708,25.21,1.234,0.826,0.022,1.691,0.172,1469.785,1481.935,76.693,77.446,0.316
3,Finetuned_128CL_False_RoPE_tested_on_64CL,4.836,0.561,10.536,1.479,23.567,1.029,0.861,0.037,1.755,0.308,1466.388,1482.254,76.551,77.096,0.316
4,Finetuned_128CL_True_RoPE_tested_on_128CL,3.608,0.216,7.102,0.473,22.37,0.675,0.94,0.008,1.068,0.059,1466.695,1593.816,76.048,81.64,0.317
5,Finetuned_128CL_True_RoPE_tested_on_256CL,3.601,0.148,7.523,0.351,22.403,0.599,0.935,0.006,0.955,0.056,1461.805,1632.746,75.467,77.78,0.317
6,Finetuned_128CL_True_RoPE_tested_on_32CL,5.6,0.805,11.877,1.061,24.876,1.489,0.821,0.034,1.909,0.31,1463.755,1537.669,76.67,78.779,0.317
7,Finetuned_128CL_True_RoPE_tested_on_64CL,4.898,0.585,9.126,1.426,24.304,0.889,0.891,0.036,1.587,0.352,1466.501,1539.456,76.495,78.764,0.317
8,Finetuned_256CL_False_RoPE_tested_on_128CL,3.764,0.152,7.756,0.541,22.754,0.626,0.929,0.009,0.988,0.057,1479.262,1540.526,135.756,136.279,0.316
9,Finetuned_256CL_False_RoPE_tested_on_256CL,3.784,0.278,7.882,0.371,22.681,0.57,0.928,0.007,0.929,0.071,1478.983,1643.409,134.342,134.759,0.316


In [127]:
print(summary[["Training Time"]].mean())
print(summary[["Inference Time"]].mean())
print(summary[["Model memory (MB)"]].mean())

Training Time  min    1570.388857
               max    1850.087250
dtype: float64
Inference Time  min     89.308679
                max    107.533750
dtype: float64
Model memory (MB)  mean    0.316571
dtype: float64
