In [15]:
root_dir = "../exp2_init_models/"
exp_name = "exp2"

In [16]:
import numpy as np
import os
import pandas as pd
import warnings
warnings.filterwarnings("ignore")
import matplotlib.pyplot as plt
import seaborn as sns

def process_model_directories(root_dir, lista_de_modelos,exp_name):
    MAE_values = []
    RMSE_values = []
    SMAPE_values = []
    r2_values = []
    MASE_values = []
    training_time_values = []
    inference_time_values = []
    model_memory_values = []

    for subdir in os.listdir(root_dir):
        subdir_path = os.path.join(root_dir, subdir)
        if os.path.isdir(subdir_path):
            for model_dir in lista_de_modelos:
                model_dir_path = os.path.join(subdir_path, model_dir)
                if os.path.isdir(model_dir_path):
                    if exp_name == "exp3":
                        try:
                            for file_name in os.listdir(model_dir_path):
                                if "model_metrics" in file_name and file_name.endswith('.csv'):
                                    model_metrics_path = os.path.join(model_dir_path, file_name)
                                    if os.path.isfile(model_metrics_path):
                                                model_metrics = pd.read_csv(model_metrics_path)
                                                rename_dict = {
                                                        'mae': 'MAE',
                                                        'rmse': 'RMSE',
                                                        'r2': 'r2',
                                                        'smape': 'SMAPE',
                                                        'mase': 'MASE',
                                                        'Training Time': 'Training_time',
                                                        'Inference Time': 'Inference_time',
                                                        'Model memory (MB)': 'Model memory (MB)'
                                                    }
                                                model_metrics.rename(columns=rename_dict, inplace=True)

                        except Exception as e:
                            print(f"An error occurred: {e} and {model_metrics_path} does not exist")
                    else:
                        model_metrics_path = os.path.join(model_dir_path, "model_data.csv")
                        model_metrics = pd.read_csv(model_metrics_path)
                    MAE_values.append(float(model_metrics.MAE))
                    RMSE_values.append(float(model_metrics['RMSE']))
                    SMAPE_values.append(float(model_metrics['SMAPE']))
                    r2_values.append(float(model_metrics['r2']))
                    MASE_values.append(float(model_metrics['MASE']))
                    training_time_values.append(float(model_metrics['Training_time']))
                    inference_time_values.append(float(model_metrics['Inference_time']))
                    model_memory_values.append(float(model_metrics['Model memory (MB)']))
        
    MAE_array = np.array(MAE_values)
    RMSE_array = np.array(RMSE_values)
    SMAPE_array = np.array(SMAPE_values)
    r2_array = np.array(r2_values)
    MASE_array = np.array(MASE_values)
    training_time_array = np.array(training_time_values)
    inference_time_array = np.array(inference_time_values)
    model_memory_array = np.array(model_memory_values)
    print(f"processed {len(model_memory_array)} seeds")
    return (MAE_array, RMSE_array, SMAPE_array, r2_array, MASE_array,
            training_time_array, inference_time_array, model_memory_array)
def arrays_to_dataframe(MAE_array, RMSE_array, SMAPE_array, r2_array, MASE_array,
                        training_time_array, inference_time_array, model_memory_array):
    SMALL_CONSTANT = 1
    log_training_time = np.log(training_time_array + SMALL_CONSTANT)
    log_inference_time = np.log(inference_time_array + SMALL_CONSTANT)

    # Create a DataFrame
    df = pd.DataFrame({
        'MAE': MAE_array,
        'RMSE': RMSE_array,
        'SMAPE': SMAPE_array,
        'r2': r2_array,
        'MASE': MASE_array,
        'Training_time': training_time_array,
        'Training_time_log': log_training_time,
        'Inference_time': inference_time_array,
        'Inference_time_log': log_inference_time,
        'Model_memory': model_memory_array
    })
    
    return df
import pandas as pd

def extract_window_size(model_name):
    parts = model_name.split('_ws_')
    if len(parts) > 1:
        try:
            window_size = int(parts[1])
            return parts[0], window_size
        except ValueError:
            pass
    return model_name, None

def add_window_size_column(df):
    if 'Window Size' not in df.columns:
        df[['Model', 'Window Size']] = df['Model'].apply(extract_window_size).apply(pd.Series)
    return df

def update_model_names(df, name_mapping):
    df['Model'] = df['Model'].replace(name_mapping)
    return df

## Generate csv for every seed

In [17]:
root_dir_exp1 = os.path.join(root_dir,"testbed_0")# because model names are inside seed folders
folder_names = []

# Iterate through each item in the directory
for item in os.listdir(root_dir_exp1):
    if os.path.isdir(os.path.join(root_dir_exp1, item)):

        folder_names.append(item)
folder_names

['HoeffdingTreeRegressor_ws_32',
 'PassiveAggressive_ws_32',
 'MLP_partialfit_ws_9',
 'MLP_partialfit_ws_20',
 'SRPRegressor_ws_9',
 'XGBRegressor_ws_20',
 'HoeffdingTreeRegressor_ws_12',
 'HoeffdingTreeRegressor_ws_64',
 'PassiveAggressive_ws_12',
 'AdaptiveRandomForest_ws_12',
 'SRPRegressor_ws_6',
 'XGBRegressor_ws_9',
 'PassiveAggressive_ws_6',
 'MLP_partialfit_ws_64',
 'AdaptiveRandomForest_ws_64',
 'AdaptiveRandomForest_ws_9',
 'SGDRegressor_ws_64',
 'PassiveAggressive_ws_20',
 'PassiveAggressive_ws_9',
 'PassiveAggressive_ws_64',
 'SGDRegressor_ws_20',
 'HoeffdingAdaptiveTreeRegressor_ws_12',
 'XGBRegressor_ws_64',
 'HoeffdingTreeRegressor_ws_20',
 'SRPRegressor_ws_32',
 'HoeffdingAdaptiveTreeRegressor_ws_6',
 'HoeffdingAdaptiveTreeRegressor_ws_20',
 'SGDRegressor_ws_32',
 'MLP_partialfit_ws_12',
 'SGDRegressor_ws_6',
 'MLP_partialfit_ws_32',
 'HoeffdingTreeRegressor_ws_6',
 'MLP_partialfit_ws_6',
 'SRPRegressor_ws_64',
 'SRPRegressor_ws_20',
 'AdaptiveRandomForest_ws_20',
 'Hoe

In [18]:
import pandas as pd

def extract_window_size(model_name):
    parts = model_name.split('_ws_')
    if len(parts) > 1:
        try:
            return parts[0], int(parts[1])
        except ValueError:
            pass
    return model_name, None

def add_window_size_column(df):
    df[['Model', 'Window Size']] = df['Model'].apply(extract_window_size).apply(pd.Series)
    return df

def update_model_names(df, name_mapping):
    df['Model'] = df['Model'].replace(name_mapping)
    return df

# Initialize a list to collect dataframes
all_metrics = []

# Iterate through each model to process directories and collect metrics
for model_name in folder_names:
    lista_de_modelos = [f"{model_name}"]
    metrics_arrays = process_model_directories(root_dir, lista_de_modelos, exp_name)
    df = arrays_to_dataframe(*metrics_arrays)
    df["Model"] = model_name
    all_metrics.append(df)


all_models_df = pd.concat(all_metrics, ignore_index=True)

all_models_df.rename(columns={
    'Inference_time': 'Inference Time',
    'Training_time': 'Training Time',
    'Model_memory':'Model memory (MB)'
}, inplace=True)

all_models_df = add_window_size_column(all_models_df)
print(all_models_df.Model.unique())
if exp_name == "exp2":
    name_mapping = {
        'AdaptiveRandomForest': 'Adaptive Random Forest (ARF)',
        'HoeffdingAdaptiveTreeRegressor': 'Hoeffding Adaptive Tree Regressor',
        'HoeffdingTreeRegressor': 'Hoeffding Tree Regressor',
        'MLP_partialfit': 'MLP partialfit',
        'PassiveAggressive': 'Passive Aggressive (PA)',
        'SGDRegressor': 'SGD Regressor',
        'SRPRegressor': 'SRP Regressor',
        'XGBRegressor': 'XGBoost Regressor',
    }
    all_models_df = update_model_names(all_models_df, name_mapping)
elif exp_name=="exp1":
    name_mapping = {
        'SVR': 'Support Vector Regressor (SVR)',
        'XGBRegressor': 'XGBoost Regressor',
        'RandomForestRegressor': 'Random Forest',
        'LinearRegression': 'Linear Regression (LR)',
        'AdaBoostRegressor': 'Ada Boost Regressor',
        'DecisionTreeRegressor': 'Decision Tree Regressor',
        'MLPRegressor': 'MLP partialfit',
        'PassiveAggressiveRegressor': 'Passive Aggressive (PA)',
        'KNeighborsRegressor': 'K-Neighbors Regressor',
        'LSTM': 'LSTM',
        'GRU': 'GRU',
        'SGDRegressor': 'SGD Regressor',
        'BI-LSTM': 'BI-LSTM',
        'LSTM_ATTN': 'LSTM with Attention'
    }
    all_models_df = update_model_names(all_models_df, name_mapping)

    all_models_df = all_models_df[all_models_df['Model'] != 'MLP partialfit']
    all_models_df = all_models_df[all_models_df['Model'] != 'K-Neighbors Regressor']

all_models_df.to_csv(f"{root_dir}/{exp_name}_full_results.csv", index=False)


processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
['HoeffdingTreeRegressor' 'PassiveAggressive' 'MLP_partialfit'
 'SRPRegressor' 'XGBRegre

In [19]:
all_models_df

Unnamed: 0,MAE,RMSE,SMAPE,r2,MASE,Training Time,Training_time_log,Inference Time,Inference_time_log,Model memory (MB),Model,Window Size
0,4.312017,9.709203,23.301031,0.896128,1.112356,14.681324,2.752470,5.564582,1.881689,12.997726,Hoeffding Tree Regressor,32
1,4.312017,9.709203,23.301031,0.896128,1.112356,14.668052,2.751624,5.236668,1.830446,12.997726,Hoeffding Tree Regressor,32
2,4.312017,9.709203,23.301031,0.896128,1.112356,20.067662,3.047739,7.042866,2.084786,12.997726,Hoeffding Tree Regressor,32
3,4.312017,9.709203,23.301031,0.896128,1.112356,19.397012,3.015388,5.024833,1.795890,12.997726,Hoeffding Tree Regressor,32
4,4.312017,9.709203,23.301031,0.896128,1.112356,16.269219,2.848926,7.188344,2.102712,12.997726,Hoeffding Tree Regressor,32
...,...,...,...,...,...,...,...,...,...,...,...,...
955,4.137304,9.554504,23.161217,0.899412,1.067286,0.242643,0.217241,2189.719319,7.691985,0.003525,XGBoost Regressor,32
956,4.085987,9.429154,23.025020,0.902034,1.054048,0.228477,0.205775,2219.412131,7.705448,0.003517,XGBoost Regressor,32
957,4.291498,9.617171,24.144724,0.898088,1.107063,0.240145,0.215228,2212.626487,7.702387,0.003525,XGBoost Regressor,32
958,4.070249,9.431148,23.048721,0.901992,1.049988,0.276109,0.243815,2170.217190,7.683043,0.003525,XGBoost Regressor,32


In [20]:
all_models_df.Model.unique()

array(['Hoeffding Tree Regressor', 'Passive Aggressive (PA)',
       'MLP partialfit', 'SRP Regressor', 'XGBoost Regressor',
       'Adaptive Random Forest (ARF)', 'SGD Regressor',
       'Hoeffding Adaptive Tree Regressor'], dtype=object)

In [21]:
# all_models_df[(all_models_df['Model'] == "Adaptive Random Forest (ARF)") & (all_models_df['Window Size'] == 64)][["MAE","Training Time","Inference Time"]]

In [22]:
all_models_df

Unnamed: 0,MAE,RMSE,SMAPE,r2,MASE,Training Time,Training_time_log,Inference Time,Inference_time_log,Model memory (MB),Model,Window Size
0,4.312017,9.709203,23.301031,0.896128,1.112356,14.681324,2.752470,5.564582,1.881689,12.997726,Hoeffding Tree Regressor,32
1,4.312017,9.709203,23.301031,0.896128,1.112356,14.668052,2.751624,5.236668,1.830446,12.997726,Hoeffding Tree Regressor,32
2,4.312017,9.709203,23.301031,0.896128,1.112356,20.067662,3.047739,7.042866,2.084786,12.997726,Hoeffding Tree Regressor,32
3,4.312017,9.709203,23.301031,0.896128,1.112356,19.397012,3.015388,5.024833,1.795890,12.997726,Hoeffding Tree Regressor,32
4,4.312017,9.709203,23.301031,0.896128,1.112356,16.269219,2.848926,7.188344,2.102712,12.997726,Hoeffding Tree Regressor,32
...,...,...,...,...,...,...,...,...,...,...,...,...
955,4.137304,9.554504,23.161217,0.899412,1.067286,0.242643,0.217241,2189.719319,7.691985,0.003525,XGBoost Regressor,32
956,4.085987,9.429154,23.025020,0.902034,1.054048,0.228477,0.205775,2219.412131,7.705448,0.003517,XGBoost Regressor,32
957,4.291498,9.617171,24.144724,0.898088,1.107063,0.240145,0.215228,2212.626487,7.702387,0.003525,XGBoost Regressor,32
958,4.070249,9.431148,23.048721,0.901992,1.049988,0.276109,0.243815,2170.217190,7.683043,0.003525,XGBoost Regressor,32


In [23]:
all_models_df.columns

Index(['MAE', 'RMSE', 'SMAPE', 'r2', 'MASE', 'Training Time',
       'Training_time_log', 'Inference Time', 'Inference_time_log',
       'Model memory (MB)', 'Model', 'Window Size'],
      dtype='object')

In [24]:
if exp_name != "exp3":

    all_models_df = all_models_df.dropna()

    summary = all_models_df.groupby(['Model', 'Window Size']).agg({
        'MAE': ['mean', 'std'],
        'RMSE': ['mean', 'std'],
        'SMAPE': ['mean', 'std'],
        'r2': ['mean', 'std'],
        'MASE': ['mean', 'std'],
        'Training Time': ['mean'],
        'Inference Time': ['mean'],
        'Model memory (MB)': ['mean'],
    }).reset_index()

    summary = summary.round(3)

    summary.columns = ['_'.join(col).strip() if col[1] else col[0] for col in summary.columns.values]


    summary = summary.rename(columns={
        'Model_': 'Model',
        'Window Size_': 'Window Size',
        'MAE_mean': 'MAE_mean',
        'MAE_std': 'MAE_std',
        'RMSE_mean': 'RMSE_mean',
        'RMSE_std': 'RMSE_std',
        'SMAPE_mean': 'SMAPE_mean',
        'SMAPE_std': 'SMAPE_std',
        'r2_mean': 'r2_mean',
        'r2_std': 'r2_std',
        'MASE_mean': 'MASE_mean',
        'MASE_std': 'MASE_std',
        'Training Time_mean': 'Training Time',
        'Inference Time_mean': 'Inference Time',
        'Model memory (MB)_mean': 'Model memory (MB)'
    })

    summary.to_csv(f"{root_dir}/{exp_name}_model_metrics_avg.csv", index=False)

else:
    
    summary = all_models_df.groupby(['Model']).agg({
        'MAE': ['mean', 'std'],
        'RMSE': ['mean', 'std'],
        'SMAPE': ['mean', 'std'],
        'r2': ['mean', 'std'],
        'MASE': ['mean', 'std'],
        'Training Time': ['min', 'max'],#['mean'],
        'Inference Time': ['min', 'max'],#['mean'],
        'Model memory (MB)': ['mean'],
    }).reset_index()

    summary = summary.round(3)
    summary.to_csv(f"{root_dir}/{exp_name}_model_metrics_avg.csv", index=False)

summary 

Unnamed: 0,Model,Window Size,MAE_mean,MAE_std,RMSE_mean,RMSE_std,SMAPE_mean,SMAPE_std,r2_mean,r2_std,MASE_mean,MASE_std,Training Time,Inference Time,Model memory (MB)
0,Adaptive Random Forest (ARF),6,3.427,0.018,9.078,0.023,20.17,0.131,0.909,0.0,0.885,0.005,71.431,31.641,146.031
1,Adaptive Random Forest (ARF),9,3.553,0.038,9.212,0.066,20.291,0.162,0.906,0.001,0.918,0.01,95.346,40.418,184.937
2,Adaptive Random Forest (ARF),12,3.729,0.08,9.429,0.141,20.721,0.204,0.902,0.003,0.963,0.021,99.159,41.007,187.451
3,Adaptive Random Forest (ARF),20,4.193,0.177,9.952,0.256,21.949,0.445,0.891,0.006,1.083,0.046,113.908,42.076,157.025
4,Adaptive Random Forest (ARF),32,5.63,0.765,11.28,0.756,25.978,1.812,0.859,0.019,1.452,0.197,140.108,44.412,93.727
5,Adaptive Random Forest (ARF),64,11.661,0.213,17.159,0.24,38.946,0.481,0.676,0.009,3.011,0.055,153.104,38.984,3.986
6,Hoeffding Adaptive Tree Regressor,6,3.795,0.063,9.34,0.085,21.583,0.42,0.904,0.002,0.981,0.016,4.567,2.575,2.819
7,Hoeffding Adaptive Tree Regressor,9,3.924,0.062,9.454,0.108,22.128,0.311,0.901,0.002,1.014,0.016,5.786,2.886,4.411
8,Hoeffding Adaptive Tree Regressor,12,4.039,0.085,9.61,0.203,22.626,0.624,0.898,0.004,1.043,0.022,7.075,3.185,5.815
9,Hoeffding Adaptive Tree Regressor,20,4.27,0.134,9.679,0.183,23.467,0.609,0.897,0.004,1.102,0.035,10.8,4.007,10.013


In [25]:
print(summary[["Training Time"]].mean())
print(summary[["Inference Time"]].mean())
print(summary[["Model memory (MB)"]].mean())

Training Time    54.685437
dtype: float64
Inference Time    275.405729
dtype: float64
Model memory (MB)    18.193208
dtype: float64
