In [83]:
root_dir = "/home/sebastian/codes/ARF_STUFF/arf_paper/outputs/Exp2_REAL/exp2"
# root_dir = "/media/enc/vera1/icos_shared/ARF_paper/outputs/Exp2/exp2"
exp_name = "exp2"

In [84]:
import numpy as np
import os
import pandas as pd
import warnings
warnings.filterwarnings("ignore")
import matplotlib.pyplot as plt
import seaborn as sns

def process_model_directories(root_dir, lista_de_modelos):
    MAE_values = []
    RMSE_values = []
    SMAPE_values = []
    r2_values = []
    MASE_values = []
    training_time_values = []
    inference_time_values = []
    model_memory_values = []

    for subdir in os.listdir(root_dir):
        subdir_path = os.path.join(root_dir, subdir)
        if os.path.isdir(subdir_path):
            for model_dir in lista_de_modelos:
                model_dir_path = os.path.join(subdir_path, model_dir)
                if os.path.isdir(model_dir_path):
                    model_metrics_path = os.path.join(model_dir_path, "model_data.csv")
                    model_metrics = pd.read_csv(model_metrics_path)
                    MAE_values.append(float(model_metrics.MAE))
                    RMSE_values.append(float(model_metrics['RMSE']))
                    SMAPE_values.append(float(model_metrics['SMAPE']))
                    r2_values.append(float(model_metrics['r2']))
                    MASE_values.append(float(model_metrics['MASE']))
                    training_time_values.append(float(model_metrics['Training_time']))
                    inference_time_values.append(float(model_metrics['Inference_time']))
                    model_memory_values.append(float(model_metrics['Model memory (MB)']))
        
    MAE_array = np.array(MAE_values)
    RMSE_array = np.array(RMSE_values)
    SMAPE_array = np.array(SMAPE_values)
    r2_array = np.array(r2_values)
    MASE_array = np.array(MASE_values)
    training_time_array = np.array(training_time_values)
    inference_time_array = np.array(inference_time_values)
    model_memory_array = np.array(model_memory_values)
    print(f"processed {len(model_memory_array)} seeds")
    return (MAE_array, RMSE_array, SMAPE_array, r2_array, MASE_array,
            training_time_array, inference_time_array, model_memory_array)
def arrays_to_dataframe(MAE_array, RMSE_array, SMAPE_array, r2_array, MASE_array,
                        training_time_array, inference_time_array, model_memory_array):
    SMALL_CONSTANT = 1
    log_training_time = np.log(training_time_array + SMALL_CONSTANT)
    log_inference_time = np.log(inference_time_array + SMALL_CONSTANT)

    # Create a DataFrame
    df = pd.DataFrame({
        'MAE': MAE_array,
        'RMSE': RMSE_array,
        'SMAPE': SMAPE_array,
        'r2': r2_array,
        'MASE': MASE_array,
        'Training_time': training_time_array,
        'Training_time_log': log_training_time,
        'Inference_time': inference_time_array,
        'Inference_time_log': log_inference_time,
        'Model_memory': model_memory_array
    })
    
    return df
import pandas as pd

def extract_window_size(model_name):
    parts = model_name.split('_ws_')
    if len(parts) > 1:
        try:
            window_size = int(parts[1])
            return parts[0], window_size
        except ValueError:
            pass
    return model_name, None

def add_window_size_column(df):
    if 'Window Size' not in df.columns:
        df[['Model', 'Window Size']] = df['Model'].apply(extract_window_size).apply(pd.Series)
    return df

def update_model_names(df, name_mapping):
    df['Model'] = df['Model'].replace(name_mapping)
    return df

## Generate csv for every seed

In [85]:
root_dir_exp1 = os.path.join(root_dir,"testbed_0")# because model names are inside seed folders
folder_names = []

# Iterate through each item in the directory
for item in os.listdir(root_dir_exp1):
    if os.path.isdir(os.path.join(root_dir_exp1, item)):

        folder_names.append(item)
folder_names

['HoeffdingTreeRegressor_ws_32',
 'PassiveAggressive_ws_32',
 'HoeffdingTreeRegressor_ws_8',
 'MLP_partialfit_ws_9',
 'MLP_partialfit_ws_20',
 'SRPRegressor_ws_9',
 'HoeffdingTreeRegressor_ws_12',
 'HoeffdingTreeRegressor_ws_64',
 'PassiveAggressive_ws_12',
 'SRPRegressor_ws_8',
 'AdaptiveRandomForest_ws_12',
 'SRPRegressor_ws_6',
 'AdaptiveRandomForest_ws_8',
 'PassiveAggressive_ws_6',
 'MLP_partialfit_ws_64',
 'SGDRegressor_ws_8',
 'AdaptiveRandomForest_ws_64',
 'AdaptiveRandomForest_ws_9',
 'MLP_partialfit_ws_8',
 'SGDRegressor_ws_64',
 'PassiveAggressive_ws_20',
 'PassiveAggressive_ws_9',
 'PassiveAggressive_ws_64',
 'SGDRegressor_ws_20',
 'HoeffdingAdaptiveTreeRegressor_ws_12',
 'PassiveAggressive_ws_8',
 'HoeffdingTreeRegressor_ws_20',
 'SRPRegressor_ws_32',
 'HoeffdingAdaptiveTreeRegressor_ws_6',
 'HoeffdingAdaptiveTreeRegressor_ws_20',
 'HoeffdingAdaptiveTreeRegressor_ws_8',
 'SGDRegressor_ws_32',
 'MLP_partialfit_ws_12',
 'SGDRegressor_ws_6',
 'MLP_partialfit_ws_32',
 'Hoeffdi

In [117]:
import pandas as pd

def extract_window_size(model_name):
    parts = model_name.split('_ws_')
    if len(parts) > 1:
        try:
            return parts[0], int(parts[1])
        except ValueError:
            pass
    return model_name, None

def add_window_size_column(df):
    df[['Model', 'Window Size']] = df['Model'].apply(extract_window_size).apply(pd.Series)
    return df

def update_model_names(df, name_mapping):
    df['Model'] = df['Model'].replace(name_mapping)
    return df

# Initialize a list to collect dataframes
all_metrics = []

# Iterate through each model to process directories and collect metrics
for model_name in folder_names:
    lista_de_modelos = [f"{model_name}"]
    metrics_arrays = process_model_directories(root_dir, lista_de_modelos)
    df = arrays_to_dataframe(*metrics_arrays)
    df["Model"] = model_name
    all_metrics.append(df)


all_models_df = pd.concat(all_metrics, ignore_index=True)

all_models_df.rename(columns={
    'Inference_time': 'Inference Time',
    'Training_time': 'Training Time',
    'Model_memory':'Model memory (MB)'
}, inplace=True)

all_models_df = add_window_size_column(all_models_df)

if exp_name == "exp2":
    name_mapping = {
        'AdaptiveRandomForest': 'Adaptive Random Forest (ARF)',
        'HoeffdingAdaptiveTreeRegressor': 'Hoeffding Adaptive Tree Regressor',
        'HoeffdingTreeRegressor': 'Hoeffding Tree Regressor',
        'MLP_partialfit': 'MLP partialfit',
        'PassiveAggressive': 'Passive Aggressive (PA)',
        'SGDRegressor': 'SGD Regressor',
        'SRPRegressor': 'SRP Regressor',
    }
    all_models_df = update_model_names(all_models_df, name_mapping)

all_models_df.to_csv(f"../../outputs/{exp_name}_full_results.csv", index=False)


processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds
processed 20 seeds


In [118]:
all_models_df

Unnamed: 0,MAE,RMSE,SMAPE,r2,MASE,Training Time,Training_time_log,Inference Time,Inference_time_log,Model memory (MB),Model,Window Size
0,3.286322,8.604321,19.750337,0.918424,0.847761,17.803598,2.934048,6.757077,2.048606,456.870440,Hoeffding Tree Regressor,32
1,3.253851,8.582090,19.619776,0.918845,0.839385,18.600983,2.975580,6.073897,1.956412,536.443273,Hoeffding Tree Regressor,32
2,3.473062,8.846662,20.571204,0.913764,0.895934,14.265496,2.725595,5.227467,1.828970,339.960547,Hoeffding Tree Regressor,32
3,3.530282,8.911189,20.508241,0.912501,0.910695,17.626902,2.924607,6.304663,1.988513,373.381795,Hoeffding Tree Regressor,32
4,3.359753,8.683785,19.980693,0.916910,0.866704,15.251554,2.788189,5.609841,1.888560,429.459672,Hoeffding Tree Regressor,32
...,...,...,...,...,...,...,...,...,...,...,...,...
975,4.904821,11.099057,24.528932,0.863996,1.266792,45.986953,3.849870,12.900311,2.631911,0.161770,SRP Regressor,12
976,4.926476,11.094333,24.390741,0.864112,1.272385,90.711144,4.518644,30.716121,3.456825,0.177773,SRP Regressor,12
977,4.968919,11.139051,24.796023,0.863014,1.283347,62.335208,4.148441,18.474203,2.969091,0.159031,SRP Regressor,12
978,4.900282,11.131230,24.648084,0.863206,1.265620,45.778557,3.845425,12.746889,2.620813,0.169474,SRP Regressor,12


In [119]:
all_models_df.Model.unique()

array(['Hoeffding Tree Regressor', 'Passive Aggressive (PA)',
       'MLP partialfit', 'SRP Regressor', 'Adaptive Random Forest (ARF)',
       'SGD Regressor', 'Hoeffding Adaptive Tree Regressor'], dtype=object)

In [125]:
all_models_df[(all_models_df['Model'] == "Adaptive Random Forest (ARF)") & (all_models_df['Window Size'] == 64)][["Training Time","Inference Time"]]

Unnamed: 0,Training Time,Inference Time
320,6483.943267,1232.22909
321,5721.9784,1075.446992
322,4702.349188,895.052446
323,3203.157578,726.593073
324,6609.554566,1251.386844
325,7270.362343,1267.635906
326,14766.618504,3688.795129
327,6162.364741,1145.596648
328,1802.862764,393.911654
329,6829.144629,1186.005573


In [126]:
all_models_df = all_models_df.dropna()

summary = all_models_df.groupby(['Model', 'Window Size']).agg({
    'MAE': ['mean', 'std'],
    'RMSE': ['mean', 'std'],
    'SMAPE': ['mean', 'std'],
    'r2': ['mean', 'std'],
    'MASE': ['mean', 'std'],
    'Training Time': ['mean'],
    'Inference Time': ['mean'],
    'Model memory (MB)': ['mean'],
}).reset_index()

summary = summary.round(3)

summary.columns = ['_'.join(col).strip() if col[1] else col[0] for col in summary.columns.values]


summary = summary.rename(columns={
    'Model_': 'Model',
    'Window Size_': 'Window Size',
    'MAE_mean': 'MAE_mean',
    'MAE_std': 'MAE_std',
    'RMSE_mean': 'RMSE_mean',
    'RMSE_std': 'RMSE_std',
    'SMAPE_mean': 'SMAPE_mean',
    'SMAPE_std': 'SMAPE_std',
    'r2_mean': 'r2_mean',
    'r2_std': 'r2_std',
    'MASE_mean': 'MASE_mean',
    'MASE_std': 'MASE_std',
    'Training Time_mean': 'Training Time',
    'Inference Time_mean': 'Inference Time',
    'Model memory (MB)_mean': 'Model memory (MB)'
})

summary.to_csv(f"../../outputs/{exp_name}_model_metrics_avg.csv", index=False)

summary

Unnamed: 0,Model,Window Size,MAE_mean,MAE_std,RMSE_mean,RMSE_std,SMAPE_mean,SMAPE_std,r2_mean,r2_std,MASE_mean,MASE_std,Training Time,Inference Time,Model memory (MB)
0,Adaptive Random Forest (ARF),6,3.167,0.076,8.475,0.16,19.294,0.279,0.921,0.003,0.818,0.02,36765.313,12135.261,4035.426
1,Adaptive Random Forest (ARF),8,2.917,0.143,8.176,0.218,18.198,0.634,0.926,0.004,0.754,0.037,10985.082,2104.979,4049.356
2,Adaptive Random Forest (ARF),9,2.841,0.24,8.271,0.325,17.102,1.128,0.924,0.006,0.734,0.062,1403.575,394.632,1453.863
3,Adaptive Random Forest (ARF),12,2.594,0.238,7.772,0.327,16.594,1.104,0.933,0.006,0.67,0.061,930.893,267.114,4088.65
4,Adaptive Random Forest (ARF),20,2.114,0.413,7.172,0.532,14.006,2.159,0.943,0.009,0.546,0.107,1267.728,261.401,4142.016
5,Adaptive Random Forest (ARF),32,1.76,0.515,6.658,0.721,12.232,2.774,0.951,0.011,0.454,0.133,2363.09,386.467,4225.895
6,Adaptive Random Forest (ARF),64,1.276,0.675,5.092,1.411,9.951,3.479,0.969,0.017,0.33,0.174,5991.68,1170.914,4367.824
7,Hoeffding Adaptive Tree Regressor,6,3.838,0.711,24.027,65.853,21.783,0.716,-4.189,22.777,0.992,0.184,1294.042,291.991,304.292
8,Hoeffding Adaptive Tree Regressor,8,3.882,0.668,25.723,60.047,21.917,0.472,-3.514,18.886,1.003,0.173,806.718,187.705,273.816
9,Hoeffding Adaptive Tree Regressor,9,3.704,0.201,9.415,0.281,21.752,0.669,0.902,0.006,0.957,0.052,17.874,6.295,27.802
