### Get additional metrics for models (additional PasterRepo Code - without modyfing the original code)

In [1]:
# to catch any changes to libraries without restarting the notebook kernel every time
%load_ext autoreload
%autoreload 2

import json
import os
import sys
from copy import deepcopy
from pathlib import Path
import numpy as np
import pandas as pd

REPO_DIR = os.path.abspath('..')  # path to the root of the repository
sys.path.append(REPO_DIR)
os.environ["PROJECT_DIR"] = REPO_DIR
import lib
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_percentage_error, mean_squared_error, mean_absolute_error, r2_score

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

# Initialize notebook parameters

In [2]:
project_dir = os.environ["PROJECT_DIR"]
dataset_name = 'tabular_100_trials_32_batch_size'
os.makedirs(f'{project_dir}/paster_scripts/report_metrics/{dataset_name}', exist_ok=True)
output_folder_single_models = f'{project_dir}/output_pasterquality/{dataset_name}'

n_seeds = 15

### Calculate MAPE, RMSE, MAE, R2 metrics for each seed of each model

In [34]:
metrics_json = {}

y_train = np.load(f'{project_dir}/data/{dataset_name}/y_train.npy')
y_val = np.load(f'{project_dir}/data/{dataset_name}/y_val.npy')
y_test = np.load(f'{project_dir}/data/{dataset_name}/y_test.npy')

for model in sorted(os.listdir(output_folder_single_models)):
    metrics_json[model] = {}
    for folder in sorted(os.listdir(f'{output_folder_single_models}/{model}')):
        if folder == 'tuned':
            for seed_folder in sorted(os.listdir(f'{output_folder_single_models}/{model}/{folder}')):
                if not seed_folder.endswith('.toml'):
                    if model != 'node':
                        p_test = np.load(f'{output_folder_single_models}/{model}/{folder}/{seed_folder}/p_test.npy')
                        p_val = np.load(f'{output_folder_single_models}/{model}/{folder}/{seed_folder}/p_val.npy')
                        p_train = np.load(f'{output_folder_single_models}/{model}/{folder}/{seed_folder}/p_train.npy')
                        y_info = lib.load_pickle(f'{output_folder_single_models}/{model}/{folder}/{seed_folder}/y_info.pickle')

                        # Convert predictions to the original scale based on y_info mean and std (extracted from y_train)
                        p_test = p_test * y_info['std'] + y_info['mean']
                        p_val = p_val * y_info['std'] + y_info['mean']
                        p_train = p_train * y_info['std'] + y_info['mean']

                        # Convert MAPE to percentage
                        mape_train = mean_absolute_percentage_error(y_train, p_train) * 100
                        mape_val = mean_absolute_percentage_error(y_val, p_val) * 100
                        mape_test = mean_absolute_percentage_error(y_test, p_test) * 100
                        
                        rmse_train = np.sqrt(mean_squared_error(y_train, p_train))
                        rmse_val = np.sqrt(mean_squared_error(y_val, p_val))
                        rmse_test = np.sqrt(mean_squared_error(y_test, p_test))
                        
                        mae_train = mean_absolute_error(y_train, p_train)
                        mae_val = mean_absolute_error(y_val, p_val)
                        mae_test = mean_absolute_error(y_test, p_test)
                        
                        r2_train = r2_score(y_train, p_train)
                        r2_val = r2_score(y_val, p_val)
                        r2_test = r2_score(y_test, p_test)
                        
                        if model not in metrics_json:
                            metrics_json[model] = {}
                        if seed_folder not in metrics_json[model]:
                            metrics_json[model][seed_folder] = {}
                        metrics_json[model][seed_folder]['MAPE'] = {}
                        metrics_json[model][seed_folder]['RMSE'] = {}
                        metrics_json[model][seed_folder]['MAE'] = {}
                        metrics_json[model][seed_folder]['R2'] = {}

                        metrics_json[model][seed_folder]['MAPE']['train'] = mape_train
                        metrics_json[model][seed_folder]['MAPE']['val'] = mape_val
                        metrics_json[model][seed_folder]['MAPE']['test'] = mape_test
                        metrics_json[model][seed_folder]['RMSE']['train'] = rmse_train
                        metrics_json[model][seed_folder]['RMSE']['val'] = rmse_val
                        metrics_json[model][seed_folder]['RMSE']['test'] = rmse_test
                        metrics_json[model][seed_folder]['MAE']['train'] = mae_train
                        metrics_json[model][seed_folder]['MAE']['val'] = mae_val
                        metrics_json[model][seed_folder]['MAE']['test'] = mae_test
                        metrics_json[model][seed_folder]['R2']['train'] = r2_train
                        metrics_json[model][seed_folder]['R2']['val'] = r2_val
                        metrics_json[model][seed_folder]['R2']['test'] = r2_test

# Save metrics to json file
json_file_path = f'{project_dir}/paster_scripts/report_metrics/{dataset_name}/detailed_metrics_per_model_metric_seed_set.json'
with open(json_file_path, 'w') as f:
    json.dump(metrics_json, f, indent=4, default=str)

### Calculate mean and std, across all seeds of each models, for each metric per (train, val, test) set

In [35]:
metrics_mean_std = {}

for model, model_data in metrics_json.items():
    metrics_mean_std[model] = {}
    for seed_folder, seed_data in model_data.items():
        for metric, metric_data in seed_data.items():
            if metric not in metrics_mean_std[model]:
                metrics_mean_std[model][metric] = {}
            for set_name, values in metric_data.items():
                if set_name not in metrics_mean_std[model][metric]:
                    metrics_mean_std[model][metric][set_name] = {'mean': [], 'std': []}
                metrics_mean_std[model][metric][set_name]['mean'].append(values)
                metrics_mean_std[model][metric][set_name]['std'].append(values)

# Calculate mean and std for each metric set
for model, model_data in metrics_mean_std.items():
    for metric, metric_data in model_data.items():
        for set_name, set_data in metric_data.items():
            set_data['mean'] = round(np.mean(set_data['mean']).item(),4)
            set_data['std'] = round(np.std(set_data['std']).item(),4)

# Save metrics to json file
json_file_path = f'{project_dir}/paster_scripts/report_metrics/{dataset_name}/aggregated_metrics.json'
with open(json_file_path, 'w') as f:
    json.dump(metrics_mean_std, f, indent=4, default=str)


# Create a DataFrame from the list of dictionaries for the aggregated metrics (mean and sted)
metrics_list = []
for model, model_data in metrics_mean_std.items():
    for metric, metric_data in model_data.items():
        for set_name, values in metric_data.items():
            metrics_list.append({
                'Model': model,
                'Metric': metric,
                'Set': set_name,
                'Mean': values['mean'],
                'Std': values['std']
            })
df = pd.DataFrame(metrics_list)
df.to_csv(f'{project_dir}/paster_scripts/report_metrics/{dataset_name}/aggregated_metrics.csv', index=False)
df = df.sort_values(by=['Metric', 'Set', 'Mean'], ascending=[True, True, True]) 

In [38]:
# Transpose the DataFrame to have the metrics as columns and the models as rows, only for the test set
df_test = df[df['Set'] == 'val']
df_test = df_test.pivot(index='Model', columns='Metric', values='Mean')
df_test.sort_values(by=['RMSE'], ascending=[True], inplace=True)
df_test
# df_test.to_latex(f'{project_dir}/paster_scripts/report_metrics/{dataset_name}/aggregated_metrics_test.tex', index=True, float_format="%.2f")

Metric,MAE,MAPE,R2,RMSE
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
resnet,2.147,12.1941,0.8409,2.6966
ft_transformer,2.1387,12.1707,0.84,2.705
mlp,2.1967,12.572,0.8336,2.757
autoint,2.2335,12.1982,0.8155,2.8925
dcn2,2.2403,12.9191,0.8065,2.9626
snn,2.2846,12.6111,0.7954,3.0403
tabnet,2.5719,14.4862,0.7752,3.188
grownet,2.5506,14.1089,0.7643,3.274
lightgbm_,2.2907,11.5963,0.7335,3.4906
xgboost_,2.4815,12.8348,0.6951,3.7192


### Create ensemble groups and calculate their metrics

In [15]:
# For each model, obtain three ensembles by splitting the 15 single models (seeds) into three disjoint groups of equal size and 
# averaging predictions of single models within each group.

y_train = np.load(f'{project_dir}/data/{dataset_name}/y_train.npy')
y_val = np.load(f'{project_dir}/data/{dataset_name}/y_val.npy')
y_test = np.load(f'{project_dir}/data/{dataset_name}/y_test.npy')

ensemble_metrics = {}

for model in sorted(os.listdir(output_folder_single_models)):

    # Skip node model
    if model == 'node':
        continue
   
    if model not in ensemble_metrics:
        ensemble_metrics[model] = {}
    
    for folder in sorted(os.listdir(f'{output_folder_single_models}/{model}')):
        if folder == 'tuned':
            for seeds in [range(0, 5), range(5, 10), range(10, 15)]:
                train_pred_list = []
                val_pred_list = []
                test_pred_list = []
                for seed in seeds:
                    if not folder.endswith('.toml'):
                            p_test = np.load(f'{output_folder_single_models}/{model}/{folder}/{seed}/p_test.npy')
                            p_val = np.load(f'{output_folder_single_models}/{model}/{folder}/{seed}/p_val.npy')
                            p_train = np.load(f'{output_folder_single_models}/{model}/{folder}/{seed}/p_train.npy')
                            y_info = lib.load_pickle(f'{output_folder_single_models}/{model}/{folder}/{seed}/y_info.pickle')

                            # Convert predictions to the original scale based on y_info mean and std (extracted from y_train)
                            p_test = p_test * y_info['std'] + y_info['mean']
                            p_val = p_val * y_info['std'] + y_info['mean']
                            p_train = p_train * y_info['std'] + y_info['mean']

                            train_pred_list.append(p_train)
                            val_pred_list.append(p_val)
                            test_pred_list.append(p_test)
                
                #Taking the average of this seed_ensemble group
                train_pred_list = np.array(train_pred_list)
                val_pred_list = np.array(val_pred_list)
                test_pred_list = np.array(test_pred_list)
                train_pred_list = np.mean(train_pred_list, axis=0)
                val_pred_list = np.mean(val_pred_list, axis=0)
                test_pred_list = np.mean(test_pred_list, axis=0)

                # Convert MAPE to percentage
                mape_train = mean_absolute_percentage_error(y_train, train_pred_list) * 100
                mape_val = mean_absolute_percentage_error(y_val, val_pred_list) * 100
                mape_test = mean_absolute_percentage_error(y_test, test_pred_list) * 100

                rmse_train = np.sqrt(mean_squared_error(y_train, train_pred_list))
                rmse_val = np.sqrt(mean_squared_error(y_val, val_pred_list))
                rmse_test = np.sqrt(mean_squared_error(y_test, test_pred_list))

                mae_train = mean_absolute_error(y_train, train_pred_list)
                mae_val = mean_absolute_error(y_val, val_pred_list)
                mae_test = mean_absolute_error(y_test, test_pred_list)

                r2_train = r2_score(y_train, train_pred_list)
                r2_val = r2_score(y_val, val_pred_list)
                r2_test = r2_score(y_test, test_pred_list)

                # Save metrics to json file for this model and this seed_ensemble group
                group = f'{min(seeds)}_{max(seeds)}'
                ensemble_metrics[model][group] = {}
                ensemble_metrics[model][group]['MAPE'] = {}
                ensemble_metrics[model][group]['RMSE'] = {}
                ensemble_metrics[model][group]['MAE'] = {}
                ensemble_metrics[model][group]['R2'] = {}

                ensemble_metrics[model][group]['MAPE']['train'] = mape_train
                ensemble_metrics[model][group]['MAPE']['val'] = mape_val
                ensemble_metrics[model][group]['MAPE']['test'] = mape_test

                ensemble_metrics[model][group]['RMSE']['train'] = rmse_train
                ensemble_metrics[model][group]['RMSE']['val'] = rmse_val
                ensemble_metrics[model][group]['RMSE']['test'] = rmse_test

                ensemble_metrics[model][group]['MAE']['train'] = mae_train
                ensemble_metrics[model][group]['MAE']['val'] = mae_val
                ensemble_metrics[model][group]['MAE']['test'] = mae_test

                ensemble_metrics[model][group]['R2']['train'] = r2_train
                ensemble_metrics[model][group]['R2']['val'] = r2_val
                ensemble_metrics[model][group]['R2']['test'] = r2_test

# Save metrics to json file
json_file_path = f'{project_dir}/paster_scripts/report_metrics/{dataset_name}/ensemble_detailed_metrics.json'
with open(json_file_path, 'w') as f:
    json.dump(ensemble_metrics, f, indent=4, default=str)

# Create a DataFrame from the list of dictionaries for the ensemble_metrics (mean and sted)
ensemble_metrics_list = []
for model, model_data in ensemble_metrics.items():
    for group, group_data in model_data.items():
        for metric, metric_data in group_data.items():
            for set_name, values in metric_data.items():
                ensemble_metrics_list.append({
                    'Model': model,
                    'Group': group,
                    'Metric': metric,
                    'Set': set_name,
                    'Value': values
                })

df_detailed_metrics_ensemble = pd.DataFrame(ensemble_metrics_list)
df_detailed_metrics_ensemble.to_csv(f'{project_dir}/paster_scripts/report_metrics/{dataset_name}/ensemble_detailed_metrics.csv', index=False)
df_detailed_metrics_ensemble = df_detailed_metrics_ensemble.sort_values(by=['Metric', 'Set', 'Value'], ascending=[True, True, True])

### Calculate the mean and std for the metrics between the ensemble groups of each model and as always for each (train, val, test) set

In [None]:
data = ensemble_metrics
model_results = {}
for model, groups in data.items():
    model_results[model] = {}
    for group, metrics in groups.items():
        for metric, values in metrics.items():
            if metric not in model_results[model]:
                model_results[model][metric] = {}
            for set_name, value in values.items():
                if set_name not in model_results[model][metric]:
                    model_results[model][metric][set_name] = []
                model_results[model][metric][set_name].append(value)

mean_std_results = {}
for model, metrics in model_results.items():
    mean_std_results[model] = {}
    for metric, sets in metrics.items():
        mean_std_results[model][metric] = {}
        for set_name, values in sets.items():
            mean = round(np.mean(values),4)
            std_dev = round(np.std(values),4)
            mean_std_results[model][metric][set_name] = {
                "mean": mean,
                "std_dev": std_dev
            }

# Save metrics to json file
json_file_path = f'{project_dir}/paster_scripts/report_metrics/{dataset_name}/ensemble_aggregated_metrics.json'
with open(json_file_path, 'w') as f:
    json.dump(mean_std_results, f, indent=4, default=str)

# Create a DataFrame from the list of dictionaries for the aggregated ensemble_metrics (mean and sted)
ensemble_metrics_list = []
for model, model_data in mean_std_results.items():
    for metric, metric_data in model_data.items():
        for set_name, values in metric_data.items():
            ensemble_metrics_list.append({
                'Model': model,
                'Metric': metric,
                'Set': set_name,
                'Mean': values['mean'],
                'Std': values['std_dev']
            })

df_aggregated_metrics_ensemble = pd.DataFrame(ensemble_metrics_list)
df_aggregated_metrics_ensemble.to_csv(f'{project_dir}/paster_scripts/report_metrics/{dataset_name}/ensemble_aggregated_metrics.csv', index=False)
df_aggregated_metrics_ensemble = df_aggregated_metrics_ensemble.sort_values(by=['Metric', 'Set', 'Mean'], ascending=[True, True, True])
df_aggregated_metrics_ensemble

In [31]:
# Transpose the DataFrame to have the metrics as columns and the models as rows, only for the test set
# also keep the std in parenthesis next to the mean

tmpp = df_aggregated_metrics_ensemble.copy()
tmpp = tmpp[tmpp['Set'] == 'test']
tmpp = tmpp.pivot(index='Model', columns='Metric', values='Mean')
tmpp.to_latex(f'{project_dir}/paster_scripts/report_metrics/{dataset_name}/ensemble_aggregated_metrics_test.tex', index=True, float_format="%.2f")

### Create an ensemble by mixing the top five performing ensemble groups

In [9]:
# just to have one idea in the test
# ens_df[(ens_df['Metric'] == 'RMSE') & (ens_df['Set'] == 'test')].sort_values(by=['Value'], ascending=[True]).head(pool_of_models)

# Create an ensemble by mixing the top X performing ensemble groups in the validation (val) set
pool_of_models = 20
ens_df = df_detailed_metrics_ensemble.copy()
ens_mape = ens_df[(ens_df['Metric'] == 'MAPE') & (ens_df['Set'] == 'val')].sort_values(by=['Value'], ascending=[True]).head(pool_of_models)
ens_r2 = ens_df[(ens_df['Metric'] == 'R2') & (ens_df['Set'] == 'val')].sort_values(by=['Value'], ascending=[False]).head(pool_of_models)
ens_mae = ens_df[(ens_df['Metric'] == 'MAE') & (ens_df['Set'] == 'val')].sort_values(by=['Value'], ascending=[True]).head(pool_of_models)
ens_rmse = ens_df[(ens_df['Metric'] == 'RMSE') & (ens_df['Set'] == 'val')].sort_values(by=['Value'], ascending=[True]).head(pool_of_models)

# Merge the four DataFrames to find the common model names and groups
common_df = ens_mape.merge(ens_r2, on=['Model', 'Group'], how='inner')
common_df = common_df.merge(ens_mae, on=['Model', 'Group'], how='inner')
common_df = common_df.merge(ens_rmse, on=['Model', 'Group'], how='inner')

# TODO: continue this
# Inspect the common_df and choose models and their ensemble groups to put in the models_to_ensemble
models_to_ensemble = {}
models_to_ensemble['dcn2'] = "10_14"

### Create ensemble from the top x performing single models

In [32]:
# TODO: Try with three or five models
# TODO: simpliy above cells with for loops, based on this one
# TODO: replace the multiple lines for metrics assignment to json, with for loops
# TODO: find for each model in models_to_ensemble, the best seed (with the best metrics in the val set)
#       compare the results with the random (e.g. first seed)
# TODO: in the first cell of the notebook find also the best performing seed for each model

# Inspect below, to find the models to ensemble
# df[(df['Metric'] == 'MAE') & (df['Set'] == 'val')].sort_values(by=['Mean'], ascending=[True])

# models_to_ensemble = ["dcn2", "catboost_", "resnet", "autoint", "mlp"]
# models_to_ensemble = ["ft_transformer", "catboost_", "snn", "mlp", "resnet"]
models_to_ensemble = ["ft_transformer", "dcn2", "autoint", "mlp", "resnet"]

seed_folder = 0 #(take predictions from 0.toml)
folder = 'tuned'

train_predictions = []
val_predictions = []
test_predictions = []

y_train = np.load(f'{project_dir}/data/{dataset_name}/y_train.npy')
y_val = np.load(f'{project_dir}/data/{dataset_name}/y_val.npy')
y_test = np.load(f'{project_dir}/data/{dataset_name}/y_test.npy')

for model in sorted(os.listdir(output_folder_single_models)):
    if model in models_to_ensemble:
        p_test = np.load(f'{output_folder_single_models}/{model}/{folder}/{seed_folder}/p_test.npy')
        p_val = np.load(f'{output_folder_single_models}/{model}/{folder}/{seed_folder}/p_val.npy')
        p_train = np.load(f'{output_folder_single_models}/{model}/{folder}/{seed_folder}/p_train.npy')
        y_info = lib.load_pickle(f'{output_folder_single_models}/{model}/{folder}/{seed_folder}/y_info.pickle')

        # Convert predictions to the original scale based on y_info mean and std (extracted from y_train)
        p_test = p_test * y_info['std'] + y_info['mean']
        p_val = p_val * y_info['std'] + y_info['mean']
        p_train = p_train * y_info['std'] + y_info['mean']

        train_predictions.append(p_train)
        val_predictions.append(p_val)
        test_predictions.append(p_test)

#Taking the average of this seed_ensemble group
train_predictions_orig = np.array(train_predictions)
val_predictions_orig = np.array(val_predictions)
test_predictions_orig = np.array(test_predictions)
train_predictions = np.mean(train_predictions_orig, axis=0)
val_predictions = np.mean(val_predictions_orig, axis=0)
test_predictions = np.mean(test_predictions_orig, axis=0)

# Convert MAPE to percentage
mape_train = mean_absolute_percentage_error(y_train, train_predictions) * 100
mape_val = mean_absolute_percentage_error(y_val, val_predictions) * 100
mape_test = mean_absolute_percentage_error(y_test, test_predictions) * 100

rmse_train = np.sqrt(mean_squared_error(y_train, train_predictions))
rmse_val = np.sqrt(mean_squared_error(y_val, val_predictions))
rmse_test = np.sqrt(mean_squared_error(y_test, test_predictions))

mae_train = mean_absolute_error(y_train, train_predictions)
mae_val = mean_absolute_error(y_val, val_predictions)
mae_test = mean_absolute_error(y_test, test_predictions)

r2_train = r2_score(y_train, train_predictions)
r2_val = r2_score(y_val, val_predictions)
r2_test = r2_score(y_test, test_predictions)

# Save metrics to json file
json_file_path = f'{project_dir}/paster_scripts/report_metrics/{dataset_name}/ensemble_top_single_models_metrics.json'
with open(json_file_path, 'w') as f:
    json.dump({'MAPE': {'train': mape_train, 'val': mape_val, 'test': mape_test}, 
               'RMSE': {'train': rmse_train, 'val': rmse_val, 'test': rmse_test},
               'MAE':  {'train': mae_train, 'val': mae_val, 'test': mae_test},
               'R2':   {'train': r2_train, 'val': r2_val, 'test': r2_test},
               "models:" : models_to_ensemble}, f, indent=4, default=str)    

## Create ensemble with the above and a default RF or XGB

In [11]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, mean_absolute_percentage_error, mean_absolute_error
import xgboost as xgb

# model = RandomForestRegressor()
model = xgb.XGBRegressor()

y_train = np.load(f'{project_dir}/data/{dataset_name}/y_train.npy')
y_val = np.load(f'{project_dir}/data/{dataset_name}/y_val.npy')
y_test = np.load(f'{project_dir}/data/{dataset_name}/y_test.npy')

x_train = np.load(f'{project_dir}/data/{dataset_name}/N_train.npy')
x_val = np.load(f'{project_dir}/data/{dataset_name}/N_val.npy')
x_test = np.load(f'{project_dir}/data/{dataset_name}/N_test.npy')

model.fit(x_train, y_train)


### Check from here and below, kati paizei lathos
# Get predictions for all sets
y_pred_train = model.predict(x_train)
y_pred_val = model.predict(x_val)
y_pred_test = model.predict(x_test)

train_predictions_orig = np.insert(train_predictions_orig, 0, y_pred_train, axis=0)
val_predictions_orig = np.insert(val_predictions_orig, 0, y_pred_val, axis=0)
test_predictions_orig = np.insert(test_predictions_orig, 0, y_pred_test, axis=0)

#Taking the average of this ensemble group
train_predictions = np.mean(train_predictions_orig, axis=0)
val_predictions = np.mean(val_predictions_orig, axis=0)
test_predictions = np.mean(test_predictions_orig, axis=0)

#### Metrics with all models together with RF
mape_train = mean_absolute_percentage_error(y_train, y_pred_train) * 100
mape_val = mean_absolute_percentage_error(y_val, y_pred_val) * 100
mape_test = mean_absolute_percentage_error(y_test, y_pred_test) * 100

rmse_train = np.sqrt(mean_squared_error(y_train, y_pred_train))
rmse_val = np.sqrt(mean_squared_error(y_val, y_pred_val))
rmse_test = np.sqrt(mean_squared_error(y_test, y_pred_test))

mae_train = mean_absolute_error(y_train, y_pred_train)
mae_val = mean_absolute_error(y_val, y_pred_val)
mae_test = mean_absolute_error(y_test, y_pred_test)

r2_train = r2_score(y_train, y_pred_train)
r2_val = r2_score(y_val, y_pred_val)
r2_test = r2_score(y_test, y_pred_test)


##### metrics for RF alone
rf_mape_train = mean_absolute_percentage_error(y_train, train_predictions) * 100
rf_mape_val = mean_absolute_percentage_error(y_val, val_predictions) * 100
rf_mape_test = mean_absolute_percentage_error(y_test, test_predictions) * 100

rf_rmse_train = np.sqrt(mean_squared_error(y_train, train_predictions))
rf_rmse_val = np.sqrt(mean_squared_error(y_val, val_predictions))
rf_rmse_test = np.sqrt(mean_squared_error(y_test, test_predictions))

rf_mae_train = mean_absolute_error(y_train, train_predictions)
rf_mae_val = mean_absolute_error(y_val, val_predictions)
rf_mae_test = mean_absolute_error(y_test, test_predictions)

rf_r2_train = r2_score(y_train, train_predictions)
rf_r2_val = r2_score(y_val, val_predictions)
rf_r2_test = r2_score(y_test, test_predictions)

# Save metrics to json file
json_file_path = f'{project_dir}/paster_scripts/report_metrics/{dataset_name}/ensemble_top_single_models_RF_metrics.json'
with open(json_file_path, 'w') as f:
    json.dump({"ensemble_with_RF": {
                    'MAPE': {'train': mape_train, 'val': mape_val, 'test': mape_test}, 
                    'RMSE': {'train': rmse_train, 'val': rmse_val, 'test': rmse_test},
                    'MAE':  {'train': mae_train, 'val': mae_val, 'test': mae_test},
                    'R2':   {'train': r2_train, 'val': r2_val, 'test': r2_test}},
                    "models:" : models_to_ensemble.append("RF"),
                "RF_alone": {
                    'MAPE': {'train': rf_mape_train, 'val': rf_mape_val, 'test': rf_mape_test},
                    'RMSE': {'train': rf_rmse_train, 'val': rf_rmse_val, 'test': rf_rmse_test},
                    'MAE':  {'train': rf_mae_train, 'val': rf_mae_val, 'test': rf_mae_test},
                    'R2':   {'train': rf_r2_train, 'val': rf_r2_val, 'test': rf_r2_test}}}, f, indent=4, default=str)
    

with open(json_file_path) as f:
    data = json.load(f)

ensemble_data = data['ensemble_with_RF']
rf_alone_data = data['RF_alone']

ensemble_df = pd.DataFrame(ensemble_data).T
rf_alone_df = pd.DataFrame(rf_alone_data).T

ensemble_df.columns = [f'ensemble_{col}' for col in ensemble_df.columns]
rf_alone_df.columns = [f'rf_alone_{col}' for col in rf_alone_df.columns]

comparison_df = pd.concat([ensemble_df, rf_alone_df], axis=1)
comparison_df.to_csv(f'{project_dir}/paster_scripts/report_metrics/{dataset_name}/ensemble_top_single_models_RF_metrics.csv', index=False)

In [12]:
comparison_df

Unnamed: 0,ensemble_train,ensemble_val,ensemble_test,rf_alone_train,rf_alone_val,rf_alone_test
MAPE,0.005711,13.150758,12.12141,6.337018,10.480887,9.234461
RMSE,0.0014376971,4.5154862,3.8829393,1.4745461,2.4618459,2.6997578
MAE,0.0009550007,2.647571,2.3295279,1.1214733,1.8887786,1.722553
R2,1.0,0.554805,0.793707,0.957771,0.867669,0.900273
