#### Collect result metrics

In [None]:
%load_ext autoreload
%autoreload 2

import os
from pathlib import Path
import pandas as pd

import radipop_utils 
import radipop_utils.visualization
import radipop_utils.features
import radipop_utils.inference
import radipop_utils.data


# load user/ system specific env variables:
from dotenv import dotenv_values, find_dotenv
config = dotenv_values(find_dotenv())  # load environment variables as dictionary

path = Path(os.path.abspath(radipop_utils.__file__))
RADIPOP_PACKAGE_ROOT = path.parent.parent
DATA_ROOT_DIRECTORY = Path(config["DATA_ROOT_DIRECTORY"])


DATA_ROOT_DIRECTORY


In [None]:
# Define the experiments and load metric files

exp_302_WFD_no_autoseg = {"notes": "50 healthy patients were used for training (with fake data = WFD)",
               "spacing": (20, "?", "?"),
               "segmentation": "ground_truth",
               "path": DATA_ROOT_DIRECTORY / "radiomics/Dataset302/regression" / "radipop_no_autoseg_WFD"}

exp_302_ORD_no_autoseg = {"notes": "Only patients with a HVPG measurement were used for training (Only real data = ORD)",
               "spacing": (20, "?", "?"),
               "segmentation": "ground_truth",
               "path": DATA_ROOT_DIRECTORY / "radiomics/Dataset302/regression" / "radipop_no_autoseg_ORD"}




exp_125_111_ORD_autoseg = {"notes": "",
                           "spacing": (1, 1, 1),
                           "segmentation": "nnUNet",
                           "path": DATA_ROOT_DIRECTORY / "radiomics/Dataset125_LSS/regression" / "radipop_nnUNet_spacing_111_ORD"}
exp_125_111_WFD_autoseg = {"notes": "",
                           "spacing": (1, 1, 1),
                           "segmentation": "nnUNet",
                           "path": DATA_ROOT_DIRECTORY / "radiomics/Dataset125_LSS/regression" / "radipop_nnUNet_spacing_111_WFD"}

exp_125_111_WFD_no_autoseg = {"notes": "",
                              "spacing": (1, 1, 1),
                              "segmentation": "ground_truth",
                              "path": DATA_ROOT_DIRECTORY / "radiomics/Dataset125_LSS/regression" / "radipop_no_autoseg_spacing_111_WFD"}
exp_125_111_ORD_no_autoseg = {"notes": "",
                              "spacing": (1, 1, 1),
                              "segmentation": "ground_truth",
                              "path": DATA_ROOT_DIRECTORY / "radiomics/Dataset125_LSS/regression" / "radipop_no_autoseg_spacing_111_ORD"
}



exp_125_median_ORD_autoseg = {"notes": "",
                              "spacing": (2.0, 0.71875, 0.71875),
                              "segmentation": "nnUNet",
                              "path": DATA_ROOT_DIRECTORY / "radiomics/Dataset125_LSS/regression" / "radipop_nnUNet_spacing_median_ORD"}

exp_125_median_WFD_autoseg = {"notes": "",
                              "spacing": (2.0, 0.71875, 0.71875),
                              "segmentation": "nnUNet",
                              "path": DATA_ROOT_DIRECTORY / "radiomics/Dataset125_LSS/regression" / "radipop_nnUNet_spacing_median_WFD"}

exp_125_median_ORD_no_autoseg = {"notes": "",
                                 "spacing": (2.0, 0.71875, 0.71875),
                                 "segmentation": "ground_truth",
                                 "path": DATA_ROOT_DIRECTORY / "radiomics/Dataset125_LSS/regression" / "radipop_no_autoseg_spacing_median_ORD"}

exp_125_median_WFD_no_autoseg = {"notes": "",
                                 "spacing": (2.0, 0.71875, 0.71875),
                                 "segmentation": "ground_truth",
                                 "path": DATA_ROOT_DIRECTORY / "radiomics/Dataset125_LSS/regression" / "radipop_no_autoseg_spacing_median_WFD"}


exp_125_median_ORD_no_autoseg_only_shape_features = {"notes": "only_shape_features",
                                 "spacing": (2.0, 0.71875, 0.71875),
                                 "segmentation": "nnUNet",
                                 "path": DATA_ROOT_DIRECTORY / "radiomics/Dataset125_LSS/regression" / "radipop_nnUNet_spacing_222_ORD_only_shape"}

exp_125_median_WFD_no_autoseg_only_shape_features = {"notes": "only_shape_features",
                                 "spacing": (2.0, 0.71875, 0.71875),
                                 "segmentation": "nnUNet",
                                 "path": DATA_ROOT_DIRECTORY / "radiomics/Dataset125_LSS/regression" / "radipop_nnUNet_spacing_222_WFD_only_shape"}





# radipop_no_autoseg_spacing_median_ORD


experiments = {
    'exp_302_WFD_no_autoseg': exp_302_WFD_no_autoseg,
    'exp_302_ORD_no_autoseg': exp_302_ORD_no_autoseg,
    #
    'exp_125_111_ORD_autoseg': exp_125_111_ORD_autoseg,
    'exp_125_111_WFD_autoseg': exp_125_111_WFD_autoseg,
    'exp_125_111_ORD_no_autoseg': exp_125_111_ORD_no_autoseg,
    'exp_125_111_WFD_no_autoseg': exp_125_111_WFD_no_autoseg,    
    #
    'exp_125_median_ORD_autoseg': exp_125_median_ORD_autoseg,
    'exp_125_median_WFD_autoseg': exp_125_median_WFD_autoseg,
    'exp_125_median_ORD_no_autoseg': exp_125_median_ORD_no_autoseg,
    'exp_125_median_WFD_no_autoseg': exp_125_median_WFD_no_autoseg,
    #
    # 'exp_125_median_ORD_no_autoseg_only_shape_features': exp_125_median_ORD_no_autoseg_only_shape_features,  # TODO wait for the results
    # 'exp_125_median_WFD_no_autoseg_only_shape_features': exp_125_median_WFD_no_autoseg_only_shape_features
    

}

experiments_autoseg = {k:v for k,v in experiments.items() if "no_autoseg" not in k}

metric_files = ["metrics_training_set.xlsx", "metrics_training_CV5.xlsx",
                "metrics_internal_test_set.xlsx", "metrics_external_test_set.xlsx"]



r = radipop_utils.inference.process_metric_files(experiments_autoseg, metric_files)
# r = radipop_utils.inference.process_metric_files(experiments, metric_files)

# to compare against DL
y_shift = 16.246,
y_rescaling =  8.244

for t in ["metrics_training_set", "metrics_training_CV5", "metrics_internal_test_set", "metrics_external_test_set"]:
    r[t]["MSEN loss"] = r[t]["mean_squared_error"] / (y_rescaling ** 2)



In [None]:
r['metrics_training_set'].round(2).sort_values(by="mean_absolute_error", ascending=True)

In [None]:
r["metrics_training_CV5"].round(2).sort_values(by="mean_absolute_error", ascending=True)

In [None]:
r['metrics_internal_test_set'].round(2).sort_values(by="mean_absolute_error", ascending=True)

In [None]:
r['metrics_external_test_set'].round(2).sort_values(by="mean_absolute_error", ascending=True)

In [None]:
# Extract 'best' model
r = radipop_utils.inference.process_metric_files(experiments_autoseg, metric_files)
import numpy as np

idx = np.argmax((
    r['metrics_internal_test_set']["oos_r2_score"] +
    r['metrics_external_test_set']["oos_r2_score"] +
    r['metrics_training_set']["oos_r2_score"] * 0.0 +
    r["metrics_training_CV5"]["oos_r2_score"] * 0.0)
)

def combine_metrics_for_one_experiment(r: dict, idx : int ) -> pd.DataFrame:
    # Assuming r is a dictionary containing DataFrames and idx is defined
    metrics_training_set = r['metrics_training_set'].iloc[idx].to_frame().T
    metrics_training_set['metric_type'] = 'metrics_training_set'

    metrics_training_CV5 = r['metrics_training_CV5'].iloc[idx].to_frame().T
    metrics_training_CV5['metric_type'] = 'metrics_training_CV5'

    metrics_internal_test_set = r['metrics_internal_test_set'].iloc[idx].to_frame().T
    metrics_internal_test_set['metric_type'] = 'metrics_internal_test_set'

    metrics_external_test_set = r['metrics_external_test_set'].iloc[idx].to_frame().T
    metrics_external_test_set['metric_type'] = 'metrics_external_test_set'

    # Combine the DataFrames
    combined_df = pd.concat([
        metrics_training_set,
        metrics_training_CV5,
        metrics_internal_test_set,
        metrics_external_test_set
    ], axis=0)

    # Reset the index
    combined_df.reset_index(drop=True, inplace=True)

    # Display the combined DataFrame
    return combined_df.set_index('metric_type').round(2)

combine_metrics_for_one_experiment(r, idx).round(2)


In [None]:
### Feature importance
exp_name = "exp_125_111_ORD_autoseg"
# exp_name = "exp_125_111_WFD_autoseg"
model = "RF"
exp = experiments[exp_name]
file = exp["path"] / f"feature_importances_{model}_on_iTs.xlsx"
importances = pd.read_excel(file, index_col=0)
_ = importances.iloc[:20][::-1].plot.barh(title=f"Top features for {exp_name}: {model}", legend=False)
print(f"Number of features after reduction: {importances.shape[0]}")