In [24]:
import os
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

# Function to read and process the evaluation results from a CSV file
def process_results(csv_file, model_name):
    df = pd.read_csv(csv_file)
    df['Model'] = model_name
    return df[['Model', 'Subpart', 'MSE', 'RMSE', 'MAE']]

# Directory containing the evaluation results
results_dir = "../Predictions/Surface/"


# DataFrame to store the evaluation results for each subpart
all_results = []

# Iterate over the models in the directory
for model_name in os.listdir(results_dir):
    model_path = os.path.join(results_dir, model_name)
    eval_file = os.path.join(model_path, "eval.csv")
    try:
        assert os.path.exists(eval_file)
    except AssertionError:
        print(f"Missing evaluation results for {model_name}")
        continue
    # Process the evaluation results for the current model
    results = process_results(eval_file, model_name)
    all_results.append(results)

results = "/home/ngaggion/DATA/HybridGNet3D/Predictions/YanSurface/Results.csv"
df = pd.read_csv(results)

#all_results.append(df)
all_results = pd.concat(all_results, ignore_index=True)

def get_nice_dataframe_sub(df, metrics, subpart = "Full", vertical = False):
    models = df["Model"].unique()
    df = df.copy()
    df = df[df["Subpart"] == subpart]

    # creates a dataframe where each metric has a column for its mean and std.
    # the mean and std. are computed for each model
    # the dataframe is then saved as a csv file
    df_std = pd.DataFrame(columns=metrics)
    for metric in metrics:
        df_std[metric] = df.groupby(["Model"])[metric].std()

    df_mean = pd.DataFrame(columns=metrics)
    for metric in metrics:
        df_mean[metric] = df.groupby(["Model"])[metric].mean()

    df_mean = df_mean.round(2)
    df_std = df_std.round(2)

    #combine both dataframes, intercalating columns

    empty_df = pd.DataFrame(columns = metrics)
    for metric in metrics:
        i = 0
        for model in models:
            mean_str = str(df_mean.loc[model, metric]) 
            std_str = str(df_std.loc[model, metric]) 
            if len(mean_str) == 3 or (mean_str[2] == '.' and len(mean_str) == 4):
                mean_str += '0'
            if len(std_str) == 3 or (std_str[2] == '.' and len(std_str) == 4):
                std_str += '0'
              
            empty_df.loc[model, metric] = mean_str + " (" + std_str + ")"
            i+=1

    # transposes the dataframe
    if vertical:
        empty_df = empty_df.T
    
    return empty_df

metrics = ['MAE', 'MSE', 'RMSE']
subparts = ['Full', 'LV', 'RV', 'LA', 'RA', 'aorta']

for subpart in subparts:
    print(subpart)
    nice = get_nice_dataframe_sub(all_results, metrics, subpart, vertical=1)
    display(nice)


Full


Unnamed: 0,FULL_WDS_1_WL_0.01_3D_32_2D_8_KL_1e-5,FULL_WDS_1_WL_0.01_3D_32_2D_8_KL_1e-5_noLAX,ROI_WDS_1_WL_0.01_3D_32_2D_8_KL_1e-5,ROI_WDS_1_WL_0.01_3D_32_2D_8_KL_1e-5_noLAX
MAE,2.26 (0.55),2.56 (0.62),2.18 (0.54),2.43 (0.59)
MSE,9.29 (5.48),12.20 (7.11),8.80 (5.31),11.27 (6.69)
RMSE,2.95 (0.76),3.38 (0.89),2.87 (0.76),3.25 (0.86)


LV


Unnamed: 0,FULL_WDS_1_WL_0.01_3D_32_2D_8_KL_1e-5,FULL_WDS_1_WL_0.01_3D_32_2D_8_KL_1e-5_noLAX,ROI_WDS_1_WL_0.01_3D_32_2D_8_KL_1e-5,ROI_WDS_1_WL_0.01_3D_32_2D_8_KL_1e-5_noLAX
MAE,1.79 (0.55),1.90 (0.57),1.70 (0.54),1.75 (0.54)
MSE,5.60 (4.03),6.23 (4.28),5.11 (3.67),5.35 (3.83)
RMSE,2.26 (0.71),2.39 (0.73),2.15 (0.70),2.21 (0.70)


RV


Unnamed: 0,FULL_WDS_1_WL_0.01_3D_32_2D_8_KL_1e-5,FULL_WDS_1_WL_0.01_3D_32_2D_8_KL_1e-5_noLAX,ROI_WDS_1_WL_0.01_3D_32_2D_8_KL_1e-5,ROI_WDS_1_WL_0.01_3D_32_2D_8_KL_1e-5_noLAX
MAE,2.08 (0.60),2.18 (0.64),1.97 (0.59),2.00 (0.58)
MSE,7.69 (4.93),8.39 (5.64),7.04 (4.72),7.12 (4.84)
RMSE,2.66 (0.78),2.78 (0.82),2.54 (0.78),2.56 (0.76)


LA


Unnamed: 0,FULL_WDS_1_WL_0.01_3D_32_2D_8_KL_1e-5,FULL_WDS_1_WL_0.01_3D_32_2D_8_KL_1e-5_noLAX,ROI_WDS_1_WL_0.01_3D_32_2D_8_KL_1e-5,ROI_WDS_1_WL_0.01_3D_32_2D_8_KL_1e-5_noLAX
MAE,2.37 (0.78),2.90 (1.00),2.30 (0.77),2.84 (0.99)
MSE,10.07 (9.74),15.40 (13.73),9.58 (9.24),14.88 (13.29)
RMSE,3.00 (1.02),3.69 (1.33),2.92 (1.02),3.63 (1.31)


RA


Unnamed: 0,FULL_WDS_1_WL_0.01_3D_32_2D_8_KL_1e-5,FULL_WDS_1_WL_0.01_3D_32_2D_8_KL_1e-5_noLAX,ROI_WDS_1_WL_0.01_3D_32_2D_8_KL_1e-5,ROI_WDS_1_WL_0.01_3D_32_2D_8_KL_1e-5_noLAX
MAE,2.57 (0.76),3.07 (0.96),2.51 (0.80),2.98 (0.93)
MSE,12.00 (9.42),17.46 (13.65),11.75 (10.16),16.67 (13.13)
RMSE,3.30 (1.05),3.97 (1.32),3.24 (1.11),3.87 (1.30)


aorta


Unnamed: 0,FULL_WDS_1_WL_0.01_3D_32_2D_8_KL_1e-5,FULL_WDS_1_WL_0.01_3D_32_2D_8_KL_1e-5_noLAX,ROI_WDS_1_WL_0.01_3D_32_2D_8_KL_1e-5,ROI_WDS_1_WL_0.01_3D_32_2D_8_KL_1e-5_noLAX
MAE,2.37 (0.84),2.66 (0.93),2.34 (0.83),2.56 (0.89)
MSE,10.24 (8.71),13.17 (11.05),10.04 (8.43),12.38 (10.52)
RMSE,3.01 (1.09),3.41 (1.23),2.97 (1.09),3.31 (1.20)


In [28]:
def analyze_pairs(df, metrics, subpart="Full"):
   df = df[df["Subpart"] == subpart].copy()
   df_mean = df.groupby(["Model"])[metrics].mean()
   df_std = df.groupby(["Model"])[metrics].std()
   
   n = len(df) // len(df["Model"].unique())
   results = []
   metrics = ['MAE', 'MSE']
   
   for base_model in df["Model"].unique():
       if "noLAX" in base_model:
           lax_model = base_model.replace("_noLAX", "")
           if lax_model in df["Model"].unique():
               model_results = []
               for metric in metrics:
                   t_stat = (df_mean.loc[lax_model, metric] - df_mean.loc[base_model, metric]) / \
                           np.sqrt((df_std.loc[lax_model, metric]**2 + df_std.loc[base_model, metric]**2)/n)
                   if abs(t_stat) > 1.96:  # p < 0.05
                       better = t_stat < 0 if metric in ['MSE', 'MAE'] else t_stat > 0
                       model_results.append(f"{metric}: {'better' if better else 'worse'} (t={t_stat:.2f})")
               if model_results:
                   results.append(f"\n{subpart} - {base_model} vs {lax_model}:")
                   results.extend(model_results)
   
   return "\n".join(results)

# Usage:
for subpart in subparts:
   print(analyze_pairs(all_results, metrics, subpart))


Full - FULL_WDS_1_WL_0.01_3D_32_2D_8_KL_1e-5_noLAX vs FULL_WDS_1_WL_0.01_3D_32_2D_8_KL_1e-5:
MAE: better (t=-12.53)
MSE: better (t=-11.24)

Full - ROI_WDS_1_WL_0.01_3D_32_2D_8_KL_1e-5_noLAX vs ROI_WDS_1_WL_0.01_3D_32_2D_8_KL_1e-5:
MAE: better (t=-11.05)
MSE: better (t=-10.03)

LV - FULL_WDS_1_WL_0.01_3D_32_2D_8_KL_1e-5_noLAX vs FULL_WDS_1_WL_0.01_3D_32_2D_8_KL_1e-5:
MAE: better (t=-4.90)
MSE: better (t=-3.68)

LV - ROI_WDS_1_WL_0.01_3D_32_2D_8_KL_1e-5_noLAX vs ROI_WDS_1_WL_0.01_3D_32_2D_8_KL_1e-5:
MAE: better (t=-2.25)

RV - FULL_WDS_1_WL_0.01_3D_32_2D_8_KL_1e-5_noLAX vs FULL_WDS_1_WL_0.01_3D_32_2D_8_KL_1e-5:
MAE: better (t=-4.14)
MSE: better (t=-3.23)

LA - FULL_WDS_1_WL_0.01_3D_32_2D_8_KL_1e-5_noLAX vs FULL_WDS_1_WL_0.01_3D_32_2D_8_KL_1e-5:
MAE: better (t=-14.52)
MSE: better (t=-10.96)

LA - ROI_WDS_1_WL_0.01_3D_32_2D_8_KL_1e-5_noLAX vs ROI_WDS_1_WL_0.01_3D_32_2D_8_KL_1e-5:
MAE: better (t=-14.82)
MSE: better (t=-11.35)

RA - FULL_WDS_1_WL_0.01_3D_32_2D_8_KL_1e-5_noLAX vs FULL_WDS_1_

In [18]:
import os
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Function to read and process the evaluation results from a CSV file
def process_results(csv_file, model_name):
    df = pd.read_csv(csv_file)
    df['Model'] = model_name
    return df

# Directory containing the evaluation results
results_dir = "../Predictions/Surface/"

# DataFrame to store the evaluation results for each subpart
all_results = []

# Iterate over the models in the directory
for model_name in os.listdir(results_dir):
    model_path = os.path.join(results_dir, model_name)
    eval_file = os.path.join(model_path, "metrics.csv")

    if not os.path.exists(eval_file):
        continue
    
    # Process the evaluation results for the current model
    results = process_results(eval_file, model_name)
    all_results.append(results)

def get_nice_dataframe(df, metrics, vertical = False):
    models = df["Model"].unique()

    # creates a dataframe where each metric has a column for its mean and std.
    # the mean and std. are computed for each model
    # the dataframe is then saved as a csv file
    df_std = pd.DataFrame(columns=metrics)
    for metric in metrics:
        df_std[metric] = df.groupby(["Model"])[metric].std()

    df_mean = pd.DataFrame(columns=metrics)
    for metric in metrics:
        df_mean[metric] = df.groupby(["Model"])[metric].mean()

    df_mean = df_mean.round(2)
    df_std = df_std.round(2)

    #combine both dataframes, intercalating columns

    empty_df = pd.DataFrame(columns = metrics)
    for metric in metrics:
        i = 0
        for model in models:
            mean_str = str(df_mean.loc[model, metric]) 
            std_str = str(df_std.loc[model, metric]) 
            if len(mean_str) == 3 or (mean_str[2] == '.' and len(mean_str) == 4):
                mean_str += '0'
            if len(std_str) == 3 or (std_str[2] == '.' and len(std_str) == 4):
                std_str += '0'
              
            empty_df.loc[model, metric] = mean_str + " (" + std_str + ")"
            i+=1

    # transposes the dataframe
    if vertical:
        empty_df = empty_df.T
    
    return empty_df


all_results = pd.concat(all_results, ignore_index=True)
metrics = ['LV Endo - DC', 'LV Endo - HD', 'LV Endo - MCD', 
           'LV Myo - DC', 'LV Myo - HD', 'LV Myo - MCD', 
           'RV Endo - DC', 'RV Endo - HD', 'RV Endo - MCD']

nice = get_nice_dataframe(all_results, metrics, vertical=1)

nice["MCSI-Net-Paper"] = ["0.88 (0.05)", "4.74 (1.75)", "1.86 (0.79)",
                    "0.78 (0.08)", "4.75 (1.76)", "1.86 (0.82)", 
                    "0.85 (0.06)", "7.06 (2.64)", "2.27 (0.95)"]

display(nice)

Unnamed: 0,FULL_WDS_1_WL_0.01_3D_32_2D_8_KL_1e-5,FULL_WDS_1_WL_0.01_3D_32_2D_8_KL_1e-5_noLAX,ROI_WDS_1_WL_0.01_3D_32_2D_8_KL_1e-5,ROI_WDS_1_WL_0.01_3D_32_2D_8_KL_1e-5_noLAX,MCSI-Net-Paper
LV Endo - DC,0.90 (0.04),0.89 (0.05),0.91 (0.04),0.90 (0.04),0.88 (0.05)
LV Endo - HD,4.22 (1.22),4.48 (1.32),3.89 (1.18),4.08 (1.22),4.74 (1.75)
LV Endo - MCD,1.55 (0.51),1.67 (0.55),1.39 (0.46),1.49 (0.49),1.86 (0.79)
LV Myo - DC,0.81 (0.05),0.80 (0.06),0.84 (0.04),0.83 (0.05),0.78 (0.08)
LV Myo - HD,4.40 (1.26),4.71 (1.36),3.96 (1.23),4.23 (1.27),4.75 (1.76)
LV Myo - MCD,1.57 (0.52),1.71 (0.56),1.35 (0.46),1.49 (0.51),1.86 (0.82)
RV Endo - DC,0.86 (0.05),0.85 (0.05),0.87 (0.05),0.86 (0.05),0.85 (0.06)
RV Endo - HD,6.79 (2.23),6.98 (2.32),6.13 (2.23),6.44 (2.19),7.06 (2.64)
RV Endo - MCD,1.99 (0.59),2.11 (0.64),1.76 (0.59),1.90 (0.57),2.27 (0.95)


In [22]:
def analyze_significance(df, metrics):
    baseline_means_full = {
        'LV Endo - DC': 0.88, 'LV Endo - HD': 4.74, 'LV Endo - MCD': 1.86,
        'LV Myo - DC': 0.78, 'LV Myo - HD': 4.75, 'LV Myo - MCD': 1.86,
        'RV Endo - DC': 0.85, 'RV Endo - HD': 7.06, 'RV Endo - MCD': 2.27
    }
    baseline_stds_full = {
        'LV Endo - DC': 0.05, 'LV Endo - HD': 1.75, 'LV Endo - MCD': 0.79,
        'LV Myo - DC': 0.08, 'LV Myo - HD': 1.76, 'LV Myo - MCD': 0.82,
        'RV Endo - DC': 0.06, 'RV Endo - HD': 2.64, 'RV Endo - MCD': 0.95
    }

    baseline_means_roi = {
        'LV Endo - DC': 0.87, 'LV Endo - HD': 5.13, 'LV Endo - MCD': 1.93,
        'LV Myo - DC': 0.76, 'LV Myo - HD': 5.31, 'LV Myo - MCD': 1.97,
        'RV Endo - DC': 0.85, 'RV Endo - HD': 7.11, 'RV Endo - MCD': 2.34
    }

    baseline_stds_roi = {
        'LV Endo - DC': 0.05, 'LV Endo - HD': 1.97, 'LV Endo - MCD': 0.83,
        'LV Myo - DC': 0.09, 'LV Myo - HD': 1.98, 'LV Myo - MCD': 0.95,
        'RV Endo - DC': 0.06, 'RV Endo - HD': 2.78, 'RV Endo - MCD': 0.98
    }

    n = len(df) // len(df["Model"].unique())  # samples per model
    
    results = []
    df_mean = df.groupby(["Model"])[metrics].mean()
    df_std = df.groupby(["Model"])[metrics].std()
    
    for model in df_mean.index:
        if "ROI" in model:
                baseline_means = baseline_means_roi
                baseline_stds = baseline_stds_roi
        else: 
                baseline_means = baseline_means_full
                baseline_stds = baseline_stds_full
        
        model_results = []
        for metric in metrics:
            t_stat = (df_mean.loc[model, metric] - baseline_means[metric]) / \
                    np.sqrt((df_std.loc[model, metric]**2 + baseline_stds[metric]**2)/n)
            if abs(t_stat) > 1.96:  # p < 0.05
                better = t_stat > 0
                model_results.append(f"{metric}: {'higher' if better else 'lower'} (t={t_stat:.2f})")
        if model_results:
            results.append(f"\n{model}:")
            results.extend(model_results)
    
    return "\n".join(results)

# Usage:
significance_text = analyze_significance(all_results, metrics)
print(significance_text)


FULL_WDS_1_WL_0.01_3D_32_2D_8_KL_1e-5:
LV Endo - DC: higher (t=11.07)
LV Endo - HD: lower (t=-8.51)
LV Endo - MCD: lower (t=-11.48)
LV Myo - DC: higher (t=12.38)
LV Myo - HD: lower (t=-5.69)
LV Myo - MCD: lower (t=-10.49)
RV Endo - DC: higher (t=3.22)
RV Endo - HD: lower (t=-2.70)
RV Endo - MCD: lower (t=-8.77)

FULL_WDS_1_WL_0.01_3D_32_2D_8_KL_1e-5_noLAX:
LV Endo - DC: higher (t=6.68)
LV Endo - HD: lower (t=-4.13)
LV Endo - MCD: lower (t=-6.82)
LV Myo - DC: higher (t=7.42)
LV Myo - MCD: lower (t=-5.32)
RV Endo - MCD: lower (t=-4.98)

ROI_WDS_1_WL_0.01_3D_32_2D_8_KL_1e-5:
LV Endo - DC: higher (t=20.25)
LV Endo - HD: lower (t=-18.73)
LV Endo - MCD: lower (t=-19.84)
LV Myo - DC: higher (t=26.91)
LV Myo - HD: lower (t=-20.08)
LV Myo - MCD: lower (t=-20.48)
RV Endo - DC: higher (t=9.42)
RV Endo - HD: lower (t=-9.55)
RV Endo - MCD: lower (t=-17.70)

ROI_WDS_1_WL_0.01_3D_32_2D_8_KL_1e-5_noLAX:
LV Endo - DC: higher (t=17.88)
LV Endo - HD: lower (t=-15.73)
LV Endo - MCD: lower (t=-15.75)
LV M