# Segmentation results
This notebook presents segmentation evaluation results of two final models on different test sets

In [11]:
import os
import sys
import argparse
import traceback


import logging
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns
import pickle
import pandas as pd
from itertools import combinations, permutations

import cmbnet.utils.utils_plotting as utils_plotting
import cmbnet.utils.utils_evaluation as utils_eval
import ast

In [12]:
eval_dir = "../../data-misc/evaluations"
l1_dirs = ["Scratch-Pretrained-FineTuned", "TL-Pretrained-FineTuned"]
l2_dirs = ["predict_cmb_valid", "predict_cmb_dou", "predict_cmb_crb"]
l3_dirs = ["valloss", "F1macro"]

(
    df_classification,
    df_detection,
    df_segmentation,
    detection_details,
    segmentation_details,
    cmb_results,
) = utils_eval.load_evaluation_data(eval_dir, l1_dirs, l2_dirs, l3_dirs)

In [13]:
# TODO: select which to show

dataset = ["dou", "crb", "valid"]
model = ["Scratch-Pretrained-FineTuned", "TL-Pretrained-FineTuned"]
# criteria = ["F1macro", "valloss"]
criteria = ["valloss"]
typemetric = "Macro - "
df_tmp = df_segmentation.copy()

selected = df_tmp[(df_tmp['Dataset'].isin(dataset)) & (df_tmp['Model'].isin(model)) & (df_tmp['Criteria'].isin(criteria))]
selected = selected[selected['Metric'].str.contains(typemetric)]
table4results_macro = selected[["Model",	"Dataset",	"Metric",	"Mean",	"Std."]]
table4results_macro['Mean ± Std.'] = table4results_macro['Mean'].round(2).astype(str) + ' ± ' + table4results_macro['Std.'].round(2).astype(str)
table4results_macro = table4results_macro.pivot_table(
    index=['Model', 'Dataset'],
    columns='Metric',
    values='Mean ± Std.',
    aggfunc='first'  # Since there should be only one entry per group, 'first' is a safe choice here
).reset_index().sort_values(by=['Dataset'])
table4results_macro

Metric,Model,Dataset,Macro - DiceTP Score
0,Scratch-Pretrained-FineTuned,crb,0.41 ± 0.2
3,TL-Pretrained-FineTuned,crb,0.49 ± 0.18
1,Scratch-Pretrained-FineTuned,dou,0.56 ± 0.2
4,TL-Pretrained-FineTuned,dou,0.65 ± 0.14
2,Scratch-Pretrained-FineTuned,valid,0.6 ± 0.18
5,TL-Pretrained-FineTuned,valid,0.63 ± 0.16


In [14]:
# TODO: select which to show

dataset = ["dou", "crb", "valid"]
model = ["Scratch-Pretrained-FineTuned", "TL-Pretrained-FineTuned"]
# criteria = ["F1macro", "valloss"]
criteria = ["valloss"]
typemetric = "Micro - "
df_tmp = df_segmentation.copy()

selected = df_tmp[(df_tmp['Dataset'].isin(dataset)) & (df_tmp['Model'].isin(model)) & (df_tmp['Criteria'].isin(criteria))]
selected = selected[selected['Metric'].str.contains(typemetric)]
selected['Micro - DiceTP Score'] = selected['Mean']
table4results_micro = selected[["Model",	"Dataset", 'Micro - DiceTP Score'	]].round(2)
table4results_micro

Unnamed: 0,Model,Dataset,Micro - DiceTP Score
1,Scratch-Pretrained-FineTuned,valid,0.64
5,Scratch-Pretrained-FineTuned,dou,0.58
9,Scratch-Pretrained-FineTuned,crb,0.42
13,TL-Pretrained-FineTuned,valid,0.66
17,TL-Pretrained-FineTuned,dou,0.66
21,TL-Pretrained-FineTuned,crb,0.53


In [15]:
table4results_all = pd.merge(table4results_macro, table4results_micro, on=['Model', 'Dataset'])
table4results_all

Unnamed: 0,Model,Dataset,Macro - DiceTP Score,Micro - DiceTP Score
0,Scratch-Pretrained-FineTuned,crb,0.41 ± 0.2,0.42
1,TL-Pretrained-FineTuned,crb,0.49 ± 0.18,0.53
2,Scratch-Pretrained-FineTuned,dou,0.56 ± 0.2,0.58
3,TL-Pretrained-FineTuned,dou,0.65 ± 0.14,0.66
4,Scratch-Pretrained-FineTuned,valid,0.6 ± 0.18,0.64
5,TL-Pretrained-FineTuned,valid,0.63 ± 0.16,0.66


# Per-scan analysis

In [17]:
# Flatten the data into a list
flattened_data = []
for key, entries in segmentation_details.items():
    if not entries:
        continue
    for entry in entries:
        # Combine key tuple and entry dictionary
        combined_entry = dict(zip(['Model', 'Dataset', 'Criteria', 'Location'], key))
        combined_entry.update(entry)
        flattened_data.append(combined_entry)

# Create a DataFrame
df_seg_serieslevel = pd.DataFrame(flattened_data)


In [18]:
df_seg_serieslevel

Unnamed: 0,Model,Dataset,Criteria,seriesUID,CM,dice_score,overlap,n_voxels_pred,n_voxels_GT
0,Scratch-Pretrained-FineTuned,valid,valloss,RODEJA-00008-CMB-1,"(212, 247, 211)",0.761194,51,62,72
1,Scratch-Pretrained-FineTuned,valid,valloss,RODEJA-00051-CMB-3,"(223, 58, 95)",0.652361,76,76,157
2,Scratch-Pretrained-FineTuned,valid,valloss,RODEJA-00051-CMB-3,"(232, 124, 154)",0.606061,30,30,69
3,Scratch-Pretrained-FineTuned,valid,valloss,RODEJA-00070-CMB-10,"(81, 109, 33)",0.600000,15,28,22
4,Scratch-Pretrained-FineTuned,valid,valloss,RODEJA-00070-CMB-10,"(106, 42, 100)",0.859873,675,695,875
...,...,...,...,...,...,...,...,...,...
596,TL-Pretrained-FineTuned,crb,F1macro,CRB-1.2.840.113845.13.13849.872975378.63476729...,"(296, 215, 166)",0.449799,56,172,77
597,TL-Pretrained-FineTuned,crb,F1macro,CRB-1.3.12.2.1107.5.2.6.24111.3000001908270351...,"(186, 75, 146)",0.421739,97,328,132
598,TL-Pretrained-FineTuned,crb,F1macro,CRB-1.3.12.2.1107.5.2.6.24111.3000001908270351...,"(245, 116, 193)",0.663102,62,76,111
599,TL-Pretrained-FineTuned,crb,F1macro,CRB-1.3.12.2.1107.5.2.6.24111.3000001908270351...,"(98, 92, 191)",0.364964,50,183,91


In [27]:
dataset = ["dou", "crb", "valid"]
models = ["Scratch-Pretrained-FineTuned", "TL-Pretrained-FineTuned"]

criteria = ["valloss"]
typemetric = ""
df_tmp = df_seg_serieslevel.copy()

selected = df_tmp[
    (df_tmp["Dataset"].isin(dataset))
    & (df_tmp["Model"].isin(models))
    & (df_tmp["Criteria"].isin(criteria))
]
selected.sort_values(["seriesUID","CM"]).to_csv("/storage/evo1/jorge/CHECK_Seg.csv")

# selected = selected[selected['Metric'].str.contains(typemetric)]
# selected = selected[['Dataset', 'Location', 'Metric', 'Mean', 'Std.']]
# selected

In [None]:
CRB-1.2.826.1.3680043.9.5282.150415.25794.2579422232752-CMB-2	(195, 220, 162)