# Image-level Classification results
This notebook presents scan-level classification evaluation results of two final models on different test sets

In [1]:
import os
import sys
import argparse
import traceback


import logging
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns
import pickle
import pandas as pd
from itertools import combinations, permutations

import cmbnet.utils.utils_plotting as utils_plotting
import cmbnet.utils.utils_evaluation as utils_eval
import ast

In [2]:
eval_dir = "../../data-misc/evaluations"
l1_dirs = ['Scratch-Pretrained-FineTuned', 'TL-Pretrained-FineTuned']
l2_dirs = ['predict_cmb_valid', 'predict_cmb_dou', 'predict_cmb_crb']
l3_dirs = ['valloss']

df_classification, df_detection, df_segmentation, \
    detection_details, segmentation_details, cmb_results = \
        utils_eval.load_evaluation_data(eval_dir, l1_dirs, l2_dirs, l3_dirs)

In [3]:
df_classification.sort_values("threshold")

Unnamed: 0,threshold,Precision,Recall,F1-Score,Specificity,Model,Criteria,Dataset
0,1,0.976744,0.75,0.848485,0.961538,Scratch-Pretrained-FineTuned,valloss,valid
16,1,0.0,,0.0,0.85,TL-Pretrained-FineTuned,valloss,dou
12,1,0.954545,0.75,0.84,0.923077,TL-Pretrained-FineTuned,valloss,valid
4,1,0.0,,0.0,0.9,Scratch-Pretrained-FineTuned,valloss,dou
8,1,0.0,,0.0,0.833333,Scratch-Pretrained-FineTuned,valloss,crb
20,1,0.0,,0.0,0.722222,TL-Pretrained-FineTuned,valloss,crb
13,3,0.971831,0.932432,0.951724,0.75,TL-Pretrained-FineTuned,valloss,valid
17,3,1.0,0.833333,0.909091,1.0,TL-Pretrained-FineTuned,valloss,dou
9,3,0.5,0.857143,0.631579,0.454545,Scratch-Pretrained-FineTuned,valloss,crb
5,3,1.0,0.833333,0.909091,1.0,Scratch-Pretrained-FineTuned,valloss,dou


In [4]:
dataset = ["dou", "crb", "valid"]
model = ["Scratch-Pretrained-FineTuned", "TL-Pretrained-FineTuned"]
thresholds = [3, 5, 11]
# criteria = ["F1macro", "valloss"]
criteria = ["valloss"]
df_tmp = df_classification.copy()

selected = df_tmp[
    (df_tmp["Dataset"].isin(dataset))
    & (df_tmp["Model"].isin(model))
    & (df_tmp["Criteria"].isin(criteria))
    & (df_tmp["threshold"].isin(thresholds))
]
selected = selected[
    ["Dataset", "threshold", "Model", "Precision", "Recall", "F1-Score", "Specificity"]
]
selected.sort_values(["Dataset", "threshold", "Model"])
selected = selected.round(3)
selected

Unnamed: 0,Dataset,threshold,Model,Precision,Recall,F1-Score,Specificity
1,valid,3,Scratch-Pretrained-FineTuned,0.972,0.932,0.952,0.75
2,valid,5,Scratch-Pretrained-FineTuned,1.0,0.987,0.993,1.0
3,valid,11,Scratch-Pretrained-FineTuned,0.987,0.975,0.981,0.667
5,dou,3,Scratch-Pretrained-FineTuned,1.0,0.833,0.909,1.0
6,dou,5,Scratch-Pretrained-FineTuned,0.882,1.0,0.938,0.6
7,dou,11,Scratch-Pretrained-FineTuned,0.895,1.0,0.944,0.333
9,crb,3,Scratch-Pretrained-FineTuned,0.5,0.857,0.632,0.455
10,crb,5,Scratch-Pretrained-FineTuned,0.571,0.8,0.667,0.25
11,crb,11,Scratch-Pretrained-FineTuned,0.706,1.0,0.828,0.167
13,valid,3,TL-Pretrained-FineTuned,0.972,0.932,0.952,0.75


In [5]:
df = selected.copy()

metrics = ["Precision", "Recall", "F1-Score", "Specificity"]
minimize_metrics = []  # Metrics where lower is better

# Loop through each dataset, threshold, and metric
for dataset in df['Dataset'].unique():
    for threshold in df['threshold'].unique():
        for metric in metrics:
            # Filter the DataFrame for the current dataset and threshold
            subset = df[(df['Dataset'] == dataset) & (df['threshold'] == threshold)]
            
            # Determine the best value depending on whether higher is better
            # if the are equal do not use bold
            if subset[metric].unique().size == 1:
                best_value = np.inf
            elif metric in minimize_metrics:
                best_value = subset[metric].min()  # Lower is better for these metrics
            else:
                best_value = subset[metric].max()  # Higher is better for these metrics
            
            
            # Apply bold to the best value
            
            df.loc[(df['Dataset'] == dataset) & (df['threshold'] == threshold) & (df[metric] == best_value), metric] = '\\textbf{' + f'{best_value:.6g}' + '}'

# Restructure the DataFrame for display
formatted_df = df[["Dataset", "threshold", "Model", "Precision", "Recall", "F1-Score", "Specificity"]]

# Display the formatted DataFrame
formatted_df.sort_values(["Dataset", "threshold", "Model"], inplace=True)


  df.loc[(df['Dataset'] == dataset) & (df['threshold'] == threshold) & (df[metric] == best_value), metric] = '\\textbf{' + f'{best_value:.6g}' + '}'


In [6]:
formatted_df

Unnamed: 0,Dataset,threshold,Model,Precision,Recall,F1-Score,Specificity
9,crb,3,Scratch-Pretrained-FineTuned,0.5,0.857,0.632,0.455
21,crb,3,TL-Pretrained-FineTuned,0.5,0.857,0.632,0.455
10,crb,5,Scratch-Pretrained-FineTuned,0.571,0.8,0.667,0.25
22,crb,5,TL-Pretrained-FineTuned,\textbf{0.643},\textbf{0.9},\textbf{0.75},\textbf{0.375}
11,crb,11,Scratch-Pretrained-FineTuned,0.706,1.0,0.828,0.167
23,crb,11,TL-Pretrained-FineTuned,0.706,1.0,0.828,0.167
5,dou,3,Scratch-Pretrained-FineTuned,1.0,0.833,0.909,1.0
17,dou,3,TL-Pretrained-FineTuned,1.0,0.833,0.909,1.0
6,dou,5,Scratch-Pretrained-FineTuned,0.882,\textbf{1},0.938,0.6
18,dou,5,TL-Pretrained-FineTuned,\textbf{1},0.933,\textbf{0.966},\textbf{1}


In [7]:
lattex = formatted_df.round(3).to_latex(index=False, escape=False)
print(lattex)

\begin{tabular}{lrlllll}
\toprule
Dataset & threshold & Model & Precision & Recall & F1-Score & Specificity \\
\midrule
crb & 3 & Scratch-Pretrained-FineTuned & 0.500000 & 0.857000 & 0.632000 & 0.455000 \\
crb & 3 & TL-Pretrained-FineTuned & 0.500000 & 0.857000 & 0.632000 & 0.455000 \\
crb & 5 & Scratch-Pretrained-FineTuned & 0.571000 & 0.800000 & 0.667000 & 0.250000 \\
crb & 5 & TL-Pretrained-FineTuned & \textbf{0.643} & \textbf{0.9} & \textbf{0.75} & \textbf{0.375} \\
crb & 11 & Scratch-Pretrained-FineTuned & 0.706000 & 1.000000 & 0.828000 & 0.167000 \\
crb & 11 & TL-Pretrained-FineTuned & 0.706000 & 1.000000 & 0.828000 & 0.167000 \\
dou & 3 & Scratch-Pretrained-FineTuned & 1.000000 & 0.833000 & 0.909000 & 1.000000 \\
dou & 3 & TL-Pretrained-FineTuned & 1.000000 & 0.833000 & 0.909000 & 1.000000 \\
dou & 5 & Scratch-Pretrained-FineTuned & 0.882000 & \textbf{1} & 0.938000 & 0.600000 \\
dou & 5 & TL-Pretrained-FineTuned & \textbf{1} & 0.933000 & \textbf{0.966} & \textbf{1} \\
dou & 11 &