# Image-level Classification results
This notebook presents scan-level classification evaluation results of two final models on different test sets

In [1]:
import os
import sys
import argparse
import traceback


import logging
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns
import pickle
import pandas as pd
from itertools import combinations, permutations

import cmbnet.utils.utils_plotting as utils_plotting
import cmbnet.utils.utils_evaluation as utils_eval
import ast

In [2]:
eval_dir = "../../data-misc/evaluations"
l1_dirs = ['Scratch-Pretrained-FineTuned', 'TL-Pretrained-FineTuned']
l2_dirs = ['predict_cmb_valid', 'predict_cmb_dou', 'predict_cmb_crb']
l3_dirs = ['valloss']

df_classification, df_detection, df_segmentation, \
    detection_details, segmentation_details, cmb_results = \
        utils_eval.load_evaluation_data(eval_dir, l1_dirs, l2_dirs, l3_dirs)

In [3]:
df_classification.sort_values("threshold")

Unnamed: 0,threshold,Precision,Recall,F1-Score,Specificity,Model,Criteria,Dataset
0,1,0.976744,0.75,0.848485,0.961538,Scratch-Pretrained-FineTuned,valloss,valid
15,1,0.0,,0.0,0.722222,TL-Pretrained-FineTuned,valloss,crb
3,1,0.0,,0.0,0.9,Scratch-Pretrained-FineTuned,valloss,dou
6,1,0.0,,0.0,0.833333,Scratch-Pretrained-FineTuned,valloss,crb
9,1,0.954545,0.75,0.84,0.923077,TL-Pretrained-FineTuned,valloss,valid
12,1,0.0,,0.0,0.85,TL-Pretrained-FineTuned,valloss,dou
1,2,0.95082,0.828571,0.885496,0.75,Scratch-Pretrained-FineTuned,valloss,valid
4,2,1.0,0.9,0.947368,1.0,Scratch-Pretrained-FineTuned,valloss,dou
7,2,0.222222,0.5,0.307692,0.5,Scratch-Pretrained-FineTuned,valloss,crb
16,2,0.3,0.75,0.428571,0.5,TL-Pretrained-FineTuned,valloss,crb


In [4]:
dataset = ["dou", "crb", "valid"]
model = ["Scratch-Pretrained-FineTuned", "TL-Pretrained-FineTuned"]
thresholds = [2, 5]
# criteria = ["F1macro", "valloss"]
criteria = ["valloss"]
df_tmp = df_classification.copy()

selected = df_tmp[
    (df_tmp["Dataset"].isin(dataset))
    & (df_tmp["Model"].isin(model))
    & (df_tmp["Criteria"].isin(criteria))
    & (df_tmp["threshold"].isin(thresholds))
]
selected = selected[
    ["Dataset", "threshold", "Model", "Precision", "Recall", "F1-Score", "Specificity"]
]
selected.sort_values(["Dataset", "threshold", "Model"])
selected = selected.round(2)
selected

Unnamed: 0,Dataset,threshold,Model,Precision,Recall,F1-Score,Specificity
1,valid,2,Scratch-Pretrained-FineTuned,0.95,0.83,0.89,0.75
2,valid,5,Scratch-Pretrained-FineTuned,1.0,0.99,0.99,1.0
4,dou,2,Scratch-Pretrained-FineTuned,1.0,0.9,0.95,1.0
5,dou,5,Scratch-Pretrained-FineTuned,0.88,1.0,0.94,0.6
7,crb,2,Scratch-Pretrained-FineTuned,0.22,0.5,0.31,0.5
8,crb,5,Scratch-Pretrained-FineTuned,0.57,0.8,0.67,0.25
10,valid,2,TL-Pretrained-FineTuned,0.92,0.86,0.89,0.58
11,valid,5,TL-Pretrained-FineTuned,0.99,0.99,0.99,0.83
13,dou,2,TL-Pretrained-FineTuned,1.0,0.7,0.82,1.0
14,dou,5,TL-Pretrained-FineTuned,1.0,0.93,0.97,1.0


In [5]:
df = selected.copy()

metrics = ["Precision", "Recall", "F1-Score", "Specificity"]
minimize_metrics = []  # Metrics where lower is better

# Loop through each dataset, threshold, and metric
for dataset in df['Dataset'].unique():
    for threshold in df['threshold'].unique():
        for metric in metrics:
            # Filter the DataFrame for the current dataset and threshold
            subset = df[(df['Dataset'] == dataset) & (df['threshold'] == threshold)]
            
            # Determine the best value depending on whether higher is better
            if metric in minimize_metrics:
                best_value = subset[metric].min()  # Lower is better for these metrics
            else:
                best_value = subset[metric].max()  # Higher is better for these metrics
            
            # Apply bold to the best value
            df.loc[(df['Dataset'] == dataset) & (df['threshold'] == threshold) & (df[metric] == best_value), metric] = '\\textbf{' + f'{best_value:.6g}' + '}'

# Restructure the DataFrame for display
formatted_df = df[["Dataset", "threshold", "Model", "Precision", "Recall", "F1-Score", "Specificity"]]

# Display the formatted DataFrame
formatted_df.sort_values(["Dataset", "threshold", "Model"], inplace=True)


  df.loc[(df['Dataset'] == dataset) & (df['threshold'] == threshold) & (df[metric] == best_value), metric] = '\\textbf{' + f'{best_value:.6g}' + '}'
  df.loc[(df['Dataset'] == dataset) & (df['threshold'] == threshold) & (df[metric] == best_value), metric] = '\\textbf{' + f'{best_value:.6g}' + '}'
  df.loc[(df['Dataset'] == dataset) & (df['threshold'] == threshold) & (df[metric] == best_value), metric] = '\\textbf{' + f'{best_value:.6g}' + '}'
  df.loc[(df['Dataset'] == dataset) & (df['threshold'] == threshold) & (df[metric] == best_value), metric] = '\\textbf{' + f'{best_value:.6g}' + '}'


In [6]:
formatted_df

Unnamed: 0,Dataset,threshold,Model,Precision,Recall,F1-Score,Specificity
7,crb,2,Scratch-Pretrained-FineTuned,0.22,0.5,0.31,\textbf{0.5}
16,crb,2,TL-Pretrained-FineTuned,\textbf{0.3},\textbf{0.75},\textbf{0.43},\textbf{0.5}
8,crb,5,Scratch-Pretrained-FineTuned,0.57,0.8,0.67,0.25
17,crb,5,TL-Pretrained-FineTuned,\textbf{0.64},\textbf{0.9},\textbf{0.75},\textbf{0.38}
4,dou,2,Scratch-Pretrained-FineTuned,\textbf{1},\textbf{0.9},\textbf{0.95},\textbf{1}
13,dou,2,TL-Pretrained-FineTuned,\textbf{1},0.7,0.82,\textbf{1}
5,dou,5,Scratch-Pretrained-FineTuned,0.88,\textbf{1},0.94,0.6
14,dou,5,TL-Pretrained-FineTuned,\textbf{1},0.93,\textbf{0.97},\textbf{1}
1,valid,2,Scratch-Pretrained-FineTuned,\textbf{0.95},0.83,\textbf{0.89},\textbf{0.75}
10,valid,2,TL-Pretrained-FineTuned,0.92,\textbf{0.86},\textbf{0.89},0.58


In [7]:
lattex = formatted_df.to_latex(index=False, escape=False)
print(lattex)

\begin{tabular}{lrlllll}
\toprule
Dataset & threshold & Model & Precision & Recall & F1-Score & Specificity \\
\midrule
crb & 2 & Scratch-Pretrained-FineTuned & 0.220000 & 0.500000 & 0.310000 & \textbf{0.5} \\
crb & 2 & TL-Pretrained-FineTuned & \textbf{0.3} & \textbf{0.75} & \textbf{0.43} & \textbf{0.5} \\
crb & 5 & Scratch-Pretrained-FineTuned & 0.570000 & 0.800000 & 0.670000 & 0.250000 \\
crb & 5 & TL-Pretrained-FineTuned & \textbf{0.64} & \textbf{0.9} & \textbf{0.75} & \textbf{0.38} \\
dou & 2 & Scratch-Pretrained-FineTuned & \textbf{1} & \textbf{0.9} & \textbf{0.95} & \textbf{1} \\
dou & 2 & TL-Pretrained-FineTuned & \textbf{1} & 0.700000 & 0.820000 & \textbf{1} \\
dou & 5 & Scratch-Pretrained-FineTuned & 0.880000 & \textbf{1} & 0.940000 & 0.600000 \\
dou & 5 & TL-Pretrained-FineTuned & \textbf{1} & 0.930000 & \textbf{0.97} & \textbf{1} \\
valid & 2 & Scratch-Pretrained-FineTuned & \textbf{0.95} & 0.830000 & \textbf{0.89} & \textbf{0.75} \\
valid & 2 & TL-Pretrained-FineTuned & 0.