# 3D Seg Evaluation
 ULS23

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import os
import warnings; warnings.filterwarnings("ignore", category=SyntaxWarning)

import matplotlib.pyplot as plt
import pandas as pd
from pathlib import Path
from tqdm import tqdm

import SimpleITK as sitk
from evaluation.uls import unzip_files_in_dir, train_val_split

## For unzipping files
# unzip_files_in_dir("/media/liushifeng/KINGSTON/ULS Jan 2025/ULS23/novel_data/ULS23_DeepLesion3D/labels/")

train/val split

In [None]:
data_folder = Path("/media/liushifeng/KINGSTON/ULS Jan 2025/ULS23/novel_data/ULS23_DeepLesion3D")
train_names, val_names = train_val_split(data_folder)  # 25% val, deterministic
print(len(train_names), len(val_names))

In [None]:
lesion_name = "003026_02_01_042_lesion_01" # val_names[0] + ".nii.gz"
print(lesion_name)

ct = sitk.ReadImage(data_folder / "images" / lesion_name)
seg = sitk.ReadImage(data_folder / "labels" / lesion_name)

# get array from seg
ct_array = sitk.GetArrayFromImage(ct)
seg_array = sitk.GetArrayFromImage(seg)

In [None]:
import sys
sys.path.append("../medsam")

from evaluation.eval_utils import evaluate_3d_metrics

### Batch Eval

In [None]:
pred_folder = Path("/home/liushifeng/Documents/GitHub/MedSAM/experiments/outputs/extend_3d/ULS_full_10Feb2025")  # ULS_windowed_10Feb2025, ULS_full_10Feb2025
gt_folder = Path("/media/liushifeng/KINGSTON/ULS Jan 2025/ULS23/novel_data/ULS23_DeepLesion3D/labels/")

results = []
for pred in tqdm(os.listdir(pred_folder)):
    lesion_name = pred.replace("_pred.nrrd", "")
    pred_path = pred_folder / pred
    gt_path = gt_folder / f"{lesion_name}.nii.gz.zip"
    res = evaluate_3d_metrics(pred_path, gt_path)
    res['lesion_name'] = lesion_name
    results.append(res)

# df_win = pd.DataFrame(results).set_index('lesion_name')
df_full = pd.DataFrame(results).set_index('lesion_name')

In [None]:
df_win['group'] = "windowed"
df_full['group'] = "full"

In [None]:
df = pd.concat([df_win, df_full]).copy()

In [None]:
# save
df.to_csv("medsam_extend3d_full_vs_win_results_250.csv")

### Visualize

In [None]:
import seaborn as sns
sns.set_context("paper", font_scale=1.2)
sns.set_style('darkgrid')

# load
df = pd.read_csv("medsam_extend3d_full_vs_win_results_250.csv", index_col="lesion_name")

# add categories
df['dice_cat'] = "high"
df.loc[df['dice'].lt(df['dice'].quantile(0.67)), 'dice_cat'] = "mid"
df.loc[df['dice'].lt(df['dice'].quantile(0.33)), 'dice_cat'] = "low"

In [None]:
# remove some outliers
df = df[df.gt_volume.lt(1e4)]
dfm = pd.melt(df.reset_index(),
    id_vars=['lesion_name', 'group'],
    value_vars=['dice', 'volume_similarity', 'iou'],
    var_name='metric_type',
    value_name='metric_value')

In [None]:
df[['dice', 'volume_similarity', 'iou', 'group']].groupby("group").agg(["median", "mean"]).round(2)

In [None]:
sns.catplot(
    dfm, y="metric_value", x="group", hue="group",
    kind="box", col="metric_type",
    aspect=0.7, height=4,
);

In [None]:
sns.catplot(
    dfm, y="metric_value", x="group", hue="group",
    kind="bar", col="metric_type",
    aspect=0.7, height=4,
);

In [None]:
sns.catplot(
    df[['dice', 'volume_similarity']], color=".9", kind="box", aspect=1, height=3,
    flierprops = {'markerfacecolor': 'white', 'markeredgecolor': "white"}
)
sns.swarmplot(df[['dice', 'volume_similarity']], size=2);

In [None]:
g = sns.PairGrid(
    df_viz,
    y_vars=["dice", "volume_similarity"],
    x_vars=['gt_volume', 'gt_ar_horizontal', 'gt_ar_vertical'],
)
g.map(
    sns.regplot, scatter_kws={"s": 3}
    # robust=True,
)

for i, ax in enumerate(g.axes.flat):
    if i in [0]:
        ax.set_xscale('log')
# g.add_legend();

In [None]:
df[df['dice'].lt(df['dice'].quantile(0.1))].sample(2)

In [None]:
# sample 2 from the bottom 10% in terms of dice, 2 from middle 40-60% and 2 from top 10%
metric = "dice"
n = 10
bad_samples = df[df[metric].lt(df[metric].quantile(0.1))].sample(n).index.tolist()
mid_samples = df[df[metric].between(df[metric].quantile(0.4), df[metric].quantile(0.6))].sample(n).index.tolist()
top_samples = df[df[metric].gt(df[metric].quantile(0.9))].sample(n).index.tolist()

In [None]:
from evaluation.eval_utils import load_seg
from utils.plot import window_ct
from evaluation.eval_utils import plot_seg
import numpy as np

results_folder = Path("results")

In [None]:
for s, d, v in tqdm(zip(df.index, df['dice'], df['volume_similarity'])):
    # print(s)

    ct = load_seg(data_folder / "images" / f"{s}.nii.gz")
    pred = load_seg(pred_folder / (s + "_pred.nrrd"))
    gt = load_seg(gt_folder / f"{s}.nii.gz.zip")

    relevant_slices = [int(x) for x in np.nonzero(gt.sum(axis=(1,2)))[0]]
    for i in relevant_slices[:5]:
        img = ct[i]
        lung, abdomen = window_ct(img)
        fig = plot_seg(abdomen, pred[i], gt[i])
        # plt.show()

        lesion_folder = results_folder / f"dice{str(int(d*100)).zfill(3)}_vol{str(int(v*100)).zfill(3)}__{s}"
        lesion_folder.mkdir(exist_ok=True)
        fig.savefig(lesion_folder / f"{i}.png");
        # close figure completely
        plt.close(fig)

## Analysis
Metrics vs
- distance from key slice (max horizontal slice seg)
- lesion size, category, aspect ratio

### Visualize
- Show worst, median, best

### Robustness test
- Add noise to the initial box (simulate human error)

## Improvement to MedSam approach
- Train a model to stop when object is no longer visible
- Use an object tracker to get bbox, then use medsam on it
