In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scripts.datasets.constant import FLARE22_LABEL_ENUM
%matplotlib inline

AREA_TOP_DATA_PATH = "../runs/all_area_data-T.json"
AREA_SIDE_DATA_PATH = "../runs/all_area_data-W.json"

POINT_TOP_DATA_PATH = "../runs/all_point_data-T.json"

point_df = pd.read_json(POINT_TOP_DATA_PATH)
point_df = point_df.T.rename(
    columns={
        value.value: value.name.lower() for value in FLARE22_LABEL_ENUM
    },
)


FileNotFoundError: File ../runs/all_point_data-T.json does not exist

In [None]:
start_point = point_df.applymap(lambda x: x[0])
end_point = point_df.applymap(lambda x: x[1])
display(start_point)
display(end_point)

In [None]:
display(start_point.describe())
display(end_point.describe())

In [None]:
dist_point = end_point - start_point

In [None]:
dist_point.describe()

In [None]:
area_df = pd.read_json(AREA_TOP_DATA_PATH).T.rename(
    columns={
        value.value: value.name.lower() for value in FLARE22_LABEL_ENUM
    },
)

max_area_projection_top = area_df.applymap(np.max)
display(max_area_projection_top.describe())


In [None]:
area_df = pd.read_json(AREA_SIDE_DATA_PATH).T.rename(
    columns={
        value.value: value.name.lower() for value in FLARE22_LABEL_ENUM
    },
)

max_area_projection_side = area_df.applymap(np.max)
display(max_area_projection_side.describe())


In [None]:
(max_area_projection_side / max_area_projection_top).describe()

In [None]:
import itertools


area_df = pd.read_json(AREA_TOP_DATA_PATH).T.rename(
    columns={
        value.value: value.name.lower() for value in FLARE22_LABEL_ENUM
    },
)

def batched(iterable, n):
    "Batch data into tuples of length n. The last batch may be shorter."
    # batched('ABCDEFG', 3) --> ABC DEF G
    if n < 1:
        raise ValueError('n must be at least one')
    it = iter(iterable)
    while batch := tuple(itertools.islice(it, n)):
        yield batch

def binning(data, reduce, n_bin=10):
    return [reduce(_bin) for _bin in list(batched(data, len(data) // n_bin + 1))]

def bin_exist(data, n_bin=10):
    # Check if certain organ existed at that bin
    return binning(data, n_bin=n_bin, reduce=lambda xs: (np.array(xs) > 0.0).any())

area_df.applymap(lambda x: bin_exist(x, 10))


In [None]:
top_area_df = pd.read_json(AREA_TOP_DATA_PATH).T.rename(
    columns={
        value.value: value.name.lower() for value in FLARE22_LABEL_ENUM
    },
)
top_area_df.applymap(np.max)['gallbladder'].describe()

In [None]:
top_area_train_df = pd.read_json('../train-all_area_data-T.json').T.rename(
    columns={
        value.value: value.name.lower() for value in FLARE22_LABEL_ENUM
    },
)
display(top_area_train_df.applymap(np.max)['gallbladder'].describe())

top_area_test_df = pd.read_json('../test-all_area_data-T.json').T.rename(
    columns={
        value.value: value.name.lower() for value in FLARE22_LABEL_ENUM
    },
)
display(top_area_test_df.applymap(np.max)['gallbladder'].describe())

In [None]:
train_dist = top_area_train_df.applymap(np.max)['liver']
test_dist = top_area_test_df.applymap(np.max)['liver']
data = {
    'train': train_dist,
    'test': test_dist
}
sns.displot(data)

In [None]:
display(top_area_train_df.applymap(np.max)['liver'].describe())
display(top_area_test_df.applymap(np.max)['liver'].describe())

In [4]:
VALLOG_PATH = "../runs/vallog-archive-230524-.csv"
from glob import glob

def remove_model_num(name: str):
    return name.rsplit("-", 1)[0]

def get_checkpoint_num(name: str):
    try:
        return int(name.rsplit("-", 1)[-1])
    except:
        return -1
    
OMIT_KEY = ['op-', 'e3-0', 'e3-1', 'e3-2', 'Fine-tune Liver', 'Fine' 'Pretrain Liver', 'mp-', 'maug_all', 'merge-min', 'merge-min-area']
EXP_PREFIX = {
    "bi_drop": "Bidirection with mask drop aug.",
    "bi_maug": "Bidirection with mul. mask aug.",
    "bi_raw": "Bidirection with raw mask prop.",
    "e3": "Ensemble 3 keyframes",
    "faug": "Focus augmentation",
    "maug_19": "Mask aug",
    "maug_all": "Mask aug but run inference for all 13 class",
    "merge-min": "Same as mgmin",
    "mgmin": "Mask merging though min prior",
    "mp": "Mask prop (no merge)",
    "op": "One point experiment",
    "org_ctx": "Organ context ver1",
    "org_ctx2": "Organ context ver2 (meaning-ful)",
}

def get_note(name: str):
    for key in EXP_PREFIX.keys():
        if name.startswith(key): return EXP_PREFIX[key]
    
def keep_max(x: pd.core.frame.DataFrame):
    best_mean = x.iloc[[x['DSC_mean'].argmax()]]
    best_liver = x.iloc[[x['DSC_1'].argmax()]]
    best_gall = x.iloc[[x['DSC_9'].argmax()]]

    best_mean['note'] = f"Best mean: ({best_mean['note'].values[0]})"
    best_liver['note'] = f"Best gall: ({best_liver['note'].values[0]})"
    best_gall['note'] = f"Best liver: ({best_gall['note'].values[0]})"
    return pd.concat([best_mean, best_liver, best_gall])

def collect_max(path):
    df = pd.read_csv(path)
    new_df = df[['Date', 'Name', 'DSC_mean', 'NSD-1mm_mean', 'DSC_1', 'NSD-1mm_1', 'DSC_9', 'NSD-1mm_9']]
    # Remove no-model-info report
    new_df = new_df[new_df['Name'] != 'bidir']
    new_df = new_df[new_df['Name'] != 'pad']
    new_df = new_df[new_df['Name'] != 'organ-ctx']
    new_df = new_df[new_df['Name'] != 'e3']
    new_df = new_df[new_df['DSC_1'] > 0.0]
    new_df = new_df[new_df['DSC_9'] > 0.0]
    for key in OMIT_KEY:
        new_df = new_df[
            new_df.Name.transform(lambda x: not x.startswith(key))
        ]
        pass
    # remote exp list in omit
    new_df['exp_name'] = df['Name'].apply(lambda x: remove_model_num(x))
    new_df['checkpoint'] = df['Name'].apply(lambda x: get_checkpoint_num(x))
    new_df['checkpoint'] = df['Name'].apply(lambda x: get_checkpoint_num(x))
    new_df['note'] = df['Name'].apply(lambda x: get_note(x))
    new_df = new_df.groupby('exp_name').apply(keep_max)
    new_df.set_index(['exp_name', 'checkpoint'], inplace=True)
    return new_df

VAL_LIST = [
#  '../runs/vallog-ensemble-3.csv',
 '../runs/vallog-archive/vallog.csv',
#  '../runs/vallog-orgctx2.csv',
#  '../runs/vallog-bi-inference-25-05-23.csv',
#  '../runs/vallog-archive-230524-.csv',
#  '../runs/vallog-mask-aug-600.csv'
]



In [6]:
dfs = []
for p in VAL_LIST:
    df = collect_max(p)
    dfs.append(df)

all_exp = pd.concat(dfs)
# all_exp.to_csv('all_exp.csv')


In [7]:
all_exp

Unnamed: 0_level_0,Unnamed: 1_level_0,Date,Name,DSC_mean,NSD-1mm_mean,DSC_1,NSD-1mm_1,DSC_9,NSD-1mm_9,note
exp_name,checkpoint,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
thesis-imp-230601-213326,20,2023-06-08 17:06:29.239356,thesis-imp-230601-213326-20,0.127082,0.093813,0.914968,0.638499,0.737096,0.581067,Best mean: (None)
thesis-imp-230601-213326,30,2023-06-08 17:08:34.074352,thesis-imp-230601-213326-30,0.126342,0.094212,0.927814,0.668517,0.714627,0.556244,Best gall: (None)
thesis-imp-230601-213326,70,2023-06-08 17:16:55.468023,thesis-imp-230601-213326-70,0.118619,0.092011,0.781101,0.60649,0.760942,0.589654,Best liver: (None)
thesis-imp-230603-150046,20,2023-06-08 16:25:35.481503,thesis-imp-230603-150046-20,0.133156,0.103108,0.951172,0.727202,0.779855,0.613203,Best mean: (None)
thesis-imp-230603-150046,5,2023-06-08 16:22:26.774265,thesis-imp-230603-150046-5,0.12718,0.100254,0.952479,0.733368,0.70086,0.569929,Best gall: (None)
thesis-imp-230603-150046,20,2023-06-08 16:25:35.481503,thesis-imp-230603-150046-20,0.133156,0.103108,0.951172,0.727202,0.779855,0.613203,Best liver: (None)
thesis-imp-230608-204325,25,2023-06-09 09:37:17.641582,thesis-imp-230608-204325-25,0.124642,0.092306,0.934404,0.675663,0.685947,0.524312,Best mean: (None)
thesis-imp-230608-204325,25,2023-06-09 09:37:17.641582,thesis-imp-230608-204325-25,0.124642,0.092306,0.934404,0.675663,0.685947,0.524312,Best gall: (None)
thesis-imp-230608-204325,20,2023-06-09 09:36:16.061189,thesis-imp-230608-204325-20,0.117936,0.088771,0.844666,0.617887,0.6885,0.536137,Best liver: (None)
thesis-imp-230608-220335,5,2023-06-09 09:45:36.550546,thesis-imp-230608-220335-5,0.130856,0.101184,0.922353,0.684546,0.778776,0.630851,Best mean: (None)
