In [1]:
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from scripts.datasets.constant import FLARE22_LABEL_ENUM
%matplotlib inline

CONFIG_PATH = "../runs/config-230711-110114.csv"

df = pd.read_csv(CONFIG_PATH)

def parse_aug_dict(aug_dict_string):
    if pd.isna(aug_dict_string): 
        aug_dict = {}
    else:
        aug_dict : dict = json.loads(aug_dict_string.replace("\'", "\""))
    organ_list = list(map(lambda x: x.replace("json://", ""), list(aug_dict.keys())))
    organ_list = ['0'] if len(organ_list) == 0 else organ_list
    return '-'.join(organ_list)

df['aug-symbol'] = df['aug_dict'].map(parse_aug_dict)
# The mask drop is the liver drop one
df.loc[df['name'] == 'mask-drop', 'aug-symbol'] = '1'
df['logdir'] = df['logdir'].apply(lambda x: x.replace('runs/', ''))
df['class_selected'] = df['class_selected'].apply(lambda x: x if not pd.isna(x) else 'all')

pivot_non_custom = pd.pivot_table(
    df[
        (pd.isna(df['custom_model_path']) == False) & (df['name'] != 'organ-ctx2')
        ], 
    values='logdir', 
    index=['aug-symbol'], 
    columns=['class_selected'], 
    aggfunc=lambda x: ','.join(x)
    # aggfunc=lambda x: len(x)
)
pivot_custom = pd.pivot_table(
    df[
        (pd.isna(df['custom_model_path'])) & (df['name'] != 'organ-ctx2')
    ], 
    values='logdir', 
    index=['aug-symbol'], 
    columns=['class_selected'], 
    # aggfunc=lambda x: ','.join(x)
    aggfunc=lambda x: len(x)
)
display(
    pivot_non_custom, pivot_custom
)

class_selected,"[1, 9, 6, 2]","[1, 9]",all
aug-symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,"imp-230603-150046,mp-focus-230513-151234,mp-fo...","imp-230608-220335,imp-aug-230607-230424,mp-foc...","imp-230602-234856,mask-prop-230511-153918"
1,"imp-aug-230605-000452,imp-aug-230603-215252,ma...",,
1-6-9,"mask-aug-230521-210435,mask-aug-230524-233309,...",,
1-9,imp-aug-230605-165716,,


class_selected,"[1, 9, 6, 2]","[1, 9]",all
aug-symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,3.0,3.0,4.0
1,1.0,,4.0
1-6-9,1.0,,2.0
1-9,,,1.0


In [2]:
df = pd.read_csv(CONFIG_PATH)
df['aug-symbol'] = df['aug_dict'].map(parse_aug_dict)
# The mask drop is the liver drop one
df.loc[df['name'] == 'mask-drop', 'aug-symbol'] = '1'
df['logdir'] = df['logdir'].apply(lambda x: x.replace('runs/', ''))
df['class_selected'] = df['class_selected'].apply(lambda x: x if not pd.isna(x) else 'all')
tmp = pd.pivot_table(
    df[
        ((df['name'] == 'imp') | \
        (df['name'] == 'imp-aug')) & \
        # (df['name'] == 'organ-ctx2') & \
        # (df['name'] != 'mask-drop') & \
        # ~pd.isna(df['custom_model_path']) & \
        (df['name'] != '')

    ],
    values='logdir', 
    index=['aug-symbol'], 
    columns=['class_selected'], 
    aggfunc=lambda x: ','.join(x)
    # aggfunc=lambda x: len(x)
)
display(tmp[['all']])
# display(tmp.values.flatten().tolist())
# # a = tmp[['all']].loc['0'].tolist()
# # b = tmp[['all']].loc['1-9'].tolist()
# # display(a + b)

class_selected,all
aug-symbol,Unnamed: 1_level_1
0,"imp-230601-213326,imp-230602-234856"
1,"imp-aug-230610-211354,imp-230610-011507"
1-6-9,imp-aug-230610-104249
1-9,imp-aug-230606-002414


In [3]:
# def isin(x: pd.Series, arr): 
#     return x.apply(lambda x: x in arr)

# tmp_df = df[isin(df['logdir'], cond)]
# exp_name = tmp_df[tmp_df['n_epochs'] > 10]['logdir'].tolist()

In [23]:
VALLOG_PATH = "../runs/vallog-archive-230524-.csv"
from glob import glob

def remove_model_num(name: str):
    return name.rsplit("-", 1)[0]

def get_checkpoint_num(name: str):
    try:
        return int(name.rsplit("-", 1)[-1])
    except:
        return -1
    
OMIT_KEY = ['op-', 'e3-0', 'e3-1', 'e3-2', 'Fine-tune Liver', 'Fine' 'Pretrain Liver', 'mp-', 'maug_all', 'merge-min', 'merge-min-area']
EXP_PREFIX = {
    "bi_drop": "Bidirection with mask drop aug.",
    "bi_maug": "Bidirection with mul. mask aug.",
    "bi_raw": "Bidirection with raw mask prop.",
    "e3": "Ensemble 3 keyframes",
    "faug": "Focus augmentation",
    "maug_19": "Mask aug",
    "maug_all": "Mask aug but run inference for all 13 class",
    "merge-min": "Same as mgmin",
    "mgmin": "Mask merging though min prior",
    "mp": "Mask prop (no merge)",
    "op": "One point experiment",
    "org_ctx": "Organ context ver1",
    "org_ctx2": "Organ context ver2 (meaning-ful)",
}

def get_note(name: str):
    for key in EXP_PREFIX.keys():
        if name.startswith(key): return EXP_PREFIX[key]
    
def keep_max(x: pd.core.frame.DataFrame):
    best_mean = x.iloc[[x['DSC_mean'].argmax()]]
    best_liver = x.iloc[[x['DSC_1'].argmax()]]
    best_gall = x.iloc[[x['DSC_9'].argmax()]]

    best_mean['note'] = f"Best mean: ({best_mean['note'].values[0]})"
    best_liver['note'] = f"Best gall: ({best_liver['note'].values[0]})"
    best_gall['note'] = f"Best liver: ({best_gall['note'].values[0]})"
    return pd.concat([best_mean, best_liver, best_gall])

def match_sub_string(x: pd.Series, cond):
    tmp = [x.apply(lambda k: e in k) for e in cond]
    return np.all(tmp, axis=0)

def collect_max(path, exp_name=None):
    df = pd.read_csv(path)
    if exp_name is not None:
        df = df[match_sub_string(df['Name'], exp_name)]
    if df.empty:
        return df
    new_df = df[['Date', 'Name', 'DSC_mean', 'NSD-1mm_mean', 'DSC_1', 'NSD-1mm_1', 'DSC_9', 'NSD-1mm_9']]
    # Remove no-model-info report
    new_df = new_df[new_df['Name'] != 'bidir']
    new_df = new_df[new_df['Name'] != 'pad']
    new_df = new_df[new_df['Name'] != 'organ-ctx']
    new_df = new_df[new_df['Name'] != 'e3']
    new_df = new_df[new_df['DSC_1'] > 0.0]
    new_df = new_df[new_df['DSC_9'] > 0.0]
    for key in OMIT_KEY:
        new_df = new_df[
            new_df.Name.transform(lambda x: not x.startswith(key))
        ]
        pass
    # remote exp list in omit
    new_df['exp_name'] = df['Name'].apply(lambda x: remove_model_num(x))
    new_df['checkpoint'] = df['Name'].apply(lambda x: get_checkpoint_num(x))
    new_df['checkpoint'] = df['Name'].apply(lambda x: get_checkpoint_num(x))
    new_df['note'] = df['Name'].apply(lambda x: get_note(x))
    new_df = new_df.groupby('exp_name').apply(keep_max)
    new_df.set_index(['exp_name', 'checkpoint'], inplace=True)
    return new_df

VAL_LIST = [
#  '../runs/vallog-archive/vallog-ensemble-3.csv',
#  '../runs/vallog-archive/vallog-orgctx2.csv',
#  '../runs/vallog-archive/vallog-bi-inference-25-05-23.csv',
#  '../runs/vallog-archive/vallog-archive-230524-.csv',
#  '../runs/vallog-archive/vallog-mask-aug-600.csv',
#  '../runs/vallog-archive/vallog-train-archived.csv',
#  '../runs/vallog-archive/vallog-train-archived2.csv',
#  '../runs/vallog-archive/vallog.csv',
#  '../runs/vallog-archive/vallog-thesis-final-1.csv',
 '../runs/vallog-archive/vallog-thesis-e3-final-1.csv',
]

val_log_dfs = []
for p in VAL_LIST:
    val_log_df = collect_max(p, None)
    val_log_dfs.append(val_log_df)
    pass

val_log_dfs = pd.concat(val_log_dfs)
# match_sub_string(val_log_dfs['exp_name'], exp_name)
# val_log_dfs['logdir'] = val_log_dfs['logdir'].apply(lambda x: x.replace("runs/", ""))
# val_log_dfs[isin(val_log_dfs['logdir'], exp_name)].drop_duplicates()




In [24]:
val_log_dfs

Unnamed: 0_level_0,Unnamed: 1_level_0,Date,Name,DSC_mean,NSD-1mm_mean,DSC_1,NSD-1mm_1,DSC_9,NSD-1mm_9,note
exp_name,checkpoint,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
thesis-e3-imp-230603-150046,20,2023-07-14 09:49:18.104608,thesis-e3-imp-230603-150046-20,0.133721,0.105017,0.955413,0.740672,0.782958,0.624544,Best mean: (None)
thesis-e3-imp-230603-150046,20,2023-07-14 09:49:18.104608,thesis-e3-imp-230603-150046-20,0.133721,0.105017,0.955413,0.740672,0.782958,0.624544,Best gall: (None)
thesis-e3-imp-230603-150046,10,2023-07-14 09:44:42.281036,thesis-e3-imp-230603-150046-10,0.133533,0.104923,0.95195,0.733414,0.783974,0.630581,Best liver: (None)
thesis-e3-imp-230610-011507,30,2023-07-14 09:55:30.436290,thesis-e3-imp-230610-011507-30,0.132799,0.099939,0.935363,0.686648,0.791028,0.612558,Best mean: (None)
thesis-e3-imp-230610-011507,40,2023-07-14 10:00:11.761968,thesis-e3-imp-230610-011507-40,0.132271,0.098447,0.948039,0.718227,0.771487,0.561582,Best gall: (None)
thesis-e3-imp-230610-011507,30,2023-07-14 09:55:30.436290,thesis-e3-imp-230610-011507-30,0.132799,0.099939,0.935363,0.686648,0.791028,0.612558,Best liver: (None)
thesis-e3-imp-aug-230605-000452,25,2023-07-14 09:53:02.424260,thesis-e3-imp-aug-230605-000452-25,0.135186,0.104278,0.952732,0.723973,0.804684,0.631635,Best mean: (None)
thesis-e3-imp-aug-230605-000452,5,2023-07-14 09:42:41.489314,thesis-e3-imp-aug-230605-000452-5,0.133448,0.104692,0.956298,0.741791,0.778527,0.619209,Best gall: (None)
thesis-e3-imp-aug-230605-000452,25,2023-07-14 09:53:02.424260,thesis-e3-imp-aug-230605-000452-25,0.135186,0.104278,0.952732,0.723973,0.804684,0.631635,Best liver: (None)
thesis-e3-imp-aug-230605-165716,5,2023-07-14 09:42:37.334936,thesis-e3-imp-aug-230605-165716-5,0.133556,0.103797,0.953664,0.728832,0.782567,0.620528,Best mean: (None)


In [25]:
val_log_dfs.index = val_log_dfs.index.map(lambda x: (x[0].replace("thesis-", ""), x[1]))


In [26]:
tmp_df = val_log_dfs[['Name', 'DSC_mean', 'DSC_1', 'DSC_9', 'note']]
tmp_df = tmp_df[tmp_df['note'] == 'Best mean: (None)']
# .drop(index='mask-prop-230508-222109')
# tmp_df
tmp_df['DSC_mean'] = (tmp_df['DSC_1'] + tmp_df['DSC_9']) / 2
tmp_df = (tmp_df
    .reset_index()
    .drop(columns=['Name', 'note'])
    .drop_duplicates(subset=['exp_name', 'checkpoint'])
    .sort_values(by='DSC_mean', ascending=False)
)
display(tmp_df)
# .to_csv('thesis_config.csv', index=None)
# For augmentation
# exp_name,DSC_mean,DSC_1,DSC_9
# raw,0.8260318344843465,0.9149680007720452,0.7370956681966478
# 1,0.852842,0.940365,0.765319
# 1-9,0.8491595143354436,0.9451452597425496,0.7531737689283375
# 1-6-9,0.8703018829741453,0.945820822243538,0.7947829437047526


Unnamed: 0,exp_name,checkpoint,DSC_mean,DSC_1,DSC_9
2,e3-imp-aug-230605-000452,25,0.878708,0.952732,0.804684
4,e3-imp-aug-230610-104249,65,0.874652,0.951599,0.797704
0,e3-imp-230603-150046,20,0.869185,0.955413,0.782958
3,e3-imp-aug-230605-165716,5,0.868116,0.953664,0.782567
1,e3-imp-230610-011507,30,0.863196,0.935363,0.791028
5,e3-imp-aug-230610-211354,30,0.857277,0.939284,0.775271


In [18]:
for entry in tmp_df.iterrows():
    exp_name = entry[1]['exp_name']
    ckpt = entry[1]['checkpoint']
    # print(f"runs/{exp_name}/model-{ckpt}.pt")
    print(exp_name)

    pass

imp-aug-230610-104249
imp-230603-150046
imp-230610-011507
imp-aug-230605-165716
imp-aug-230605-000452
imp-aug-230610-211354
imp-230608-220335
imp-aug-230606-002414
imp-230608-231031
imp-aug-230607-230424
imp-230601-213326
imp-230608-204325
imp-aug-230608-003029
imp-aug-230607-213820
imp-aug-230603-215252


In [None]:
import re
from pandas import DataFrame

from sympy import false

match_url_eval = 'https://flare22.grand-challenge.org/evaluation'

from bs4 import BeautifulSoup
LEADER_BOARD = "../dataset/flare22.leaderboard.50.json"
with open(LEADER_BOARD, 'r') as out:
    data = json.load(out)


def search_metric_func(tag):
    if tag.attrs.get('href', False):
        if tag.attrs['href'].startswith(match_url_eval): return True
    return False

def search_user_func(tag):
    if tag.attrs.get('href', False):
        # print(tag)
        if tag.attrs['href'].startswith("https://grand-challenge.org/users"): return True
    return False

orders = [
        FLARE22_LABEL_ENUM.LIVER,
        FLARE22_LABEL_ENUM.RIGHT_KIDNEY,
        FLARE22_LABEL_ENUM.SPLEEN,
        FLARE22_LABEL_ENUM.PANCREAS,
        FLARE22_LABEL_ENUM.AORTA,
        FLARE22_LABEL_ENUM.IVC,
        FLARE22_LABEL_ENUM.RAG,
        FLARE22_LABEL_ENUM.LAG,
        FLARE22_LABEL_ENUM.GALLBLADDER,
        FLARE22_LABEL_ENUM.ESOPHAGUS,
        FLARE22_LABEL_ENUM.STOMACH,
        FLARE22_LABEL_ENUM.DUODENUM,
        FLARE22_LABEL_ENUM.LEFT_KIDNEY
    ]
col_name = ['name', 'DSC_mean'] + [f'DSC_{i.value}' for i in orders]
orders = [0] + [o.value for o in orders]
result = []
for html_row in data['data']:
    html_text = "".join(html_row)
    parser = BeautifulSoup(html_text)
    row = parser.find_all(search_metric_func)
    user = parser.find(search_user_func)
    user = user['href'].replace('https://grand-challenge.org/users/', '').replace('/', '').strip()
    row_result = []
    for parse_cell, target_num in zip(row, orders):
        # print(target_num, parse_cell.text.strip())
        row_result.append(float(parse_cell.text.strip()))
        pass
    result.append([user] + row_result)


leader_df = DataFrame(result, columns=col_name, index=None)
    


In [None]:
leader_df['note'] = None
leader_df = leader_df.sort_values('DSC_mean').groupby('name').apply(keep_max)

In [None]:
new_leader = leader_df[leader_df['note'] == 'Best mean: (None)'].sort_values('DSC_mean', ascending=False)[['DSC_mean', 'DSC_1', 'DSC_9']]
new_leader['DSC_mean'] = (new_leader['DSC_1'] + new_leader['DSC_9']) / 2.0
new_leader.reset_index()[['name', 'DSC_mean', 'DSC_1', 'DSC_9']].to_csv('../dataset/compact-leader-board.csv', index=None)