In [1]:
import pandas as pd
import numpy as np
import os

In [2]:
from rpy2.robjects.packages import importr, data
from rpy2.robjects import numpy2ri
def delong(y_true, pred_a, pred_b):
    y_true = np.array(y_true)
    pred_a, pred_b = np.array(pred_a), np.array(pred_b)
    pROC = importr('pROC')
    numpy2ri.activate()
    roc_a = pROC.roc(y_true, pred_a)
    roc_b = pROC.roc(y_true, pred_b)
    roc_test_result = pROC.roc_test(roc_a, roc_b, alternative='greater')
    numpy2ri.deactivate()
    
    return next(np.array(v).item() for k, v in roc_test_result.items() if k == 'p.value')

In [3]:
def get_best_model(path, features='clinical'):
    datasets = ['ictcf_Union', 'ictcf_Liyuan', 'cwz_cwz', 'cwz_rumc']
    dfs = [pd.read_csv(os.path.join(path, dataset, features, 'results/data.csv'), index_col=0) for dataset in datasets]
    return (sum(df[(df.metric == 'AUC') & (df.model != 'Baseline')].set_index(list(df.columns[5:])).value for df in dfs)/4).idxmax(), (sum(df[(df.metric == 'AUC') & (df.model != 'Baseline')].set_index(list(df.columns[5:])).value for df in dfs)/4).max()
#     return df.loc[df[df.metric == 'AUC'].value.idxmax()][5:].item()

In [4]:
def get_best_model(experiment, dataset, features='clinical'):
    df = pd.read_csv(os.path.join('../experiments', experiment, dataset, features, 'results', 'data.csv'))
    return df.loc[df[df.metric == 'AUC'].value.idxmax()].iloc[6:].values, df.loc[df[df.metric == 'AUC'].value.idxmax()].value

In [5]:
def get_all(experiment, dataset, features='clinical'):
    df = pd.read_csv(os.path.join('../experiments', experiment, dataset, features, 'results', 'data.csv'), index_col=0)
    df['Experiment'] = experiment
    df['Dataset'] = dataset
    df['feature_set'] = features
    return df

In [6]:
get_best_model('predict_with_impute', 'cwz_cwz')

(array(['MEAN', 'LR'], dtype=object), 0.8967223252937538)

In [7]:
from covidcf.evaluation.base import get_cat_codes
import yaml
import joblib
def load_data(experiments_path):
    dfs = []
    shap_df = []
    is_multi = False
    for f in os.scandir(experiments_path):
        if f.name != 'meta.yaml' and f.name.startswith('result_'):
#             print(f.name)
            path_to_file, filename = os.path.split(f.path)
            config_path = os.path.join(path_to_file, f'{filename.split(".")[0].replace("result_", "")}.yaml')
            with open(config_path, 'r') as fh:
                config_dict = yaml.load(fh, Loader=yaml.SafeLoader)
            data = joblib.load(f.path)
#             print(len(data['preds_proba']))
#             print(data.keys())
            if not isinstance(data['preds_proba'], list):
#                 print({'true_0': get_cat_codes(data['y_test']), 'pred_0': data['preds_proba'],
#                                      'pred_label_0': data['preds']})
                if data['preds'].ndim > 1:
                    data['preds'] = np.argmax(data['preds'], axis=1)
                data = pd.DataFrame({'true_0': get_cat_codes(data['y_test']), 'pred_0': data['preds_proba'],
                                     'pred_label_0': data['preds']})
            # assert all(not (df.model == config_dict['human_name']).any() for df in
            #            dfs), f'{[df.model.unique() for df in dfs]}, {config_dict["human_name"]}'
            for k, v in config_dict['meta'].items():
                data[f'meta_{k}'] = v

            dfs.append(data)

    df = pd.concat(dfs, ignore_index=True)
    return df

In [279]:
y_true = np.random.rand(40) < 0.5
preds1 = y_true + np.random.rand(40)
preds2 = preds1 * ((np.random.rand(40)<0.5)*2-1)

delong(y_true, preds2, preds1)

R[write to console]: Setting levels: control = FALSE, case = TRUE

R[write to console]: Setting direction: controls > cases

R[write to console]: Setting levels: control = FALSE, case = TRUE

R[write to console]: Setting direction: controls < cases



0.0015132971290430844


0.0015132971290430844

In [262]:
load_data('../experiments/within_dataset/cwz/clinical/')

Unnamed: 0,true_0,pred_0,pred_label_0,meta_model
0,1,0.868380,1,BaggedGBDT
1,1,0.583818,1,BaggedGBDT
2,1,0.695027,1,BaggedGBDT
3,1,0.918778,1,BaggedGBDT
4,1,0.691489,1,BaggedGBDT
...,...,...,...,...
604,1,0.944000,1,RFsklearn
605,1,0.704000,1,RFsklearn
606,0,0.538000,1,RFsklearn
607,0,0.462000,0,RFsklearn


In [8]:
def load_best(experiment, dataset, features='clinical'):
    best, auc = get_best_model(experiment, dataset, features)
    data = load_data(os.path.join('../experiments', experiment, dataset, features))
    if 'meta_impute_method' in data.columns:
        data = data[(data.meta_model == best[1]) & (data.meta_impute_method == best[0])]
    else:
        data = data[data.meta_model == best[0]] 
    return data, auc

In [9]:
def bold_best(df):
    for col in df.columns:
        max_idx = df[col].idxmax()
        for i in df.index:
            df.loc[i, col] = f'{df.loc[i, col]:.3f}'
        df.loc[max_idx, col] = f'\textbf{{{df.loc[max_idx, col]}}}'
    return df

In [11]:
from tqdm import tqdm
# experiments_within = {'within_dataset': 'Base', 'within_dataset_hyperopt': "Optimized"}
experiments_within = {'within_dataset': 'Base', 'within_dataset_hyperopt': 'Optimized', 'within_predict_with_impute': 'Impute', 
                      'within_shap_zeroing': 'SHAP zeroing', 'within_shap_zeroing_hyperopt': 'SHAP zeroing optimized', 
                      'within_test_time_impute': 'Test-time impute'}
datasets_within = {'cwz': 'CWZ', 'ictcf': 'iCTCF', 'rumc': 'RUMC'}
dfs = []
datasets_visited = set()
true_data_within = []
pred_data_within = {}
# for experiment_path, experiment in experiments_within.items():
#     for dataset_path, dataset in datasets_within.items():
#         df = get_all(experiment_path, dataset_path)
#         preds = load_data(os.path.join('../experiments', experiment_path, dataset_path, 'clinical'))
#         if dataset_path not in datasets_visited:
#             true_data_within += list(preds.loc[preds.meta_model == preds.meta_model.unique()[0], 'true_0'])
#             true_data_within += list(preds.loc[preds.meta_model == preds.meta_model.unique()[0], 'true_0'])
#             datasets_visited.add(dataset_path)
        
#         for model in df.model.unique():
#             model_preds = preds.loc[preds.meta_model == model, 'pred_0'].values
#             key = (experiment, model)
#             if key not in pred_data_within:
#                 pred_data_within[key] = []
#             pred_data_within[key] += list(model_preds)
#         dfs.append(df)
#         df = get_all(experiment_path, dataset_path, features='combined')
#         preds = load_data(os.path.join('../experiments', experiment_path, dataset_path, 'combined'))
#         for model in df.model.unique():
#             model_preds = preds.loc[preds.meta_model == model, 'pred_0'].values
#             key = (experiment, model)
#             if key not in pred_data_within:
#                 pred_data_within[key] = []
#             pred_data_within[key] += list(model_preds)
#         dfs.append(df)

for experiment_path, experiment in (list(experiments_within.items())):
    print(experiment_path)
#     if experiment_path != 'within_test_time_impute':
#         continue
    for dataset_path, dataset in datasets_within.items():
        df = get_all(experiment_path, dataset_path)
        preds = load_data(os.path.join('../experiments', experiment_path, dataset_path, 'clinical'))
        
        if dataset_path not in datasets_visited and 'meta_impute_method' not in preds.columns:
#             if experiment_path == 'within_test_time_impute':
#                 print(preds.meta_model.unique())
#             print([len(preds.loc[preds.meta_model == model, 'true_0']) 
#                                   for model in preds.meta_model.unique()])
            assert len(np.unique([len(preds.loc[preds.meta_model == model, 'true_0']) 
                                  for model in preds.meta_model.unique()])) == 1
            true_data_within += list(preds.loc[preds.meta_model == preds.meta_model.unique()[0], 'true_0'])
            true_data_within += list(preds.loc[preds.meta_model == preds.meta_model.unique()[0], 'true_0'])
            datasets_visited.add(dataset_path)
        
        for model in tqdm(df.model.unique()):
            if 'meta_impute_method' not in preds.columns:
                model_preds = preds.loc[preds.meta_model == model, 'pred_0'].values
                key = (experiment, model)
                if key not in pred_data_within:
                    pred_data_within[key] = []
                pred_data_within[key] += list(model_preds)
            else:
                for imp in preds.meta_impute_method.unique():
                    model_preds = preds.loc[(preds.meta_model == model) & (preds.meta_impute_method == imp), 'pred_0'].values
                    key = ('Imputation: '+imp, model)
                    if key not in pred_data_within:
                        pred_data_within[key] = []
                    pred_data_within[key] += list(model_preds)
        
        dfs.append(df)
        df = get_all(experiment_path, dataset_path, features='combined')
        preds = load_data(os.path.join('../experiments', experiment_path, dataset_path, 'combined'))
        for model in df.model.unique():
            if 'meta_impute_method' not in preds.columns:
                model_preds = preds.loc[preds.meta_model == model, 'pred_0'].values
                key = (experiment, model)
                if key not in pred_data_within:
                    pred_data_within[key] = []
                pred_data_within[key] += list(model_preds)
            else:
                for imp in preds.meta_impute_method.unique():
                    model_preds = preds.loc[(preds.meta_model == model) & (preds.meta_impute_method == imp), 'pred_0'].values
                    key = ('Imputation: '+imp, model)
                    if key not in pred_data_within:
                        pred_data_within[key] = []
                    pred_data_within[key] += list(model_preds)
        dfs.append(df)

within_dataset


100%|███████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 1998.00it/s]
100%|███████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 2003.25it/s]
100%|███████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 3995.53it/s]
100%|███████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 1499.93it/s]

within_dataset_hyperopt



100%|███████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 1498.50it/s]
100%|███████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 1501.54it/s]


within_predict_with_impute


100%|████████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 571.33it/s]
100%|████████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 363.62it/s]
100%|████████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 571.31it/s]
100%|███████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 4006.98it/s]

within_shap_zeroing



100%|███████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 4008.89it/s]
100%|███████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 3984.14it/s]
100%|█████████████████████████████████████████████████████████████████████| 2/2 [00:00<?, ?it/s]

within_shap_zeroing_hyperopt



100%|███████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 2002.05it/s]
100%|███████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 2003.01it/s]


within_test_time_impute


100%|███████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 1996.81it/s]
100%|███████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 1996.34it/s]
100%|████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 998.88it/s]


In [12]:
overview_within = pd.concat(dfs)
overview_within = overview_within.rename(columns={'model': 'Model'})
overview_within.head(30)

Unnamed: 0,metric,feat,value,lb,ub,Model,Experiment,Dataset,feature_set,impute_method
0,AUC,0,0.869048,0.786301,0.935704,BaggedGBDT,within_dataset,cwz,clinical,
1,F1,0,0.857143,0.790312,0.910513,BaggedGBDT,within_dataset,cwz,clinical,
2,Precision,0,0.814286,0.724638,0.901443,BaggedGBDT,within_dataset,cwz,clinical,
3,Recall,0,0.904762,0.820865,0.968774,BaggedGBDT,within_dataset,cwz,clinical,
4,AUC,0,0.87037,0.793788,0.93508,GBDT,within_dataset,cwz,clinical,
5,F1,0,0.859375,0.786885,0.918548,GBDT,within_dataset,cwz,clinical,
6,Precision,0,0.846154,0.757565,0.925373,GBDT,within_dataset,cwz,clinical,
7,Recall,0,0.873016,0.784583,0.95,GBDT,within_dataset,cwz,clinical,
8,AUC,0,0.79828,0.688383,0.900142,LR,within_dataset,cwz,clinical,
9,F1,0,0.846154,0.77476,0.906505,LR,within_dataset,cwz,clinical,


In [13]:
overview_within.loc[~overview_within.impute_method.isna(), 'Experiment'] = 'Imputation: ' + overview_within.loc[~overview_within.impute_method.isna(), 'impute_method']

In [14]:
from itertools import permutations
import functools

def compute_pvalues(true_data, pred_data, overview):
    pvalues = pd.DataFrame(index=overview.index, columns=overview.index)
    
    for a, b in permutations(overview.index, 2):
        pred_a, pred_b = pred_data[a], pred_data[b]
        assert len(true_data) == len(pred_a), f'{a}, {len(pred_a)}, {len(true_data)}'
        assert len(true_data) == len(pred_b), f'{b}, {len(pred_b)}, {len(true_data)}'
        pvalues.loc[a, b] = delong(true_data, pred_a, pred_b)
        
    pvalues *= len(overview)**2 - len(overview)
    return pvalues

def compute_ranking(pvalues, overview, p_thresh):
    def is_better_than(a, b):
        return pvalues.loc[a, b] < p_thresh

    def comp(a, b):
        a_better = is_better_than(a, b)
        b_better = is_better_than(b, a)
        return 1 if b_better else (-1 if a_better else 0)

    ranking = pd.Series(sorted(overview.index, key=functools.cmp_to_key(comp))).to_frame()
#     ranking = ranking.reindex(index=base.index)
    
    rank = 1
    ranking['Rank'] = 1
    for i in range(len(ranking)):
        ranking.loc[i, 'Rank'] = rank
#         if i < len(ranking)-1:
#             print(ranking.loc[i+1, 0], ranking.loc[i, 0])
#             print(functools.cmp_to_key(comp)(ranking.loc[i+1, 0]) > functools.cmp_to_key(comp)(ranking.loc[i, 0]))
        if i < len(ranking)-1 and functools.cmp_to_key(comp)(ranking.loc[i+1, 0]) > functools.cmp_to_key(comp)(ranking.loc[i, 0]):
            rank += 1
    
    ranking.index = pd.MultiIndex.from_tuples(ranking[0])
    
    return ranking.Rank

In [15]:
from tqdm import tqdm
experiments_between = {'between_dataset': 'Base', 'between_dataset_hyperopt': 'Optimized', 'predict_with_impute': 'Impute', 'shap_zeroing': 'SHAP zeroing', 'shap_zeroing_hyperopt': 'SHAP zeroing optimized', 'test_time_impute': 'Test-time impute'}
datasets_between = {'ictcf_Union': 'iCTCF: Union', 'ictcf_Liyuan': 'iCTCF: Liyuan', 'cwz_cwz': 'CWZ: CWZ', 'cwz_rumc': 'CWZ: RUMC'}
# dfs = []
# for experiment_path, experiment in experiments_between.items():
#     for dataset_path, dataset in datasets_between.items():
#         df = get_all(experiment_path, dataset_path)
#         dfs.append(df)
#         df = get_all(experiment_path, dataset_path, features='combined')
#         dfs.append(df)
dfs = []
datasets_visited = set()
true_data_between = []
pred_data_between = {}
for experiment_path, experiment in (list(experiments_between.items())):
    print(experiment_path)
    for dataset_path, dataset in datasets_between.items():
        df = get_all(experiment_path, dataset_path)
        preds = load_data(os.path.join('../experiments', experiment_path, dataset_path, 'clinical'))
        
        if dataset_path not in datasets_visited and 'meta_impute_method' not in preds.columns:
            if 'meta_impute_method' not in preds.columns:
                true_data_between += list(preds.loc[preds.meta_model == preds.meta_model.unique()[0], 'true_0'])
                true_data_between += list(preds.loc[preds.meta_model == preds.meta_model.unique()[0], 'true_0'])
            else:
                true_data_between += list(preds.loc[(preds.meta_model == preds.meta_model.unique()[0]) & 
                                                    (preds.meta_impute_method == preds.meta_impute_method.unique()[0]), 'true_0'])
                true_data_between += list(preds.loc[(preds.meta_model == preds.meta_model.unique()[0]) &
                                                    (preds.meta_impute_method == preds.meta_impute_method.unique()[0]), 'true_0'])
            datasets_visited.add(dataset_path)
        
        for model in tqdm(df.model.unique()):
            if 'meta_impute_method' not in preds.columns:
                model_preds = preds.loc[preds.meta_model == model, 'pred_0'].values
                key = (experiment, model)
                if key not in pred_data_between:
                    pred_data_between[key] = []
                pred_data_between[key] += list(model_preds)
            else:
                for imp in preds.meta_impute_method.unique():
                    model_preds = preds.loc[(preds.meta_model == model) & (preds.meta_impute_method == imp), 'pred_0'].values
                    key = ('Imputation: '+imp, model)
                    if key not in pred_data_between:
                        pred_data_between[key] = []
                    pred_data_between[key] += list(model_preds)
        
        dfs.append(df)
        df = get_all(experiment_path, dataset_path, features='combined')
        preds = load_data(os.path.join('../experiments', experiment_path, dataset_path, 'combined'))
        for model in df.model.unique():
            if 'meta_impute_method' not in preds.columns:
                model_preds = preds.loc[preds.meta_model == model, 'pred_0'].values
                key = (experiment, model)
                if key not in pred_data_between:
                    pred_data_between[key] = []
                pred_data_between[key] += list(model_preds)
            else:
                for imp in preds.meta_impute_method.unique():
                    model_preds = preds.loc[(preds.meta_model == model) & (preds.meta_impute_method == imp), 'pred_0'].values
                    key = ('Imputation: '+imp, model)
                    if key not in pred_data_between:
                        pred_data_between[key] = []
                    pred_data_between[key] += list(model_preds)
        dfs.append(df)

between_dataset


100%|███████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 1333.01it/s]
100%|███████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 3999.34it/s]
100%|███████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 4001.24it/s]
100%|███████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 2000.14it/s]
100%|███████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 1499.57it/s]

between_dataset_hyperopt



100%|███████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 2994.51it/s]
100%|███████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 3001.65it/s]
100%|███████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 1498.68it/s]


predict_with_impute


100%|████████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 181.82it/s]
100%|████████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 399.92it/s]
100%|████████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 399.94it/s]
100%|████████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 499.90it/s]
100%|███████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 1333.96it/s]

shap_zeroing



100%|███████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 1999.19it/s]
100%|███████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 4001.24it/s]
100%|███████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 2000.38it/s]
100%|███████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 2000.14it/s]


shap_zeroing_hyperopt


100%|███████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 1998.24it/s]
100%|███████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 1997.29it/s]
100%|███████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 1998.24it/s]


test_time_impute


100%|███████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 1998.24it/s]
100%|███████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 1000.07it/s]
100%|████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 999.83it/s]
100%|███████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 1999.67it/s]


In [16]:
overview_between = pd.concat(dfs)
overview_between = overview_between.rename(columns={'model': 'Model'})
overview_between

Unnamed: 0,metric,feat,value,lb,ub,Model,Experiment,Dataset,feature_set,impute_method
0,AUC,0,0.758625,0.726016,0.791797,BaggedGBDT,between_dataset,ictcf_Union,clinical,
1,F1,0,0.867429,0.849045,0.884377,BaggedGBDT,between_dataset,ictcf_Union,clinical,
2,Precision,0,0.765893,0.738334,0.790699,BaggedGBDT,between_dataset,ictcf_Union,clinical,
3,Recall,0,1.000000,1.000000,1.000000,BaggedGBDT,between_dataset,ictcf_Union,clinical,
4,AUC,0,0.717054,0.679600,0.750684,GBDT,between_dataset,ictcf_Union,clinical,
...,...,...,...,...,...,...,...,...,...,...
3,Recall,0,0.940678,0.889807,0.976567,GBDT,test_time_impute,cwz_rumc,combined,
4,AUC,0,0.834746,0.773798,0.885291,LR,test_time_impute,cwz_rumc,combined,
5,F1,0,0.811475,0.754237,0.862745,LR,test_time_impute,cwz_rumc,combined,
6,Precision,0,0.785714,0.713177,0.853874,LR,test_time_impute,cwz_rumc,combined,


In [17]:
experiments_misc = {'within_dataset_missing': 'within_dataset_missing'}
datasets_misc = datasets_within
dfs = []
for experiment_path, experiment in experiments_misc.items():
    for dataset_path, dataset in datasets_misc.items():
        df = get_all(experiment_path, dataset_path)
        dfs.append(df)
        df = get_all(experiment_path, dataset_path, features='combined')
        dfs.append(df)

In [18]:
overview_misc = pd.concat(dfs)
overview_misc = overview_misc.rename(columns={'model': 'Model'})
overview_misc

Unnamed: 0,metric,feat,value,lb,ub,Model,Experiment,Dataset,feature_set
0,AUC,0,0.667328,0.533275,0.785695,BaggedGBDT,within_dataset_missing,cwz,clinical
1,F1,0,0.828571,0.759124,0.888889,BaggedGBDT,within_dataset_missing,cwz,clinical
2,Precision,0,0.753247,0.658529,0.851852,BaggedGBDT,within_dataset_missing,cwz,clinical
3,Recall,0,0.920635,0.855072,0.983051,BaggedGBDT,within_dataset_missing,cwz,clinical
4,AUC,0,0.702050,0.578801,0.825348,GBDT,within_dataset_missing,cwz,clinical
...,...,...,...,...,...,...,...,...,...
11,Recall,0,0.500000,,,LR,within_dataset_missing,rumc,combined
12,AUC,0,0.783730,0.599973,0.911461,RFsklearn,within_dataset_missing,rumc,combined
13,F1,0,0.125000,,,RFsklearn,within_dataset_missing,rumc,combined
14,Precision,0,0.500000,,,RFsklearn,within_dataset_missing,rumc,combined


In [19]:
overview_between.loc[~overview_between.impute_method.isna(), 'Experiment'] = 'Imputation: ' + overview_between.loc[~overview_between.impute_method.isna(), 'impute_method']

In [20]:
def compress_overview(df, models, value, feature_set, index_map, column_map):
    df = df[(df.feature_set == feature_set) & 
                                          (df.metric == 'AUC') &
                                          (df.Model.isin(models))]
    result = pd.pivot(df, index=['Experiment', 'Model'], columns=['Dataset'], values=value) \
                .rename(index=index_map, columns=column_map)
    return result

In [21]:
overview_within[(overview_within.feature_set == 'clinical') & (overview_within.metric == 'AUC') & (overview_within.Model.isin(models))][overview_within[(overview_within.feature_set == 'clinical') & (overview_within.metric == 'AUC') & (overview_within.Model.isin(models))][['Model', 'Experiment', 'Dataset']].duplicated()]

NameError: name 'models' is not defined

In [158]:
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import StratifiedShuffleSplit, PredefinedSplit
def compute_roc_auc(path, splitter, target):
    df = pd.read_pickle(path)
#     print(df.Input.Visual.mean(axis=1).max())
#     print(df.Input.Visual.mean(axis=1).min())
#     error()
    train_idx, test_idx = next(splitter.split(df.Input, df.Target[target]))
    test_idx = df.iloc[test_idx, :][~df.iloc[test_idx, :].Target[target].isna()].index#.intersection(df.Input.Visual.mean(axis=1)[~df.Input.Visual.mean(axis=1).isna()].index)
    
#     return roc_auc_score(df.iloc[test_idx].Target[target], df.iloc[test_idx].Input.Visual.mean(axis=1))
    preds = df.loc[test_idx].Input.Visual.mean(axis=1).fillna(2)
    preds = preds.fillna(2.5)
    preds = np.clip(preds, 0, 5)
    return roc_auc_score(df.loc[test_idx].Target[target], preds)

cwz_score = compute_roc_auc('../data/processed/cwz.pkl', StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=38107), 'diagnosis')
rumc_score = compute_roc_auc('../data/processed/rumc.pkl', 
                             PredefinedSplit((pd.read_pickle('../data/processed/rumc.pkl').Meta['part'].isin(['val', 'test'])) - 1), 'pcr')
ictcf_score = compute_roc_auc('../data/processed/ictcf.pkl', 
                             PredefinedSplit((pd.read_pickle('../data/processed/ictcf.pkl').Meta['cohort'] == 'c2') - 1), 'pcr')

cwz_score, rumc_score, ictcf_score

(0.8002645502645502, 0.4642857142857143, 0.7607431651906046)

In [159]:
models = ['GBDT']
base = compress_overview(overview_within, models, 'value', 'clinical', experiments_within, datasets_within)
value = compress_overview(overview_within, models, 'value', 'clinical', experiments_within, datasets_within)
lb = compress_overview(overview_within, models, 'lb', 'clinical', experiments_within, datasets_within)
ub = compress_overview(overview_within, models, 'ub', 'clinical', experiments_within, datasets_within)

value_vis = compress_overview(overview_within, models, 'value', 'combined', experiments_within, datasets_within)
lb_vis = compress_overview(overview_within, models, 'lb', 'combined', experiments_within, datasets_within)
ub_vis = compress_overview(overview_within, models, 'ub', 'combined', experiments_within, datasets_within)

for col in base.columns:
    for idx in base.index:
#         base.loc[idx, col] = f'{value.loc[idx, col]:.3f} ({lb.loc[idx, col]:.3f}, {ub.loc[idx, col]:.3f}); {value_vis.loc[idx, col]:.3f} ({lb_vis.loc[idx, col]:.3f}, {ub_vis.loc[idx, col]:.3f})'
        base.loc[idx, col] = f'{value.loc[idx, col]:.3f};  {value_vis.loc[idx, col]:.3f}'
# base
pd.concat([pd.DataFrame([[f'{cwz_score:.3f}', f'{ictcf_score:.3f}', f'{rumc_score:.3f}']], index= pd.MultiIndex.from_tuples([('', 'CORADS-AI')], names=base.index.names), columns=base.columns), base])

Unnamed: 0_level_0,Dataset,CWZ,iCTCF,RUMC
Experiment,Model,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
,CORADS-AI,0.800,0.761,0.464
Imputation: KNN5,GBDT,0.858; 0.844,0.823; 0.854,0.792; 0.732
Imputation: MEAN,GBDT,0.862; 0.836,0.925; 0.895,0.794; 0.782
Imputation: MICE,GBDT,0.870; 0.852,0.773; 0.836,0.754; 0.790
Base,GBDT,0.870; 0.836,0.940; 0.899,0.859; 0.835
Optimized,GBDT,0.884; 0.849,0.961; 0.902,0.911; 0.859
SHAP zeroing,GBDT,0.862; 0.841,0.908; 0.861,0.883; 0.843
SHAP zeroing optimized,GBDT,0.868; 0.874,0.939; 0.812,0.889; 0.865
Test-time impute,GBDT,0.827; 0.806,0.760; 0.799,0.780; 0.770


In [160]:
import pyperclip
result = pd.concat([pd.DataFrame([[f'{cwz_score:.3f}', f'{ictcf_score:.3f}', f'{rumc_score:.3f}']], index= pd.MultiIndex.from_tuples([('', 'CORADS-AI')], names=base.index.names), columns=base.columns), base])
result.index = [', '.join(idx) for idx in result.index]
pyperclip.copy(result.to_markdown())

In [89]:
def separate_clin_vis(df):
    result = pd.DataFrame(index=df.index, 
                          columns=pd.MultiIndex.from_tuples([(c, x) for c in df.columns for x in ['Clinical', 'Clin./Vis.']]))
    for col in df.columns:
        for ind in df.index:
            cell = df.loc[ind, col]
            if not isinstance(cell, str) or ';' not in cell:
                result.loc[ind, (col, 'Clinical')] = 'NA'
                result.loc[ind, (col, 'Clin./Vis.')] = cell
            else:
                result.loc[ind, (col, 'Clinical')] = cell.split('; ')[0]
                result.loc[ind, (col, 'Clin./Vis.')] = cell.split('; ')[1]
    return result

In [97]:
def mark_max(df):
    df = df.copy()
    for col in df.columns:
        max_idx = df.loc[df[col] != 'NA', col].astype(np.float).idxmax()
        df.loc[max_idx, col] = df.loc[max_idx, col]+'*'
    return df

In [127]:
def prepare_for_ppt(df, only_base=True, no_vis=True, drop=None):
    if 'within_dataset_missing' not in df.index:
        if len(df.columns) == 3:
            output = pd.concat([pd.DataFrame([[f'{cwz_score:.3f}', f'{ictcf_score:.3f}', f'{rumc_score:.3f}']], index= pd.MultiIndex.from_tuples([('', 'CORADS-AI')], names=df.index.names), columns=df.columns), df])
        else:
            output = pd.concat([pd.DataFrame([[f'{cwz_cwz_score:.3f}', f'{cwz_rumc_score:.3f}', f'{ictcf_liyuan_score:.3f}', f'{ictcf_union_score:.3f}']], index= pd.MultiIndex.from_tuples([('', 'CORADS-AI')], names=df.index.names), columns=df.columns), df])
        
        if only_base:
            output = output.loc[['Base', 'Optimized', '']]
    else:
        output = pd.concat([pd.DataFrame([[f'{cwz_score:.3f}', f'{ictcf_score:.3f}', f'{rumc_score:.3f}']], index= pd.MultiIndex.from_tuples([('', 'CORADS-AI')], names=df.index.names), columns=df.columns), df]).loc[['within_dataset_missing', '']].applymap(lambda x: x).reset_index(level=0, drop=True)
    
    if no_vis:
        output = output.applymap(lambda x: x.split('; ')[0] if ';' in x else x)
    else:
        output = separate_clin_vis(output)
    if output.index.nlevels == 2:
        output = output.reset_index(level=1, drop=True)
    output.index.name = None
    output.columns.name = None
    if drop is not None:
        output = output.drop(index=drop)
    output = mark_max(output)
    output = output.rename(index={'': 'CORADS-AI'})
    return output

In [111]:
prepare_for_ppt(base, no_vis=False)

Unnamed: 0_level_0,CWZ: CWZ,CWZ: CWZ,CWZ: RUMC,CWZ: RUMC,iCTCF: Liyuan,iCTCF: Liyuan,iCTCF: Union,iCTCF: Union
Unnamed: 0_level_1,Clinical,Clin./Vis.,Clinical,Clin./Vis.,Clinical,Clin./Vis.,Clinical,Clin./Vis.
Base,0.858,0.840,0.725,0.807,0.761*,0.864*,0.717*,0.730
Optimized,0.886*,0.838,0.751*,0.808,0.577,0.848,0.680,0.734
CORADS-AI,,0.885*,,0.826*,,0.812,,0.854*


In [131]:
prepare_for_ppt(base, no_vis=False, only_base=False, drop=['SHAP zeroing optimized'])

Unnamed: 0_level_0,CWZ,CWZ,iCTCF,iCTCF,RUMC,RUMC
Unnamed: 0_level_1,Clinical,Clin./Vis.,Clinical,Clin./Vis.,Clinical,Clin./Vis.
CORADS-AI,,0.800,,0.761,,0.464
Imputation: KNN5,0.858,0.844,0.823,0.854,0.792,0.732
Imputation: MEAN,0.862,0.836,0.925,0.895,0.794,0.782
Imputation: MICE,0.870,0.852*,0.773,0.836,0.754,0.790
Base,0.870,0.836,0.940,0.899,0.859,0.835
Optimized,0.884*,0.849,0.961*,0.902*,0.911*,0.859*
SHAP zeroing,0.862,0.841,0.908,0.861,0.883,0.843
Test-time impute,0.827,0.806,0.760,0.799,0.780,0.770


In [72]:
# output = pd.concat([pd.DataFrame([[f'{cwz_score:.3f}', f'{ictcf_score:.3f}', f'{rumc_score:.3f}']], index= pd.MultiIndex.from_tuples([('', 'CORADS-AI')], names=base.index.names), columns=base.columns), base]).applymap(lambda x: x).reset_index(level=1, drop=True)
output = pd.concat([pd.DataFrame([[f'{cwz_score:.3f}', f'{ictcf_score:.3f}', f'{rumc_score:.3f}']], index= pd.MultiIndex.from_tuples([('', 'CORADS-AI')], names=base.index.names), columns=base.columns), base]).loc[['Base', 'Optimized', '']].applymap(lambda x: x).reset_index(level=1, drop=True)
output.index.name = None
output.columns.name = None
output

Unnamed: 0,CWZ,iCTCF,RUMC
Base,0.870; 0.836,0.940; 0.899,0.859; 0.835
Optimized,0.884; 0.849,0.961; 0.902,0.911; 0.859
,0.800,0.761,0.464


In [31]:
pd.concat([pd.DataFrame([[f'{cwz_score:.3f}', f'{ictcf_score:.3f}', f'{rumc_score:.3f}']], index= pd.MultiIndex.from_tuples([('', 'CORADS-AI')], names=base.index.names), columns=base.columns), base]).loc[['Base', 'Optimized']].reset_index(level=1, drop=True).to_excel('base_within.xlsx')

In [239]:
base = base[~(base.CWZ.str.contains('nan') | base.iCTCF.str.contains('nan') | base.RUMC.str.contains('nan'))]

In [240]:
base

Unnamed: 0_level_0,Dataset,CWZ,iCTCF,RUMC
Experiment,Model,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Imputation: KNN5,BaggedGBDT,0.881; 0.819,0.781; 0.821,0.831; 0.831
Imputation: KNN5,GBDT,0.858; 0.844,0.823; 0.854,0.792; 0.732
Imputation: KNN5,LR,0.807; 0.870,0.639; 0.636,0.629; 0.583
Imputation: KNN5,RFsklearn,0.887; 0.823,0.824; 0.814,0.732; 0.743
Imputation: MEAN,BaggedGBDT,0.862; 0.804,0.881; 0.870,0.833; 0.815
Imputation: MEAN,GBDT,0.862; 0.836,0.925; 0.895,0.794; 0.782
Imputation: MEAN,LR,0.798; 0.857,0.617; 0.689,0.601; 0.657
Imputation: MEAN,RFsklearn,0.877; 0.823,0.910; 0.836,0.731; 0.782
Imputation: MICE,GBDT,0.870; 0.852,0.773; 0.836,0.754; 0.790
Imputation: MICE,LR,0.809; 0.788,0.602; 0.613,0.661; 0.671


In [139]:
pvalues = compute_pvalues(true_data_within, pred_data_within, base)

R[write to console]: Setting levels: control = 0, case = 1

R[write to console]: Setting direction: controls < cases

R[write to console]: Setting levels: control = 0, case = 1

R[write to console]: Setting direction: controls < cases

R[write to console]: Setting levels: control = 0, case = 1

R[write to console]: Setting direction: controls < cases

R[write to console]: Setting levels: control = 0, case = 1

R[write to console]: Setting direction: controls < cases

R[write to console]: Setting levels: control = 0, case = 1

R[write to console]: Setting direction: controls < cases

R[write to console]: Setting levels: control = 0, case = 1

R[write to console]: Setting direction: controls < cases

R[write to console]: Setting levels: control = 0, case = 1

R[write to console]: Setting direction: controls < cases

R[write to console]: Setting levels: control = 0, case = 1

R[write to console]: Setting direction: controls < cases

R[write to console]: Setting levels: control = 0, case =

In [140]:
ranking = compute_ranking(pvalues, base, 0.01)

In [141]:
base['Rank'] = ranking

In [142]:
pd.concat([pd.DataFrame([[f'{cwz_score:.3f}', f'{ictcf_score:.3f}', f'{rumc_score:.3f}', '-']], index= pd.MultiIndex.from_tuples([('', 'CORADS-AI')], names=base.index.names), columns=base.columns), base.sort_values(by='Rank')])

Unnamed: 0_level_0,Dataset,CWZ,iCTCF,RUMC,Rank
Experiment,Model,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
,CORADS-AI,0.800,0.761,0.464,-
Imputation: MEAN,GBDT,0.862; 0.836,0.925; 0.895,0.794; 0.782,1
Base,GBDT,0.870; 0.836,0.940; 0.899,0.859; 0.835,1
SHAP zeroing,GBDT,0.862; 0.841,0.908; 0.861,0.883; 0.843,2
Imputation: KNN5,GBDT,0.858; 0.844,0.823; 0.854,0.792; 0.732,3
Imputation: MICE,GBDT,0.870; 0.852,0.773; 0.836,0.754; 0.790,3
Optimized,GBDT,0.884; 0.849,0.961; 0.902,0.911; 0.859,3
SHAP zeroing optimized,GBDT,0.868; 0.874,0.939; 0.812,0.889; 0.865,4
Test-time impute,GBDT,0.827; 0.806,0.760; 0.799,0.780; 0.770,4


In [245]:
pd.concat([pd.DataFrame([[f'{cwz_score:.3f}', f'{ictcf_score:.3f}', f'{rumc_score:.3f}', '-']], index= pd.MultiIndex.from_tuples([('', 'CORADS-AI')], names=base.index.names), columns=base.columns), base.sort_values(by='Rank')]).to_latex('results/overview_within.tex')

In [147]:
cwz_cwz_score = compute_roc_auc('../data/processed/cwz.pkl',
                                PredefinedSplit((pd.read_pickle('../data/processed/cwz.pkl').Meta['hospital'] == 'cwz') - 1), 'diagnosis')
cwz_rumc_score = compute_roc_auc('../data/processed/cwz.pkl',
                                PredefinedSplit((pd.read_pickle('../data/processed/cwz.pkl').Meta['hospital'] == 'rumc') - 1), 'diagnosis')

ictcf_liyuan_score = compute_roc_auc('../data/processed/ictcf.pkl', 
                             PredefinedSplit((pd.read_pickle('../data/processed/ictcf.pkl').Meta['hospital'] == 'Liyuan') - 1), 'pcr')
ictcf_union_score = compute_roc_auc('../data/processed/ictcf.pkl', 
                             PredefinedSplit((pd.read_pickle('../data/processed/ictcf.pkl').Meta['hospital'] == 'Union') - 1), 'pcr')

cwz_cwz_score, cwz_rumc_score, ictcf_liyuan_score, ictcf_union_score

(0.8849721706864565,
 0.8259305210918113,
 0.8123449131513648,
 0.8536363636363637)

In [161]:
models = ['GBDT']
base = compress_overview(overview_between, models, 'value', 'clinical', experiments_between, datasets_between)
value = compress_overview(overview_between, models, 'value', 'clinical', experiments_between, datasets_between)
lb = compress_overview(overview_between, models, 'lb', 'clinical', experiments_between, datasets_between)
ub = compress_overview(overview_between, models, 'ub', 'clinical', experiments_between, datasets_between)

value_vis = compress_overview(overview_between, models, 'value', 'combined', experiments_between, datasets_between)
# value_vis_only = value_vis.copy()
# value_vis_only['CWZ: CWZ'] = cwz_cwz_score
# value_vis_only['CWZ: RUMC'] = cwz_rumc_score
# value_vis_only['iCTCF: Liyuan'] = ictcf_liyuan_score
# value_vis_only['iCTCF: Union'] = ictcf_union_score
lb_vis = compress_overview(overview_between, models, 'lb', 'combined', experiments_between, datasets_between)
ub_vis = compress_overview(overview_between, models, 'ub', 'combined', experiments_between, datasets_between)

for col in base.columns:
    for idx in base.index:
#         base.loc[idx, col] = f'{value.loc[idx, col]:.3f} ({lb.loc[idx, col]:.3f}, {ub.loc[idx, col]:.3f}); {value_vis.loc[idx, col]:.3f} ({lb_vis.loc[idx, col]:.3f}, {ub_vis.loc[idx, col]:.3f})'
        base.loc[idx, col] = f'{value.loc[idx, col]:.3f};  {value_vis.loc[idx, col]:.3f}'
# base.loc[(~base.index.get_level_values(0).str.startswith('Imputation')) | (base.index.get_level_values(0).isin(['Imputation: MEAN', 'Imputation: KNN5', 'Imputation: MICE']))]
pd.concat([pd.DataFrame([[f'{cwz_cwz_score:.3f}', f'{cwz_rumc_score:.3f}', f'{ictcf_liyuan_score:.3f}', f'{ictcf_union_score:.3f}']], index= pd.MultiIndex.from_tuples([('', 'CORADS-AI')], names=base.index.names), columns=base.columns), base])

Unnamed: 0_level_0,Dataset,CWZ: CWZ,CWZ: RUMC,iCTCF: Liyuan,iCTCF: Union
Experiment,Model,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
,CORADS-AI,0.885,0.826,0.812,0.854
Imputation: KNN5,GBDT,0.842; 0.846,0.743; 0.848,0.666; 0.842,0.613; 0.772
Imputation: MEAN,GBDT,0.862; 0.857,0.697; 0.845,0.713; 0.875,0.640; 0.787
Imputation: MICE,GBDT,0.873; 0.855,0.694; 0.826,0.519; 0.879,0.624; 0.726
Base,GBDT,0.858; 0.840,0.725; 0.807,0.761; 0.864,0.717; 0.730
Optimized,GBDT,0.886; 0.838,0.751; 0.808,0.577; 0.848,0.680; 0.734
SHAP zeroing,GBDT,0.860; 0.846,0.776; 0.860,0.644; 0.849,0.711; 0.777
SHAP zeroing optimized,GBDT,0.893; 0.863,0.809; 0.867,0.525; 0.889,0.670; 0.760
Test-time impute,GBDT,0.874; 0.854,0.756; 0.840,0.643; 0.863,0.667; 0.868


In [162]:
import pyperclip
result = pd.concat([pd.DataFrame([[f'{cwz_cwz_score:.3f}', f'{cwz_rumc_score:.3f}', f'{ictcf_liyuan_score:.3f}', f'{ictcf_union_score:.3f}']], index= pd.MultiIndex.from_tuples([('', 'CORADS-AI')], names=base.index.names), columns=base.columns), base])
result.index = [', '.join(idx) for idx in result.index]
pyperclip.copy(result.to_markdown())

In [135]:
prepare_for_ppt(base, no_vis=False)

Unnamed: 0_level_0,CWZ: CWZ,CWZ: CWZ,CWZ: RUMC,CWZ: RUMC,iCTCF: Liyuan,iCTCF: Liyuan,iCTCF: Union,iCTCF: Union
Unnamed: 0_level_1,Clinical,Clin./Vis.,Clinical,Clin./Vis.,Clinical,Clin./Vis.,Clinical,Clin./Vis.
Base,0.858,0.840,0.725,0.807,0.761*,0.864*,0.717*,0.730
Optimized,0.886*,0.838,0.751*,0.808,0.577,0.848,0.680,0.734
CORADS-AI,,0.885*,,0.826*,,0.812,,0.854*


In [136]:
prepare_for_ppt(base, no_vis=False, only_base=False, drop=['SHAP zeroing optimized'])

Unnamed: 0_level_0,CWZ: CWZ,CWZ: CWZ,CWZ: RUMC,CWZ: RUMC,iCTCF: Liyuan,iCTCF: Liyuan,iCTCF: Union,iCTCF: Union
Unnamed: 0_level_1,Clinical,Clin./Vis.,Clinical,Clin./Vis.,Clinical,Clin./Vis.,Clinical,Clin./Vis.
CORADS-AI,,0.885*,,0.826,,0.812,,0.854
Imputation: KNN5,0.842,0.846,0.743,0.848,0.666,0.842,0.613,0.772
Imputation: MEAN,0.862,0.857,0.697,0.845,0.713,0.875,0.640,0.787
Imputation: MICE,0.873,0.855,0.694,0.826,0.519,0.879*,0.624,0.726
Base,0.858,0.840,0.725,0.807,0.761*,0.864,0.717*,0.730
Optimized,0.886*,0.838,0.751,0.808,0.577,0.848,0.680,0.734
SHAP zeroing,0.860,0.846,0.776*,0.860*,0.644,0.849,0.711,0.777
Test-time impute,0.874,0.854,0.756,0.840,0.643,0.863,0.667,0.868*


In [67]:
output = pd.concat([pd.DataFrame([[f'{cwz_cwz_score:.3f}', f'{cwz_rumc_score:.3f}', f'{ictcf_liyuan_score:.3f}', f'{ictcf_union_score:.3f}']], index= pd.MultiIndex.from_tuples([('', 'CORADS-AI')], names=base.index.names), columns=base.columns), base]).applymap(lambda x: x).reset_index(level=1, drop=True)
output.index.name = None
output.columns.name = None
output

Unnamed: 0,CWZ: CWZ,CWZ: RUMC,iCTCF: Liyuan,iCTCF: Union
,0.885,0.826,0.812,0.854
Imputation: KNN5,0.842; 0.846,0.743; 0.848,0.666; 0.842,0.613; 0.772
Imputation: MEAN,0.862; 0.857,0.697; 0.845,0.713; 0.875,0.640; 0.787
Imputation: MICE,0.873; 0.855,0.694; 0.826,0.519; 0.879,0.624; 0.726
Base,0.858; 0.840,0.725; 0.807,0.761; 0.864,0.717; 0.730
Optimized,0.886; 0.838,0.751; 0.808,0.577; 0.848,0.680; 0.734
SHAP zeroing,0.860; 0.846,0.776; 0.860,0.644; 0.849,0.711; 0.777
SHAP zeroing optimized,0.893; 0.863,0.809; 0.867,0.525; 0.889,0.670; 0.760
Test-time impute,0.874; 0.854,0.756; 0.840,0.643; 0.863,0.667; 0.868


In [253]:
base = base.loc[(~base.index.get_level_values(0).str.startswith('Imputation')) | (base.index.get_level_values(0).isin(['Imputation: MEAN', 'Imputation: KNN5', 'Imputation: MICE']))]#.drop(('SHAP zeroing optimized', 'BaggedGBDT'))

In [149]:
pvalues = compute_pvalues(true_data_between, pred_data_between, base)

R[write to console]: Setting levels: control = 0, case = 1

R[write to console]: Setting direction: controls < cases

R[write to console]: Setting levels: control = 0, case = 1

R[write to console]: Setting direction: controls < cases

R[write to console]: Setting levels: control = 0, case = 1

R[write to console]: Setting direction: controls < cases

R[write to console]: Setting levels: control = 0, case = 1

R[write to console]: Setting direction: controls < cases

R[write to console]: Setting levels: control = 0, case = 1

R[write to console]: Setting direction: controls < cases

R[write to console]: Setting levels: control = 0, case = 1

R[write to console]: Setting direction: controls < cases

R[write to console]: Setting levels: control = 0, case = 1

R[write to console]: Setting direction: controls < cases

R[write to console]: Setting levels: control = 0, case = 1

R[write to console]: Setting direction: controls < cases

R[write to console]: Setting levels: control = 0, case =

In [150]:
pvalues

Unnamed: 0_level_0,Experiment,Imputation: KNN5,Imputation: MEAN,Imputation: MICE,Base,Optimized,SHAP zeroing,SHAP zeroing optimized,Test-time impute
Unnamed: 0_level_1,Model,GBDT,GBDT,GBDT,GBDT,GBDT,GBDT,GBDT,GBDT
Experiment,Model,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
Imputation: KNN5,GBDT,,12.485015,5.3e-05,3.24491,0.0,55.698229,0.0,55.609237
Imputation: MEAN,GBDT,43.514985,,0.000131,4.311349,0.0,55.996646,0.0,55.943462
Imputation: MICE,GBDT,55.999947,55.999869,,55.96734,0.0,56.0,0.0,56.0
Base,GBDT,52.75509,51.688651,0.03266,,0.0,56.0,0.0,55.997845
Optimized,GBDT,56.0,56.0,56.0,56.0,,56.0,47.550931,56.0
SHAP zeroing,GBDT,0.301771,0.003354,0.0,0.0,0.0,,0.0,21.375448
SHAP zeroing optimized,GBDT,56.0,56.0,56.0,56.0,8.449069,56.0,,56.0
Test-time impute,GBDT,0.390763,0.056538,0.0,0.002155,0.0,34.624552,0.0,


In [151]:
ranking = compute_ranking(pvalues, base, 0.01)

In [152]:
base['Rank'] = ranking

In [154]:
pd.concat([pd.DataFrame([[f'{cwz_cwz_score:.3f}', f'{cwz_rumc_score:.3f}', f'{ictcf_liyuan_score:.3f}', f'{ictcf_union_score:.3f}', '-']], index= pd.MultiIndex.from_tuples([('', 'CORADS-AI')], names=base.index.names), columns=base.columns), base.sort_values(by='Rank')])

Unnamed: 0_level_0,Dataset,CWZ: CWZ,CWZ: RUMC,iCTCF: Liyuan,iCTCF: Union,Rank
Experiment,Model,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
,CORADS-AI,0.885,0.826,0.812,0.854,-
Imputation: KNN5,GBDT,0.842; 0.846,0.743; 0.848,0.666; 0.842,0.613; 0.772,1
SHAP zeroing,GBDT,0.860; 0.846,0.776; 0.860,0.644; 0.849,0.711; 0.777,1
Imputation: MEAN,GBDT,0.862; 0.857,0.697; 0.845,0.713; 0.875,0.640; 0.787,2
Test-time impute,GBDT,0.874; 0.854,0.756; 0.840,0.643; 0.863,0.667; 0.868,2
Imputation: MICE,GBDT,0.873; 0.855,0.694; 0.826,0.519; 0.879,0.624; 0.726,3
Base,GBDT,0.858; 0.840,0.725; 0.807,0.761; 0.864,0.717; 0.730,3
Optimized,GBDT,0.886; 0.838,0.751; 0.808,0.577; 0.848,0.680; 0.734,4
SHAP zeroing optimized,GBDT,0.893; 0.863,0.809; 0.867,0.525; 0.889,0.670; 0.760,4


In [155]:
import pyperclip
result = pd.concat([pd.DataFrame([[f'{cwz_cwz_score:.3f}', f'{cwz_rumc_score:.3f}', f'{ictcf_liyuan_score:.3f}', f'{ictcf_union_score:.3f}', '-']], index= pd.MultiIndex.from_tuples([('', 'CORADS-AI')], names=base.index.names), columns=base.columns), base.sort_values(by='Rank')])
result.index = [', '.join(idx) for idx in result.index]
pyperclip.copy(result.to_markdown())

In [276]:
overview_between[['Experiment', 'Model']][~overview_between[['Experiment', 'Model']].duplicated()]

Unnamed: 0,Experiment,Model
0,between_dataset,BaggedGBDT
4,between_dataset,GBDT
8,between_dataset,LR
12,between_dataset,RFsklearn
0,between_dataset_hyperopt,GBDT
4,between_dataset_hyperopt,LR
8,between_dataset_hyperopt,RFsklearn
0,Imputation: KNN5,BaggedGBDT
4,Imputation: KNN5,GBDT
8,Imputation: KNN5,LR


In [287]:
len(overview_within), len(overview_between)

(599, 832)

In [288]:
set([(experiments_between[x.values[0]] if x.values[0] in experiments_between else x.values[0], x.values[1]) for _, x in overview_between[['Experiment', 'Model']][~overview_between[['Experiment', 'Model']].duplicated()].iterrows()]) \
    - set([(experiments_within[x.values[0]] if x.values[0] in experiments_within else x.values[0], x.values[1]) for _, x in overview_within[['Experiment', 'Model']][~overview_within[['Experiment', 'Model']].duplicated()].iterrows()])

set()

In [260]:
pd.concat([pd.DataFrame([[f'{cwz_cwz_score:.3f}', f'{cwz_rumc_score:.3f}', f'{ictcf_liyuan_score:.3f}', f'{ictcf_union_score:.3f}', '-']], index= pd.MultiIndex.from_tuples([('', 'CORADS-AI')], names=base.index.names), columns=base.columns), base.sort_values(by='Rank')]).to_latex('results/overview_between.tex')

In [156]:
base = compress_overview(overview_misc, ['BaggedGBDT', 'GBDT', 'LR'], 'value', 'clinical', experiments_misc, datasets_misc)
value = compress_overview(overview_misc, ['BaggedGBDT', 'GBDT', 'LR'], 'value', 'clinical', experiments_misc, datasets_misc)
lb = compress_overview(overview_misc, ['BaggedGBDT', 'GBDT', 'LR'], 'lb', 'clinical', experiments_misc, datasets_misc)
ub = compress_overview(overview_misc, ['BaggedGBDT', 'GBDT', 'LR'], 'ub', 'clinical', experiments_misc, datasets_misc)

value_vis = compress_overview(overview_misc, ['BaggedGBDT', 'GBDT', 'LR'], 'value', 'combined', experiments_misc, datasets_misc)
lb_vis = compress_overview(overview_misc, ['BaggedGBDT', 'GBDT', 'LR'], 'lb', 'combined', experiments_misc, datasets_misc)
ub_vis = compress_overview(overview_misc, ['BaggedGBDT', 'GBDT', 'LR'], 'ub', 'combined', experiments_misc, datasets_misc)

for col in base.columns:
    for idx in base.index:
#         base.loc[idx, col] = f'{value.loc[idx, col]:.3f} ({lb.loc[idx, col]:.3f}, {ub.loc[idx, col]:.3f}); {value_vis.loc[idx, col]:.3f} ({lb_vis.loc[idx, col]:.3f}, {ub_vis.loc[idx, col]:.3f})'
        base.loc[idx, col] = f'{value.loc[idx, col]:.3f};  {value_vis.loc[idx, col]:.3f}'
base.reset_index(level=0, drop=True)

Dataset,CWZ,iCTCF,RUMC
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
BaggedGBDT,0.667; 0.666,0.954; 0.955,0.879; 0.885
GBDT,0.702; 0.702,0.932; 0.942,0.863; 0.863
LR,0.634; 0.633,0.943; 0.944,0.853; 0.853


In [128]:
prepare_for_ppt(base, no_vis=False, drop=['BaggedGBDT'])

Unnamed: 0_level_0,CWZ,CWZ,iCTCF,iCTCF,RUMC,RUMC
Unnamed: 0_level_1,Clinical,Clin./Vis.,Clinical,Clin./Vis.,Clinical,Clin./Vis.
GBDT,0.702*,0.702,0.932,0.942,0.863*,0.863*
LR,0.634,0.633,0.943*,0.944*,0.853,0.853
CORADS-AI,,0.800*,,0.761,,0.464


In [118]:
'within_dataset_missing' in base.index

True

In [157]:
import pyperclip
result = pd.concat([pd.DataFrame([[f'{cwz_score:.3f}', f'{ictcf_score:.3f}', f'{rumc_score:.3f}']], index= pd.MultiIndex.from_tuples([('', 'CORADS-AI')], names=base.index.names), columns=base.columns), base]).loc[['within_dataset_missing', '']].applymap(lambda x: x).reset_index(level=0, drop=True)
# result.index = [', '.join(idx) for idx in result.index]
pyperclip.copy(result.to_markdown())

In [60]:
output = pd.concat([pd.DataFrame([[f'{cwz_score:.3f}', f'{ictcf_score:.3f}', f'{rumc_score:.3f}']], index= pd.MultiIndex.from_tuples([('', 'CORADS-AI')], names=base.index.names), columns=base.columns), base]).loc[['within_dataset_missing', '']].applymap(lambda x: x).reset_index(level=0, drop=True)
output.index.name = None
output.columns.name = None
output

Unnamed: 0,CWZ,iCTCF,RUMC
BaggedGBDT,0.667; 0.666,0.954; 0.955,0.879; 0.885
GBDT,0.702; 0.702,0.932; 0.942,0.863; 0.863
LR,0.634; 0.633,0.943; 0.944,0.853; 0.853
CORADS-AI,0.800,0.761,0.464


In [262]:
base.reset_index(level=0, drop=True).to_latex('results/within_dataset_missing.tex')

In [128]:
experiments = {'within_dataset': 'Base', 'within_dataset_hyperopt': "Optimized"}
datasets = ['cwz', 'ictcf', 'rumc']
overview_within = []
for experiment_path, experiment in experiments.items():
    for dataset in datasets:
        _, auc = load_best(experiment_path, dataset)
        overview_within.append({'Experiment': experiment, 'Dataset': dataset, 'auc': auc})

In [129]:
overview_within = pd.DataFrame(overview_within)

In [130]:
overview_within = overview_within.pivot(index='Experiment', columns='Dataset', values='auc')
overview_within.to_latex('results/overview_within_clinical.tex', float_format='%.3f', escape=False)
overview_within

Dataset,cwz,ictcf,rumc
Experiment,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Base,0.859127,0.932576,0.833333
Optimized,0.907407,0.961417,0.796


In [131]:
experiments = {'within_dataset': 'Base', 'within_dataset_hyperopt': "Optimized"}
datasets = ['cwz', 'ictcf', 'rumc']
overview_within = []
for experiment_path, experiment in experiments.items():
    for dataset in datasets:
        _, auc = load_best(experiment_path, dataset, features='combined')
        overview_within.append({'Experiment': experiment, 'Dataset': dataset, 'auc': auc})

In [132]:
overview_within = pd.DataFrame(overview_within)

In [133]:
overview_within = overview_within.pivot(index='Experiment', columns='Dataset', values='auc')
overview_within.to_latex('results/overview_within_combined.tex', float_format='%.3f', escape=False)
overview_within

Dataset,cwz,ictcf,rumc
Experiment,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Base,0.884259,0.852484,0.694444
Optimized,0.949735,0.89176,0.805556


In [150]:
experiments = {'between_dataset': 'Base', 'between_dataset_hyperopt': 'Optimized', 'predict_with_impute': 'Impute', 'shap_zeroing': 'SHAP zeroing', 'shap_zeroing_hyperopt': 'SHAP zeroing optimized', 'test_time_impute': 'Test-time impute'}
datasets = ['ictcf_Union', 'ictcf_Liyuan', 'cwz_cwz', 'cwz_rumc']
overview_between = []
for experiment_path, experiment in experiments.items():
    for dataset in datasets:
        _, auc = load_best(experiment_path, dataset)
        overview_between.append({'Experiment': experiment, 'Dataset': dataset, 'auc': auc})

In [151]:
overview_between = pd.DataFrame(overview_between)

In [152]:
overview_between = bold_best(overview_between.pivot(index='Experiment', columns='Dataset', values='auc'))
overview_between.to_latex('results/overview_between_clinical.tex', escape=False)
overview_between

Dataset,cwz_cwz,cwz_rumc,ictcf_Liyuan,ictcf_Union
Experiment,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Base,0.893,0.768,0.669,0.702
Impute,0.888,\textbf{0.800},\textbf{0.681},0.700
Optimized,\textbf{0.897},0.751,0.653,\textbf{0.720}
SHAP zeroing,0.863,0.769,0.649,0.636
SHAP zeroing optimized,0.864,0.763,0.662,0.674
Test-time impute,0.858,0.760,0.658,0.647


In [154]:
experiments = {'between_dataset': 'Base', 'between_dataset_hyperopt': 'Optimized', 'predict_with_impute': 'Impute', 'shap_zeroing': 'SHAP zeroing', 'shap_zeroing_hyperopt': 'SHAP zeroing optimized', 'test_time_impute': 'Test-time impute'}
datasets = ['ictcf_Union', 'ictcf_Liyuan', 'cwz_cwz', 'cwz_rumc']
overview_between = []
for experiment_path, experiment in experiments.items():
    for dataset in datasets:
        _, auc = load_best(experiment_path, dataset, features='combined')
        overview_between.append({'Experiment': experiment, 'Dataset': dataset, 'auc': auc})

In [155]:
overview_between = pd.DataFrame(overview_between)

In [156]:
overview_between = bold_best(overview_between.pivot(index='Experiment', columns='Dataset', values='auc'))
overview_between.to_latex('results/overview_between_combined.tex', float_format='%.3f', escape=False)
overview_between

Dataset,cwz_cwz,cwz_rumc,ictcf_Liyuan,ictcf_Union
Experiment,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Base,0.866,0.831,0.838,0.756
Impute,0.873,\textbf{0.863},0.846,\textbf{0.775}
Optimized,\textbf{0.875},0.852,\textbf{0.851},0.753
SHAP zeroing,0.866,0.863,0.779,0.764
SHAP zeroing optimized,0.870,0.855,0.782,0.755
Test-time impute,0.841,0.838,0.838,0.756


In [22]:
load_data('../experiments/within_dataset/ictcf/clinical/')

result_bagged_gbdt.pkl
351
dict_keys(['X_test', 'y_test', 'preds', 'preds_proba', 'pipeline'])
result_dart.pkl
351
dict_keys(['X_test', 'y_test', 'preds', 'preds_proba', 'pipeline'])
result_gbdt.pkl
351
dict_keys(['X_test', 'y_test', 'preds', 'preds_proba', 'pipeline'])
result_goss.pkl
351
dict_keys(['X_test', 'y_test', 'preds', 'preds_proba', 'pipeline'])
result_lr.pkl
351
dict_keys(['X_test', 'y_test', 'preds', 'preds_proba', 'pipeline'])
result_rf.pkl
351
dict_keys(['X_test', 'y_test', 'preds', 'preds_proba', 'pipeline'])
result_rf_sklearn.pkl
351
dict_keys(['X_test', 'y_test', 'preds', 'preds_proba', 'pipeline'])


Unnamed: 0,true_0,pred_0,pred_label_0,meta_model
0,1,0.744266,Positive,BaggedGBDT
1,1,0.732651,Positive,BaggedGBDT
2,1,0.893357,Positive,BaggedGBDT
3,1,0.894246,Positive,BaggedGBDT
4,1,0.308471,Negative,BaggedGBDT
...,...,...,...,...
2452,1,0.867000,Positive,RFsklearn
2453,1,0.554000,Positive,RFsklearn
2454,1,0.555000,Positive,RFsklearn
2455,1,0.671000,Positive,RFsklearn


In [12]:
get_best_model('within_dataset', 'ictcf')

Unnamed: 0.1,Unnamed: 0,metric,feat,value,lb,ub,model
0,0,AUC,0,0.932576,0.895208,0.961964,BaggedGBDT
1,1,F1,0,0.916179,0.89129,0.941446,BaggedGBDT
2,2,Precision,0,0.876866,0.839077,0.918598,BaggedGBDT
3,3,Recall,0,0.959184,0.934748,0.983193,BaggedGBDT
4,4,AUC,0,0.907239,0.870462,0.938382,DART
5,5,F1,0,0.915663,0.888539,0.942944,DART
6,6,Precision,0,0.901186,0.863334,0.930259,DART
7,7,Recall,0,0.930612,0.896038,0.961187,DART
8,8,AUC,0,0.914093,0.876445,0.944829,GBDT
9,9,F1,0,0.913043,0.891017,0.942308,GBDT
