In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import json
import os
from glob import glob

In [2]:
def open_cfg(file):
    with open(file, 'rt') as f:
        data = json.loads(f.read())

    for key in list(data.keys()):
        if type(data[key]) == list:
            data[key] = tuple(data[key])
    return data


In [3]:
results = sorted(glob('results/*.csv'))
print(len(results))
results = {log: pd.read_csv(log) for log in results}
print(len(results))
results = {key.removeprefix('results/').removesuffix('.csv'): val for key, val in results.items()}
results = {key: {'log': val, 'cfg': open_cfg(f'results/{key}.cfg')} for key, val in results.items() if os.path.isfile(f'results/{key}.cfg')}
print(len(results))
results = {key: val for key, val in results.items() if not key.endswith('pretrain')}
print(len(results))
results = {name: cfg['cfg'] for name, cfg in results.items()}
results = pd.DataFrame(results).T

36
36
36
32


In [4]:
results['name'] = results['name'].apply(lambda x: x.split('_')[0])

In [5]:
metrics_data = list()
metrics_in_question = 'mean_rocaucs'
main_cols = ['name', 'valid_fold', 'text_encoder_model', 'ecg_encoder_model', 'train_datasets', 'pretrained']
for index, row in results.iterrows():
    metrics = dict()
    for col in main_cols:
        metrics[col] = row[col]

    for col in ['test_metrics', 'zero_shot_test_metrics', 'exp2_metrics_trained', 'exp2_metrics_untrained']:
        for ds_name, ds_metrics in row[col].items():
            for val_name, val in ds_metrics.items():
                if val_name == metrics_in_question:
                    metrics[f'metrics__{col}__{ds_name}__{val_name}'] = val
            
    metrics_data.append(metrics)

In [6]:
metrics_data = pd.DataFrame(metrics_data)
metrics_cols = [col for col in metrics_data.columns if col.startswith('metrics__')]

In [7]:
groups = metrics_data.groupby('name')[metrics_cols].agg(['mean', 'std'])

In [8]:
for name, row in groups.iterrows():
    config = metrics_data.loc[metrics_data['name'] == name, main_cols].iloc[0].to_dict()
    print()
    print()
    print(config)
    print()
    for name, val in row.items():
        if name[1] == 'mean':
            mean_val = val
        else:
            print(f'{name[0].split("__")[1]}: {name[0].split("__")[2]} Mean ROC-AUCS = {mean_val:.3f}+/-{val:.3f}')



{'name': '04728b1accd4', 'valid_fold': 1, 'text_encoder_model': 'emilyalsentzer/Bio_ClinicalBERT', 'ecg_encoder_model': 'CNN_model_v3', 'train_datasets': ('ptb_xl', 'ningbo', 'georgia'), 'pretrained': False}

test_metrics: ptb_xl Mean ROC-AUCS = 0.737+/-0.028
test_metrics: ningbo Mean ROC-AUCS = 0.792+/-0.013
test_metrics: georgia Mean ROC-AUCS = 0.704+/-0.008
zero_shot_test_metrics: ptb_xl Mean ROC-AUCS = 0.638+/-0.021
zero_shot_test_metrics: ningbo Mean ROC-AUCS = 0.661+/-0.012
zero_shot_test_metrics: georgia Mean ROC-AUCS = 0.694+/-0.005
exp2_metrics_trained: sph Mean ROC-AUCS = 0.817+/-0.024
exp2_metrics_trained: code15 Mean ROC-AUCS = nan+/-nan
exp2_metrics_untrained: sph Mean ROC-AUCS = 0.682+/-0.029
exp2_metrics_untrained: code15 Mean ROC-AUCS = 0.634+/-0.024


{'name': '29edb150bba1', 'valid_fold': 1, 'text_encoder_model': 'emilyalsentzer/Bio_ClinicalBERT', 'ecg_encoder_model': 'ISIBrno_model', 'train_datasets': ('ptb_xl', 'ningbo', 'georgia'), 'pretrained': False}

test_metr