In [37]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import json
import os
from glob import glob

In [38]:
def open_cfg(file):
    with open(file, 'rt') as f:
        data = json.loads(f.read())

    for key in list(data.keys()):
        if type(data[key]) == list:
            data[key] = tuple(data[key])
    return data

def dicts_equal(dict1, dict2):
    keys1 = tuple(sorted(list(dict1.keys())))
    keys2 = tuple(sorted(list(dict2.keys())))
    if keys1 != keys2:
        return False
    for key in keys1:
        if dict1[key] != dict2[key]:
            return False
    return True

In [44]:
MODELS = ['ISIBrno_model', 'CNN_model', 'RNN_model', 'ECGConvEncoder', 'CNN_model_v2', 'CNN_model_v3']
TEXT_MODELS = ['emilyalsentzer/Bio_ClinicalBERT', 'dmis-lab/biobert-v1.1']
TRAIN_DATASETS = [['ptb_xl'], ['ptb_xl', 'ningbo'], ['ptb_xl', 'ningbo', 'georgia']]
EXP2_DATASETS = ['sph', 'code15']

In [50]:
results = sorted(glob('results/*.csv'))
print(len(results))
results = {log: pd.read_csv(log) for log in results}
print(len(results))
results = {key.removeprefix('results/').removesuffix('.csv'): val for key, val in results.items()}
results = {key: {'log': val, 'cfg': open_cfg(f'results/{key}.cfg')} for key, val in results.items() if os.path.isfile(f'results/{key}.cfg')}
print(len(results))
results = {key: val for key, val in results.items() if val['cfg']['ecg_encoder_model'] in MODELS}
print(len(results))
results = {key: val for key, val in results.items() if not key.endswith('pretrain')}
print(len(results))
results = {key: val for key, val in results.items() if not key.endswith('pretrained')}
print(len(results))

76
76
73
73
60
60


In [51]:
for model in MODELS:
    print()
    print(model)
    model_results = {key: val for key, val in results.items() if val['cfg']['ecg_encoder_model'] == model}
    print('Number of results:', len(model_results))
    if len(model_results) > 0:

        for datasets in TRAIN_DATASETS:
            print('Train datasets:', datasets)
            ds_results = {key: val for key, val in model_results.items() if val['cfg']['train_datasets'] == tuple(datasets)}
            print('Number of datasets:', len(ds_results))
            if len(ds_results) > 0:

                for text_model in TEXT_MODELS:
                    print('Text model:', text_model)
                    txtmdl_results = {key: val for key, val in model_results.items() if val['cfg']['text_encoder_model'] == text_model}
                    print('Number of textmodel_results:', len(txtmdl_results))
                    if len(txtmdl_results) > 0:
                        # for train_class in val['cfg']['train_classes']:
                        #     for train_ds in datasets:
                        #         vals = [res['cfg']['test_metrics'][train_ds][f'{train_class}_rocauc'] for res in ds_results.values()]
                        #         vals = [val for val in vals if val is not None]
                        #         print(f'{train_ds} {train_class} metrics: {np.mean(vals):.3f}\u00b1{np.std(vals):.3f}')
            
                        for train_ds in datasets:
                            vals = [res['cfg']['test_metrics'][train_ds]['mean_rocaucs'] for res in txtmdl_results.values()]
                            print(f'{train_ds} test metrics: {np.mean(vals):.3f}+/-{np.std(vals):.3f}')
        
                        
                        for train_ds in datasets:
                            vals = [res['cfg']['zero_shot_test_metrics'][train_ds]['mean_rocaucs'] for res in txtmdl_results.values()]
                            print(f'{train_ds} zero-shot metrics: {np.mean(vals):.3f}+/-{np.std(vals):.3f}') 
        
                
                        for exp2_ds in EXP2_DATASETS:
                            vals = [res['cfg']['exp2_metrics_trained'][exp2_ds]['mean_rocaucs'] for res in txtmdl_results.values()]
                            print(f'{exp2_ds} trained metrics: {np.mean(vals):.3f}+/-{np.std(vals):.3f}')
        
                
                        for exp2_ds in EXP2_DATASETS:
                            vals = [res['cfg']['exp2_metrics_untrained'][exp2_ds]['mean_rocaucs'] for res in txtmdl_results.values()]
                            print(f'{exp2_ds} zero-shot metrics: {np.mean(vals):.3f}+/-{np.std(vals):.3f}')
                        print()
                        
                        #plt.figure(figsize=(30, 5))
                        #for result in ds_results.values():
                        #    plt.plot(result['log']['valid_mean_rocaucs'])
                        #plt.grid()
                        #plt.show()


ISIBrno_model
Number of results: 12
Train datasets: ['ptb_xl']
Number of datasets: 4
Text model: emilyalsentzer/Bio_ClinicalBERT
Number of textmodel_results: 12
ptb_xl test metrics: 0.723+/-0.016
ptb_xl zero-shot metrics: 0.791+/-0.061
sph trained metrics: 0.778+/-0.047
code15 trained metrics: nan+/-nan
sph zero-shot metrics: nan+/-nan
code15 zero-shot metrics: nan+/-nan

Text model: dmis-lab/biobert-v1.1
Number of textmodel_results: 0
Train datasets: ['ptb_xl', 'ningbo']
Number of datasets: 4
Text model: emilyalsentzer/Bio_ClinicalBERT
Number of textmodel_results: 12
ptb_xl test metrics: 0.723+/-0.016


KeyError: 'ningbo'

In [None]:
for key, val in results.items():
    print('*'*80)
    print('Train classes:')
    print(len(val['cfg']['train_classes']))
    print(val['cfg']['train_classes'])
    print()
    print('Zeroshot classes:')
    print(len(val['cfg']['zeroshot_classes']))
    print(val['cfg']['zeroshot_classes'])
    print()

In [32]:
char('U+00B1')

NameError: name 'char' is not defined

In [34]:
'\u00b1'

'±'