# Summarize results

We provide the details of our test results (`scores/*.csv`) and how we summarized those results in this notebook.

In [1]:
import pandas as pd
import numpy as np
from pathlib import Path
import re
import json
import seaborn as sns
import warnings; warnings.simplefilter('ignore')

pd.options.display.float_format = '{:.3f}'.format

## Basic results on Table II

In [2]:
def get_model_name(repr, ptconf):
    return repr

def read_scores(scorefiles=['scores/circor-scores-rulebased.csv', ]):
    df = pd.concat([pd.read_csv(f) for f in scorefiles])

    for i, r in df.iterrows():
        s = r['weight_file']
        org_weightspath = str(s)
        ptconf = org_weightspath.split('/')[-2]
        if 'bs128a2nr' in s:
            #print('NO lo', s)
            s = s.replace('bs128a2nr', 'bs128a2lo1.0nr')
        #print(s)
        m = re.search(r'\/(m2d.+base).+\/checkpoint-(\d+)\.pth', s)
        m = re.search(r"'seed': (\d)", s)
        seed = m.group(1) if m is not None else None
        model = get_model_name(r['representation'], ptconf)
        df.loc[i, 'model'] = model
        df.loc[i, 'seed'] = seed
        df.loc[i, 'ptconf'] = ptconf
        df.loc[i, 'task'] = r['task']

    df = df.sort_values('ptconf')
    #### EXCLUDE ####
    df = df[df.task.isin(['circor1', 'circor4', 'circor5'])]

    scoredf = df[['ptconf', 'model', 'wacc', 'uar', 'r_Present', 'r_Unknown', 'r_Absent', ]].groupby(['model', 'ptconf']).mean()
    scoredf['count'] = df[['ptconf', 'model', 'uar']].groupby(['model', 'ptconf']).count()
    print(len(scoredf), len(df))
    return df, scoredf

rawscoredf, scoredf = read_scores()
print(scoredf.to_latex())
scoredf

4 60
\begin{tabular}{llrrrrrr}
\toprule
    &                                      &  wacc &   uar &  r\_Present &  r\_Unknown &  r\_Absent &  count \\
model & ptconf &       &       &            &            &           &        \\
\midrule
AST & pretrained\_models & 0.654 & 0.672 &      0.744 &      0.769 &     0.505 &     15 \\
BYOLA & pretrained\_weights & 0.556 & 0.556 &      0.590 &      0.573 &     0.507 &     15 \\
Cnn14 & external & 0.582 & 0.548 &      0.750 &      0.506 &     0.388 &     15 \\
M2D & m2d\_vit\_base-80x608p16x16-220930-mr7 & 0.832 & 0.713 &      0.911 &      0.361 &     0.868 &     15 \\
\bottomrule
\end{tabular}



Unnamed: 0_level_0,Unnamed: 1_level_0,wacc,uar,r_Present,r_Unknown,r_Absent,count
model,ptconf,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
AST,pretrained_models,0.654,0.672,0.744,0.769,0.505,15
BYOLA,pretrained_weights,0.556,0.556,0.59,0.573,0.507,15
Cnn14,external,0.582,0.548,0.75,0.506,0.388,15
M2D,m2d_vit_base-80x608p16x16-220930-mr7,0.832,0.713,0.911,0.361,0.868,15


## The ablation results on Table IV (a)

In [3]:
rawscoredf, scoredf = read_scores(scorefiles=['scores/circor-scores-normal.csv'])
print(scoredf.to_latex())
scoredf

4 60
\begin{tabular}{llrrrrrr}
\toprule
    &                                      &  wacc &   uar &  r\_Present &  r\_Unknown &  r\_Absent &  count \\
model & ptconf &       &       &            &            &           &        \\
\midrule
AST & pretrained\_models & 0.673 & 0.705 &      0.579 &      0.769 &     0.766 &     15 \\
BYOLA & pretrained\_weights & 0.569 & 0.598 &      0.409 &      0.627 &     0.759 &     15 \\
Cnn14 & external & 0.611 & 0.604 &      0.544 &      0.553 &     0.715 &     15 \\
M2D & m2d\_vit\_base-80x608p16x16-220930-mr7 & 0.796 & 0.683 &      0.794 &      0.314 &     0.940 &     15 \\
\bottomrule
\end{tabular}



Unnamed: 0_level_0,Unnamed: 1_level_0,wacc,uar,r_Present,r_Unknown,r_Absent,count
model,ptconf,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
AST,pretrained_models,0.673,0.705,0.579,0.769,0.766,15
BYOLA,pretrained_weights,0.569,0.598,0.409,0.627,0.759,15
Cnn14,external,0.611,0.604,0.544,0.553,0.715,15
M2D,m2d_vit_base-80x608p16x16-220930-mr7,0.796,0.683,0.794,0.314,0.94,15


## The ensemble results on Table III

In [5]:
df = pd.read_csv('scores/ensemble-circor-results.csv')

comb = df.combinations.values[0]
models = comb.split(',')
def model_name(model):
    if 'ar_byola.AR_BYOLA' in model: return 'BYOL-A'
    if 'ar_m2d.AR_M2D' in model: return 'M2D'  # TODO 0.6 ? 0.7 ?
    if 'ar_ast.AR_AST' in model: return 'AST'
    if 'ar_cnn14.AR_Cnn14' in model: return 'Cnn14'
    assert False
df['models'] = df.combinations.apply(lambda t: '-'.join(sorted([model_name(m) for m in t.split(',')])))
df.sort_values(['wacc'], ascending=False)[:10]

Unnamed: 0,task,wacc,uar,r_Present,r_Unknown,r_Absent,combinations,models
81,circor1,0.88,0.788,0.978,0.529,0.856,-lab-physionet2022_dl-evar-logs-circor1_ar_ast...,AST-M2D
118,circor1,0.871,0.826,0.933,0.706,0.839,-lab-physionet2022_dl-evar-logs-circor1_ar_cnn...,Cnn14-M2D
218,circor4,0.869,0.769,0.933,0.471,0.902,-lab-physionet2022_dl-evar-logs-circor4_ar_ast...,AST-M2D
350,circor4,0.869,0.767,0.956,0.471,0.874,-lab-physionet2022_dl-evar-logs-circor4_ar_ast...,AST-M2D
35,circor1,0.869,0.826,0.911,0.706,0.862,-lab-physionet2022_dl-evar-logs-circor1_ar_cnn...,Cnn14-M2D
87,circor1,0.867,0.822,0.933,0.706,0.828,-lab-physionet2022_dl-evar-logs-circor1_ar_cnn...,Cnn14-M2D
31,circor1,0.867,0.795,0.933,0.588,0.862,-lab-physionet2022_dl-evar-logs-circor1_ar_ast...,AST-M2D
79,circor1,0.867,0.808,0.933,0.647,0.845,-lab-physionet2022_dl-evar-logs-circor1_ar_cnn...,Cnn14-M2D
176,circor1,0.867,0.795,0.933,0.588,0.862,-lab-physionet2022_dl-evar-logs-circor1_ar_ast...,AST-M2D
187,circor1,0.867,0.808,0.933,0.647,0.845,-lab-physionet2022_dl-evar-logs-circor1_ar_ast...,AST-M2D


In [6]:
tmpdf = df.groupby('models').mean().sort_values('wacc')
print(tmpdf.to_latex())
tmpdf

\begin{tabular}{lrrrrr}
\toprule
{} &  wacc &   uar &  r\_Present &  r\_Unknown &  r\_Absent \\
models        &       &       &            &            &           \\
\midrule
BYOL-A-BYOL-A & 0.575 & 0.581 &      0.619 &      0.616 &     0.508 \\
Cnn14-Cnn14   & 0.587 & 0.558 &      0.745 &      0.529 &     0.399 \\
BYOL-A-Cnn14  & 0.587 & 0.580 &      0.652 &      0.582 &     0.505 \\
AST-BYOL-A    & 0.630 & 0.645 &      0.676 &      0.712 &     0.546 \\
AST-Cnn14     & 0.648 & 0.652 &      0.734 &      0.700 &     0.523 \\
AST-AST       & 0.682 & 0.699 &      0.750 &      0.782 &     0.565 \\
BYOL-A-M2D    & 0.817 & 0.721 &      0.870 &      0.432 &     0.862 \\
Cnn14-M2D     & 0.829 & 0.719 &      0.898 &      0.391 &     0.868 \\
AST-M2D       & 0.832 & 0.733 &      0.899 &      0.438 &     0.862 \\
M2D-M2D       & 0.837 & 0.716 &      0.918 &      0.355 &     0.875 \\
\bottomrule
\end{tabular}



Unnamed: 0_level_0,wacc,uar,r_Present,r_Unknown,r_Absent
models,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
BYOL-A-BYOL-A,0.575,0.581,0.619,0.616,0.508
Cnn14-Cnn14,0.587,0.558,0.745,0.529,0.399
BYOL-A-Cnn14,0.587,0.58,0.652,0.582,0.505
AST-BYOL-A,0.63,0.645,0.676,0.712,0.546
AST-Cnn14,0.648,0.652,0.734,0.7,0.523
AST-AST,0.682,0.699,0.75,0.782,0.565
BYOL-A-M2D,0.817,0.721,0.87,0.432,0.862
Cnn14-M2D,0.829,0.719,0.898,0.391,0.868
AST-M2D,0.832,0.733,0.899,0.438,0.862
M2D-M2D,0.837,0.716,0.918,0.355,0.875


## The Ablation results on Table IV (b) and (c)

In [7]:
rawscoredf, scoredf = read_scores(scorefiles=['scores/circor-scores-ablations.csv'])
print(scoredf.to_latex())
scoredf

5 75
\begin{tabular}{llrrrrrr}
\toprule
    &                                      &  wacc &   uar &  r\_Present &  r\_Unknown &  r\_Absent &  count \\
model & ptconf &       &       &            &            &           &        \\
\midrule
AST & pretrained\_models & 0.670 & 0.617 &      0.772 &      0.490 &     0.590 &     15 \\
BYOLA & pretrained\_weights & 0.536 & 0.524 &      0.630 &      0.522 &     0.420 &     15 \\
Cnn14 & external & 0.374 & 0.374 &      0.550 &      0.447 &     0.126 &     15 \\
M2D & m2d\_vit\_base-80x208p16x16-random & 0.595 & 0.536 &      0.547 &      0.325 &     0.737 &     15 \\
    & m2d\_vit\_base-80x608p16x16-220930-mr7 & 0.832 & 0.713 &      0.911 &      0.361 &     0.868 &     15 \\
\bottomrule
\end{tabular}



Unnamed: 0_level_0,Unnamed: 1_level_0,wacc,uar,r_Present,r_Unknown,r_Absent,count
model,ptconf,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
AST,pretrained_models,0.67,0.617,0.772,0.49,0.59,15
BYOLA,pretrained_weights,0.536,0.524,0.63,0.522,0.42,15
Cnn14,external,0.374,0.374,0.55,0.447,0.126,15
M2D,m2d_vit_base-80x208p16x16-random,0.595,0.536,0.547,0.325,0.737,15
M2D,m2d_vit_base-80x608p16x16-220930-mr7,0.832,0.713,0.911,0.361,0.868,15
