### Dependencies

In [26]:
# Base Dependencies
import os
import pickle
import warnings
warnings.filterwarnings('ignore')

# LinAlg / Stats / Plotting Dependencies
import numpy as np
import pandas as pd
pd.set_option("display.precision", 3)
from tqdm import tqdm

# Scikit-Learn Imports
import sklearn
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.utils._testing import ignore_warnings
from sklearn.exceptions import ConvergenceWarning
from sklearn.model_selection import cross_val_score, StratifiedKFold

# Utils
from patch_evaluation_utils import kendalltau_bpq

### ICIAR-BACH (without SN)

In [15]:
iciarbach_raw_aucs_all = {}
models = ['resnet50trunc_mean256', 'hipt_4k_mean256', 'hipt_4k_cls4k', 'hipt_4k_mean256_cls4k']

model_names = ['ImageNet (Mean)',
               'ViT-256 (Mean)',
               'ViT-4k',
               'ViT-256-4k'
              ]

for enc in models:
    train_fname = os.path.join('./embeddings_region_lib/', 'iciarbach_raw_train_%s.pkl' % enc)
    if not os.path.isfile(train_fname): continue
    with open(train_fname, 'rb') as handle:
        asset_dict = pickle.load(handle)
        train_embeddings, train_labels = asset_dict['embeddings'], asset_dict['labels']

    le = LabelEncoder().fit(train_labels)
    train_labels = le.transform(train_labels)
    
    if enc in iciarbach_raw_aucs_all.keys():
        pass
    else:
        clf = KNeighborsClassifier()
        #clf = LogisticRegression(random_state=0)
        skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=0)

        aucs, f1s = [], []
        for i, label in enumerate(np.unique(train_labels)):
            label_class = np.array(train_labels == label, int)
            scores = cross_val_score(clf, train_embeddings, label_class, cv=skf, scoring='roc_auc')
            aucs.append(scores.mean())
        scores = cross_val_score(clf, train_embeddings, train_labels, cv=skf, scoring='roc_auc_ovr')
        aucs.append(scores.mean())
        iciarbach_raw_aucs_all[enc] = aucs

        
aucs_df = pd.DataFrame(iciarbach_raw_aucs_all).T.loc[models]
aucs_df.index = model_names
aucs_df.columns = ['Benign', 'InSitu', 'Invasive', 'Normal', 'All']
aucs_df = aucs_df[['Normal', 'Benign', 'InSitu', 'Invasive', 'All']]
iciarbach_raw = aucs_df.copy()
iciarbach_raw

Unnamed: 0,Normal,Benign,InSitu,Invasive,All
ImageNet (Mean),0.935,0.875,0.885,0.911,0.905
ViT-256 (Mean),0.868,0.837,0.85,0.896,0.865
ViT-4k,0.952,0.917,0.94,0.949,0.944
ViT-256-4k,0.933,0.914,0.934,0.933,0.934


### ICIAR-BACH (with SN)

In [17]:
iciarbach_sn_aucs_all = {}
models = ['resnet50trunc_mean256', 'hipt_4k_mean256', 'hipt_4k_cls4k', 'hipt_4k_mean256_cls4k']

model_names = ['ImageNet (Mean)',
               'ViT-256 (Mean)',
               'ViT-4k',
               'ViT-256-4k'
              ]

for enc in models:
    train_fname = os.path.join('./embeddings_region_lib/', 'iciarbach_sn_train_%s.pkl' % enc)
    with open(train_fname, 'rb') as handle:
        asset_dict = pickle.load(handle)
        train_embeddings, train_labels = asset_dict['embeddings'], asset_dict['labels']

    le = LabelEncoder().fit(train_labels)
    train_labels = le.transform(train_labels)
    
    if enc in iciarbach_sn_aucs_all.keys():
        pass
    else:
        clf = KNeighborsClassifier()
        #clf = LogisticRegression(random_state=0)
        skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=0)

        aucs, f1s = [], []
        for i, label in enumerate(np.unique(train_labels)):
            label_class = np.array(train_labels == label, int)
            scores = cross_val_score(clf, train_embeddings, label_class, cv=skf, scoring='roc_auc')
            aucs.append(scores.mean())
        scores = cross_val_score(clf, train_embeddings, train_labels, cv=skf, scoring='roc_auc_ovr')
        aucs.append(scores.mean())
        iciarbach_sn_aucs_all[enc] = aucs

        
aucs_df = pd.DataFrame(iciarbach_sn_aucs_all).T.loc[models]
aucs_df.index = model_names
aucs_df.columns = ['Benign', 'InSitu', 'Invasive', 'Normal', 'All']
aucs_df = aucs_df[['Normal', 'Benign', 'InSitu', 'Invasive', 'All']]
iciarbach_sn = aucs_df.copy()
iciarbach_sn

Unnamed: 0,Normal,Benign,InSitu,Invasive,All
ImageNet (Mean),0.93,0.843,0.865,0.889,0.895
ViT-256 (Mean),0.887,0.804,0.827,0.897,0.854
ViT-4k,0.968,0.899,0.928,0.934,0.938
ViT-256-4k,0.958,0.875,0.913,0.92,0.925


### DigestPath

In [19]:
digestpath_lesionseg_aucs_all = {}
models = ['resnet50trunc_mean256', 'hipt_4k_mean256', 'hipt_4k_cls4k', 'hipt_4k_mean256_cls4k']

model_names = ['ImageNet (Mean)',
               'ViT-256 (Mean)',
               'ViT-4k',
               'ViT-256-4k'
              ]

for enc in models:
    train_fname = os.path.join('./embeddings_region_lib/', 'digestpath_lesionseg_train_%s.pkl' % enc)
    if not os.path.isfile(train_fname): continue
    with open(train_fname, 'rb') as handle:
        asset_dict = pickle.load(handle)
        train_embeddings, train_labels = asset_dict['embeddings'], asset_dict['labels']

    le = LabelEncoder().fit(train_labels)
    train_labels = le.transform(train_labels)
    
    if enc in digestpath_lesionseg_aucs_all.keys():
        pass
    else:
        clf = KNeighborsClassifier()
        #clf = LogisticRegression(random_state=0)
        skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=0)

        aucs, f1s = [], []
        for i, label in enumerate(np.unique(train_labels)):
            label_class = np.array(train_labels == label, int)
            scores = cross_val_score(clf, train_embeddings, label_class, cv=skf, scoring='roc_auc')
            aucs.append(scores.mean())
        scores = cross_val_score(clf, train_embeddings, train_labels, cv=skf, scoring='roc_auc_ovr')
        aucs.append(scores.mean())
        digestpath_lesionseg_aucs_all[enc] = aucs

        
aucs_df = pd.DataFrame(digestpath_lesionseg_aucs_all).T.loc[models]
aucs_df.index = model_names
#aucs_df.columns = ['Benign', 'InSitu', 'Invasive', 'Normal', 'All']
#aucs_df = aucs_df[['Normal', 'Benign', 'InSitu', 'Invasive', 'All']]
iciarbach_raw = aucs_df.copy()
iciarbach_raw

Unnamed: 0,0,1,2
ImageNet (Mean),0.947,0.947,0.947
ViT-256 (Mean),0.896,0.896,0.896
ViT-4k,0.992,0.992,0.992
ViT-256-4k,0.976,0.976,0.976


### Lymphoma

In [None]:
lymphomaTMA_aucs_all = {}
models = ['resnet50trunc_mean256', 'hipt_4k_mean256', 'hipt_4k_cls4k', 'hipt_4k_mean256_cls4k']

model_names = ['ImageNet (Mean)',
               'ViT-256 (Mean)',
               'ViT-4k',
               'ViT-256-4k'
              ]

clfs = [KNeighborsClassifier(), 
        LogisticRegression(random_state=0), 
        RandomForestClassifier(random_state=0)]

all_aucs = []

for clf in clfs:
    lymphomaTMA_aucs_all = {}

    for enc in models:
        train_fname = os.path.join('./embeddings_region_lib/', 'lymphomaTMA_train_%s.pkl' % enc)
        if not os.path.isfile(train_fname): continue
        with open(train_fname, 'rb') as handle:
            asset_dict = pickle.load(handle)
            train_embeddings, train_labels = asset_dict['embeddings'], asset_dict['labels']
            train_labels = train_labels[:,0]

        le = LabelEncoder().fit(train_labels)
        train_labels = le.transform(train_labels)

        if enc in lymphomaTMA_aucs_all.keys():
            pass
        else:
            skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=0)

            aucs, f1s = [], []
            for i, label in enumerate(np.unique(train_labels)):
                label_class = np.array(train_labels == label, int)
                scores = cross_val_score(clf, train_embeddings, label_class, cv=skf, scoring='roc_auc')
                aucs.append(scores.mean())
            scores = cross_val_score(clf, train_embeddings, train_labels, cv=skf, scoring='roc_auc_ovr')
            aucs.append(scores.mean())
            lymphomaTMA_aucs_all[enc] = aucs


    aucs_df = pd.DataFrame(lymphomaTMA_aucs_all).T.loc[models]
    aucs_df.index = model_names
    lymphomaTMA_raw = aucs_df.copy()
    lymphomaTMA_raw = pd.DataFrame(lymphomaTMA_raw[0])
    all_aucs.append(lymphomaTMA_raw)

In [36]:
pd.concat(all_aucs, axis=1)

Unnamed: 0,0,0.1,0.2
ImageNet (Mean),0.606,0.606,0.606
ViT-256 (Mean),0.594,0.594,0.594
ViT-4k,0.64,0.64,0.64
ViT-256-4k,0.632,0.632,0.632
