In [1]:
import pandas as pd
from sklearn import metrics

from sklearn import metrics
from scipy import stats
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import scipy.stats
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 200)
sns.set_theme(style='whitegrid')
plt.rcParams["figure.figsize"] = (10,10)


def format_e(n):
    a = '%2e' % n
    return a.split('e')[0].rstrip('0').rstrip('.') + 'e' + a.split('e')[1]

def mean_confidence_interval(data, confidence=0.95, rounding=4):
    a = 1.0 * np.array(data)
    n = len(a)
    m, se = np.mean(a), scipy.stats.sem(a)
    h = se * scipy.stats.t.ppf((1 + confidence) / 2., n-1)
    return round(m, rounding), round(m-h, rounding), round(m+h, rounding)

def confidence_interval(data, size=10000, func=np.mean):
    """creates a bootstrap sample, computes replicates and returns replicates array"""
    # Create an empty array to store replicates
    bs_replicates = np.empty(size)
    
    np.random.seed(42)
    # Create bootstrap replicates as much as size
    for i in range(size):
        # Create a bootstrap sample
        bs_sample = np.random.choice(data,size=len(data))
        # Get bootstrap replicate and append to bs_replicates
        bs_replicates[i] = func(bs_sample)
    
    return np.percentile(bs_replicates, [2.5, 97.5])


def confidence_interval_accuracy(data, size=10000):
    """creates a bootstrap sample, computes replicates and returns replicates array"""
    # Create an empty array to store replicates
    bs_replicates = np.empty(size)
    
    np.random.seed(42)
    # Create bootstrap replicates as much as size
    for i in range(size):
        # Create a bootstrap sample
        bs_sample = data.sample(len(data), replace=True)
        # Get bootstrap replicate and append to bs_replicates
        bs_replicates[i] = metrics.balanced_accuracy_score(bs_sample['benign_malignant'], bs_sample['prediction'])
    
    return np.percentile(bs_replicates, [2.5, 97.5])


def confidence_interval_accuracy_baseline(data, size=10000):
    """creates a bootstrap sample, computes replicates and returns replicates array"""
    # Create an empty array to store replicates
    bs_replicates = np.empty(size)
    
    np.random.seed(42)
    # Create bootstrap replicates as much as size
    for i in range(size):
        # Create a bootstrap sample
        bs_sample = data.sample(len(data), replace=True)
        # Get bootstrap replicate and append to bs_replicates
        bs_replicates[i] = metrics.balanced_accuracy_score(bs_sample['true'], bs_sample['pred'])
    
    return np.percentile(bs_replicates, [2.5, 97.5])

In [2]:
%%time 

def create_row(result, architecture, baseline=False):
    if baseline:
        bal_acc = metrics.balanced_accuracy_score(result[architecture]['true'], result[architecture]['pred'])
        ci = confidence_interval_accuracy_baseline(result[architecture])
        ci[0] = np.round(ci[0], 3)
        ci[1] = np.round(ci[1], 3)
        return [architecture, bal_acc, ci, bal_acc - ci[0]]
    else:
        bal_acc = metrics.balanced_accuracy_score(result[architecture]['benign_malignant'], result[architecture]['prediction'])
        ci = confidence_interval_accuracy(result[architecture])
        ci[0] = np.round(ci[0], 3)
        ci[1] = np.round(ci[1], 3)
        return [architecture, bal_acc, ci, bal_acc - ci[0]]
    

result_val = {}
result_test = {}
baseline_test = {}

result_val['densenet121'] = pd.read_csv("../data/classifier/result_val_densenet121")
result_val['densenet161'] = pd.read_csv("../data/classifier/result_val_densenet161")
result_val['efficientnet_b1'] = pd.read_csv("../data/classifier/result_val_efficientnet_b1")
result_val['efficientnet_b3'] = pd.read_csv("../data/classifier/result_val_efficientnet_b3")
result_val['resnet34'] = pd.read_csv("../data/classifier/result_val_resnet34")
result_val['resnet50'] = pd.read_csv("../data/classifier/result_val_resnet50")

result_test['densenet121'] = pd.read_csv("../data/classifier/result_test_densenet121")
result_test['densenet161'] = pd.read_csv("../data/classifier/result_test_densenet161")
result_test['efficientnet_b1'] = pd.read_csv("../data/classifier/result_test_efficientnet_b1")
result_test['efficientnet_b3'] = pd.read_csv("../data/classifier/result_test_efficientnet_b3")
result_test['resnet18'] = pd.read_csv("../data/classifier/result_test_resnet18")
result_test['resnet34'] = pd.read_csv("../data/classifier/result_test_resnet34")
result_test['resnet50'] = pd.read_csv("../data/classifier/result_test_resnet50")
result_test['resnet101'] = pd.read_csv("../data/classifier/result_test_resnet101")

baseline_test['densenet121'] = pd.read_csv("../data/baseline/result_test_densenet121")
baseline_test['densenet161'] = pd.read_csv("../data/baseline/result_test_densenet161")
baseline_test['efficientnet_b1'] = pd.read_csv("../data/baseline/result_test_efficientnetb1")
baseline_test['efficientnet_b3'] = pd.read_csv("../data/baseline/result_test_efficientnetb3")
baseline_test['resnet18'] = pd.read_csv("../data/baseline/result_test_resnet18")
baseline_test['resnet34'] = pd.read_csv("../data/baseline/result_test_resnet34")
baseline_test['resnet50'] = pd.read_csv("../data/baseline/result_test_resnet50")
baseline_test['resnet101'] = pd.read_csv("../data/baseline/result_test_resnet101")

#val_df = pd.DataFrame([create_row(result_val, architecture) for architecture in result_val.keys()], columns=['Architecture', 'Balanced Accuracy', 'CI', 'Std'])
test_df = pd.DataFrame([create_row(result_test, architecture) for architecture in result_test.keys()], columns=['Architecture', 'Balanced Accuracy', 'CI', 'Std'])
baseline_test_df = pd.DataFrame([create_row(baseline_test, architecture, baseline=True) for architecture in baseline_test.keys()], columns=['Architecture', 'Balanced Accuracy', 'CI', 'Std'])

CPU times: user 2min 11s, sys: 5.9 ms, total: 2min 11s
Wall time: 2min 11s


In [4]:
df = pd.merge(baseline_test_df, test_df, on='Architecture')
df

Unnamed: 0,Architecture,Balanced Accuracy_x,CI_x,Std_x,Balanced Accuracy_y,CI_y,Std_y
0,densenet121,0.785,"[0.726, 0.84]",0.059,0.755,"[0.694, 0.814]",0.061
1,densenet161,0.775,"[0.715, 0.831]",0.06,0.79,"[0.733, 0.845]",0.057
2,efficientnet_b1,0.72,"[0.657, 0.781]",0.063,0.74,"[0.677, 0.8]",0.063
3,efficientnet_b3,0.74,"[0.679, 0.8]",0.061,0.78,"[0.721, 0.835]",0.059
4,resnet18,0.785,"[0.725, 0.84]",0.06,0.785,"[0.728, 0.841]",0.057
5,resnet34,0.765,"[0.705, 0.822]",0.06,0.77,"[0.711, 0.827]",0.059
6,resnet50,0.805,"[0.748, 0.858]",0.057,0.81,"[0.756, 0.863]",0.054
7,resnet101,0.8,"[0.742, 0.854]",0.058,0.805,"[0.748, 0.858]",0.057
