# Model predictions for the tube detection model

In [1]:
# Imports
import pandas as pd
import numpy as np
import ast
import re
import statistics
from sklearn.metrics import roc_auc_score

In [2]:
# Loading all the data

# Model predictions
TD_preds = pd.read_csv('Predictions/TD_preds.csv', index_col=0)
TD_preds_CXR14 = pd.read_csv('Predictions/TD_preds_CXR14.csv', index_col=0)

# True labels
true_labels = pd.read_csv('../Data/Data_splits/tube_detection-test.csv', index_col=0)
true_labels_CXR14 = pd.read_csv('../Data/Data_splits/tube_detection-CXR14_test.csv', index_col=0)

## Area Under the ROC Curve (AUC)

In [6]:
# Function for reading the predictions, which are strings, as numpy arrays
def str2array(s):
    # Remove space after [
    s=re.sub('\[ +', '[', s.strip())
    # Replace commas and spaces
    s=re.sub('[,\s]+', ', ', s)
    return np.array(ast.literal_eval(s))

# Function for printing the average accuracy and auc (with std) for tube detection task for chest drains
def get_preds_binary(df, y_true_anns, y_true_padchest):
    all_preds = []

    for row_number in range(len(df)):
        #print(row_number)
        preds = [[str2array(i["Preds_model1"]) for idx, i in df.iterrows()][row_number][:,1],
                [str2array(i["Preds_model2"]) for idx, i in df.iterrows()][row_number][:,1],
                [str2array(i["Preds_model3"]) for idx, i in df.iterrows()][row_number][:,1]]
        all_preds.append(preds)
        
        
    #print(len(all_preds[0]))
    #print(all_preds[0][0][:10])
    
    # Constructing a df with the preds and 'true' labels
    for idx, row in df.reset_index(drop=True).iterrows():
        #print('DF ROW: ', idx)
        print('Model: ', row['Model_name'])
        #print('Val data: ', row['Val_data'])
        preds_model1 = all_preds[idx][0]
        preds_model2 = all_preds[idx][1]
        preds_model3 = all_preds[idx][2]
        
        # Getting rid of the damn -1 anns
        preds_df = pd.DataFrame(list(zip(y_true_anns,
                                         y_true_padchest,
                                         list(preds_model1),
                                         list(preds_model2),
                                         list(preds_model3))),
                                columns =['Anns', 'PadChest', 'preds_model1', 'preds_model2', 'preds_model3'])
        
        preds_df = preds_df[preds_df['Anns'] != -1]

        # Computing the performance scores
        auc_with_anns = [roc_auc_score(preds_df['Anns'], preds_df['preds_model1']), roc_auc_score(preds_df['Anns'], preds_df['preds_model2']), roc_auc_score(preds_df['Anns'], preds_df['preds_model3'])]
        auc_with_padchest = [roc_auc_score(preds_df['PadChest'], preds_df['preds_model1']), roc_auc_score(preds_df['PadChest'], preds_df['preds_model2']), roc_auc_score(preds_df['PadChest'], preds_df['preds_model3'])]
        
        print("Annotations Average auc:", round(sum(auc_with_anns)/len(auc_with_anns)*100, 5), "with standard deviation:", round(statistics.stdev(auc_with_anns)*100,5))
        print("PadChest Average auc:", round(sum(auc_with_padchest)/len(auc_with_padchest)*100, 5), "with standard deviation:", round(statistics.stdev(auc_with_padchest)*100,5))
        print()
        
        # Printing a confusion matrix
        #print(confusion_matrix(preds_df['Anns'], preds_df['preds_model1']))  # Cannot, due to non-integer probs


# Function to arrange preds nicely in a df
def get_perf_only_chd(orig_pred_df, true_labels_df, print_auc=True):
    tube_types = ['Chest_drain_tube', 'NSG_tube', 'Endotracheal_tube', 'Tracheostomy_tube']
    all_preds = []
    
    for row_number in range(len(orig_pred_df)):
        for idx, tube in enumerate(tube_types):
            preds = [[str2array(i["Preds_model1"]) for idx, i in orig_pred_df.iterrows()][row_number][:,idx],
                     [str2array(i["Preds_model2"]) for idx, i in orig_pred_df.iterrows()][row_number][:,idx],
                     [str2array(i["Preds_model3"]) for idx, i in orig_pred_df.iterrows()][row_number][:,idx]]
            all_preds.append(preds)
            
    # Constructing a df with the preds and 'true' labels
    preds_df = pd.DataFrame(list(zip(list(true_labels_df['Chest_drain_Ann']),
                                     list(all_preds[0][0]),
                                     list(all_preds[0][1]),
                                     list(all_preds[0][2]))),
                            columns = ['Chest_drain_Ann',
                                       'preds_CheD_model1', 'preds_CheD_model2', 'preds_CheD_model3'])

    if print_auc:
        # Computing the auc for each tube separately
        print('CHEST DRAIN TUBE')
        auc_with_anns = [roc_auc_score(preds_df['Chest_drain_Ann'], preds_df['preds_CheD_model1']), roc_auc_score(preds_df['Chest_drain_Ann'], preds_df['preds_CheD_model2']), roc_auc_score(preds_df['Chest_drain_Ann'], preds_df['preds_CheD_model3'])]
        print("Annotations average auc:", round(sum(auc_with_anns)/len(auc_with_anns)*100, 5), "with standard deviation:", round(statistics.stdev(auc_with_anns)*100,5))
    
    return preds_df


# Function to arrange preds nicely in a df
def get_preds_multiclass(orig_pred_df, true_labels_df, print_auc=True):
    tube_types = ['Chest_drain_tube', 'NSG_tube', 'Endotracheal_tube', 'Tracheostomy_tube']
    all_preds = []
    
    for row_number in range(len(orig_pred_df)):
        for idx, tube in enumerate(tube_types):
            preds = [[str2array(i["Preds_model1"]) for idx, i in orig_pred_df.iterrows()][row_number][:,idx],
                     [str2array(i["Preds_model2"]) for idx, i in orig_pred_df.iterrows()][row_number][:,idx],
                     [str2array(i["Preds_model3"]) for idx, i in orig_pred_df.iterrows()][row_number][:,idx]]
            all_preds.append(preds)
            
    # Constructing a df with the preds and 'true' labels
    preds_df = pd.DataFrame(list(zip(list(true_labels_df['Chest_drain_Ann']),
                                     list(true_labels_df['NSG_tube_Ann']),
                                     list(true_labels_df['Endotracheal_tube_Ann']),
                                     list(true_labels_df['Tracheostomy_tube_Ann']),
                                     list(true_labels_df['Chest_drain_tube']),
                                     list(true_labels_df['NSG_tube']),
                                     list(true_labels_df['Endotracheal_tube']),
                                     list(true_labels_df['Tracheostomy_tube']),
                                     list(all_preds[0][0]),
                                     list(all_preds[0][1]),
                                     list(all_preds[0][2]),
                                     list(all_preds[1][0]),
                                     list(all_preds[1][1]),
                                     list(all_preds[1][2]),
                                     list(all_preds[2][0]),
                                     list(all_preds[2][1]),
                                     list(all_preds[2][2]),
                                     list(all_preds[3][0]),
                                     list(all_preds[3][1]),
                                     list(all_preds[3][2]))),
                            columns = ['Chest_drain_Ann', 'NSG_tube_Ann', 'Endotracheal_tube_Ann', 'Tracheostomy_tube_Ann',
                                       'Chest_drain_tube_PadChest', 'NSG_tube_PadChest', 'Endotracheal_tube_PadChest', 'Tracheostomy_tube_PadChest',
                                       'preds_CheD_model1', 'preds_CheD_model2', 'preds_CheD_model3',
                                       'preds_NSG_model1', 'preds_NSG_model2', 'preds_NSG_model3',
                                       'preds_Endo_model1', 'preds_Endo_model2', 'preds_Endo_model3',
                                       'preds_Trach_model1', 'preds_Trach_model2', 'preds_Trach_model3',])


    ## From here, one can return the preds_df if you want to see the predictions nicely
    
    if print_auc:
        # Computing the auc for each tube separately
        print('CHEST DRAIN TUBE')
        preds_df_tube = preds_df[preds_df['Chest_drain_Ann'] != -1]
        auc_with_anns = [roc_auc_score(preds_df_tube['Chest_drain_Ann'], preds_df_tube['preds_CheD_model1']), roc_auc_score(preds_df_tube['Chest_drain_Ann'], preds_df_tube['preds_CheD_model2']), roc_auc_score(preds_df_tube['Chest_drain_Ann'], preds_df_tube['preds_CheD_model3'])]
        auc_with_padchest = [roc_auc_score(preds_df_tube['Chest_drain_tube_PadChest'], preds_df_tube['preds_CheD_model1']), roc_auc_score(preds_df_tube['Chest_drain_tube_PadChest'], preds_df_tube['preds_CheD_model2']), roc_auc_score(preds_df_tube['Chest_drain_tube_PadChest'], preds_df_tube['preds_CheD_model3'])]
        print("Annotations Average auc:", round(sum(auc_with_anns)/len(auc_with_anns)*100, 5), "with standard deviation:", round(statistics.stdev(auc_with_anns)*100,5))
        print("PadChest Average auc:", round(sum(auc_with_padchest)/len(auc_with_padchest)*100, 5), "with standard deviation:", round(statistics.stdev(auc_with_padchest)*100,5))
        #print(auc_with_anns)
        #print(auc_with_padchest)
        print()

        print('NSG TUBE')
        preds_df_tube = preds_df[preds_df['NSG_tube_Ann'] != -1]
        auc_with_anns = [roc_auc_score(preds_df_tube['NSG_tube_Ann'], preds_df_tube['preds_NSG_model1']), roc_auc_score(preds_df_tube['NSG_tube_Ann'], preds_df_tube['preds_NSG_model2']), roc_auc_score(preds_df_tube['NSG_tube_Ann'], preds_df_tube['preds_NSG_model3'])]
        auc_with_padchest = [roc_auc_score(preds_df_tube['NSG_tube_PadChest'], preds_df_tube['preds_NSG_model1']), roc_auc_score(preds_df_tube['NSG_tube_PadChest'], preds_df_tube['preds_NSG_model2']), roc_auc_score(preds_df_tube['NSG_tube_PadChest'], preds_df_tube['preds_NSG_model3'])]
        print("Annotations Average auc:", round(sum(auc_with_anns)/len(auc_with_anns)*100, 5), "with standard deviation:", round(statistics.stdev(auc_with_anns)*100,5))
        print("PadChest Average auc:", round(sum(auc_with_padchest)/len(auc_with_padchest)*100, 5), "with standard deviation:", round(statistics.stdev(auc_with_padchest)*100,5))
        #print(auc_with_anns)
        #print(auc_with_padchest)
        print()

        print('ENDOTRACHEAL TUBE')
        preds_df_tube = preds_df[preds_df['Endotracheal_tube_Ann'] != -1]
        auc_with_anns = [roc_auc_score(preds_df_tube['Endotracheal_tube_Ann'], preds_df_tube['preds_Endo_model1']), roc_auc_score(preds_df_tube['Endotracheal_tube_Ann'], preds_df_tube['preds_Endo_model2']), roc_auc_score(preds_df_tube['Endotracheal_tube_Ann'], preds_df_tube['preds_Endo_model3'])]
        auc_with_padchest = [roc_auc_score(preds_df_tube['Endotracheal_tube_PadChest'], preds_df_tube['preds_Endo_model1']), roc_auc_score(preds_df_tube['Endotracheal_tube_PadChest'], preds_df_tube['preds_Endo_model2']), roc_auc_score(preds_df_tube['Endotracheal_tube_PadChest'], preds_df_tube['preds_Endo_model3'])]
        print("Annotations Average auc:", round(sum(auc_with_anns)/len(auc_with_anns)*100, 5), "with standard deviation:", round(statistics.stdev(auc_with_anns)*100,5))
        print("PadChest Average auc:", round(sum(auc_with_padchest)/len(auc_with_padchest)*100, 5), "with standard deviation:", round(statistics.stdev(auc_with_padchest)*100,5))
        #print(auc_with_anns)
        #print(auc_with_padchest)
        print()

        print('TRACHEOSTOMY TUBE')
        preds_df_tube = preds_df[preds_df['Tracheostomy_tube_Ann'] != -1]
        auc_with_anns = [roc_auc_score(preds_df_tube['Tracheostomy_tube_Ann'], preds_df_tube['preds_Trach_model1']), roc_auc_score(preds_df_tube['Tracheostomy_tube_Ann'], preds_df_tube['preds_Trach_model2']), roc_auc_score(preds_df_tube['Tracheostomy_tube_Ann'], preds_df_tube['preds_Trach_model3'])]
        auc_with_padchest = [roc_auc_score(preds_df_tube['Tracheostomy_tube_PadChest'], preds_df_tube['preds_Trach_model1']), roc_auc_score(preds_df_tube['Tracheostomy_tube_PadChest'], preds_df_tube['preds_Trach_model2']), roc_auc_score(preds_df_tube['Tracheostomy_tube_PadChest'], preds_df_tube['preds_Trach_model3'])]
        print("Annotations Average auc:", round(sum(auc_with_anns)/len(auc_with_anns)*100, 5), "with standard deviation:", round(statistics.stdev(auc_with_anns)*100,5))
        print("PadChest Average auc:", round(sum(auc_with_padchest)/len(auc_with_padchest)*100, 5), "with standard deviation:", round(statistics.stdev(auc_with_padchest)*100,5))
        #print(auc_with_anns)
        #print(auc_with_padchest)
        print()
    
    return preds_df


# Function for printing the average accuracy and auc (with std) for tube detection task for chest drains
def get_preds_multiclass_one_model(df, true_labels_df, print_auc=True):
    tube_types = ['Chest_drain_tube', 'NSG_tube', 'Endotracheal_tube', 'Tracheostomy_tube']
    all_preds = []
    
    for row_number in range(len(df)):
        for t_idx, tube in enumerate(tube_types):
            preds = [[str2array(i["Preds_model1"]) for idx, i in df.iterrows()][row_number][:,t_idx]]
            all_preds.append(preds)
    
    
    # Constructing a df with the preds and 'true' labels
    preds_df = pd.DataFrame(list(zip(list(true_labels_df['Chest_drain_Ann']),
                                     list(true_labels_df['NSG_tube_Ann']),
                                     list(true_labels_df['Endotracheal_tube_Ann']),
                                     list(true_labels_df['Tracheostomy_tube_Ann']),
                                     list(true_labels_df['Chest_drain_tube']),
                                     list(true_labels_df['NSG_tube']),
                                     list(true_labels_df['Endotracheal_tube']),
                                     list(true_labels_df['Tracheostomy_tube']),
                                     list(all_preds[0][0]),
                                     list(all_preds[1][0]),
                                     list(all_preds[2][0]),
                                     list(all_preds[3][0]))),
                            columns = ['Chest_drain_Ann', 'NSG_tube_Ann', 'Endotracheal_tube_Ann', 'Tracheostomy_tube_Ann',
                                       'Chest_drain_tube_PadChest', 'NSG_tube_PadChest', 'Endotracheal_tube_PadChest', 'Tracheostomy_tube_PadChest',
                                       'preds_CheD_model1', 'preds_NSG_model1',
                                       'preds_Endo_model1', 'preds_Trach_model1'])

    
    ## From here, one can return the preds_df if you want to see the predictions nicely
    
    if print_auc:
        # Computing the auc for each tube separately
        print('CHEST DRAIN TUBE')
        preds_df_tube = preds_df[preds_df['Chest_drain_Ann'] != -1]
        print("Annotations auc:", round(roc_auc_score(preds_df_tube['Chest_drain_Ann'], preds_df_tube['preds_CheD_model1'])*100, 5))
        print("PadChest auc:", round(roc_auc_score(preds_df_tube['Chest_drain_tube_PadChest'], preds_df_tube['preds_CheD_model1'])*100, 5))
        print()

        print('NSG TUBE')
        preds_df_tube = preds_df[preds_df['NSG_tube_Ann'] != -1]
        print("Annotations auc:", round(roc_auc_score(preds_df_tube['NSG_tube_Ann'], preds_df_tube['preds_NSG_model1'])*100, 5))
        print("PadChest auc:", round(roc_auc_score(preds_df_tube['NSG_tube_PadChest'], preds_df_tube['preds_NSG_model1'])*100, 5))
        print()

        print('ENDOTRACHEAL TUBE')
        print('NSG TUBE')
        preds_df_tube = preds_df[preds_df['Endotracheal_tube_Ann'] != -1]
        print("Annotations auc:", round(roc_auc_score(preds_df_tube['Endotracheal_tube_Ann'], preds_df_tube['preds_Endo_model1'])*100, 5))
        print("PadChest auc:", round(roc_auc_score(preds_df_tube['Endotracheal_tube_PadChest'], preds_df_tube['preds_Endo_model1'])*100, 5))
        print()

        print('TRACHEOSTOMY TUBE')
        preds_df_tube = preds_df[preds_df['Tracheostomy_tube_Ann'] != -1]
        print("Annotations auc:", round(roc_auc_score(preds_df_tube['Tracheostomy_tube_Ann'], preds_df_tube['preds_Trach_model1'])*100, 5))
        print("PadChest auc:", round(roc_auc_score(preds_df_tube['Tracheostomy_tube_PadChest'], preds_df_tube['preds_Trach_model1'])*100, 5))
        print()
    
    return preds_df

### Multiclass: DenseNet121 fine-tuned on PadChest, detecting 4 tubes

In [7]:
preds_df = get_preds_multiclass(TD_preds, true_labels)
#preds_df

CHEST DRAIN TUBE
Annotations Average auc: 83.73617 with standard deviation: 4.5467
PadChest Average auc: 77.79 with standard deviation: 3.46581

NSG TUBE
Annotations Average auc: 68.36736 with standard deviation: 1.11814
PadChest Average auc: 74.83157 with standard deviation: 1.26568

ENDOTRACHEAL TUBE
Annotations Average auc: 74.86049 with standard deviation: 0.41856
PadChest Average auc: 75.73033 with standard deviation: 0.06881

TRACHEOSTOMY TUBE
Annotations Average auc: 87.5752 with standard deviation: 2.04351
PadChest Average auc: 86.43756 with standard deviation: 2.5167



### Single class: DenseNet121 fine-tuned on PadChest, evaluated on CXR14, detecting only chest drains

In [81]:
preds_df = get_perf_only_chd(TD_preds_CXR14, true_labels_CXR14)
#preds_df

CHEST DRAIN TUBE
Annotations average auc: 48.42036 with standard deviation: 1.77075


## Implementation of Class-Wise Calibration Error (CWCE)

Binary Expected Calibration Error:
$$ \text{binary-ECE}  = \sum_{i=1}^M \frac{|B_{i}|}{N} |
        \bar{y}(B_{i}) - \bar{p}(B_{i})| $$

Class-wise Expected Calibration Error:
$$ \text{class-$j$-ECE}  = \sum_{i=1}^M \frac{|B_{i,j}|}{N}
        |\bar{y}_j(B_{i,j}) - \bar{p}_j(B_{i,j})|,
        \text{classwise-ECE}  = \frac{1}{K}\sum_{j=1}^K \text{class-$j$-ECE} $$

In [88]:
def binary_ECE(y_true, probs, power=1, bins=10):
    r"""
    Binary Expected Calibration Error
    
    Parameters
    ----------
    y_true : indicator vector (n_samples, )
        True labels.
    probs : matrix (n_samples, )
        Predicted probabilities for positive class.
        
    Returns
    -------
    score : float
    """

    create_bins = np.linspace(start=0, stop=1, num=bins + 1)   # Returns 'num' evenly spaced samples, calculated over the interval [start, stop]
    #print('bins created: ', create_bins)
    idx_bins = np.digitize(x=probs, bins=create_bins)   # Return the indices of the bins to which each value in input array belongs
    idx_bins -= 1   # Need to subtract 1 from the bin indices to start at 0
    
    
    # Function for computing the ECE for one bin
    def bin_func(y, p, idx_bins):
        probs_bin_mean = np.mean(p[idx_bins])   # Mean of probs in bin i
        true_bin_mean = np.mean(y[idx_bins])   # Mean of true values in bin i
        diff = np.abs(probs_bin_mean - true_bin_mean)   # Absolute difference between the two bin means
        diff_power = diff ** power   # Raising the diff according to the L_p calibration error specified, typically power = 1
        ece = diff_power * np.sum(idx_bins) / len(p)   # Multiplying by the fraction of probs in that bin
        return ece
        
    # Computing the binary ECE for each bin and summing them
    ece = 0
    
    for i in np.unique(idx_bins):   # Looping through the unique bins (len(bins))
        ece += bin_func(y_true, probs, idx_bins == i)   # Summing the error for each bin

    return ece


def classwise_ECE(y_true, probs, classes_list, power=1, bins=10, print_ece=False):
    r"""Classwise Expected Calibration Error
    
    Parameters
    ----------
    y_true : label indicator matrix (n_samples, n_classes)
        True labels.
    probs : matrix (n_samples, n_classes)
        Predicted probabilities.
        
    Returns
    -------
    score : float
    """

    n_classes = len(classes_list)
    
    # Computing the binary ECE for each class
    class_eces = []
    for c in range(n_classes):   # Looping through the classes
        binary_ece = binary_ECE(y_true[:, c], probs[:, c], power=power, bins=bins)
        if print_ece:
            print('ECE for {}: {}'.format(classes_list[c], round(binary_ece, 3)))
        class_eces.append(binary_ece)
    
    #if print_ece:
        #print()
        #print('Average Class-Wise ECE: ', round(np.mean(class_eces), 3))
    
    return class_eces
    # Right now, not printing the average class-wise calibration error

    
def classwise_ECE_three_models(df_orig, df_y_true, classes_list, power=1, bins=10):
        
    # Creating the preds df
    preds_df = get_preds_multiclass(df_orig, df_y_true, print_auc=False)
    all_model_eces_ann = []
    all_model_eces_pad = []
    
    for i in range(3):
        probs_model_df = preds_df[['preds_CheD_model'+str(i+1), 'preds_NSG_model'+str(i+1), 'preds_Endo_model'+str(i+1), 'preds_Trach_model'+str(i+1)]]
        y_true_ann_df = preds_df[['Chest_drain_Ann', 'NSG_tube_Ann', 'Endotracheal_tube_Ann', 'Tracheostomy_tube_Ann']]
        y_true_pad_df = preds_df[['Chest_drain_tube_PadChest', 'NSG_tube_PadChest', 'Endotracheal_tube_PadChest', 'Tracheostomy_tube_PadChest']]
        
        class_eces_ann = classwise_ECE(y_true_ann_df.to_numpy(), probs_model_df.to_numpy(), classes_list=classes_list, power=power, bins=bins)
        all_model_eces_ann.append(class_eces_ann)
        
        class_eces_pad = classwise_ECE(y_true_pad_df.to_numpy(), probs_model_df.to_numpy(), classes_list=classes_list, power=power, bins=bins)
        all_model_eces_pad.append(class_eces_pad)
        
    #print(all_model_eces_ann)
    #print(all_model_eces_pad)
    
    for c_idx, c in enumerate(classes_list):
        print('Class: ', c)
        print('Average CWCE Ann: ', round(sum([all_model_eces_ann[i][c_idx] for i in range(3)]) / 3, 5), 'with standard deviation: ', round(statistics.stdev([all_model_eces_ann[i][c_idx] for i in range(3)]), 5))
#        print('Average CWCE Ann: ', sum([all_model_eces_ann[i][c_idx] for i in range(3)]) / 3, 'with standard deviation: ', statistics.stdev([all_model_eces_ann[i][c_idx] for i in range(3)]))
        print('Average CWCE Pad: ', round(sum([all_model_eces_pad[i][c_idx] for i in range(3)]) / 3, 5), 'with standard deviation: ', round(statistics.stdev([all_model_eces_pad[i][c_idx] for i in range(3)]), 5))
        print()
    
    return preds_df
    # Right now, not printing the average class-wise calibration error


def classwise_ECE_three_models_CXR14(df_orig_preds, df_y_true, classes_list, power=1, bins=10):
        
    # Creating the preds df
    preds_df = get_perf_only_chd(df_orig_preds, df_y_true, print_auc=False)
    all_model_eces = []
    
    for i in range(3):
        probs_model_df = preds_df[['preds_CheD_model'+str(i+1)]]
        y_true_ann_df = preds_df[['Chest_drain_Ann']]

        class_eces = classwise_ECE(y_true_ann_df.to_numpy(), probs_model_df.to_numpy(), classes_list=classes_list, power=power, bins=bins)
        all_model_eces.append(class_eces)
        
    #print(all_model_eces)
    
    for c_idx, c in enumerate(classes_list):
        print('Class: ', c)
        print('Average CWCE Ann: ', round(sum([all_model_eces[i][c_idx] for i in range(3)]) / 3, 5), 'with standard deviation: ', round(statistics.stdev([all_model_eces[i][c_idx] for i in range(3)]), 5))
#        print('Average CWCE Ann: ', sum([all_model_eces[i][c_idx] for i in range(3)]) / 3, 'with standard deviation: ', statistics.stdev([all_model_eces[i][c_idx] for i in range(3)]))
        print()
    
    return preds_df
    # Right now, not printing the average class-wise calibration error
    

### Multiclass: DenseNet121 fine-tuned on PadChest, detecting 4 tubes

In [89]:
tube_types = ['Chest_drain_tube', 'NSG_tube', 'Endotracheal_tube', 'Tracheostomy_tube']
preds_df = classwise_ECE_three_models(TD_preds, true_labels, classes_list=tube_types, power=1, bins=10)

Class:  Chest_drain_tube
Average CWCE Ann:  0.05564 with standard deviation:  0.00089
Average CWCE Pad:  0.01045 with standard deviation:  0.00181

Class:  NSG_tube
Average CWCE Ann:  0.52769 with standard deviation:  0.00778
Average CWCE Pad:  0.04563 with standard deviation:  0.00973

Class:  Endotracheal_tube
Average CWCE Ann:  0.27189 with standard deviation:  0.01031
Average CWCE Pad:  0.08022 with standard deviation:  0.00929

Class:  Tracheostomy_tube
Average CWCE Ann:  0.11031 with standard deviation:  0.00506
Average CWCE Pad:  0.10173 with standard deviation:  0.00428



### Single class: DenseNet121 fine-tuned on PadChest, evaluated on CXR14, detecting only chest drains

In [90]:
tube_types = ['Chest_drain_tube']
preds_df = classwise_ECE_three_models_CXR14(TD_preds_CXR14, true_labels_CXR14, classes_list=tube_types)

Class:  Chest_drain_tube
Average CWCE Ann:  0.29346 with standard deviation:  0.0242

