# Model predictions for the pathology detection model

In [2]:
# Imports
import pandas as pd
import numpy as np
import ast
import re
import statistics
from sklearn.metrics import roc_auc_score

In [4]:
## Loading all the data

# Model predictions
PD_preds_finePad_PredsPadchest = pd.read_csv('Predictions/PD_preds.csv', index_col=0)
PD_preds_finePad_PredsCXR14 = pd.read_csv('Predictions/PD_preds_CXR14.csv', index_col=0)

PD_preds_fineCXR14_PredsPadchest_5epochs = pd.read_csv('Predictions/PD_fine_CXR14_preds_padchest.csv', index_col=0)
PD_preds_fineCXR14_PredsCXR14 = pd.read_csv('Predictions/PD_fine_CXR14_preds_CXR14.csv', index_col=0)

# True labels
test_padchest = pd.read_csv('../Data/Data_splits/pathology_detection-test.csv', index_col=0)
annotations = pd.read_csv('../Data/Annotations/Annotations_aggregated.csv', index_col=0)
padchest_test_labels_ALL = pd.concat([test_padchest, annotations])   # Concatenating the tube and pathology test sets

CXR14_pred_images = pd.read_csv('../Data/Data_splits/pathology_detection-CXR14_test.csv', index_col=0) # Images predicted on
CXR14_test_combined_labels = pd.read_csv('../Data/Data_splits/pathology_detection-CXR14_test_combined.csv', index_col=0)
GCS16l = pd.read_csv("../Data/Data_splits/GCS16l.csv", index_col=0)
Bbox = pd.read_csv("../Data/Data_splits/Bbox.csv", index_col=0)
GCS4l = pd.read_csv("../Data/Data_splits/GCS4l.csv", index_col=0)
RSNA = pd.read_csv("../Data/Data_splits/RSNA.csv", index_col=0)

## Area Under the ROC Curve (AUC)

In [10]:
# Function for reading the predictions, which are strings, as numpy arrays
def str2array(s):
    # Remove space after [
    s=re.sub('\[ +', '[', s.strip())
    # Replace commas and spaces
    s=re.sub('[,\s]+', ', ', s)
    return np.array(ast.literal_eval(s))


# Function to arrange preds nicely in a df
def get_preds_multiclass(orig_pred_df, true_labels_df, print_auc=True):
    
    pathologies = ['Effusion', 'Pneumothorax', 'Atelectasis', 'Cardiomegaly', 'Pneumonia']
    all_preds = []
    
    for row_number in range(len(orig_pred_df)):
        for p_idx, p in enumerate(pathologies):
            preds = [[str2array(i["Preds_model1"]) for idx, i in orig_pred_df.iterrows()][row_number][:,p_idx],
                     [str2array(i["Preds_model2"]) for idx, i in orig_pred_df.iterrows()][row_number][:,p_idx],
                     [str2array(i["Preds_model3"]) for idx, i in orig_pred_df.iterrows()][row_number][:,p_idx]]
            all_preds.append(preds)
            
    # Constructing a df with the preds and 'true' labels
    preds_df = pd.DataFrame(list(zip(list(true_labels_df['ImagePath']),
                                     list(true_labels_df['Effusion']),
                                     list(true_labels_df['Pneumothorax']),
                                     list(true_labels_df['Atelectasis']),
                                     list(true_labels_df['Cardiomegaly']),
                                     list(true_labels_df['Pneumonia']),
                                     list(all_preds[0][0]),
                                     list(all_preds[0][1]),
                                     list(all_preds[0][2]),
                                     list(all_preds[1][0]),
                                     list(all_preds[1][1]),
                                     list(all_preds[1][2]),
                                     list(all_preds[2][0]),
                                     list(all_preds[2][1]),
                                     list(all_preds[2][2]),
                                     list(all_preds[3][0]),
                                     list(all_preds[3][1]),
                                     list(all_preds[3][2]),
                                     list(all_preds[4][0]),
                                     list(all_preds[4][1]),
                                     list(all_preds[4][2]))),
                            columns = ['ImagePath', 'Effusion', 'Pneumothorax', 'Atelectasis', 'Cardiomegaly',
                                       'Pneumonia', 'preds_Effusion_model1', 'preds_Effusion_model2', 'preds_Effusion_model3',
                                       'preds_Pneumothorax_model1', 'preds_Pneumothorax_model2', 'preds_Pneumothorax_model3',
                                       'preds_Atelectasis_model1', 'preds_Atelectasis_model2', 'preds_Atelectasis_model3',
                                       'preds_Cardiomegaly_model1', 'preds_Cardiomegaly_model2', 'preds_Cardiomegaly_model3',
                                       'preds_Pneumonia_model1', 'preds_Pneumonia_model2', 'preds_Pneumonia_model3'])


    # Computing the auc for each pathology separately
    for p in pathologies:
        if print_auc:
            print(p)
        auc_list = []
        for i in range(3):
            #print(i+1)
            auc = roc_auc_score(preds_df[p], preds_df['preds_' + str(p) + '_model' + str(i+1)])
            auc_list.append(auc)
        #print(auc_list)
        
        if print_auc:
            print("Average auc:", round(sum(auc_list)/len(auc_list)*100, 1), "with standard deviation:", round(statistics.stdev(auc_list)*100,1))
        
            print()

    return preds_df


### Evaluating on PadChest

#### Fine-tuned on PadChest

In [11]:
preds_df = get_preds_multiclass(PD_preds_finePad_PredsPadchest, padchest_test_labels_ALL)
#preds_df

Effusion
Average auc: 94.2 with standard deviation: 0.1

Pneumothorax
Average auc: 81.4 with standard deviation: 2.3

Atelectasis
Average auc: 86.9 with standard deviation: 0.1

Cardiomegaly
Average auc: 89.2 with standard deviation: 0.2

Pneumonia
Average auc: 81.0 with standard deviation: 0.3



#### Fine-tuned on Padchest AND then CXR14 for 5 epochs

In [12]:
preds_df = get_preds_multiclass(PD_preds_fineCXR14_PredsPadchest_5epochs, padchest_test_labels_ALL)
#preds_df

Effusion
Average auc: 92.0 with standard deviation: 0.1

Pneumothorax
Average auc: 76.9 with standard deviation: 0.1

Atelectasis
Average auc: 84.8 with standard deviation: 0.1

Cardiomegaly
Average auc: 85.6 with standard deviation: 0.1

Pneumonia
Average auc: 77.6 with standard deviation: 0.1



### Evaluating on ChestX-ray14 (CXR14)

In [6]:
# Function to arrange preds nicely in a df
def get_preds_multiclass_CXR14(orig_pred_df, true_labels_df):
    
    pathologies = ['Effusion', 'Pneumothorax', 'Atelectasis', 'Cardiomegaly', 'Pneumonia']
    all_preds = []
    
    for row_number in range(len(orig_pred_df)):
        for p_idx, p in enumerate(pathologies):
            preds = [[str2array(i["Preds_model1"]) for idx, i in orig_pred_df.iterrows()][row_number][:,p_idx],
                     [str2array(i["Preds_model2"]) for idx, i in orig_pred_df.iterrows()][row_number][:,p_idx],
                     [str2array(i["Preds_model3"]) for idx, i in orig_pred_df.iterrows()][row_number][:,p_idx]]
            all_preds.append(preds)
            
    # Constructing a df with the preds and 'true' labels
    preds_df = pd.DataFrame(list(zip(list(true_labels_df['ImagePath']),
                                     list(true_labels_df['DummyValue']),
                                     list(all_preds[0][0]),
                                     list(all_preds[0][1]),
                                     list(all_preds[0][2]),
                                     list(all_preds[1][0]),
                                     list(all_preds[1][1]),
                                     list(all_preds[1][2]),
                                     list(all_preds[2][0]),
                                     list(all_preds[2][1]),
                                     list(all_preds[2][2]),
                                     list(all_preds[3][0]),
                                     list(all_preds[3][1]),
                                     list(all_preds[3][2]),
                                     list(all_preds[4][0]),
                                     list(all_preds[4][1]),
                                     list(all_preds[4][2]))),
                            columns = ['ImagePath', 'DummyValue', 'preds_Effusion_model1', 'preds_Effusion_model2', 'preds_Effusion_model3',
                                       'preds_Pneumothorax_model1', 'preds_Pneumothorax_model2', 'preds_Pneumothorax_model3',
                                       'preds_Atelectasis_model1', 'preds_Atelectasis_model2', 'preds_Atelectasis_model3',
                                       'preds_Cardiomegaly_model1', 'preds_Cardiomegaly_model2', 'preds_Cardiomegaly_model3',
                                       'preds_Pneumonia_model1', 'preds_Pneumonia_model2', 'preds_Pneumonia_model3'])

    return preds_df

def print_pathology_perf(preds_df, pathologies=['Effusion', 'Pneumothorax', 'Atelectasis', 'Cardiomegaly', 'Pneumonia']):
        
    # Computing the auc for each pathology separately
    for p in pathologies:
        print(p)
        auc_list = []
        for i in range(3):
            #print(i+1)
            auc = roc_auc_score(preds_df[p], preds_df['preds_' + str(p) + '_model' + str(i+1)])
            auc_list.append(auc)
        #print(auc_list)
        
#        print("Average AUC:", round(sum(auc_list)/3 * 100, 5))
        print("Average auc:", round(sum(auc_list)/len(auc_list)*100, 5), "with standard deviation:", round(statistics.stdev(auc_list)*100,5))
        
        print()
        

#### Fine-tuned on PadChest, evaluating on the combined/aggregated CXR14 test set

In [None]:
# Creating a combined predictions df
preds_df = get_preds_multiclass_CXR14(PD_preds_finePad_PredsCXR14, CXR14_pred_images)

# Adding a column with the image index taken from the image path column
preds_df['Image Index'] = [i[-16:] for i in list(preds_df['ImagePath'])]

In [53]:
# Merging the preds df with the true labels df
print(len(CXR14_test_combined_labels))
CXR14_test_combined_preds = CXR14_test_combined_labels.merge(preds_df, how='left', on=['ImagePath', 'Image Index'])
print(len(CXR14_test_combined_preds))
#CXR14_test_combined_preds[:2]

print_pathology_perf(CXR14_test_combined_preds)

1664
1664
Effusion
Average auc: 80.71155 with standard deviation: 0.48461

Pneumothorax
Average auc: 56.49444 with standard deviation: 2.95551

Atelectasis
Average auc: 65.37522 with standard deviation: 0.67667

Cardiomegaly
Average auc: 81.36595 with standard deviation: 0.7267

Pneumonia
Average auc: 67.54732 with standard deviation: 1.01999



#### Fine-tuned on PadChest, evaluating on the BBox test set

In [52]:
print(len(Bbox))
Bbox_preds = Bbox.merge(preds_df, how='left', on='Image Index')
print(len(Bbox_preds))
#Bbox_preds[:2]

print_pathology_perf(Bbox_preds)

880
880
Effusion
Average auc: 74.01594 with standard deviation: 0.36982

Pneumothorax
Average auc: 50.90427 with standard deviation: 1.61266

Atelectasis
Average auc: 51.59444 with standard deviation: 0.80441

Cardiomegaly
Average auc: 77.66259 with standard deviation: 1.55262

Pneumonia
Average auc: 60.27449 with standard deviation: 1.77085



#### Fine-tuned on PadChest, evaluating on the GCS16L test set

In [55]:
print(len(GCS16l))
GCS16l_preds = GCS16l.merge(preds_df, how='left', on='Image Index')
print(len(GCS16l_preds))
#GCS16l_preds[:2]

print_pathology_perf(GCS16l_preds)

810
810
Effusion
Average auc: 90.32332 with standard deviation: 0.38269

Pneumothorax
Average auc: 63.21966 with standard deviation: 5.21036

Atelectasis
Average auc: 83.18568 with standard deviation: 0.75017

Cardiomegaly
Average auc: 86.07879 with standard deviation: 0.51087

Pneumonia
Average auc: 87.60314 with standard deviation: 6.83117



#### Fine-tuned on PadChest, evaluating on the GCS4L test set

In [51]:
print(len(GCS4l))
GCS4l_preds = GCS4l.merge(preds_df, how='left', on='Image Index')
print(len(GCS4l_preds))
#GCS4l_preds[:2]

print_pathology_perf(GCS4l_preds, pathologies=['Pneumothorax'])

4376
4376
Pneumothorax
Average auc: 54.83099 with standard deviation: 2.16114



#### Fine-tuned on PadChest, evaluating on the RSNA test set

In [57]:
print(len(RSNA))
RSNA_preds = RSNA.merge(preds_df, how='left', on='Image Index')
print(len(RSNA_preds))
#RSNA_preds[:2]

print_pathology_perf(RSNA_preds, pathologies=['Pneumonia'])

26684
26684
Pneumonia
Average auc: 70.82156 with standard deviation: 0.76744



#### Fine-tuned on PadChest AND then CXR14 for 5 epochs, evaluating on the combined/aggregated CXR14 test set

In [None]:
preds_df = get_preds_multiclass_CXR14(PD_preds_fineCXR14_PredsCXR14, CXR14_pred_images)

# Adding a column with the image index taken from the image path column
preds_df['Image Index'] = [i[-16:] for i in list(preds_df['ImagePath'])]

In [59]:
# Merging the preds df with the true labels df
print(len(CXR14_test_combined_labels))
CXR14_test_combined_preds = CXR14_test_combined_labels.merge(preds_df, how='left', on=['ImagePath', 'Image Index'])
print(len(CXR14_test_combined_preds))
#CXR14_test_combined_preds[:2]

print_pathology_perf(CXR14_test_combined_preds)

1664
1664
Effusion
Average auc: 79.96185 with standard deviation: 0.03593

Pneumothorax
Average auc: 54.81671 with standard deviation: 0.18866

Atelectasis
Average auc: 66.86635 with standard deviation: 0.10262

Cardiomegaly
Average auc: 80.05033 with standard deviation: 0.46381

Pneumonia
Average auc: 63.47249 with standard deviation: 0.29401



#### Fine-tuned on PadChest AND then CXR14 for 5 epochs, evaluating on the BBox test set

In [60]:
print(len(Bbox))
Bbox_preds = Bbox.merge(preds_df, how='left', on='Image Index')
print(len(Bbox_preds))
#Bbox_preds[:2]

print_pathology_perf(Bbox_preds)

880
880
Effusion
Average auc: 74.30003 with standard deviation: 0.03841

Pneumothorax
Average auc: 50.58284 with standard deviation: 0.13234

Atelectasis
Average auc: 55.0328 with standard deviation: 0.11796

Cardiomegaly
Average auc: 78.62871 with standard deviation: 0.50841

Pneumonia
Average auc: 57.6451 with standard deviation: 0.12534



#### Fine-tuned on PadChest AND then CXR14 for 5 epochs, evaluating on the GCS16L test set

In [61]:
print(len(GCS16l))
GCS16l_preds = GCS16l.merge(preds_df, how='left', on='Image Index')
print(len(GCS16l_preds))
#GCS16l_preds[:2]

print_pathology_perf(GCS16l_preds)

810
810
Effusion
Average auc: 87.56465 with standard deviation: 0.06872

Pneumothorax
Average auc: 60.7774 with standard deviation: 0.25046

Atelectasis
Average auc: 83.47123 with standard deviation: 0.09706

Cardiomegaly
Average auc: 83.94949 with standard deviation: 0.19262

Pneumonia
Average auc: 88.92327 with standard deviation: 0.10718



#### Fine-tuned on PadChest AND then CXR14 for 5 epochs, evaluating on the GCS4L test set

In [62]:
print(len(GCS4l))
GCS4l_preds = GCS4l.merge(preds_df, how='left', on='Image Index')
print(len(GCS4l_preds))
#GCS4l_preds[:2]

print_pathology_perf(GCS4l_preds, pathologies=['Pneumothorax'])

4376
4376
Pneumothorax
Average auc: 54.19752 with standard deviation: 0.18769



#### Fine-tuned on PadChest AND then CXR14 for 5 epochs, evaluating on the RSNA test set

In [63]:
print(len(RSNA))
RSNA_preds = RSNA.merge(preds_df, how='left', on='Image Index')
print(len(RSNA_preds))
#RSNA_preds[:2]

print_pathology_perf(RSNA_preds, pathologies=['Pneumonia'])

26684
26684
Pneumonia
Average auc: 64.38903 with standard deviation: 0.55552



## Implementation of Class-Wise Calibration Error (CWCE)

Binary Expected Calibration Error:
$$ \text{binary-ECE}  = \sum_{i=1}^M \frac{|B_{i}|}{N} |
        \bar{y}(B_{i}) - \bar{p}(B_{i})| $$

Class-wise Expected Calibration Error:
$$ \text{class-$j$-ECE}  = \sum_{i=1}^M \frac{|B_{i,j}|}{N}
        |\bar{y}_j(B_{i,j}) - \bar{p}_j(B_{i,j})|,
        \text{classwise-ECE}  = \frac{1}{K}\sum_{j=1}^K \text{class-$j$-ECE} $$

In [15]:
def binary_ECE(y_true, probs, power=1, bins=10):
    r"""
    Binary Expected Calibration Error
    
    Parameters
    ----------
    y_true : indicator vector (n_samples, )
        True labels.
    probs : matrix (n_samples, )
        Predicted probabilities for positive class.
        
    Returns
    -------
    score : float
    """

    create_bins = np.linspace(start=0, stop=1, num=bins + 1)   # Returns 'num' evenly spaced samples, calculated over the interval [start, stop]
    #print('bins created: ', create_bins)
    idx_bins = np.digitize(x=probs, bins=create_bins)   # Return the indices of the bins to which each value in input array belongs
    idx_bins -= 1   # Need to subtract 1 from the bin indices to start at 0
    
    
    # Function for computing the ECE for one bin
    def bin_func(y, p, idx_bins):
        probs_bin_mean = np.mean(p[idx_bins])   # Mean of probs in bin i
        true_bin_mean = np.mean(y[idx_bins])   # Mean of true values in bin i
        diff = np.abs(probs_bin_mean - true_bin_mean)   # Absolute difference between the two bin means
        diff_power = diff ** power   # Raising the diff according to the L_p calibration error specified, typically power = 1
        ece = diff_power * np.sum(idx_bins) / len(p)   # Multiplying by the fraction of probs in that bin
        return ece
        
    # Computing the binary ECE for each bin and summing them
    ece = 0
    
    for i in np.unique(idx_bins):   # Looping through the unique bins (len(bins))
        ece += bin_func(y_true, probs, idx_bins == i)   # Summing the error for each bin

    return ece


def classwise_ECE(y_true, probs, classes_list, power=1, bins=10, print_ece=False):
    r"""Classwise Expected Calibration Error
    
    Parameters
    ----------
    y_true : label indicator matrix (n_samples, n_classes)
        True labels.
    probs : matrix (n_samples, n_classes)
        Predicted probabilities.
        
    Returns
    -------
    score : float
    """

    n_classes = len(classes_list)
    
    # Computing the binary ECE for each class
    class_eces = []
    for c in range(n_classes):   # Looping through the classes
        binary_ece = binary_ECE(y_true[:, c], probs[:, c], power=power, bins=bins)
        if print_ece:
            print('ECE for {}: {}'.format(classes_list[c], round(binary_ece, 3)))
        class_eces.append(binary_ece)
    
    #if print_ece:
        #print()
        #print('Average Class-Wise ECE: ', round(np.mean(class_eces), 3))
    
    return class_eces
    # Right now, not printing the average class-wise calibration error

    
def classwise_ECE_three_models(df_orig, df_y_true, classes_list, power=1, bins=10):
        
    # Creating the preds df
    preds_df = get_preds_multiclass(df_orig, df_y_true, print_auc=False)
    
    all_model_eces = []
    
    for i in range(3):
        probs_model_df = preds_df[['preds_Effusion_model'+str(i+1), 'preds_Pneumothorax_model'+str(i+1), 'preds_Atelectasis_model'+str(i+1), 'preds_Cardiomegaly_model'+str(i+1), 'preds_Pneumonia_model'+str(i+1)]]
        y_true_ann_df = preds_df[['Effusion', 'Pneumothorax', 'Atelectasis', 'Cardiomegaly', 'Pneumonia']]
        
        class_eces = classwise_ECE(y_true_ann_df.to_numpy(), probs_model_df.to_numpy(), classes_list=classes_list, power=power, bins=bins)
        all_model_eces.append(class_eces)
        
    #print(all_model_eces)
    
    for c_idx, c in enumerate(classes_list):
        print('Class: ', c)
        print('Average CWCE: ', round(sum([all_model_eces[i][c_idx] for i in range(3)]) / 3, 5), 'with standard deviation: ', round(statistics.stdev([all_model_eces[i][c_idx] for i in range(3)]), 5))
        print()
    
    return preds_df
    # Right now, not printing the average class-wise calibration error
        

### Evaluating on PadChest

#### Fine-tuned on PadChest

In [16]:
pathologies = 'Effusion', 'Pneumothorax', 'Atelectasis', 'Cardiomegaly', 'Pneumonia'
preds_df = classwise_ECE_three_models(PD_preds_finePad_PredsPadchest, padchest_test_labels_ALL, classes_list=pathologies, power=1, bins=10)
#preds_df

Class:  Effusion
Average CWCE:  0.00728 with standard deviation:  0.00104

Class:  Pneumothorax
Average CWCE:  0.00033 with standard deviation:  5e-05

Class:  Atelectasis
Average CWCE:  0.00362 with standard deviation:  0.00057

Class:  Cardiomegaly
Average CWCE:  0.00911 with standard deviation:  0.0028

Class:  Pneumonia
Average CWCE:  0.00731 with standard deviation:  0.0015



#### Fine-tuned on Padchest AND then CXR14 for 5 epochs

In [17]:
pathologies = 'Effusion', 'Pneumothorax', 'Atelectasis', 'Cardiomegaly', 'Pneumonia'
preds_df = classwise_ECE_three_models(PD_preds_fineCXR14_PredsPadchest_5epochs, padchest_test_labels_ALL, classes_list=pathologies, power=1, bins=10)

Class:  Effusion
Average CWCE:  0.04411 with standard deviation:  0.0002

Class:  Pneumothorax
Average CWCE:  0.00319 with standard deviation:  1e-05

Class:  Atelectasis
Average CWCE:  0.01252 with standard deviation:  0.00076

Class:  Cardiomegaly
Average CWCE:  0.05667 with standard deviation:  0.00225

Class:  Pneumonia
Average CWCE:  0.04163 with standard deviation:  3e-05



### Evaluating on ChestX-ray14 (CXR14)

In [23]:
def classwise_ECE_three_models_CXR14(df_orig_preds, df_y_true, classes_list, power=1, bins=10):
        
    # Creating the preds df
    preds_df = get_preds_multiclass_CXR14(df_orig_preds, CXR14_pred_images)
    
    # Adding a column with the image index taken from the image path column
    preds_df['Image Index'] = [i[-16:] for i in list(preds_df['ImagePath'])]
    
    # Merging the preds df with the true labels df
    #eval_df = df_y_true.merge(preds_df, how='left', on=['ImagePath', 'Image Index'])
    eval_df = df_y_true.merge(preds_df, how='left', on=['Image Index'])
    
    all_model_eces = []
    
    for i in range(3):
        #probs_model_df = eval_df[['preds_Effusion_model'+str(i+1), 'preds_Pneumothorax_model'+str(i+1), 'preds_Atelectasis_model'+str(i+1), 'preds_Cardiomegaly_model'+str(i+1), 'preds_Pneumonia_model'+str(i+1)]]
        probs_model_df = eval_df[['preds_'+c+'_model'+str(i+1) for c in classes_list]]
        #y_true_ann_df = eval_df[['Effusion', 'Pneumothorax', 'Atelectasis', 'Cardiomegaly', 'Pneumonia']]
        y_true_ann_df = eval_df[classes_list]
        
        class_eces = classwise_ECE(y_true_ann_df.to_numpy(), probs_model_df.to_numpy(), classes_list=classes_list, power=power, bins=bins)
        all_model_eces.append(class_eces)
        
    #print(all_model_eces)
    
    for c_idx, c in enumerate(classes_list):
        print('Class: ', c)
        print('Average CWCE: ', round(sum([all_model_eces[i][c_idx] for i in range(3)]) / 3, 5), 'with standard deviation: ', round(statistics.stdev([all_model_eces[i][c_idx] for i in range(3)]), 5))
        print()
    
    return preds_df
    # Right now, not printing the average class-wise calibration error

#### Fine-tuned on PadChest, evaluating on the combined/aggregated CXR14 test set

In [19]:
pathologies = ['Effusion', 'Pneumothorax', 'Atelectasis', 'Cardiomegaly', 'Pneumonia']
preds = classwise_ECE_three_models_CXR14(PD_preds_finePad_PredsCXR14, CXR14_test_combined_labels, classes_list=pathologies, power=1, bins=10)

Class:  Effusion
Average CWCE:  0.08058 with standard deviation:  0.0093

Class:  Pneumothorax
Average CWCE:  0.13676 with standard deviation:  0.00065

Class:  Atelectasis
Average CWCE:  0.23772 with standard deviation:  0.008

Class:  Cardiomegaly
Average CWCE:  0.08451 with standard deviation:  0.00229

Class:  Pneumonia
Average CWCE:  0.06021 with standard deviation:  0.00242



#### Fine-tuned on PadChest, evaluating on the BBox test set

In [18]:
pathologies = ['Effusion', 'Pneumothorax', 'Atelectasis', 'Cardiomegaly', 'Pneumonia']
preds = classwise_ECE_three_models_CXR14(PD_preds_finePad_PredsCXR14, Bbox, classes_list=pathologies, power=1, bins=10)

Class:  Effusion
Average CWCE:  0.11262 with standard deviation:  0.01193

Class:  Pneumothorax
Average CWCE:  0.10439 with standard deviation:  0.00084

Class:  Atelectasis
Average CWCE:  0.15474 with standard deviation:  0.00317

Class:  Cardiomegaly
Average CWCE:  0.10789 with standard deviation:  0.00326

Class:  Pneumonia
Average CWCE:  0.06286 with standard deviation:  0.00875



#### Fine-tuned on PadChest, evaluating on the GCS16L test set

In [28]:
pathologies = ['Effusion', 'Pneumothorax', 'Atelectasis', 'Cardiomegaly', 'Pneumonia']
preds = classwise_ECE_three_models_CXR14(PD_preds_finePad_PredsCXR14, GCS16l, classes_list=pathologies, power=1, bins=10)

Class:  Effusion
Average CWCE:  0.1184 with standard deviation:  0.01817

Class:  Pneumothorax
Average CWCE:  0.16239 with standard deviation:  0.00045

Class:  Atelectasis
Average CWCE:  0.33576 with standard deviation:  0.00781

Class:  Cardiomegaly
Average CWCE:  0.05882 with standard deviation:  0.00099

Class:  Pneumonia
Average CWCE:  0.07698 with standard deviation:  0.00824



#### Fine-tuned on PadChest, evaluating on the GCS4L test set

In [24]:
pathologies = ['Pneumothorax']
preds = classwise_ECE_three_models_CXR14(PD_preds_finePad_PredsCXR14, GCS4l, classes_list=pathologies, power=1, bins=10)

Class:  Pneumothorax
Average CWCE:  0.04751 with standard deviation:  0.00092



#### Fine-tuned on PadChest, evaluating on the RSNA test set

In [29]:
pathologies = ['Pneumonia']
preds = classwise_ECE_three_models_CXR14(PD_preds_finePad_PredsCXR14, RSNA, classes_list=pathologies, power=1, bins=10)

Class:  Pneumonia
Average CWCE:  0.14441 with standard deviation:  0.00697



#### Fine-tuned on PadChest AND then CXR14 for 5 epochs, evaluating on the combined/aggregated CXR14 test set

In [30]:
pathologies = ['Effusion', 'Pneumothorax', 'Atelectasis', 'Cardiomegaly', 'Pneumonia']
preds = classwise_ECE_three_models_CXR14(PD_preds_fineCXR14_PredsCXR14, CXR14_test_combined_labels, classes_list=pathologies, power=1, bins=10)

Class:  Effusion
Average CWCE:  0.15555 with standard deviation:  0.00126

Class:  Pneumothorax
Average CWCE:  0.14271 with standard deviation:  1e-05

Class:  Atelectasis
Average CWCE:  0.2105 with standard deviation:  0.00161

Class:  Cardiomegaly
Average CWCE:  0.11509 with standard deviation:  0.00125

Class:  Pneumonia
Average CWCE:  0.13471 with standard deviation:  5e-05



#### Fine-tuned on PadChest AND then CXR14 for 5 epochs, evaluating on the BBox test set

In [31]:
pathologies = ['Effusion', 'Pneumothorax', 'Atelectasis', 'Cardiomegaly', 'Pneumonia']
preds = classwise_ECE_three_models_CXR14(PD_preds_fineCXR14_PredsCXR14, Bbox, classes_list=pathologies, power=1, bins=10)

Class:  Effusion
Average CWCE:  0.09945 with standard deviation:  0.00243

Class:  Pneumothorax
Average CWCE:  0.11101 with standard deviation:  1e-05

Class:  Atelectasis
Average CWCE:  0.1223 with standard deviation:  0.00203

Class:  Cardiomegaly
Average CWCE:  0.14431 with standard deviation:  0.00137

Class:  Pneumonia
Average CWCE:  0.13384 with standard deviation:  4e-05



#### Fine-tuned on PadChest AND then CXR14 for 5 epochs, evaluating on the GCS16L test set

In [32]:
pathologies = ['Effusion', 'Pneumothorax', 'Atelectasis', 'Cardiomegaly', 'Pneumonia']
preds = classwise_ECE_three_models_CXR14(PD_preds_fineCXR14_PredsCXR14, GCS16l, classes_list=pathologies, power=1, bins=10)

Class:  Effusion
Average CWCE:  0.21922 with standard deviation:  0.00112

Class:  Pneumothorax
Average CWCE:  0.16762 with standard deviation:  1e-05

Class:  Atelectasis
Average CWCE:  0.31464 with standard deviation:  0.00148

Class:  Cardiomegaly
Average CWCE:  0.08258 with standard deviation:  0.00114

Class:  Pneumonia
Average CWCE:  0.0004 with standard deviation:  5e-05



#### Fine-tuned on PadChest AND then CXR14 for 5 epochs, evaluating on the GCS4L test set

In [33]:
pathologies = ['Pneumothorax']
preds = classwise_ECE_three_models_CXR14(PD_preds_fineCXR14_PredsCXR14, GCS4l, classes_list=pathologies, power=1, bins=10)

Class:  Pneumothorax
Average CWCE:  0.05404 with standard deviation:  1e-05



#### Fine-tuned on PadChest AND then CXR14 for 5 epochs, evaluating on the RSNA test set

In [34]:
pathologies = ['Pneumonia']
preds = classwise_ECE_three_models_CXR14(PD_preds_fineCXR14_PredsCXR14, RSNA, classes_list=pathologies, power=1, bins=10)

Class:  Pneumonia
Average CWCE:  0.22334 with standard deviation:  4e-05

