# Model predictions for the pathology detection model using segmented images

In [5]:
# Imports
import pandas as pd
import numpy as np
import ast
import re
import statistics
from sklearn.metrics import roc_auc_score

In [20]:
## Loading all the data

# Model predictions
S_preds_PadChest_PredsNoLungs = pd.read_csv('../Task-Pathology_detection/Predictions/PD_preds_nolungs.csv', index_col=0)
S_preds_PadChest_PredsOnlyLungs = pd.read_csv('../Task-Pathology_detection/Predictions/PD_preds_onlylungs.csv', index_col=0)

S_preds_nolungs_PredsPadChest = pd.read_csv('Predictions/PD_SEG_nolungs_preds_padchest.csv', index_col=0)
S_preds_nolungs_PredsNoLungs = pd.read_csv('Predictions/PD_SEG_nolungs_preds_nolungs.csv', index_col=0)
S_preds_nolungs_PredsOnlyLungs = pd.read_csv('Predictions/PD_SEG_nolungs_preds_onlylungs.csv', index_col=0)

S_preds_onlylungs_PredsPadChest = pd.read_csv('Predictions/PD_SEG_onlylungs_preds_padchest.csv', index_col=0)
S_preds_onlylungs_PredsNoLungs = pd.read_csv('Predictions/PD_SEG_onlylungs_preds_nolungs.csv', index_col=0)
S_preds_onlylungs_PredsOnlyLungs = pd.read_csv('Predictions/PD_SEG_onlylungs_preds_onlylungs.csv', index_col=0)

# True labels
test_padchest = pd.read_csv('../Data/Data_splits/pathology_detection-test.csv', index_col=0)
annotations = pd.read_csv('../Data/Annotations/Annotations_aggregated.csv', index_col=0)
padchest_test_labels_ALL = pd.concat([test_padchest, annotations])   # Concatenating the tube and pathology test sets

## Area Under the ROC Curve (AUC)

In [11]:
## Area Under the ROC Curve (AUC)# Function for reading the predictions, which are strings, as numpy arrays
def str2array(s):
    # Remove space after [
    s=re.sub('\[ +', '[', s.strip())
    # Replace commas and spaces
    s=re.sub('[,\s]+', ', ', s)
    return np.array(ast.literal_eval(s))


# Function to arrange preds nicely in a df
def get_preds_multiclass(orig_pred_df, true_labels_df, print_auc=True):
    
    pathologies = ['Effusion', 'Pneumothorax', 'Atelectasis', 'Cardiomegaly', 'Pneumonia']
    all_preds = []
    
    for row_number in range(len(orig_pred_df)):
        for p_idx, p in enumerate(pathologies):
            preds = [[str2array(i["Preds_model1"]) for idx, i in orig_pred_df.iterrows()][row_number][:,p_idx],
                     [str2array(i["Preds_model2"]) for idx, i in orig_pred_df.iterrows()][row_number][:,p_idx],
                     [str2array(i["Preds_model3"]) for idx, i in orig_pred_df.iterrows()][row_number][:,p_idx]]
            all_preds.append(preds)
            
    # Constructing a df with the preds and 'true' labels
    preds_df = pd.DataFrame(list(zip(list(true_labels_df['Effusion']),
                                     list(true_labels_df['Pneumothorax']),
                                     list(true_labels_df['Atelectasis']),
                                     list(true_labels_df['Cardiomegaly']),
                                     list(true_labels_df['Pneumonia']),
                                     list(all_preds[0][0]),
                                     list(all_preds[0][1]),
                                     list(all_preds[0][2]),
                                     list(all_preds[1][0]),
                                     list(all_preds[1][1]),
                                     list(all_preds[1][2]),
                                     list(all_preds[2][0]),
                                     list(all_preds[2][1]),
                                     list(all_preds[2][2]),
                                     list(all_preds[3][0]),
                                     list(all_preds[3][1]),
                                     list(all_preds[3][2]),
                                     list(all_preds[4][0]),
                                     list(all_preds[4][1]),
                                     list(all_preds[4][2]))),
                            columns = ['Effusion', 'Pneumothorax', 'Atelectasis', 'Cardiomegaly',
                                       'Pneumonia', 'preds_Effusion_model1', 'preds_Effusion_model2', 'preds_Effusion_model3',
                                       'preds_Pneumothorax_model1', 'preds_Pneumothorax_model2', 'preds_Pneumothorax_model3',
                                       'preds_Atelectasis_model1', 'preds_Atelectasis_model2', 'preds_Atelectasis_model3',
                                       'preds_Cardiomegaly_model1', 'preds_Cardiomegaly_model2', 'preds_Cardiomegaly_model3',
                                       'preds_Pneumonia_model1', 'preds_Pneumonia_model2', 'preds_Pneumonia_model3'])


    if print_auc:
        # Computing the auc for each pathology separately
        for p in pathologies:
            print(p)
            auc_list = []
            for i in range(3):
                #print(i+1)
                auc = roc_auc_score(preds_df[p], preds_df['preds_' + str(p) + '_model' + str(i+1)])
                auc_list.append(auc)
            #print(auc_list)

    #        print("Average AUC:", round(sum(auc_list)/3 * 100, 5))
            print("Average auc:", round(sum(auc_list)/len(auc_list)*100, 1), "with standard deviation:", round(statistics.stdev(auc_list)*100,1))

            print()

    return preds_df


### Trained on: Original PadChest images

#### Predictions on: Images with no lungs

In [14]:
preds_df = get_preds_multiclass(S_preds_PadChest_PredsNoLungs, padchest_test_labels_ALL)
#preds_df

Effusion
Average auc: 89.4 with standard deviation: 0.6

Pneumothorax
Average auc: 66.3 with standard deviation: 5.2

Atelectasis
Average auc: 82.7 with standard deviation: 1.2

Cardiomegaly
Average auc: 82.0 with standard deviation: 1.1

Pneumonia
Average auc: 65.9 with standard deviation: 2.1



#### Predictions on: Images with only lungs

In [15]:
preds_df = get_preds_multiclass(S_preds_PadChest_PredsOnlyLungs, padchest_test_labels_ALL)
#preds_df

Effusion
Average auc: 70.0 with standard deviation: 3.1

Pneumothorax
Average auc: 46.0 with standard deviation: 5.9

Atelectasis
Average auc: 67.5 with standard deviation: 4.3

Cardiomegaly
Average auc: 63.0 with standard deviation: 2.9

Pneumonia
Average auc: 58.9 with standard deviation: 1.1



### Trained on: Images with no lungs

#### Predictions on: Original PadChest images

In [16]:
preds_df = get_preds_multiclass(S_preds_nolungs_PredsPadChest, padchest_test_labels_ALL)
#preds_df

Effusion
Average auc: 76.4 with standard deviation: 1.3

Pneumothorax
Average auc: 74.7 with standard deviation: 4.0

Atelectasis
Average auc: 80.7 with standard deviation: 1.3

Cardiomegaly
Average auc: 73.5 with standard deviation: 3.4

Pneumonia
Average auc: 73.9 with standard deviation: 0.6



#### Predictions on: Images with no lungs

In [17]:
preds_df = get_preds_multiclass(S_preds_nolungs_PredsNoLungs, padchest_test_labels_ALL)
#preds_df

Effusion
Average auc: 94.4 with standard deviation: 0.1

Pneumothorax
Average auc: 82.2 with standard deviation: 1.4

Atelectasis
Average auc: 87.0 with standard deviation: 0.6

Cardiomegaly
Average auc: 90.5 with standard deviation: 0.1

Pneumonia
Average auc: 79.0 with standard deviation: 0.1



#### Predictions on: Images with only lungs

In [18]:
preds_df = get_preds_multiclass(S_preds_nolungs_PredsOnlyLungs, padchest_test_labels_ALL)
#preds_df

Effusion
Average auc: 65.2 with standard deviation: 8.4

Pneumothorax
Average auc: 68.6 with standard deviation: 6.3

Atelectasis
Average auc: 60.7 with standard deviation: 7.2

Cardiomegaly
Average auc: 56.4 with standard deviation: 7.4

Pneumonia
Average auc: 61.9 with standard deviation: 1.3



### Trained on: Images with only lungs

#### Predictions on: Original PadChest images

In [21]:
preds_df = get_preds_multiclass(S_preds_onlylungs_PredsPadChest, padchest_test_labels_ALL)
#preds_df

Effusion
Average auc: 52.4 with standard deviation: 5.9

Pneumothorax
Average auc: 56.9 with standard deviation: 11.3

Atelectasis
Average auc: 58.4 with standard deviation: 2.9

Cardiomegaly
Average auc: 54.9 with standard deviation: 4.1

Pneumonia
Average auc: 56.5 with standard deviation: 0.8



#### Predictions on: Images with no lungs

In [22]:
preds_df = get_preds_multiclass(S_preds_onlylungs_PredsNoLungs, padchest_test_labels_ALL)
#preds_df

Effusion
Average auc: 56.2 with standard deviation: 12.6

Pneumothorax
Average auc: 72.3 with standard deviation: 4.0

Atelectasis
Average auc: 59.2 with standard deviation: 8.1

Cardiomegaly
Average auc: 52.7 with standard deviation: 6.6

Pneumonia
Average auc: 59.6 with standard deviation: 1.5



#### Predictions on: Images with only lungs

In [23]:
preds_df = get_preds_multiclass(S_preds_onlylungs_PredsOnlyLungs, padchest_test_labels_ALL)
#preds_df

Effusion
Average auc: 93.1 with standard deviation: 0.0

Pneumothorax
Average auc: 80.5 with standard deviation: 1.2

Atelectasis
Average auc: 86.4 with standard deviation: 0.2

Cardiomegaly
Average auc: 90.1 with standard deviation: 0.1

Pneumonia
Average auc: 79.3 with standard deviation: 0.2



## Implementation of Class-Wise Calibration Error (CWCE)

Binary Expected Calibration Error:
$$ \text{binary-ECE}  = \sum_{i=1}^M \frac{|B_{i}|}{N} |
        \bar{y}(B_{i}) - \bar{p}(B_{i})| $$

Class-wise Expected Calibration Error:
$$ \text{class-$j$-ECE}  = \sum_{i=1}^M \frac{|B_{i,j}|}{N}
        |\bar{y}_j(B_{i,j}) - \bar{p}_j(B_{i,j})|,
        \text{classwise-ECE}  = \frac{1}{K}\sum_{j=1}^K \text{class-$j$-ECE} $$

In [24]:
def binary_ECE(y_true, probs, power=1, bins=10):
    r"""
    Binary Expected Calibration Error
    
    Parameters
    ----------
    y_true : indicator vector (n_samples, )
        True labels.
    probs : matrix (n_samples, )
        Predicted probabilities for positive class.
        
    Returns
    -------
    score : float
    """

    create_bins = np.linspace(start=0, stop=1, num=bins + 1)   # Returns 'num' evenly spaced samples, calculated over the interval [start, stop]
    #print('bins created: ', create_bins)
    idx_bins = np.digitize(x=probs, bins=create_bins)   # Return the indices of the bins to which each value in input array belongs
    idx_bins -= 1   # Need to subtract 1 from the bin indices to start at 0
    
    
    # Function for computing the ECE for one bin
    def bin_func(y, p, idx_bins):
        probs_bin_mean = np.mean(p[idx_bins])   # Mean of probs in bin i
        true_bin_mean = np.mean(y[idx_bins])   # Mean of true values in bin i
        diff = np.abs(probs_bin_mean - true_bin_mean)   # Absolute difference between the two bin means
        diff_power = diff ** power   # Raising the diff according to the L_p calibration error specified, typically power = 1
        ece = diff_power * np.sum(idx_bins) / len(p)   # Multiplying by the fraction of probs in that bin
        return ece
        
    # Computing the binary ECE for each bin and summing them
    ece = 0
    
    for i in np.unique(idx_bins):   # Looping through the unique bins (len(bins))
        ece += bin_func(y_true, probs, idx_bins == i)   # Summing the error for each bin

    return ece


def classwise_ECE(y_true, probs, classes_list, power=1, bins=10, print_ece=False):
    r"""Classwise Expected Calibration Error
    
    Parameters
    ----------
    y_true : label indicator matrix (n_samples, n_classes)
        True labels.
    probs : matrix (n_samples, n_classes)
        Predicted probabilities.
        
    Returns
    -------
    score : float
    """

    n_classes = len(classes_list)
    
    # Computing the binary ECE for each class
    class_eces = []
    for c in range(n_classes):   # Looping through the classes
        binary_ece = binary_ECE(y_true[:, c], probs[:, c], power=power, bins=bins)
        if print_ece:
            print('ECE for {}: {}'.format(classes_list[c], round(binary_ece, 3)))
        class_eces.append(binary_ece)
    
    #if print_ece:
        #print()
        #print('Average Class-Wise ECE: ', round(np.mean(class_eces), 3))
    
    return class_eces
    # Right now, not printing the average class-wise calibration error

    
def classwise_ECE_three_models(df_orig, df_y_true, classes_list, power=1, bins=10):
        
    # Creating the preds df
    preds_df = get_preds_multiclass(df_orig, df_y_true, print_auc=False)
    
    all_model_eces = []
    
    for i in range(3):
        probs_model_df = preds_df[['preds_Effusion_model'+str(i+1), 'preds_Pneumothorax_model'+str(i+1), 'preds_Atelectasis_model'+str(i+1), 'preds_Cardiomegaly_model'+str(i+1), 'preds_Pneumonia_model'+str(i+1)]]
        y_true_ann_df = preds_df[['Effusion', 'Pneumothorax', 'Atelectasis', 'Cardiomegaly', 'Pneumonia']]
        
        class_eces = classwise_ECE(y_true_ann_df.to_numpy(), probs_model_df.to_numpy(), classes_list=classes_list, power=power, bins=bins)
        all_model_eces.append(class_eces)
        
    #print(all_model_eces)
    
    for c_idx, c in enumerate(classes_list):
        print('Class: ', c)
        print('Average CWCE: ', round(sum([all_model_eces[i][c_idx] for i in range(3)]) / 3, 5), 'with standard deviation: ', round(statistics.stdev([all_model_eces[i][c_idx] for i in range(3)]), 5))
        print()
    
    return preds_df
    # Right now, not printing the average class-wise calibration error
    

### Trained on: Original PadChest images

#### Predictions on: Images with no lungs

In [25]:
pathologies = ['Effusion', 'Pneumothorax', 'Atelectasis', 'Cardiomegaly', 'Pneumonia']
preds_df = classwise_ECE_three_models(S_preds_PadChest_PredsNoLungs, padchest_test_labels_ALL, classes_list=pathologies, power=1, bins=10)

Class:  Effusion
Average CWCE:  0.01437 with standard deviation:  0.00459

Class:  Pneumothorax
Average CWCE:  0.00101 with standard deviation:  0.00099

Class:  Atelectasis
Average CWCE:  0.00733 with standard deviation:  0.00374

Class:  Cardiomegaly
Average CWCE:  0.02867 with standard deviation:  0.00577

Class:  Pneumonia
Average CWCE:  0.01756 with standard deviation:  0.00222



#### Predictions on: Images with only lungs

In [26]:
pathologies = ['Effusion', 'Pneumothorax', 'Atelectasis', 'Cardiomegaly', 'Pneumonia']
preds_df = classwise_ECE_three_models(S_preds_PadChest_PredsOnlyLungs, padchest_test_labels_ALL, classes_list=pathologies, power=1, bins=10)

Class:  Effusion
Average CWCE:  0.04373 with standard deviation:  0.01177

Class:  Pneumothorax
Average CWCE:  0.00335 with standard deviation:  0.00417

Class:  Atelectasis
Average CWCE:  0.01138 with standard deviation:  0.00183

Class:  Cardiomegaly
Average CWCE:  0.05804 with standard deviation:  0.01903

Class:  Pneumonia
Average CWCE:  0.01518 with standard deviation:  0.00519



### Trained on: Images with no lungs

#### Predictions on: Original PadChest images

In [27]:
pathologies = ['Effusion', 'Pneumothorax', 'Atelectasis', 'Cardiomegaly', 'Pneumonia']
preds_df = classwise_ECE_three_models(S_preds_nolungs_PredsPadChest, padchest_test_labels_ALL, classes_list=pathologies, power=1, bins=10)

Class:  Effusion
Average CWCE:  0.14421 with standard deviation:  0.08054

Class:  Pneumothorax
Average CWCE:  0.00082 with standard deviation:  0.00081

Class:  Atelectasis
Average CWCE:  0.02237 with standard deviation:  0.01814

Class:  Cardiomegaly
Average CWCE:  0.132 with standard deviation:  0.05563

Class:  Pneumonia
Average CWCE:  0.01292 with standard deviation:  0.00285



#### Predictions on: Images with no lungs

In [28]:
pathologies = ['Effusion', 'Pneumothorax', 'Atelectasis', 'Cardiomegaly', 'Pneumonia']
preds_df = classwise_ECE_three_models(S_preds_nolungs_PredsNoLungs, padchest_test_labels_ALL, classes_list=pathologies, power=1, bins=10)

Class:  Effusion
Average CWCE:  0.00944 with standard deviation:  0.00118

Class:  Pneumothorax
Average CWCE:  0.00046 with standard deviation:  0.00011

Class:  Atelectasis
Average CWCE:  0.00401 with standard deviation:  0.00033

Class:  Cardiomegaly
Average CWCE:  0.00791 with standard deviation:  0.00199

Class:  Pneumonia
Average CWCE:  0.00473 with standard deviation:  0.0018



#### Predictions on: Images with only lungs

In [29]:
pathologies = ['Effusion', 'Pneumothorax', 'Atelectasis', 'Cardiomegaly', 'Pneumonia']
preds_df = classwise_ECE_three_models(S_preds_nolungs_PredsOnlyLungs, padchest_test_labels_ALL, classes_list=pathologies, power=1, bins=10)

Class:  Effusion
Average CWCE:  0.05312 with standard deviation:  0.00932

Class:  Pneumothorax
Average CWCE:  0.00165 with standard deviation:  0.00028

Class:  Atelectasis
Average CWCE:  0.01726 with standard deviation:  0.00099

Class:  Cardiomegaly
Average CWCE:  0.0755 with standard deviation:  0.00761

Class:  Pneumonia
Average CWCE:  0.01808 with standard deviation:  0.00705



### Trained on: Images with only lungs

#### Predictions on: Original PadChest images

In [30]:
pathologies = ['Effusion', 'Pneumothorax', 'Atelectasis', 'Cardiomegaly', 'Pneumonia']
preds_df = classwise_ECE_three_models(S_preds_onlylungs_PredsPadChest, padchest_test_labels_ALL, classes_list=pathologies, power=1, bins=10)

Class:  Effusion
Average CWCE:  0.06419 with standard deviation:  0.01725

Class:  Pneumothorax
Average CWCE:  0.00179 with standard deviation:  0.0016

Class:  Atelectasis
Average CWCE:  0.01115 with standard deviation:  0.00476

Class:  Cardiomegaly
Average CWCE:  0.0848 with standard deviation:  0.00128

Class:  Pneumonia
Average CWCE:  0.24187 with standard deviation:  0.0627



#### Predictions on: Images with no lungs

In [31]:
pathologies = ['Effusion', 'Pneumothorax', 'Atelectasis', 'Cardiomegaly', 'Pneumonia']
preds_df = classwise_ECE_three_models(S_preds_onlylungs_PredsNoLungs, padchest_test_labels_ALL, classes_list=pathologies, power=1, bins=10)

Class:  Effusion
Average CWCE:  0.05485 with standard deviation:  0.00127

Class:  Pneumothorax
Average CWCE:  0.00181 with standard deviation:  0.00088

Class:  Atelectasis
Average CWCE:  0.01833 with standard deviation:  0.00058

Class:  Cardiomegaly
Average CWCE:  0.07709 with standard deviation:  0.00406

Class:  Pneumonia
Average CWCE:  0.05909 with standard deviation:  0.02955



#### Predictions on: Images with only lungs

In [32]:
pathologies = ['Effusion', 'Pneumothorax', 'Atelectasis', 'Cardiomegaly', 'Pneumonia']
preds_df = classwise_ECE_three_models(S_preds_onlylungs_PredsOnlyLungs, padchest_test_labels_ALL, classes_list=pathologies, power=1, bins=10)

Class:  Effusion
Average CWCE:  0.00792 with standard deviation:  0.00124

Class:  Pneumothorax
Average CWCE:  0.0006 with standard deviation:  0.00018

Class:  Atelectasis
Average CWCE:  0.00278 with standard deviation:  0.00079

Class:  Cardiomegaly
Average CWCE:  0.01024 with standard deviation:  0.00066

Class:  Pneumonia
Average CWCE:  0.00474 with standard deviation:  0.00151

