In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import load_model
from sklearn.metrics import classification_report, multilabel_confusion_matrix

# Custom objects required for loading CheXpert multi-label classification models
import sys 
sys.path.append('..')
from loss import (
    set_binary_crossentropy_weighted_loss
)

2021-11-14 13:52:57.527976: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1


### 1- Loading validation set

In [2]:
val_df = pd.read_csv(
    filepath_or_buffer="../labels/validation_u-zeroes.csv",
    dtype={  # Setting labels to type np.float32 was necessary for conversion to tf.Tensor object
        "Path": str,
        "Atelectasis": np.float32,
        "Cardiomegaly": np.float32,
        "Consolidation": np.float32,
        "Edema": np.float32,
        "Pleural Effusion": np.float32,
        "Pleural Other": np.float32,
        "Pneumonia": np.float32,
        "Pneumothorax": np.float32,
        "Enlarged Cardiomediastinum": np.float32,
        "Lung Opacity": np.float32,
        "Lung Lesion": np.float32,
        "Fracture": np.float32,
        "Support Devices": np.float32,
        "No Finding": np.float32
    }
)

In [3]:
list_columns = list(val_df.columns)
y_cols = list_columns[1::]  # First column is 'Path' column

In [4]:
val_datagen = ImageDataGenerator(
    featurewise_center=True,  # Mean and standard deviation values of the training set will be loaded to the object
    featurewise_std_normalization=True
)

In [5]:
training_dataset_mean = np.load("../calculated_chexpert_training_dataset_mean_and_std_values/CheXpert_training_set_mean.npy")
training_dataset_std = np.load("../calculated_chexpert_training_dataset_mean_and_std_values/CheXpert_training_set_std.npy")

In [6]:
val_datagen.mean = training_dataset_mean
val_datagen.std = training_dataset_std

val_datagenerator = val_datagen.flow_from_dataframe(
    dataframe=val_df,
    directory="../dataset/",
    x_col='Path',
    y_col=y_cols,
    weight_col=None,
    target_size=(512, 512),
    color_mode='grayscale',
    class_mode='raw',
    batch_size=16,
    validate_filenames=True,
    shuffle=False
)

Found 234 validated image filenames.


### 2- Loading models

In [7]:
densenet201_model = load_model(
    "../trained_models/densenet201.h5",
    custom_objects={
        "binary_crossentropy_weighted_loss": set_binary_crossentropy_weighted_loss
    }
)

inceptionresnetv2_model = load_model(
    "../trained_models/inceptionresnetv2.h5",
    custom_objects={
        "binary_crossentropy_weighted_loss": set_binary_crossentropy_weighted_loss
    }
)

resnet152_model = load_model(
    "../trained_models/resnet152.h5",
    custom_objects={
        "binary_crossentropy_weighted_loss": set_binary_crossentropy_weighted_loss
    }
)

ensemble_model = load_model(
    "../trained_models/ensemble-model.h5",
    custom_objects={
        "binary_crossentropy_weighted_loss": set_binary_crossentropy_weighted_loss
    }
)

2021-11-14 13:52:58.847929: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2021-11-14 13:52:58.848421: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcuda.so.1
2021-11-14 13:52:58.896053: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:941] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-11-14 13:52:58.896582: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 0 with properties: 
pciBusID: 0000:01:00.0 name: NVIDIA TITAN RTX computeCapability: 7.5
coreClock: 1.77GHz coreCount: 72 deviceMemorySize: 23.65GiB deviceMemoryBandwidth: 625.94GiB/s
2021-11-14 13:52:58.896597: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1
2021-11-14 13:52:58.897616: I tensorflow/stream_executor/platform/default/d



### 3- Predictions

In [8]:
weights = np.load("../calculated_chexpert_class_weights/chexpert_positive_weights.npy")

In [9]:
weights

array([0.85060918, 0.87914813, 0.93383136, 0.76614715, 0.6142274 ,
       0.98423107, 0.97296946, 0.91295084, 0.9516682 , 0.52741995,
       0.95888351, 0.959537  , 0.48078008, 0.89982275])

In [10]:
weights.shape

(14,)

In [11]:
predictions_densenet201_model = densenet201_model.predict(
    x=val_datagenerator,
    steps=len(val_datagenerator),
    workers=4,
    verbose=1
)

2021-11-14 13:53:18.886722: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (registered 2)
2021-11-14 13:53:18.906774: I tensorflow/core/platform/profile_utils/cpu_utils.cc:112] CPU Frequency: 3600000000 Hz
2021-11-14 13:53:20.405949: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublas.so.10
2021-11-14 13:53:20.520533: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudnn.so.7
2021-11-14 13:53:21.112904: W tensorflow/stream_executor/gpu/asm_compiler.cc:63] Running ptxas --version returned 256
2021-11-14 13:53:21.152086: W tensorflow/stream_executor/gpu/redzone_allocator.cc:314] Internal: ptxas exited with non-zero error code 256, output: 
Relying on driver to perform ptx compilation. 
Modify $PATH to customize ptxas location.
This message will be only logged once.




In [12]:
predictions_densenet201_model.shape

(234, 14)

In [13]:
predictions_densenet201_model

array([[0.7459604 , 0.14031884, 0.0972696 , ..., 0.6975152 , 0.38334316,
        0.72488225],
       [0.29283   , 0.00853846, 0.07513779, ..., 0.45244417, 0.33490708,
        0.9645697 ],
       [0.30019215, 0.02147888, 0.06813031, ..., 0.39322576, 0.80349684,
        0.9746482 ],
       ...,
       [0.4286321 , 0.25964716, 0.06964832, ..., 0.53382134, 0.8830686 ,
        0.6317557 ],
       [0.469888  , 0.40690485, 0.04010934, ..., 0.45533997, 0.27731776,
        0.8914092 ],
       [0.7704874 , 0.28287017, 0.29875913, ..., 0.6266965 , 0.6503533 ,
        0.16895783]], dtype=float32)

In [14]:
predictions_densenet201_model[0] > 0.5

array([ True, False, False, False, False, False, False,  True,  True,
       False,  True,  True, False,  True])

In [15]:
val_datagenerator.labels

array([[0., 1., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 0., 1., 0.],
       ...,
       [0., 1., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.]], dtype=float32)

In [16]:
(predictions_densenet201_model[0] >= 0.5).astype(np.float32)

array([1., 0., 0., 0., 0., 0., 0., 1., 1., 0., 1., 1., 0., 1.],
      dtype=float32)

In [17]:
(predictions_densenet201_model[0] >= 0.5).astype(np.float32)

array([1., 0., 0., 0., 0., 0., 0., 1., 1., 0., 1., 1., 0., 1.],
      dtype=float32)

In [18]:
val_datagenerator.labels[0]

array([0., 1., 0., 0., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0.],
      dtype=float32)

In [19]:
(predictions_densenet201_model[1] >= 0.5).astype(np.float32)

array([0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1.],
      dtype=float32)

In [20]:
val_datagenerator.labels[1]

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
      dtype=float32)

In [21]:
(predictions_densenet201_model[2] >= 0.5).astype(np.float32)

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1.],
      dtype=float32)

In [22]:
val_datagenerator.labels[2]

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
      dtype=float32)

In [23]:
(predictions_densenet201_model[3] >= 0.5).astype(np.float32)

array([0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 1., 1., 0.],
      dtype=float32)

In [24]:
val_datagenerator.labels[3]

array([0., 0., 0., 1., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0.],
      dtype=float32)

### All CheXpert labels (14 labels)

In [25]:
predictions_densenet201_model.shape

(234, 14)

In [26]:
report_densenet201_model = classification_report(
    y_true=val_datagenerator.labels.astype(np.uint8),
    y_pred=predictions_densenet201_model.astype(np.uint8),
    digits=4,
    target_names=y_cols
)

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [27]:
print(report_densenet201_model)

                            precision    recall  f1-score   support

               Atelectasis     0.0000    0.0000    0.0000        80
              Cardiomegaly     0.0000    0.0000    0.0000        68
             Consolidation     0.0000    0.0000    0.0000        33
                     Edema     0.0000    0.0000    0.0000        45
          Pleural Effusion     0.0000    0.0000    0.0000        67
             Pleural Other     0.0000    0.0000    0.0000         1
                 Pneumonia     0.0000    0.0000    0.0000         8
              Pneumothorax     0.0000    0.0000    0.0000         8
Enlarged Cardiomediastinum     0.0000    0.0000    0.0000       109
              Lung Opacity     0.0000    0.0000    0.0000       126
               Lung Lesion     0.0000    0.0000    0.0000         1
                  Fracture     0.0000    0.0000    0.0000         0
           Support Devices     0.0000    0.0000    0.0000       107
                No Finding     0.0000    0.0000

In [28]:
multilabel_confusion_matrix(
    y_true=val_datagenerator.labels.astype(np.uint8),
    y_pred=predictions_densenet201_model.astype(np.uint8)
)

array([[[154,   0],
        [ 80,   0]],

       [[166,   0],
        [ 68,   0]],

       [[201,   0],
        [ 33,   0]],

       [[189,   0],
        [ 45,   0]],

       [[167,   0],
        [ 67,   0]],

       [[233,   0],
        [  1,   0]],

       [[226,   0],
        [  8,   0]],

       [[226,   0],
        [  8,   0]],

       [[125,   0],
        [109,   0]],

       [[108,   0],
        [126,   0]],

       [[233,   0],
        [  1,   0]],

       [[234,   0],
        [  0,   0]],

       [[127,   0],
        [107,   0]],

       [[196,   0],
        [ 38,   0]]])

### CheXpert competition labels (first 5 labels)

In [29]:
predictions_densenet201_model[:, :5].shape

(234, 5)

In [30]:
report_densenet201_model = classification_report(
    y_true=val_datagenerator.labels[:, :5].astype(np.uint8),
    y_pred=predictions_densenet201_model[:, :5].astype(np.uint8),
    digits=4,
    target_names=y_cols[:5]
)

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [31]:
print(report_densenet201_model)

                  precision    recall  f1-score   support

     Atelectasis     0.0000    0.0000    0.0000        80
    Cardiomegaly     0.0000    0.0000    0.0000        68
   Consolidation     0.0000    0.0000    0.0000        33
           Edema     0.0000    0.0000    0.0000        45
Pleural Effusion     0.0000    0.0000    0.0000        67

       micro avg     0.0000    0.0000    0.0000       293
       macro avg     0.0000    0.0000    0.0000       293
    weighted avg     0.0000    0.0000    0.0000       293
     samples avg     0.0000    0.0000    0.0000       293



In [32]:
multilabel_confusion_matrix(
    y_true=val_datagenerator.labels[:, :5].astype(np.uint8),
    y_pred=predictions_densenet201_model[:, :5].astype(np.uint8)
)

array([[[154,   0],
        [ 80,   0]],

       [[166,   0],
        [ 68,   0]],

       [[201,   0],
        [ 33,   0]],

       [[189,   0],
        [ 45,   0]],

       [[167,   0],
        [ 67,   0]]])