Let's try using a transfer learning model that was specifically **trained on chest x-ray images! **

#### CheXNet - Keras

* CheXNet is based on Densenet 121, which was itself pretrained on imagenet, before being finetuned on ChestX-ray14, which contained 112,120 frontal view greyscale X-rays from 30,805 patients. 
    * For more about CheXnet, check out the original article or github with the trained model: https://github.com/brucechou1983/CheXNet-Keras
* Loading the model naively won't work, but I provide a workaround here.
* Keras - for ease of use! :) 
    
* Data loading code copied from the kernel [Baseline: Transfer Learning+RandomForest](https://www.kaggle.com/titericz/baseline-transfer-learning-randomforest-gpu/) 
* Transfer learning best practices applied - frozen base model and tuning of the output layer, followed by unfreezing all layers and gentler finetuning.
    * Removing the added dense layer at the end may improve things (just be sure to handle the logits)
* Note that this is just a starter kernel - there's lots more that could be done to improve the model, the transfer learning, etc' 
* In this initial simple notebook we'll just use chexnet as a static feature extractor, and see how it does vs imagenet pretrained models (.736)
    * Note - the R.Forest model used assumes (erroneously) that this is multiclass, whilest it is actually a multilabel problem!
    
V3: Fixed global pooling to take average pooling of last convolutional block, instead of dense layer. Modified densenet extraction. Added controllable fast run settings. 

In [None]:
import os
import gc
import cv2
import cuml
import glob
import numpy as np
import pandas as pd
from numba import cuda
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
from sklearn.metrics import roc_auc_score
from sklearn.ensemble import RandomForestClassifier
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.utils import plot_model
from tensorflow.keras.applications import densenet
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import GlobalAveragePooling2D, GlobalMaxPooling2D, AveragePooling2D

In [None]:
NUM_CLASSES = 11 # number of target/output classes

IMG_SIZE =  512# 224 # 224 for imagenet , 512 - alt reshape
IMG_CHANNELS = 3 

## size of the pooled output layer from the model
POOLED_OUTPUT_SIZE = 1024 # 1024 for densenet 121, 2048 for mobilenet? 

chexnet_weights_path = "../input/chexnet-keras-weights/brucechou1983_CheXNet_Keras_0.3.0_weights.h5"

FAST_RUN = False # use only a few rows, for fast debugging
FAST_RUN_SAMPLES = 150 # num rows to use from train, test when in fast_mode

In [None]:
### if you don't want to depend on kaggle datasets, download the CheXNet weights. You cannot simply load them as inbuilt weights, as in TF!

# !wget --no-check-certificate \
#         "https://storage.googleapis.com/kaggle-datasets/66426/130851/brucechou1983_CheXNet_Keras_0.3.0_weights.h5.zip?GoogleAccessId=web-data@kaggle-161607.iam.gserviceaccount.com&Expires=1561942557&Signature=LgBs0ZzvkJ2Re%2BSuUX1JSq4%2B8DhKC1Ur4evO5L%2F4ArqEzSE2BuRj%2BrfNBOMKedVevNZNr2tuljEzE7frleWdq2yRuim2eRygRcAlpauT1wsfOc9i%2BqE%2BiFLDM03CJWV14cURqf%2FS6h64yCNvTqB%2BywEs2rjKEmZykp%2FWhHVEurINTQp1%2FntTO2rK%2BQMawClqAvo2SVayh4CVNnzzDKeyxm9R0w51FoIL%2BoYQhCVnMJLKk3KeOG8lcreKED5vR7D62KrnJy4ft1Hz2%2BO2pkP0OdDP0QZ4D%2F66bdaN6xi3OJg1g9OizWpkzct3OnLBVuivd344CUKlr25KhRS85JuZ2A%3D%3D"\
#         -O "/tmp/CheXNet_Keras_0.3.0_weights.h5.zip"

# local_zip='/tmp/CheXNet_Keras_0.3.0_weights.h5.zip'
# zip_ref=zipfile.ZipFile(local_zip,'r')
# zip_ref.extractall('/tmp/CheXNet_Keras_0.3.0_weights.h5')
# zip_ref.close()

In [None]:
# build a chexnet model with oretrain weights
class chexnet(object):
    @staticmethod
    def build(weights_path=chexnet_weights_path, out_size=11, embedding_size=64, activation_type='sigmoid',
              input_shape=(224, 224, 3),not_frozen=False, embedding_only= False):
        
        base_model = densenet.DenseNet121(weights=None,
                                    include_top=False,
                                    input_shape=input_shape,
                                    pooling="avg")
        ## workaround - add dummy layer then load weights then pop dummy layer, in order to match expected shape for pretrained weights
        predictions = tf.keras.layers.Dense(14, activation='sigmoid', name='predictions')(base_model.output)
        base_model = tf.keras.Model(inputs=base_model.input, outputs=predictions)
        base_model.load_weights(weights_path)
        base_model.layers.pop()
        print("CheXNet loaded")
        
        base_model.trainable=not_frozen # freeze most layers
        inputs = tf.keras.Input(shape=input_shape)
     
#         if embedding_only:
            
        # We make sure that the base_model is running in inference mode here,
        # by passing `training=False`. This is important for fine-tuning
        x = base_model(inputs, training=not_frozen) # frozen = freeze layers a bit confusing - double negative
        # A Dense classifier
        x = keras.layers.Dropout(0.25)(x)  # Regularize with dropout
        x = tf.keras.layers.Dense(embedding_size, activation='relu')(x)
        outputs =  tf.keras.layers.Dense(out_size, activation=activation_type)(x)
        full_model = tf.keras.Model(inputs, outputs)
        return full_model
    
def rgb2gray(rgb):
    return np.dot(rgb[...,:3], [0.2989, 0.5870, 0.1140])


## note: it would be more effecient to apply tf.keras.layers.experimental transforms https://www.tensorflow.org/tutorials/images/data_augmentation
## we could also wrap this inside the data generator, but keep seperate for now .
## https://www.tensorflow.org/tutorials/images/data_augmentation 

img_aug = ImageDataGenerator(
    rotation_range=9, width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range = 0.05,
    # ,brightness_range=(0.1,0.9)
    )

In [None]:
!ls ../input/ranzcr-clip-catheter-line-classification

# Load train and test as DataFrames

In [None]:
train = pd.read_csv('../input/ranzcr-clip-catheter-line-classification/train.csv')
test = pd.read_csv('../input/ranzcr-clip-catheter-line-classification/sample_submission.csv')

print(train.shape)
print(test.shape)
print("# unique patients",train["PatientID"].nunique())
train.head(7)

# Check distribution of labels in train

In [None]:
train.sum(numeric_only = True)

In [None]:
train.sum(numeric_only = True,axis=1).describe()

In [None]:
normal_counts = train[['ETT - Normal','NGT - Normal','CVC - Normal', 'Swan Ganz Catheter Present']].sum(axis=1)
print(100*round((normal_counts>0).sum()/train.shape[0],4), "% of rows have a normal label")

abnormal_counts = train[['ETT - Abnormal', 'ETT - Borderline', 'NGT - Abnormal', 'NGT - Borderline','NGT - Incompletely Imaged',  'CVC - Abnormal','CVC - Borderline']].sum(axis=1)
print(100*round((abnormal_counts>0).sum()/train.shape[0],4), "% of rows have abnormal labels")
print("abnormal counts/labels distribution:\n",abnormal_counts.describe())


While the problem is multilabel, we see that it's relatively rare for there to be multiple labels in a given case. It's more a case of multiclass, presumably there aren't often multiple catheter/lines/problems simultaenously.
Still, there are a number of cases with multiple issues/abnormal at once. And we see there's an overlap between being normal for some things and abnormal for others!

In [None]:
train.mean().round(3)

# Check first image in train

In [None]:
img = cv2.imread('../input/ranzcr-clip-catheter-line-classification/train/'+train.StudyInstanceUID.values[0]+'.jpg')
plt.imshow(img)

In [None]:
import ast

annot = pd.read_csv('../input/ranzcr-clip-catheter-line-classification/train_annotations.csv')
print(annot.shape)
annot.head()

In [None]:
## If `FAST_RUN` - use only a few rows rows for fast image/data loading & debugging
if FAST_RUN:
    train = train.head(FAST_RUN_SAMPLES)
    test = test.head(FAST_RUN_SAMPLES)
    annot = annot.head(FAST_RUN_SAMPLES)
    print("Fast run")

# Process average of cateter position to be used as a mask.

In [None]:
RES = np.zeros( (512,512) )
for i in tqdm(range(annot.shape[0])): # ORIG
# for i in tqdm(range(100)): # fast sample   
    img = cv2.imread('../input/ranzcr-clip-catheter-line-classification/train/'+annot.StudyInstanceUID.values[i]+'.jpg')
    img[:] = 0
    data = eval(annot.data.values[i])
    for i in range(len(data)-1):
        img = cv2.line(img, (data[i][1],data[i][0]), (data[i+1][1],data[i+1][0]), (255,255,255), 20 )
    img = cv2.resize(img,(512,512))
    RES += img[:,:,0]
    
RES /= annot.shape[0]

In [None]:
plt.imshow(np.clip(RES,0,1))

In [None]:
mask = RES.copy()
mask[mask>0.5] = 1.
mask[mask<1] = 0
mask = mask.astype(np.uint8)
mask = np.stack( (mask,mask,mask), 2 )

del RES
gc.collect()
plt.imshow(mask)

# Lets extract features from the images using transfer learning from pretrained Imagenet models.

In [None]:
import keras
# from keras.applications.mobilenet import preprocess_input
from keras.applications.densenet import preprocess_input
# dir(keras.applications)

In [None]:
# !ls ../input/keras-pretrained-models/

* Instead of mobilenet or the like, use densenet/chexnet
    * Note. - model may be improved by scaling colour channels to those expected in imagenet, prior to rgb dummy channels creation

In [None]:
# base = keras.applications.Xception( weights=None,  include_top=True)
# # Load pretrained imagenet weights
# base.load_weights('../input/keras-pretrained-models/Xception_Top_ImageNet.h5')
# base.trainable = False
# model = keras.Model(inputs=base.input, outputs=base.get_layer('avg_pool').output)
# model.summary()

In [None]:
# # Instantiate cheXnet model with pretrained weights. Pop last layers, add average pooling
from keras.models import Model

base = densenet.DenseNet121(weights=None,
                            include_top=False,
                            input_shape=(IMG_SIZE,IMG_SIZE,3)
                           )
## workaround - add dummy layer then load weights then pop dummy layer, in order to match expected shape for pretrained weights
predictions = tf.keras.layers.Dense(14, activation='sigmoid', name='predictions')(base.output)
## ,by_name=True - could save on workaround, but don't know if names will necessarily match + how to validate? - https://github.com/keras-team/keras/issues/5397
base = tf.keras.Model(inputs=base.input, outputs=predictions) 
base.load_weights(chexnet_weights_path)
print("CheXNet loaded")
base.trainable=False # freeze most layers
base.training=False

base.layers.pop()

### https://stackoverflow.com/questions/41668813/how-to-add-and-remove-new-layers-in-keras-after-loading-weights
new_model = GlobalAveragePooling2D()(base.layers[-4].output) 

model = keras.Model(base.input, new_model)
# model.summary()

# # model = keras.Model(inputs=base.input, outputs=base.get_layer('avg_pool').output)

In [None]:
model.output

In [None]:
assert model.output.shape[-1] == POOLED_OUTPUT_SIZE

# Inefficient, but easy to understand for loop to extract features from train images

In [None]:
train_path = '../input/ranzcr-clip-catheter-line-classification/train/'

### original code - learns just static embeddings - we can later try to improve by finetuning our DL model
emb_train = np.zeros( (train.shape[0],POOLED_OUTPUT_SIZE), dtype=np.float32 )
for n, filename in tqdm(enumerate(train.StudyInstanceUID.values), total=train.shape[0]): # ORIG
    img = cv2.imread(train_path+filename+'.jpg')
    img = cv2.resize(img,(512,512))
    img *= mask
    img = preprocess_input(img)[np.newaxis]
    emb_train[n] = model.predict(img)[0]
    
gc.collect()

Future notebook - retrain the model
* Load train data
* Fit on it (in 2 stages for transfer learning mode unfreezing/finetuning) + augment images

# Extract features from test images

In [None]:
test_path = '../input/ranzcr-clip-catheter-line-classification/test/'

emb_test = np.zeros( (test.shape[0],POOLED_OUTPUT_SIZE), dtype=np.float32 )
for n, filename in tqdm(enumerate(test.StudyInstanceUID.values), total=test.shape[0]): # ORIG
    img = cv2.imread(test_path+filename+'.jpg')
    img = cv2.resize(img,(512,512))
    img *= mask
    img = preprocess_input(img)[np.newaxis]
    emb_test[n] = model.predict(img)[0]
    
gc.collect()

# Delete model and release memory

In [None]:
del model
gc.collect()
keras.backend.clear_session() 
gc.collect()

# I found this trick to clear all Keras allocated memory in GPU.

In [None]:
cuda.select_device(0)
cuda.close()
cuda.select_device(0)

# Check labels names

In [None]:
train.head()
targets = train.columns[1:-1]
print(targets)

# Split train and valid set: 95%/5%

* Todo: better train/test split - e.g. groupwise split. 
* sklearn crossval predict

In [None]:
train_index = np.where( (np.arange(emb_train.shape[0])%20)!=7 )[0]
valid_index = np.where( (np.arange(emb_train.shape[0])%20)==7 )[0]
len(train_index), len(valid_index)

# Fit each label and predict test using the embeddings features

In [None]:
ytarget = train[targets].values[valid_index]
ypred = np.zeros( (len(valid_index), len(targets)) )

for n, target in tqdm(enumerate(targets), total=len(targets)):
    
    rf = cuml.ensemble.RandomForestClassifier(n_estimators=300, max_features=450, n_bins=16, output_type='numpy')
    
    rf.fit( emb_train[train_index], train[target].values[train_index] )
    
    ypred[:,n] = rf.predict_proba(emb_train[valid_index])[:,1]
    test[target] = rf.predict_proba(emb_test)[:,1]
    
    print(n, roc_auc_score( ytarget[:,n], ypred[:,n] ), target )
    
    del rf
    gc.collect()
    
print('Final AUC:', roc_auc_score( ytarget.flatten(), ypred.flatten() ) )

In [None]:
test.head()

# Check test predictions distribution

In [None]:
test.mean()

# Submit

In [None]:
test.to_csv('submission.csv', index=False)