In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import tensorflow as tf
import matplotlib.pyplot as plt
from collections import Counter
import cv2
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
# # detect and init the TPU
# tpu = tf.distribute.cluster_resolver.TPUClusterResolver.connect()

# # instantiate a distribution strategy
# tpu_strategy = tf.distribute.experimental.TPUStrategy(tpu)

In [None]:
tf.__version__

In [None]:
BATCH_SIZE = 256 #16 * tpu_strategy.num_replicas_in_sync

In [None]:
vanilla_df=pd.read_csv('../input/chestxray8-dataframe/train_df.csv').drop(['Image Index','Patient ID'],axis=1) #.drop_duplicates('Patient ID','last')
# if used drop duplicates then we can only work with those classes : Effusion,Infiltration, Mass, Nodule ,Atelectasis                                                        

In [None]:
vanilla_df.head()

In [None]:
illness_df= vanilla_df[vanilla_df['No Finding'] !=1 ]
illness_df['Normal']=illness_df['No Finding']

In [None]:
illness_df= illness_df[ illness_df['Hernia'] !=1]

In [None]:
illness_df= illness_df[ illness_df['Pneumonia'] !=1]

In [None]:
illness_df.describe()

In [None]:
illness_df.sum()

In [None]:
illness_df.drop(['No Finding','Hernia','Pneumonia'], inplace=True, axis=1)

In [None]:
vanilla_df.drop(['Hernia','Pneumonia'],axis=1,inplace=True)

In [None]:
illness_df.head()

In [None]:
vanilla_df['Normal']=vanilla_df['No Finding']
vanilla_df.drop( ['No Finding'] , axis=1 , inplace=True)

In [None]:
normal_df=vanilla_df[ vanilla_df['Normal'] ==1 ].loc[ 0:7200,:] # taking only 4000 images with normal conditions
normal_df

In [None]:
effusion_df=vanilla_df[ vanilla_df['Effusion'] ==1 ].loc[ 0:40000,:] # taking only 4000 images with conditions
effusion_df

In [None]:
infiltration_df=vanilla_df[ vanilla_df['Infiltration'] ==1 ].loc[ 0:22000,:] # taking only 3200 images with conditions
infiltration_df

In [None]:
atelectasis_df=vanilla_df[ vanilla_df['Atelectasis'] ==1 ].loc[ 0:33000,:] # taking only 3100 images with normal conditions
atelectasis_df

In [None]:
illness_df.sum()

In [None]:
illness_df.drop(index= illness_df[illness_df['Effusion']==1].index , axis=0, inplace=True)

In [None]:
illness_df.sum()

## thats what we want now, to remove the excess rows


In [None]:
illness_df.drop(index= illness_df[illness_df['Infiltration']==1].index , axis=0, inplace=True)
illness_df.drop(index= illness_df[illness_df['Atelectasis']==1].index , axis=0, inplace=True)

In [None]:
illness_df.sum()

In [None]:
balanced_df= illness_df.append([normal_df,atelectasis_df,infiltration_df,effusion_df])
path=balanced_df['FilePath']
balanced_df.head()

In [None]:
balanced_df.describe()

In [None]:
col= ['Cardiomegaly','Emphysema','Effusion','Infiltration',
      'Mass','Nodule','Atelectasis','Pneumothorax',
      'Pleural_Thickening','Fibrosis','Edema','Consolidation','Normal']

In [None]:
balanced_df.sum()

In [None]:
balanced_df.drop_duplicates('FilePath', inplace=True)

In [None]:
balanced_df.sum()

## Great now that our dataset is kind of balanced, we can proceed

In [None]:
balanced_df= balanced_df.sample(frac=1)

## reducing the batch size as SGD consumes a large chunk of memo

In [None]:
BATCH_SIZE=64

In [None]:
tmp_gen= tf.keras.preprocessing.image.ImageDataGenerator(
                                                            samplewise_center=True,
                                                            samplewise_std_normalization=True,
                                                            rotation_range=0.2,
                                                            zca_whitening=True,
                                                            width_shift_range=0.1,
                                                            height_shift_range=0.1,
                                                            shear_range=0.0,
                                                            zoom_range=0.2,
                                                            horizontal_flip=True,
                                                            rescale=1/255.,
                                                            validation_split=0.1)


train_data= tmp_gen.flow_from_dataframe(  dataframe= balanced_df ,
                                          directory= None ,
                                          x_col='FilePath' ,
                                          y_col= col ,
                                          class_mode="raw" ,
                                          batch_size= BATCH_SIZE ,
                                          shuffle= True ,
                                          target_size= (224,224),
                                          subset="training"
                                       )

val_data= tmp_gen.flow_from_dataframe(  dataframe= balanced_df ,
                                         directory= None ,
                                         x_col= 'FilePath' ,
                                         y_col= col ,
                                         class_mode= "raw" ,
                                         batch_size= BATCH_SIZE ,
                                         shuffle= True ,
                                         target_size= (224,224),
                                         subset= 'validation'
                                      )

In [None]:
from tensorflow.keras.models import Model, load_model
from tensorflow.keras import layers
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.optimizers import Adam, RMSprop, Adadelta, Adagrad
import seaborn as sns

In [None]:
# creating index to class dictionary
idx_class={i:c for i,c in enumerate(col)}

In [None]:
def get_CAM(processed_image, actual_label, layer_name='conv2d_127'):
    model_grad = Model( [model.inputs] ,   [model.get_layer(layer_name).output , model.output]  )
    
    with tf.GradientTape() as tape:
        conv_output_values, predictions = model_grad(processed_image)

        # watch the conv_output_values
        tape.watch(conv_output_values)

        ## Use binary cross entropy loss
        ## actual_label is 0 if cat, 1 if dog
        # get prediction probability of dog
        # If model does well, 
        # pred_prob should be close to 0 if cat, close to 1 if dog
        pred_prob = predictions[:,1] # [ batch , (cat_prob , dog_prob) ]
        # we tale only one prbability to be able to use binary_crossentropy_loss not sparse_categorical_loss
        
        # make sure actual_label is a float, like the rest of the loss calculation
        actual_label = tf.cast( actual_label , dtype=tf.float32 )
        
        # add a tiny value to avoid log of 0
        smoothing = 0.00001 
        
        # Calculate loss as binary cross entropy
        # we can use tf.keras in that too
        # bce = tf.keras.losses.BinaryCrossentropy()
        # bce(y_true, y_pred).numpy()


        loss = -1 * ( actual_label * tf.math.log(pred_prob + smoothing) + (1 - actual_label) * tf.math.log(1 - pred_prob + smoothing) )
        print(f"binary loss: {loss}")
    
    # get the gradient of the loss with respect to the outputs of the last conv layer
    grads_values = tape.gradient(loss , conv_output_values)
    grads_values = tf.keras.backend.mean(grads_values , axis=(0,1,2)) # mean over batch , hight , width --> num of channels
    
    conv_output_values = np.squeeze( conv_output_values.numpy() ) # will remove the 1 valued dimention which is the batch  --> (h , w )
    grads_values = grads_values.numpy()
    print(conv_output_values.shape)
    # weight the convolution outputs with the computed gradients
    for i in range(128): # num of filter channels
        conv_output_values[ : , : , i ] *= grads_values[i] # multiply the gradient of the channels by the channels values
    heatmap = np.mean(conv_output_values, axis=-1)# taking the mean over the channels , --> ( h , w )
    
    heatmap = np.maximum(heatmap, 0) # taking only the positive values
    heatmap /= heatmap.max()# regularizing the pixel values
    
    del model_grad, conv_output_values, grads_values, loss
   
    return heatmap

In [None]:
def show_sample():
    

    images, labels= next(val_data)
    sample_image = images[0]  # batch 0 so that returns ( h , w , c) for the image, without the batch dimention
    sample_label = labels[0] # takes batch of xs and ys # x= train_data.next() -> x[0].shape -> 32,224,224,3
    
    sample_image_processed = np.expand_dims(sample_image, axis=0) # adding back the batch dimention
    
    activations = vis_model.predict(sample_image_processed) # the output of each layer -features-
    
    pred_label = np.argmax( model.predict(sample_image_processed) , axis=-1 )[0]
    pred_label = idx_class[pred_label]
    
    print(activations[0].shape)
    sample_activation = activations[0] [0 , : , : , -1] # taking the first output , for image of batch 0, and for the last layer #16 , --> (h,w)
    
    sample_activation-=sample_activation.mean()
    sample_activation/=sample_activation.std()
    
    sample_activation *=255
    sample_activation = np.clip( sample_activation , 0 , 255 ).astype(np.uint8)
    
    heatmap = get_CAM(sample_image_processed , sample_label )
    heatmap = cv2.resize( heatmap, ( sample_image.shape[0], sample_image.shape[1 ]) )
    heatmap = heatmap *255
    heatmap = np.clip( heatmap , 0 , 255 ).astype(np.uint8)
    heatmap = cv2.applyColorMap( heatmap , cv2.COLORMAP_HOT )
    converted_img = sample_image
    super_imposed_image = cv2.addWeighted( converted_img, 0.8, heatmap.astype('float32'), 2e-3, 0.0 )
    
    sample_label = idx_class[np.argmax(sample_label)]
    
    f,ax = plt.subplots(2,2, figsize=(15,8))

    ax[0,0].imshow(sample_image)
    ax[0,0].set_title(f"True label: {sample_label} \n Predicted label: {pred_label}")
    ax[0,0].axis('off')
    
    ax[0,1].imshow(sample_activation)
    ax[0,1].set_title("Random feature map")
    ax[0,1].axis('off')
    
    ax[1,0].imshow(heatmap)
    ax[1,0].set_title("Class Activation Map")
    ax[1,0].axis('off')
    
    ax[1,1].imshow(super_imposed_image)
    ax[1,1].set_title("Activation map superimposed")
    ax[1,1].axis('off')
    plt.tight_layout()
    plt.show()
  
    return activations

## As we can see i'm only refining the last layer only, 14k params to train

In [None]:
def build_model():
  # load the base VGG16 model
  base_model = load_model('../input/chet-xray-encoder-model/encoder_model.h5')
  
  # build on top of AE
  #output = layers.GlobalAveragePooling2D()(base_model.output)
  output=layers.Flatten()(base_model.output)
  output = layers.BatchNormalization()(output)
  output = layers.Dense(64, activation='relu')(output)
  output = layers.Dropout(0.4)(output)
#   output = layers.Dense(32, activation='relu')(output)
#   output = layers.BatchNormalization()(output)
  output = layers.Dense( len(col) , activation='sigmoid')(output)

  # set the inputs and outputs of the model
  model = Model( base_model.input , output )

  # freeze the earlier layers and leave the last 4 layers to train
    
  for layer in base_model.layers[:]:
       layer.trainable=False

  # choose the optimizer
  #optimizer = tf.keras.optimizers.RMSprop(0.001)

  # configure the model for training

  model.compile(loss='binary_crossentropy', 
                optimizer= 'adam',#RMSprop( 0.001 , momentum=0.98 ), #Adam(0.004 ), #Adadelta(),
                metrics=[tf.keras.metrics.AUC()])
  
  # display the summary
  model.summary()
  
  return model

model=build_model()

# ***let's Plot the outputs before model training***

In [None]:
# select all the layers for which you want to visualize the outputs and store it in a list
outputs = [ layer.output for layer in model.layers[1:] ] # all layers except the input layer

# Define a new model that generates the above output
vis_model = Model(model.input , outputs)

# store the layer names we are interested in
layer_names = []
for layer in outputs:
    layer_names.append( layer.name.split("/")[0] )

    
print("Layers that will be used for visualization: ")
print(layer_names)
# Choose an image index to show, or leave it as None to get a random image
activations = show_sample()

# Training the model

In [None]:
# defining our call backs
cb= tf.keras.callbacks.ModelCheckpoint( "my_model.h5" , save_best_only=True  )

In [None]:
history= model.fit( train_data , validation_data= val_data , 
                    epochs= 50 , callbacks= [cb] 
                  )

# Using a non completed training model to continue the training

In [None]:
# instantiating the model in the strategy scope creates the model on the TPU
model = load_model('../input/nih-13classes-pretrained-model/13-class-model.h5')


In [None]:
history= model.fit( train_data , validation_data= val_data , 
                    epochs= 15 , callbacks= [cb] 
                  )

In [None]:
fig, (ax1, ax2) = plt.subplots(2, 1)

fig.suptitle('Train vs Valid')

ax1.plot(range(12), model.history.history['loss'],color='b', label='loss')
ax1.plot(range(12), model.history.history['val_loss'],color='r', label='val_loss')
ax1.set_ylabel('Loss')


ax2.plot(range(12), model.history.history['auc'], label='auc')
ax2.plot(range(12), model.history.history['val_auc'], label='val_auc')
ax2.set_ylabel('auc')

ax2.set_xlabel('Epochs')


ax2.set_ylim([0,1])
ax1.set_ylim([0,1])

## Let's train all the layers to see an improvements


In [None]:
model.summary()

In [None]:
for l in model.layers :
    l.trainable= True

In [None]:
model.summary()

In [None]:
history= model.fit( train_data , validation_data= val_data , 
                    epochs= 15 , callbacks= [cb] 
                  )

In [None]:
sgd_model= Model(inputs= model.inputs , outputs= model.output )

sgd_model.compile(loss='binary_crossentropy', 
                optimizer= tf.keras.optimizers.SGD(0.005 , 0.9) ,#RMSprop( 0.001 , momentum=0.98 ), #Adam(0.004 ), #Adadelta(),
                metrics=[tf.keras.metrics.AUC() , 'accuracy'])

In [None]:
sgd_model.summary()

In [None]:
tf.keras.Model.save(sgd_model, './my_model.h5')

In [None]:
history= sgd_model.fit( train_data , validation_data= val_data , 
                    epochs= 20 , callbacks= [cb] 
                  )

In [None]:
tf.keras.Model.save(sgd_model, './my_model.h5')

# Plotting after returning to train back and forth, which will show nothing useful 

In [None]:
fig, (ax1, ax2) = plt.subplots(2, 1)

fig.suptitle('Train vs Valid')

ax1.plot(range(12), model.history.history['loss'],color='b', label='loss')
ax1.plot(range(12), model.history.history['val_loss'],color='r', label='val_loss')
ax1.set_ylabel('Loss')


ax2.plot(range(12), model.history.history['auc'], label='auc')
ax2.plot(range(12), model.history.history['val_auc'], label='val_auc')
ax2.set_ylabel('auc')

ax2.set_xlabel('Epochs')


ax2.set_ylim([0,1])
ax1.set_ylim([0,1])

In [None]:
# select all the layers for which you want to visualize the outputs and store it in a list
outputs = [ layer.output for layer in model.layers[1:] ] # all layers except the input layer

# Define a new model that generates the above output
vis_model = Model(model.input , outputs)

# store the layer names we are interested in
layer_names = []
for layer in outputs:
    layer_names.append( layer.name.split("/")[0] )

    
print("Layers that will be used for visualization: ")
print(layer_names)
# Choose an image index to show, or leave it as None to get a random image
activations = show_sample()

In [None]:
# Confusion matrix
from sklearn.metrics import confusion_matrix
import sklearn
test_generator= val_data.next()
y_predict =np.argmax( model.predict(test_generator[0]),axis=1)

#tn, fp, fn, tp = np.max( confusion_matrix( test_generator.labels , y_predict ) , axis=1)
matrix=confusion_matrix(np.argmax(test_generator[1], axis=1) , y_predict)
print(matrix)
# Confusion matrix Plotting
import seaborn as sns
#classes=['covid', 'normal', 'pnumonia']
sns.heatmap(matrix, annot=True, xticklabels=col, yticklabels=col ,cmap='Blues')#YlGnBu_r or Blues or twilight_shifted_r

In [None]:
from tensorflow.keras.preprocessing import image
#2- setting the path of the image
path='../input/nawwar/1.jpeg'
#3- uploading the image into a variable

img= image.load_img( path , target_size=( 224,224 ) )
# don't forget the target size the model is expecting
#4- processing the image variable to suit the model

x= image.img_to_array( img )
x= np.expand_dims( x , axis=0 )
images= np.vstack( [x] )

plt.imshow(img) # to show the image
# to predict the image
print('Class is: ', idx_class[np.argmax(model.predict(x))] )

In [None]:
def classify(image):
    
    sample_image = image  # batch 0 so that returns ( h , w , c) for the image, without the batch dimention
    #sample_label = label # takes batch of xs and ys # x= train_data.next() -> x[0].shape -> 32,224,224,3
    
    sample_image_processed = np.expand_dims(sample_image, axis=0) # adding back the batch dimention
    
    activations = vis_model.predict(sample_image_processed) # the output of each layer -features-
    
    pred_label = np.argmax( model.predict(sample_image_processed) , axis=-1 )[0]
    pred_label = idx_class[pred_label]
    
    print(activations[0].shape)
    sample_activation = activations[0] [0 , : , : , :3] # taking the first output , for image of batch 0, and for the last layer #16 , --> (h,w)
    
    sample_activation-=sample_activation.mean()
    sample_activation/=sample_activation.std()
    
    sample_activation *=255
    sample_activation = np.clip( sample_activation , 0 , 255 ).astype(np.uint8)
    
    f,ax = plt.subplots(1,2, figsize=(15,8))

    ax[0].imshow(sample_image)
    ax[0].set_title(f"Predicted label: {pred_label}")
    ax[0].axis('off')
    
    ax[1].imshow(sample_activation)
    ax[1].set_title("Random feature map")
    ax[1].axis('off')
 
    plt.tight_layout()
    plt.show()
  
    return activations

In [None]:
from tensorflow.keras.preprocessing import image
#2- setting the path of the image
path='../input/nawwar/1.jpeg'
#3- uploading the image into a variable

img= image.load_img( path , target_size=( 224,224 ) )
# don't forget the target size the model is expecting
#4- processing the image variable to suit the model

x= image.img_to_array( img )

c=classify(img)

In [None]:
lite_model=tf.lite.TFLiteConverter.from_keras_model(model)

In [None]:
lite_model

In [None]:
!tflite_convert  --keras_model_file=../input/nih-13classes-pretrained-model/13-class-model.h5  --output_file=./litemodel.tflite