In [4]:
# from google.colab import drive
# drive.mount('/content/drive')

Importing necessary libraries

In [5]:
import tensorflow as tf
import cv2
import numpy as np
import os
from keras.preprocessing import image
from keras.preprocessing.image import ImageDataGenerator
from keras.layers import Dense,Flatten,Conv2D,Activation,Dropout
from keras import backend as K
import keras
from keras.models import Sequential, Model
from keras.models import load_model
from keras.optimizers import SGD
from keras.callbacks import EarlyStopping,ModelCheckpoint
from keras.layers import MaxPool2D
from google.colab.patches import cv2_imshow
import pandas as pd
from keras import losses
from keras.losses import BinaryCrossentropy

**Reading training and test datasets**

In [6]:
df = pd.read_csv(r'/content/drive/MyDrive/Machine Learning project/Balanced_training_set/Balanced/balanced_training_data.csv')
df.head(5)

FileNotFoundError: ignored

In [None]:
df['binary_label'] = np.where(df['label']== 'CE', 0, 1)  #CE is 0, LAA is 1

In [None]:
df.head()

In [None]:
testdf = pd.read_csv(r'/content/drive/MyDrive/Machine Learning project/Balanced_training_set/Balanced/balanced_testing_data.csv')
testdf['binary_label'] = np.where(testdf['label']== 'CE', 0, 1)

testdf.head()

**Image Generators**

In [None]:
# Making transformations on the images - data augmentation to train the model

train_datagen = ImageDataGenerator(zoom_range=0.15,width_shift_range=0.2,height_shift_range=0.2,shear_range=0.15)
test_datagen = ImageDataGenerator()

train_generator = train_datagen.flow_from_dataframe(dataframe=df, directory ='/content/drive/MyDrive/Machine Learning project/Balanced_training_set/Balanced/balanced_train',
                                                    x_col='image_id', y_col='label',
                                                    target_size=(224, 224),batch_size=32,shuffle=True,class_mode='binary')

test_generator = test_datagen.flow_from_dataframe(dataframe = testdf, directory= '/content/drive/MyDrive/Machine Learning project/Balanced_training_set/Balanced/balanced_test',
                                                  x_col = 'image_id', y_col = 'label',
                                                  target_size=(224,224),batch_size=32,shuffle=False,class_mode='binary')

**Creating VGG16 model**

In [None]:
# Creating the VGG16 architechture with convolutional layers (Conv2D), max pooling layers (MaxPool2D), and fully connected layers (Dense). A total of 16 layers

def VGG16():

  model = Sequential()

  model.add(Conv2D(input_shape=(224,224,3),filters=64,kernel_size=(3,3),padding='same', activation='relu'))
  model.add(Conv2D(filters=64,kernel_size=(3,3),padding='same', activation='relu'))
  model.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))
  model.add(Conv2D(filters=128, kernel_size=(3,3), padding='same', activation='relu'))
  model.add(Conv2D(filters=128, kernel_size=(3,3), padding='same', activation='relu'))
  model.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))
  model.add(Conv2D(filters=256, kernel_size=(3,3), padding='same', activation='relu'))
  model.add(Conv2D(filters=256, kernel_size=(3,3), padding='same', activation='relu'))
  model.add(Conv2D(filters=256, kernel_size=(3,3), padding='same', activation='relu'))
  model.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))
  model.add(Conv2D(filters=512, kernel_size=(3,3), padding='same', activation='relu'))
  model.add(Conv2D(filters=512, kernel_size=(3,3), padding='same', activation='relu'))
  model.add(Conv2D(filters=512, kernel_size=(3,3), padding='same', activation='relu'))
  model.add(MaxPool2D(pool_size=(2,2),strides=(2,2)))
  model.add(Conv2D(filters=512, kernel_size=(3,3), padding='same', activation='relu'))
  model.add(Conv2D(filters=512, kernel_size=(3,3), padding='same', activation='relu'))
  model.add(Conv2D(filters=512, kernel_size=(3,3), padding='same', activation='relu'))
  model.add(MaxPool2D(pool_size=(2,2),strides=(2,2),name='vgg16'))
  model.add(Flatten(name='flatten'))
  model.add(Dense(256, activation='relu', name='fc1'))
  model.add(Dense(128, activation='relu', name='fc2'))
  model.add(Dense(1, activation='sigmoid', name='output'))
  

  return model

In [None]:
# Showing the parameters of the built model, with the number of nodes in each layer

model=VGG16()
model.summary()

In [None]:
# Loading the weights of the Image Net pretrained model to save time, and only fine-tunning our models last layers.

Vgg16 = Model(inputs=model.input, outputs=model.get_layer('vgg16').output)
Vgg16.load_weights('/content/drive/MyDrive/Machine Learning project/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5')

In [None]:
# Creating a criteria of early stopping to prevent the model from over-training 

es=EarlyStopping(monitor='val_accuracy', mode='max', verbose=1, patience=20)

In [None]:
# Our model will use Stochastic Gradient Descent with a learning rate of 1e-6 and momentum of 0.9, and the loss function is a binary cross entropy since our target is only two classes

opt = SGD(learning_rate=1e-6, momentum=0.9)
model.compile(loss=tf.keras.losses.binary_crossentropy, optimizer=opt, metrics=['accuracy', 'AUC', 'Precision', 'Recall']) # tf.keras.losses.BinaryCrossentropy(from_logits=True)

In [None]:
# Locking the layers we are not training for the weights to remain the same 

for layer in Vgg16.layers:
  layer.trainable = False

for layer in model.layers:
  print(layer, layer.trainable)

In [None]:
mc = ModelCheckpoint('/content/drive/MyDrive/vgg16_best_model_balanced2.h5', monitor='val_accuracy', mode='max', save_best_only=True)

In [None]:
# Training our model 

H = model.fit_generator(train_generator,epochs=30,verbose=1,callbacks=[mc,es])

In [None]:
#plotting the precision and recall for test and train
import matplotlib.pyplot as plt

fig, ax = plt.subplots(figsize=(10,8))
plt.plot(H.history['recall'])
plt.plot(H.history['precision'])
plt.title('Model Performance')
plt.ylabel('Score')
plt.xlabel('Epoch')
plt.legend(['Recall','Precision'])
plt.show()

In [None]:
#plotting the AUC for test and train

fig, ax = plt.subplots(figsize=(10,8))
plt.plot(H.history['auc'])
# plt.plot(H.history['val_auc'])
plt.title('AUC score')
plt.ylabel('AUC score')
plt.xlabel('Epoch')
plt.legend(['AUC'])
plt.show()

In [None]:
# plotting the accuracy and loss

fig, ax = plt.subplots(figsize=(10,8))
plt.plot(H.history['accuracy'])
plt.plot(H.history['loss'])
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Accuracy','Loss'])
plt.show()

### **Metrics for model**

In [None]:
from sklearn.metrics import classification_report, confusion_matrix

In [None]:
model.load_weights('/content/drive/MyDrive/vgg16_best_model_balanced2.h5')

In [None]:
y_test = np.array(testdf['binary_label'])

print(y_test)

In [None]:
# Creating a list with all the paths for the test images to later read each one of them 

test_path = '/content/drive/MyDrive/Machine Learning project/Balanced_training_set/Balanced/balanced_test/'  # Folder path 

x_test =  np.array(testdf['image_id'])
final_test_path = []

for i in x_test:
  path = test_path + i
  final_test_path.append(path)

In [None]:
#Making predictions for all test images, the returned predictions are probabiblities of the image belonging to the positivie class
from keras.applications.vgg16 import preprocess_input, decode_predictions

y_pred = []

for path in final_test_path:
  img = tf.keras.utils.load_img(path, target_size=(224, 224))
  img_data = tf.keras.utils.img_to_array(img)
  img_data = np.expand_dims(img_data, axis=0)
  img_data = preprocess_input(img_data)
  preds = model.predict(img_data)
  y_pred.append(preds[0][0])

In [None]:
# Function to get the predictions on binary format based on treshold and print precision, recall, f1 score, accuracy 
from sklearn.metrics import classification_report, confusion_matrix

def class_report(y_pred, treshold):
  y_pred = np.array(y_pred)
  y_pred_binary = np.where(y_pred > treshold, 1,0)
  print(classification_report(y_test, y_pred_binary))

  return y_pred_binary

In [None]:
# Function to plot a confusion matrix of the test data, with CE = 0 and LAA = 1

from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure

def conf_matrix(y_test, y_pred_binary):
  cm = confusion_matrix(y_test, y_pred_binary)
  disp = ConfusionMatrixDisplay(confusion_matrix=cm,display_labels=['CE', 'LAA'])
  fig, ax = plt.subplots(figsize=(8,8))
  disp.plot(ax=ax)
  plt.show()

**Treshold = 0.8**

In [None]:
# CE = 0   LAA = 1
y_pred_binary = class_report(y_pred, 0.8)

In [None]:
conf_matrix(y_test, y_pred_binary)

**Treshold = 0.65**

In [None]:
y_pred_binary = class_report(y_pred, 0.65)

In [None]:
conf_matrix(y_test, y_pred_binary)

**Treshold = 0.5**

In [None]:
y_pred_binary = class_report(y_pred, 0.5)

In [None]:
conf_matrix(y_test, y_pred_binary)

**Treshold = 0.3**

In [None]:
y_pred_binary = class_report(y_pred, 0.3)

In [None]:
conf_matrix(y_test, y_pred_binary)

**ROC Curve**

In [None]:
from sklearn import metrics

fpr, tpr, _ = metrics.roc_curve(y_test,  y_pred)

#create ROC curve
plt.plot(fpr,tpr)
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()

In [None]:
# Getting the Area Under Curve score

from sklearn.metrics import roc_auc_score
print(roc_auc_score(y_test, y_pred))

# Model Evaluation

In [None]:
# Test Metrics

results = model.evaluate(x = test_generator, batch_size=32, verbose = 1, return_dict = True)