In [1]:
#!pip install opencv-python

In [2]:
# TRANSFER LEARNING FINE TUNING -  MASS CLASSIFICATION USING SEGMENTATION MODEL OUTPUT BCDR MASS LESION
import numpy as np
import tensorflow.keras
import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
#from tensorflow.keras.utils import np_utils
#%matplotlib inline
keras.backend.set_image_data_format('channels_last')
import tensorflow as tf
#import pandas as pd
import os
#import matplotlib.pyplot as plt
#from tensorflow.python.keras.preprocessing import image
from tensorflow.keras.optimizers import Adam
from sklearn import metrics
from tensorflow.keras.layers import ReLU
from tensorflow.keras.layers import Input, Flatten, Dense, Dropout, Convolution2D, Conv2D, MaxPooling2D, Lambda, GlobalMaxPooling2D, GlobalAveragePooling2D, BatchNormalization, Activation
from tensorflow.keras import Sequential, Model

vid='v002_5' #last number is the cross-validation fold number

In [3]:
# Create CNN

IMG_SIZE=224

from tensorflow.keras.applications.vgg16 import VGG16
base_model=VGG16(weights='imagenet',include_top=False, input_shape=(IMG_SIZE,IMG_SIZE,3))

for layer in base_model.layers:
    layer.trainable=False

x=base_model.output
x=GlobalAveragePooling2D()(x)
x=Dense(256)(x)
x=ReLU()(x)
x=Dropout(0.5)(x)
x=Dense(128)(x) 
x=ReLU()(x)
preds=Dense(1,activation='sigmoid')(x)

model=Model(inputs=base_model.input,outputs=preds)

In [4]:
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0     

In [5]:
len(model.layers)

26

In [6]:
model.compile(optimizer=tensorflow.keras.optimizers.Adam(learning_rate=1e-3), loss='binary_crossentropy', metrics=['accuracy']) 

In [7]:
# Image preprocessing and data augmentation
import cv2

batch_size=8

train_datagen = ImageDataGenerator(
                         horizontal_flip = True,
                         vertical_flip = True,
                         rotation_range = 90,
                         fill_mode="constant",
                         width_shift_range = 0.2,
                         zoom_range = 0.2,
                         rescale=1./255,
                         height_shift_range = 0.2
                        )

valid_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory('train0'+vid[-1], 
                                                  target_size=(IMG_SIZE,IMG_SIZE),
                                                 color_mode='rgb',
                                                 batch_size=batch_size,
                                                 class_mode='binary',
                                                 shuffle=True)
valid_generator = valid_datagen.flow_from_directory('valid0'+vid[-1], 
                                                 target_size=(IMG_SIZE,IMG_SIZE),
                                                 color_mode='rgb',
                                                 batch_size=batch_size,
                                                 class_mode='binary',
                                                 shuffle=True)


Found 184 images belonging to 2 classes.
Found 44 images belonging to 2 classes.


In [8]:
#Callbacks
def get_callbacks(name_weights, patience_lr, patience):
    mcp_save = ModelCheckpoint(name_weights, save_best_only=True, monitor='val_loss', mode='min')
    reduce_lr_loss = ReduceLROnPlateau(monitor='loss', factor=0.1, patience=patience_lr, verbose=2, min_delta=1e-4, mode='min')
    early_stop_cr=EarlyStopping(monitor='val_loss', min_delta=1e-4, patience=patience, verbose=2, mode='auto', restore_best_weights=True)
    return [mcp_save, reduce_lr_loss, early_stop_cr]


In [9]:
#Training stage 1
step_size_train=train_generator.n//train_generator.batch_size
step_size_valid=valid_generator.n//valid_generator.batch_size

name_weights = "mass_class_VGG16_v002_"+vid[-1]+".h5"
callbacks = get_callbacks(name_weights = name_weights, patience_lr=10, patience=10) # do not reduce LR here

model.fit_generator(generator=train_generator,
                    steps_per_epoch=step_size_train,
                    validation_steps=step_size_valid,
                    validation_data=valid_generator,
                    epochs=3,
                    verbose=1,
                    callbacks = callbacks)  

  model.fit_generator(generator=train_generator,


Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x226736a4af0>

In [10]:
#Training stage 2
top_layer_num=round(len(model.layers)*0.75)

for layer in model.layers[:top_layer_num]:
    layer.trainable=False
for layer in model.layers[top_layer_num:]:  #last ~25% layers are trainable
    layer.trainable=True
    
model.compile(optimizer=tensorflow.keras.optimizers.Adam(learning_rate=1e-4), loss='binary_crossentropy', metrics=['accuracy'])

callbacks = get_callbacks(name_weights = name_weights, patience_lr=10, patience=10) # do not reduce LR here

model.fit_generator(generator=train_generator,
                    steps_per_epoch=step_size_train,
                    validation_steps=step_size_valid,
                    validation_data=valid_generator,
                    epochs=10,
                    verbose=1,
                    callbacks = callbacks)

  model.fit_generator(generator=train_generator,


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x2266bfdb760>

In [None]:
#Training stage 3
for layer in model.layers:  #All layers are trainable
    layer.trainable=True
    
model.compile(optimizer=tensorflow.keras.optimizers.Adam(learning_rate=1e-5), loss='binary_crossentropy', metrics=['accuracy'])

callbacks = get_callbacks(name_weights = name_weights, patience_lr=5, patience=5)

model.fit_generator(generator=train_generator,
                    steps_per_epoch=step_size_train,
                    validation_steps=step_size_valid,
                    validation_data=valid_generator,
                    epochs=100,
                    verbose=1,
                    callbacks = callbacks)

  model.fit_generator(generator=train_generator,


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100

In [None]:
import os, os.path

VDIR0 = 'valid0'+vid[-1]+'/ben'
VDIR1 = 'valid0'+vid[-1]+'/mal'

valid0=len([name for name in os.listdir(VDIR0) if os.path.isfile(os.path.join(VDIR0, name))])
valid1=len([name for name in os.listdir(VDIR1) if os.path.isfile(os.path.join(VDIR1, name))])

num_of_valid_samples=valid0+valid1
# Ref:  https://stackoverflow.com/questions/2632205/how-to-count-the-number-of-files-in-a-directory-using-python

In [None]:
# Validation accuracy and validation data confusion matrix

import sklearn
valid_generator = valid_datagen.flow_from_directory('valid0'+vid[-1], 
                                                     target_size=(IMG_SIZE,IMG_SIZE),
                                                     color_mode='rgb',
                                                     batch_size=batch_size,
                                                     class_mode='binary',
                                                     shuffle=False)

# Ref: https://gist.github.com/RyanAkilos/3808c17f79e77c4117de35aa68447045 accessed on 5 Feb 2020
from sklearn.metrics import precision_score, recall_score, confusion_matrix, accuracy_score, roc_auc_score, f1_score, matthews_corrcoef


Y_pred = model.predict(valid_generator)
y_pred=(Y_pred>0.5).astype(int)
ras=roc_auc_score(valid_generator.classes, Y_pred)
prec=precision_score(valid_generator.classes, y_pred,pos_label=1)
rec=recall_score(valid_generator.classes, y_pred,pos_label=1)
f1s=f1_score(valid_generator.classes, y_pred,pos_label=1)
mcc=matthews_corrcoef(valid_generator.classes, y_pred)

print('Confusion Matrix:')
print(confusion_matrix(valid_generator.classes, y_pred))
print('Accuracy:', accuracy_score(valid_generator.classes, y_pred))
print('ROC AUC score:', ras)
print('Precision score:', prec)
print('Recall score:', rec)
print('F1 score:', f1s)
print('MCC score:', mcc)

In [None]:
# Predicted object delete threshold = 30
test_datagen = ImageDataGenerator(rescale=1./255)

test_generator = test_datagen.flow_from_directory('test_upp_xcept', 
                                                 target_size=(IMG_SIZE,IMG_SIZE),
                                                 color_mode='rgb',
                                                 batch_size=batch_size,
                                                 class_mode='binary',
                                                 shuffle=False)


# Ref: https://gist.github.com/RyanAkilos/3808c17f79e77c4117de35aa68447045 accessed on 5 Feb 2020
from sklearn.metrics import precision_score, recall_score, confusion_matrix, accuracy_score, roc_auc_score, f1_score, matthews_corrcoef


Y_pred = model.predict(test_generator)
y_pred=(Y_pred>0.5).astype(int)
ras=roc_auc_score(test_generator.classes, Y_pred)
prec=precision_score(test_generator.classes, y_pred,pos_label=1)
rec=recall_score(test_generator.classes, y_pred,pos_label=1)
f1s=f1_score(test_generator.classes, y_pred,pos_label=1)
mcc=matthews_corrcoef(test_generator.classes, y_pred)

print('Confusion Matrix:')
print(confusion_matrix(test_generator.classes, y_pred))
print('Accuracy:', accuracy_score(test_generator.classes, y_pred))
print('ROC AUC score:', ras)
print('Precision score:', prec)
print('Recall score:', rec)
print('F1 score:', f1s)
print('MCC score:', mcc)