## Define VGG16 Learning Model

### Setup virtual environment

In [None]:
!pip install --upgrade pip
!pip install --upgrade tensorflow
!pip install ipykernel
# setup python kernel to the virtual enviornment
!python -m ipykernel install --user --name venv --display-name "Python (venv)"

In [None]:
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
import os, tempfile

mpl.rcParams['figure.figsize'] = (12, 10)
colors = plt.rcParams['axes.prop_cycle'].by_key()['color']

## Define Train, Validation and Test data

In [None]:
from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing import image_dataset_from_directory

train_dir = './data_kaggle_train/'
valid_dir = './data_kaggle_valid/'
test_dir = './data_kaggle_test/'

batch_size = 32
target_size = (224,224)

train_gen = ImageDataGenerator(rescale=1.0/255.0,
                               shear_range=0.2,
                               zoom_range=0.2,
                               horizontal_flip=True)

valid_gen = ImageDataGenerator(rescale=1.0/255.0)

test_gen = ImageDataGenerator(rescale=1.0/255.0)

train_generator = train_gen.flow_from_directory(train_dir, 
                                              batch_size=batch_size, 
                                              class_mode='binary',
                                              shuffle=True,
                                              target_size=target_size)

valid_generator = valid_gen.flow_from_directory(valid_dir, 
                                              batch_size=batch_size, 
                                              class_mode='binary',
                                              shuffle=False,
                                              target_size=target_size)

test_generator = test_gen.flow_from_directory(test_dir, 
                                              batch_size=1, 
                                              class_mode='binary',
                                              shuffle=False,
                                              target_size=target_size)

## Define the Model and Metrics

In [None]:
from keras.optimizers import SGD
from keras.metrics import TruePositives, TrueNegatives, FalsePositives, FalseNegatives, BinaryAccuracy, Precision, Recall, AUC
from keras.models import Sequential
from keras.layers import Conv2D, Activation, MaxPooling2D
from keras.layers import Flatten, Dense, Dropout
from keras.initializers import Constant
from keras.applications import VGG16
from keras.models import Model

epochs = 50
kernel_size = (3,3)
pool_size = (2,2)
input_shape = (224,224,3)
n_filters = 32


METRICS = [
    TruePositives(name='tp'),
    FalsePositives(name='fp'),
    TrueNegatives(name='tn'),
    FalseNegatives(name='fn'), 
    BinaryAccuracy(name='accuracy'),
    Precision(name='precision'),
    Recall(name='recall'),
    AUC(name='auc'),
    AUC(name='prc', curve='PR'), # precision-recall curve
]

def make_model(metrics= METRICS, output_bias= None, full_train= False):
    # use VGG16
    model= VGG16(weights='imagenet', include_top= False, input_shape= input_shape)
    # define model to use on top of VGG16
    if output_bias is not None:
        output_bias = Constant(output_bias)
    top_model = Sequential([
        Flatten(),
        Dense(64, activation='relu'),
        Dropout(0.5),
        Dense(1, activation='sigmoid', bias_initializer=output_bias)
    ])
    # build Model
    model = Model(inputs= model.input, outputs= top_model(model.output))
    # note that it is necessary to start with a fully-trained
    # classifier, including the top classifier,
    # in order to successfully do fine-tuning
    top_model.load_weights(top_model_weights_path)
    # set the first 25 layers (up to the last conv block) to non-trainable (weights will not be updated)
    if(full_train == False):
        for layer in model.layers[:25]:
            layer.trainable = False
    # compile Model options
    model.compile(optimizer= SGD(lr=0.001, momentum= 0.9), loss= 'binary_crossentropy', metrics= metrics)
    return model

## Define Callbacks for Model

In [None]:
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau

# the restore_best_weights parameter load the weights of the best iteration once the training finishes
early_stopping = EarlyStopping(
    monitor = 'val_loss',
    patience=10, 
    restore_best_weights=True)
# checkpoint to save model
checkpoint = ModelCheckpoint(
    filepath="./models/checkpoint-{epoch:02d}-{val_loss:.2f}.hdf5", 
    save_best_only=True, 
    save_weights_only=True)
# reduce on plateau
reduce_plateau = ReduceLROnPlateau(
    min_lr=1e-7, 
    patience=5, 
    factor=0.5, 
    mode="min", 
    monitor="val_loss", 
    verbose=1)

## Transfer Model

### Train Model

In [None]:
# model = make_model(output_bias=inital_bias)
# model.load_weights(initial_weights)
top_model_weights_path = 'bottleneck_fc_model.h5'
model = make_model(full_train=True)
vgg16_history = model.fit(train_generator,
                             steps_per_epoch = train_generator.samples // batch_size,
                             validation_data = valid_generator,
                             validation_steps = valid_generator.samples // batch_size,
                             epochs=epochs,
                             callbacks=[early_stopping, checkpoint, reduce_plateau],
                             verbose=2)

### Save Model

In [None]:
action = input('Action: Save / Load / Check ')
print('Action: ', action)

In [None]:
from keras.models import load_model

model_name = 'transfer'

if(action == 'Save'):
    print('Saving Model....')
    model.save('./models/'+ model_name +'.h5')
elif(action == 'Load'):
    print('Loading Model...')
    model = load_model('./models/'+ model_name +'.h5')
elif(action == 'Check'):
    print('Checking Model...')
    reconstructed_model = load_model('./models/'+ model_name +'.h5')
    np.testing.assert_allclose(
        model.predict(test_generator,verbose=1), 
        reconstructed_model.predict(test_generator,verbose=1)
    )
else:
    print('No Action')

### Check training history

In [None]:
def plot_metrics(history):
    metrics = ['loss', 'prc', 'precision', 'recall']
    for n, metric in enumerate(metrics):
        name = metric.replace("_"," ").capitalize()
        plt.subplot(2,2,n+1)
        plt.plot(history.epoch, history.history[metric], color=colors[0], label='Train')
        plt.plot(history.epoch, history.history['val_'+metric],
                     color=colors[1], linestyle="--", label='Val')
        plt.xlabel('Epoch')
        plt.ylabel(name)
        if metric == 'loss':
            plt.ylim([0, plt.ylim()[1]])
        elif metric == 'auc':
            plt.ylim([0.8,1])
        else:
            plt.ylim([0,1])
        plt.legend()

In [None]:
plot_metrics(vgg16_history)

### Evaluate metrics

In [None]:
train_predictions_vgg16 = model.predict(train_generator, batch_size=batch_size,verbose=1)
valid_predictions_vgg16 = model.predict(valid_generator, batch_size=batch_size,verbose=1)
test_predictions_vgg16 = model.predict(test_generator, batch_size=test_generator.samples, verbose=1)

In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns

def plot_confusion_matrix(labels, predictions, p=0.5):
    cm = confusion_matrix(labels, predictions > p)
    fig, ax = plt.subplots(1,2, figsize=(10,5))
    # show numbers
    sns.heatmap(cm, annot=True, fmt="d", ax=ax[0])
    ax[0].set_title('Confusion matrix @{:.2f}'.format(p))
    ax[0].set_ylabel('Actual label')
    ax[0].set_xlabel('Predicted label')
    # show %
    cmp = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
    sns.heatmap(cmp, annot=True, fmt=".3f", 
                xticklabels=[f"pred_{c}" for c in train_generator.class_indices], 
                yticklabels=[f"true_{c}" for c in train_generator.class_indices],
                cmap="Blues",
                ax=ax[1])
    ax[1].set_title('Confusion matrix @{:.2f}'.format(p))
    ax[1].set_ylabel('Actual')
    ax[1].set_xlabel('Predicted')

    print('Legitimate Benign Detected (True Negatives): ', cm[0][0])
    print('Fraudulent Benign Incorrectly Detected (False Positives): ', cm[0][1])
    print('Legitimate Malignant Detected (True Positives): ', cm[1][1])
    print('Fraudulent Malignant Incorrectly Detected (False Negatives): ', cm[1][0])
    print('Total Incorrectly Detected: ', np.sum(cm[0][1]+cm[1][0]),' out of ',np.sum(cm))

In [None]:
vgg16_results = model.evaluate(valid_generator, steps=valid_generator.samples // batch_size, verbose=1)

for name, value in zip(model.metrics_names, vgg16_results):
    print(name, ': ', value)
print()
plot_confusion_matrix(test_generator.labels, test_predictions_vgg16, 0.5)

### Plot the ROC

In [None]:
from sklearn.metrics import roc_curve, auc

def plot_roc(name, labels, predictions, **kwargs):
    fp, tp, _ = roc_curve(labels, predictions)
    roc_auc = auc(fp, tp)
    plt.plot(100*fp, 100*tp, label=name+' (area = %0.2f)' % roc_auc, linewidth=2, **kwargs)
    plt.xlabel('False positives [%]')
    plt.ylabel('True positives [%]')
#     plt.xlim([-0.5,20])
#     plt.ylim([80,100.5])
    plt.grid(True)
    ax = plt.gca()
    ax.set_aspect('equal')

In [None]:
plot_roc("Train VGG16", train_generator.labels, train_predictions_vgg16, color=colors[0])
plot_roc("Validation VGG16", valid_generator.labels, valid_predictions_vgg16, color=colors[1], linestyle='dotted')
plot_roc("Test VGG16", test_generator.labels, test_predictions_vgg16, color=colors[2], linestyle='--')
plt.legend(loc='lower right')

### Plot the PRC

In [None]:
from sklearn.metrics import precision_recall_curve

def plot_prc(name, labels, predictions, **kwargs):
    precision, recall, _ = precision_recall_curve(labels, predictions)

    plt.plot(precision, recall, label=name, linewidth=2, **kwargs)
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.grid(True)
    ax = plt.gca()
    ax.set_aspect('equal')

In [None]:
plot_prc("Train VGG16", train_generator.labels, train_predictions_vgg16, color=colors[0])
plot_prc("Validation VGG16", valid_generator.labels, valid_predictions_vgg16, color=colors[1], linestyle='dotted')
plot_prc("Test VGG16", test_generator.labels, test_predictions_vgg16, color=colors[2], linestyle='--')
plt.legend(loc='upper right')

### Classification Report

In [None]:
from sklearn.metrics import classification_report

def show_classification_report(labels, predictions, p=0.5, **kwargs):
    print(classification_report(labels, predictions > p, **kwargs))

In [None]:
show_classification_report(test_generator.labels,test_predictions_vgg16,p=0.5,target_names=['benign','malignant'])