**Author**: Sujan Tamang

## Breast Cancer Predictor
This work uses the mammogram image dataset from [mini-MIAS](http://peipa.essex.ac.uk/info/mias.html).

In [1]:
import matplotlib.pyplot as plt
from keras.applications import VGG16
from keras import models
from keras import layers
from keras import optimizers
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.preprocessing.image import ImageDataGenerator
from PIL import Image


image_size = 224      # Test Image Size
test_batchsize = 16    # Test Batch Size
train_batchsize = 8   # Change the batchsize according to your system RAM
val_batchsize = 8     # Validation Batch Size
epochs = 20
show_errors = "TRUE"
show_correct_predictions = "FALSE"

# Image Dataset Directory
train_dir = "dataset/train/"
validation_dir = "dataset/valid/"
test_dir = "dataset/test/"


def vgg16_finetuned():
    # Load the VGG model
    vgg_conv = VGG16(weights="imagenet", include_top=False, input_shape=(224, 224, 3))

    # Freeze all the layers except the last 4 layers
    for layer in vgg_conv.layers[:-4]:
        layer.trainable = False

    # Check the trainable status of the individual layers
    # for layer in vgg_conv.layers:
    #     print(layer, layer.trainable)

    # Create a Sequential model
    model = models.Sequential()

    # Add the vgg convolutional base model to the Sequential model
    model.add(vgg_conv)

    # Add new layers
    model.add(layers.Flatten())
    model.add(layers.Dense(1024, activation="relu"))
    model.add(layers.Dropout(0.8))
    model.add(layers.Dense(2, activation="softmax"))

    return model

In [2]:
def show_graphs(history):
    # Plot the accuracy and loss curves
    acc = history.history["acc"]
    val_acc = history.history["val_acc"]
    loss = history.history["loss"]
    val_loss = history.history["val_loss"]

    epochs1 = range(len(acc))

    plt.plot(epochs1, acc, "b", label="Training acc")
    plt.plot(epochs1, val_acc, "r", label="Validation acc")
    plt.title("Training and validation accuracy")
    plt.legend()
    plt.savefig("Training and validation accuracy")

    plt.figure()
    plt.plot(epochs1, loss, "b", label="Training loss")
    plt.plot(epochs1, val_loss, "r", label="Validation loss")
    plt.title("Training and validation loss")
    plt.legend()
    plt.savefig("Training and validation loss")
    plt.show()

In [3]:
train_datagen = ImageDataGenerator(rescale=1. / 255,
                                   rotation_range=20,
                                   width_shift_range=0.2,
                                   height_shift_range=0.2,
                                   horizontal_flip=True,
                                   fill_mode="nearest")
train_generator = train_datagen.flow_from_directory(train_dir,
                                                    target_size=(224, 224),
                                                    batch_size=train_batchsize,
                                                    class_mode="categorical")

validation_datagen = ImageDataGenerator(rescale=1. / 255)
validation_generator = validation_datagen.flow_from_directory(validation_dir,
                                                              target_size=(224, 224),
                                                              batch_size=val_batchsize,
                                                              class_mode="categorical",
                                                              shuffle=False)

Found 72 images belonging to 2 classes.
Found 16 images belonging to 2 classes.


In [4]:
# create model
model = vgg16_finetuned()

In [None]:
checkpoint = ModelCheckpoint("trained_models/vgg16_1.h5", 
                             monitor="val_acc", 
                             verbose=1, 
                             save_best_only=True, 
                             save_weights_only=False, 
                             mode="auto", 
                             period=1)

early_stopping = EarlyStopping(monitor="val_acc", 
                               min_delta=0, 
                               patience=20, 
                               verbose=1, 
                               mode="auto")

model.compile(loss="categorical_crossentropy", optimizer=optimizers.Adam(lr=1e-5), metrics=["acc"])
history = model.fit(train_generator,
                    validation_data= validation_generator, 
                    validation_steps=10,
                    epochs=epochs,
                    callbacks=[checkpoint, early_stopping])

Epoch 1/20

Epoch 00001: val_acc improved from -inf to 0.37500, saving model to trained_models/vgg16_1.h5
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20

In [None]:
show_graphs(history)