In [1]:
import os
import pathlib
from time import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import (Dense, Conv2D, Flatten, Dropout,
                                     MaxPooling2D, Activation, BatchNormalization)
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input

### Load data

In [2]:
PATH = '../input/train-val-test-tcga-coad-msi-mss/tcga_coad_msi_mss/'
train_dir = os.path.join(PATH, 'train')
val_dir = os.path.join(PATH, 'val')
test_dir = os.path.join(PATH, 'test')

In [3]:
# Set up variables for pre-processing
batch_size = 64
epochs = 5
IMG_HEIGHT = 224
IMG_WIDTH = 224

In [4]:
model = VGG16(include_top=False)

model.summary()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
Model: "vgg16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, None, None, 3)]   0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, None, None, 64)    1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, None, None, 64)    36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, None, None, 64)    0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, None, None, 128)   73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, None, None, 128)  

### Data Augmentation

In [5]:
train_image_generator = ImageDataGenerator(rescale = 1./255,
                                           rotation_range = 45,
                                           width_shift_range = 0.20,
                                           height_shift_range = 0.20,
                                           horizontal_flip = True,
                                           zoom_range = 0.5)

val_image_generator = ImageDataGenerator(rescale = 1./255)

train_data_gen = train_image_generator.flow_from_directory(batch_size = batch_size,
                                                           directory = train_dir,
                                                           class_mode = 'binary')

val_data_gen = val_image_generator.flow_from_directory(batch_size = batch_size,
                                                       directory = val_dir,
                                                       class_mode = 'binary')

Found 153849 images belonging to 2 classes.
Found 19230 images belonging to 2 classes.


### Convolutional Neural Network

In [6]:
def create_model(input_shape, optimizer='adam', fine_tune=2):
    """
    Compiles a model integrated with VGG16 pretrained layers
    
    input_shape: tuple - the shape of input images (width, height, channels)
    n_classes: int - number of classes for the output layer
    optimizer: string - instantiated optimizer to use for training. Defaults to 'RMSProp'
    fine_tune: int - The number of pre-trained layers to unfreeze.
                If set to 0, all pretrained layers will freeze during training
    """
    
    conv_base = VGG16(include_top=False,
                     weights='imagenet', 
                     input_shape=input_shape)
    
 
    if fine_tune > 0:
        for layer in conv_base.layers[:-fine_tune]:
            layer.trainable = False
    else:
        for layer in conv_base.layers:
            layer.trainable = False


    top_model = conv_base.output
    top_model = Flatten(name="flatten")(top_model)
    top_model = Dense(4096, activation='relu')(top_model)
    top_model = Dense(1072, activation='relu')(top_model)
    top_model = Dropout(0.2)(top_model)
    output_layer = Dense(1, activation='sigmoid')(top_model)
    
    model = Model(inputs=conv_base.input, outputs=output_layer)

    model.compile(optimizer='adam', 
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    
    return model

In [7]:
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Model

In [8]:
input_shape = (224, 224, 3)
optim_1 = Adam(learning_rate=0.0001)
vgg_model = create_model(input_shape, optim_1, fine_tune=2)

### Model Training

In [9]:
history = vgg_model.fit(train_data_gen,
              batch_size=64,
              epochs=20,
              validation_data=val_data_gen,
              steps_per_epoch=50,
              validation_steps=val_data_gen.samples // 64)

Epoch 1/20


InvalidArgumentError:  Input to reshape is a tensor with 2097152 values, but the requested shape requires a multiple of 25088
	 [[node model/flatten/Reshape (defined at <ipython-input-9-bc4c9aab5065>:6) ]] [Op:__inference_train_function_1845]

Function call stack:
train_function


### Visualize Loss and Accuracy

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

epochs_range = range(5)

plt.figure(figsize = (8, 8))
plt.subplot(2, 1, 1)
plt.plot(epochs_range, acc, label = 'Training Accuracy')
plt.plot(epochs_range, val_acc, label = 'Validation Accuracy')
plt.legend(loc = 'lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(2, 1, 2)
plt.plot(epochs_range, loss, label = 'Training Loss')
plt.plot(epochs_range, val_loss, label = 'Validation Loss')
plt.legend(loc = 'upper right')
plt.title('Training and Validation Loss')
plt.show()

### Evaluation

In [None]:
test_image_generator = ImageDataGenerator(rescale = 1./255)
test_data_gen = test_image_generator.flow_from_directory(batch_size = batch_size,
                                                         directory = test_dir,
                                                         shuffle = False,
                                                         target_size = (IMG_HEIGHT, IMG_WIDTH),
                                                         class_mode = 'binary')

result = model.evaluate(test_data_gen)
print('Test Loss: ', result[0])
print('Test Accuracy: ', result[4])

In [None]:
test_image, test_label = next(test_data_gen)

predicted_batch = model.predict(test_image)
predicted_id = np.argmax(predicted_batch, axis = -1)
predicted_label_batch = class_names[predicted_id]

plt.figure(figsize = (10, 10))
plt.subplots_adjust(hspace = 0.5)
for n in range(30):
    plt.subplot(6, 5, n + 1)
    plt.imshow(test_image[n])
    color = "blue" if predicted_id[n] == test_label[n] else "red"
    plt.title(predicted_label_batch[n], color = color)
    plt.axis('off')
_ = plt.suptitle("CNN Predictions (blue: correct, red: incorrect)")