In [4]:
"""
Robert E Ruzzo III
Resnet50.ipynb

The purpose of this notebook is for implementing the ResNet50 model on the 
Histopathological Cancer Detection Dataset obtained from Kaggle.com

"""
import numpy as np
np.random.seed(42)
import tensorflow as tf
tf.set_random_seed(42)
import pandas as pd
from keras.models import Model
from keras.layers import Dense, Conv2D, BatchNormalization, MaxPooling2D, Flatten, Dropout, Input,GlobalAveragePooling2D
from keras.callbacks import TensorBoard, ModelCheckpoint
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers
import os
from keras import metrics
from PIL import Image 
from keras import applications

In [5]:
"""
Configuration
Used to hold variable values making them easier to change if needed.

    Args: 
        None

Variables:
    batch_size (int): The batch processing size
    epochs (int):  The number epoch iterations to run on the model
    data (string): The location of the training set labels csv
    data_dir (string): The directory which containes the subdirectories of the photos to be analzed. 
        For this notebook to work correctly the pictures have to divided into a sub directories based on their class.
    image_width (int): Expected width of the pictures (enforced by the training and validation generators)
    image_height (int): Expected height of the pictures (enforced byt he training and validation generators)
    center_crop_width (int): The desired width of the image after it has been cropped, used in the cropping generator
    center_crop_height (int): The desired height of the image after it has been cropped, used in the cropping generator
    name (string): The name of the model for both TensorBoard callbacks and saving of weights

"""
class Configuration:
    def __init__(self):
        self.batch_size = 128
        self.epochs = 20
        self.data = pd.read_csv('D:\\Datasets\\histopathologic-cancer-detection\\train_labels.csv')
        self.data_dir = 'D:\\Datasets\\histopathologic-cancer-detection\\train\\'
        self.image_width = 96
        self.image_height = 96
        self.name='ResNet50_20_LL_B128'
        

In [6]:
#Create an instance of the configuration class
config = Configuration()

In [8]:
#Remove the id of an image that throws exceptions on opening
config.data=config.data[config.data.id != 'b44ceb87f4fb92169ec928c652d6e1209b48135c']

In [9]:
#Build the ResNet50 Model Structure
num_classes=2
adm = optimizers.adam(lr=0.0001)
model = applications.resnet50.ResNet50(weights= None, include_top=False, input_shape= (config.image_width,config.image_height,3))
x = model.output
x = GlobalAveragePooling2D()(x)
predictions = Dense(num_classes, activation= 'softmax')(x)
model_50 = Model(inputs = model.input, outputs = predictions)
model_50.compile(optimizer=adm, loss='categorical_crossentropy', metrics=["accuracy"])

Instructions for updating:
Colocations handled automatically by placer.




In [10]:
#Creates callbacks to monitor on Tensorboard
def create_callbacks(name):
    tensorboard_callback = TensorBoard(log_dir=os.path.join(os.getcwd(), "tensorboard_log", name), write_graph=True, write_grads=False)
    return [tensorboard_callback] 

In [11]:
"""setup_data - Function creates the generators which add data variance and cropping capabilities
    Note: This function has to return the number of items in the iterables to ensure functionality with the 
    fit function.

    Args:
        train_data_dir (string) : directory that the training and validation data are located
        batch_size (int) : size of the batches (count)

    Returns:
        traing_cropped (iterable image generator) : cropped and augmented training images
        validation_cropped (iterable image generator) : cropped and augmented validation images
        train_generator.n (int) : The number of items in the training generator iterable
        validation_generator.n (int): The number of items in the validation generator iterable

    """
#Setup data, and create split for training, testing 80/20
def setup_data(train_data_dir, batch_size):
    
    train_datagen = ImageDataGenerator(rescale=1.0/255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        validation_split=0.1) # set validation split
    

    train_generator = train_datagen.flow_from_directory(
        train_data_dir,
        batch_size=batch_size,
        target_size=(96,96),
        class_mode='categorical',
        subset='training')
    
    validation_generator = train_datagen.flow_from_directory(
        train_data_dir,
        batch_size=batch_size,
        target_size=(96,96),
        class_mode='categorical',
        subset='validation')
    
    return train_generator, validation_generator, train_generator.n, validation_generator.n
    #return train_generator, validation_generator

In [12]:
"""fit_model : This function uses the iterable generators to build and train the model, as well as return the 
    output of the trained model.

    Args:
        model (Keras/TensorFlow model object) : The model created with build_model function
        train_generator (iterable image generator object) : The iterable training generator from the setup_data function
        val_generator (iterable image generator object) : The iterable validation generator from the setup_data function
        batch_size (int) : The batch size, or number of objects processed with each batch iteration.
        epochs (int) : The number of total iterations through the data
        name (string) : The name of the model for TensorBoard Callbacks
        
    Returns:
        model (model object) : A tensorflow / Keras model definition with weights and structure data included

    """
def fit_model(model, train_generator, val_generator, batch_size, epochs, name):
    model.fit_generator(
        train_generator,
        steps_per_epoch=training_n // batch_size +1,
        epochs=epochs,
        validation_data=val_generator,
        validation_steps=val_n // batch_size +1,
        callbacks=create_callbacks(name=name),
        verbose=1)
    return model

In [13]:
"""eval_model : This function uses the output of fit_model to evaluate the model after training is complete,
    and shows validation accuracy and validation loss as parameters.

    Args:
        model (Keras/TensorFlow model object) : The trained model output from fit_model
        val_generator (iterable image generator object) : The iterable validation generator from the setup_data function
        batch_size (int) : The batch size, or number of objects processed with each batch iteration.
        
    Returns:
        None, output is printed

    """
def eval_model(model, val_generator, batch_size):
    scores = model.evaluate_generator(val_generator, steps=val_n // batch_size+1)
    print("Loss: " + str(scores[0]) + " Accuracy: " + str(scores[1]))

In [14]:
#Create an instance of the necessary generators
train_generator, val_generator, training_n, val_n= setup_data(config.data_dir, batch_size=config.batch_size)

Found 176020 images belonging to 2 classes.
Found 44004 images belonging to 2 classes.


In [15]:
#Print the model summary (uncomment to view)
#print (model_50.summary())

## Model Training

In [16]:
#Run the model, using a gpu
device_name="/gpu:0"
with tf.device(device_name):
    model_out = fit_model(model_50, train_generator, val_generator,
                      config.batch_size,
                      config.epochs,
                      config.name)

Instructions for updating:
Use tf.cast instead.
Epoch 1/20
Epoch 2/20

KeyboardInterrupt: 

In [None]:
# Evaluate your model.
device_name="/gpu:0"
with tf.device(device_name):
    eval_model(model, val_generator, config.batch_size)

In [None]:
# Save your model weights
model.save(config.name)