In [None]:
"""
Robert E Ruzzo III
DoubleLayer.ipynb

The purpose of this notebook is for prototyping various model structures for processing of the 
Histopathological Cancer Detection Dataset obtained from Kaggle.com

"""
import numpy as np
np.random.seed(42)
import tensorflow as tf
tf.set_random_seed(42)
import pandas as pd
from keras.models import Model
from keras.layers import Dense, Conv2D, BatchNormalization, MaxPooling2D, Flatten, Dropout, Input
from keras.callbacks import TensorBoard, ModelCheckpoint
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers
import os
from keras import metrics 

In [None]:
"""
Configuration
Used to hold variable values making them easier to change if needed.

    Args: 
        None

Variables:
    batch_size (int): The batch processing size
    epochs (int):  The number epoch iterations to run on the model
    data (string): The location of the training set labels csv
    data_dir (string): The directory which containes the subdirectories of the photos to be analzed. 
        For this notebook to work correctly the pictures have to divided into a sub directories based on their class.
    image_width (int): Expected width of the pictures (enforced by the training and validation generators)
    image_height (int): Expected height of the pictures (enforced byt he training and validation generators)
    center_crop_width (int): The desired width of the image after it has been cropped, used in the cropping generator
    center_crop_height (int): The desired height of the image after it has been cropped, used in the cropping generator
    model_name (string): The name of the model for both TensorBoard callbacks and saving of weights

"""
class Configuration:
    def __init__(self):
        self.batch_size = 128
        self.epochs = 40
        self.data = pd.read_csv('/floyd/input/cancer_histo/train_labels.csv')
        self.data_dir = '/floyd/input/cancer_histo/train'
        #Set up cropping variables
        self.image_width = 96
        self.image_height = 96
        self.center_crop_width = 96
        self.center_crop_height = 96 
        self.model_name = "DoubleLayers_Crop96_40"
        

In [None]:
#Initialize the configuration class
config = Configuration()

In [None]:
#Check to see that the image/label data has loaded
config.data.head()


In [None]:
# Remove the file that causes an exception
config.data=config.data[config.data.id != 'b44ceb87f4fb92169ec928c652d6e1209b48135c']

In [None]:
"""This function builds the model using keras and tensorflow attributes

    Args:
       None

    Returns:
        TensorFlow model object with the desired attributes

    """
def build_model():
    inputs = Input(shape=(config.center_crop_width,config.center_crop_height,3), name="input")
    
    #Convolution 1
    conv1 = Conv2D(256, kernel_size=(3,3), activation="relu", name="conv_1")(inputs)
    conv2 = Conv2D(256, kernel_size=(3,3), activation="relu", name="conv_2")(conv1)
    pool1 = MaxPooling2D(pool_size=(2, 2), name="pool_1")(conv2)

    #Convolution 2
    conv4 = Conv2D(128, kernel_size=(3,3), activation="relu", name="conv_4")(pool1)
    conv5 = Conv2D(128, kernel_size=(3,3), activation="relu", name="conv_5")(conv4)
    pool2 = MaxPooling2D(pool_size=(2, 2), name="pool_2")(conv5)
    
    #Convolution 3
    conv6 = Conv2D(64, kernel_size=(3,3), activation="relu", name="conv_6")(pool2)
    pool3 = MaxPooling2D(pool_size=(2, 2), name="pool_3")(conv6)
    
       
    #Fully Connected Layer
    flatten = Flatten()(pool3)
    fc1 = Dense(1024, activation="relu", name="fc_1")(flatten)
    
    #output
    output=Dense(2, activation="softmax", name ="softmax")(fc1)
    #Using the ADAM optimizer with a learning rate of 0.001
    adm = optimizers.adam(lr=0.001)
    # finalize and compile
    model = Model(inputs=inputs, outputs=output)
    model.compile(optimizer=adm, loss='categorical_crossentropy', metrics=["accuracy"])
    return model

In [None]:
#Callbacks are created allowing us to monitor via TensorBoard
def create_callbacks(name):

    tensorboard_callback = TensorBoard(log_dir=os.path.join(os.getcwd(), "tensorboard_log", name), write_graph=True, write_grads=False)
    
    return [tensorboard_callback]

In [None]:
"""Cropping function 

    Args:
        img (image): An image to be cropped
        crop_size_x (int): As defined in the config class, width of final image
        crop_size_y (int): As defined in the config class, height of final image

    Returns:
        An image, cropped to the specified values
    """
#Define a center croping function to use along with the cropping generator
def center_crop(img, crop_size_x, crop_size_y):
    height, width =img.shape[0],img.shape[1]
    crop_x_start = (width-crop_size_x)//2
    crop_x_end =(crop_x_start + crop_size_x)
    crop_y_start = (height-crop_size_y)//2
    crop_y_end =(crop_y_start + crop_size_y)
    return img[(crop_y_start):(crop_y_end), (crop_x_start):(crop_x_end)]

"""Cropping Generator

    Args:
        batches (iterable generator object) : A batch which gets iterated through
        crop_size_x (int): As defined in the config class, width of final image
        crop_size_y (int): As defined in the config class, height of final image

    Returns:
        batch_cropped (iterable batch of images): The images that have been cropped
        batch_y (int) : number of items in the batch

    """

#Cropping generator, takes iterable generator output as an input and crops images (in memory) to the desired size
def crop_generator(batches, crop_size_x, crop_size_y):
    while True:
        batch_x, batch_y = next(batches)
        batch_cropped = np.zeros((batch_x.shape[0], crop_size_x, crop_size_y, 3))
        for i in range(batch_x.shape[0]):
            batch_cropped[i] = center_crop(batch_x[i], crop_size_x, crop_size_y)
        yield (batch_cropped, batch_y)

In [None]:
"""setup_data - Function creates the generators which add data variance and cropping capabilities
    Note: This function has to return the number of items in the iterables to ensure functionality with the 
    fit function.

    Args:
        train_data_dir (string) : directory that the training and validation data are located
        batch_size (int) : size of the batches (count)

    Returns:
        traing_cropped (iterable image generator) : cropped and augmented training images
        validation_cropped (iterable image generator) : cropped and augmented validation images
        train_generator.n (int) : The number of items in the training generator iterable
        validation_generator.n (int): The number of items in the validation generator iterable

    """

#Setup data, and create split for training, testing 80/20
def setup_data(train_data_dir, batch_size):
    #Shear and zoom of 20% variation are added to the images for for varying data
    train_datagen = ImageDataGenerator(rescale=1.0/255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        validation_split=0.2) # set validation split
    

    train_generator = train_datagen.flow_from_directory(
        train_data_dir,
        batch_size=batch_size,
        target_size=(96,96), #target size is added to ensure that the largest a file can be is 96 x 96 pix
        class_mode='categorical', #class mode will make a separate class determination for each subdirectory
        subset='training')
    
    validation_generator = train_datagen.flow_from_directory(
        train_data_dir,
        batch_size=batch_size,
        target_size=(96,96), #target size is added to ensure that the largest a file can be is 96 x 96 pix
        class_mode='categorical', #class mode will make a separate class determination for each subdirectory
        subset='validation')
    
    training_cropped = crop_generator(train_generator,config.center_crop_width, config.center_crop_height)
    validation_cropped = crop_generator(validation_generator,config.center_crop_width, config.center_crop_height)
    
    return training_cropped, validation_cropped, train_generator.n, validation_generator.n

In [None]:
"""fit_model : This function uses the iterable generators to build and train the model, as well as return the 
    output of the trained model.

    Args:
        model (Keras/TensorFlow model object) : The model created with build_model function
        train_generator (iterable image generator object) : The iterable training generator from the setup_data function
        val_generator (iterable image generator object) : The iterable validation generator from the setup_data function
        batch_size (int) : The batch size, or number of objects processed with each batch iteration.
        epochs (int) : The number of total iterations through the data
        name (string) : The name of the model for TensorBoard Callbacks
        
    Returns:
        model (model object) : A tensorflow / Keras model definition with weights and structure data included

    """
def fit_model(model, train_generator, val_generator, batch_size, epochs, name):
    model.fit_generator(
        train_generator,
        steps_per_epoch=training_n // batch_size + 1,
        epochs=epochs,
        validation_data=val_generator,
        validation_steps=val_n // batch_size + 1,
        callbacks=create_callbacks(name=name),
        verbose=1)
    return model

In [None]:
"""eval_model : This function uses the output of fit_model to evaluate the model after training is complete,
    and shows validation accuracy and validation loss as parameters.

    Args:
        model (Keras/TensorFlow model object) : The trained model output from fit_model
        val_generator (iterable image generator object) : The iterable validation generator from the setup_data function
        batch_size (int) : The batch size, or number of objects processed with each batch iteration.
        
    Returns:
        None, output is printed

    """
def eval_model(model, val_generator, batch_size):
    scores = model.evaluate_generator(val_generator, steps=val_n // batch_size+1)
    print("Loss: " + str(scores[0]) + " Accuracy: " + str(scores[1]))

In [None]:
#Setup data generators
train_generator, val_generator, training_n, val_n= setup_data(config.data_dir, batch_size=config.batch_size)

In [None]:
#Create a model object using the model defined in build_model, the print the output
model = build_model()
print (model.summary())

## Model Training

In [None]:
#Train the model, note the device_name and with direct the usage of a GPU for processing
device_name="/gpu:0"
with tf.device(device_name):
    model = fit_model(model, train_generator, val_generator,
                      batch_size=config.batch_size,
                      epochs=config.epochs,
                      name=config.model_name)

In [None]:
# Evaluate the model
device_name="/gpu:0"
with tf.device(device_name):
    eval_model(model, val_generator, batch_size=config.batch_size)

In [None]:
# Save the model weights
model.save(config.model_name)