In [15]:
"""
Robert E Ruzzo III
ModelValidity.ipynb

The purpose of this notebook is to utilize several methods to determine model fitness

"""
import numpy as np
np.random.seed(42)
import tensorflow as tf
tf.set_random_seed(42)
import pandas as pd
from keras.models import Model
from keras.layers import Dense, Conv2D, BatchNormalization, MaxPooling2D, Flatten, Dropout, Input
from keras.callbacks import TensorBoard, ModelCheckpoint
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers
import os
import keras
from sklearn.metrics import classification_report, confusion_matrix, auc, roc_curve
from keras import metrics
from keras import backend as b
import pandas as pd

In [16]:
"""
Configuration
Used to hold variable values making them easier to change if needed.

    Args: 
        None

Variables:
    batch_size (int): The batch processing size
    data (string): The location of the training set labels csv
    data_dir (string): The directory which containes the subdirectories of the photos to be analzed. 
        For this notebook to work correctly the pictures have to divided into a sub directories based on their class.
    image_width (int): expexted width of the image being processed
    image_height (int): expected height of the image being processed
    
"""

class Configuration:
    def __init__(self):
        self.batch_size = 128
        #self.data = pd.read_csv('/floyd/input/cancer_histo/train_labels.csv')
        self.data = pd.read_csv('D:\\Datasets\\histopathologic-cancer-detection\\train_labels.csv')
        #self.data_dir = '/floyd/input/cancer_histo/train'
        self.data_dir = 'D:\\Datasets\\histopathologic-cancer-detection\\train\\'
        self.image_width = 96
        self.image_height = 96

In [17]:
config = Configuration()

In [18]:
config.data.head()


Unnamed: 0,id,label
0,f38a6374c348f90b587e046aac6079959adf3835,0
1,c18f2d887b7ae4f6742ee445113fa1aef383ed77,1
2,755db6279dae599ebb4d39a9123cce439965282d,0
3,bc3f0c64fb968ff4a8bd33af6971ecae77c75e08,0
4,068aba587a4950175d04c680d38943fd488d6a9d,0


In [19]:
"""
auc
This was used for custom metrics AUC generation and is necessary when any model using that is loaded

    Args: 
        y_true (int):The base truth class of the image
        y_predicted (int): The predicted class of the image

Variables:
    
    auc : calculated area under the curve returned from tensorflow auc method
    
"""
def auc(y_true, y_pred):
    auc = tf.metrics.auc(y_true, y_pred)[1]
    b.get_session().run(tf.local_variables_initializer())
    return auc

In [6]:
#Uncomment if using a custom metric, make sure the custom metric is defined in this notebook
#keras.losses.custom_metrics = auc

In [7]:
#Exclude the image that throws exceptions when loaded
config.data=config.data[config.data.id != 'b44ceb87f4fb92169ec928c652d6e1209b48135c']

In [24]:
"""setup_data - Function creates the generators which add data variance and cropping capabilities
    Note: This function has to return the number of items in the iterables to ensure functionality with the 
    fit function.

    Args:
        train_data_dir (string) : directory that the training and validation data are located
        batch_size (int) : size of the batches (count)

    Returns:
        traing_cropped (iterable image generator) : cropped and augmented training images
        validation_cropped (iterable image generator) : cropped and augmented validation images
        train_generator.n (int) : The number of items in the training generator iterable
        validation_generator.n (int): The number of items in the validation generator iterable

    """
#Setup data, and create split for training, testing 80/20
def setup_data(train_data_dir, batch_size):
    
    train_datagen = ImageDataGenerator(rescale=1.0/255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        validation_split=0.2) # set validation split

    validation_generator = train_datagen.flow_from_directory(
        train_data_dir,
        batch_size=batch_size,
        target_size=(96,96),
        class_mode='categorical',
        subset='validation')
    
    return  validation_generator, validation_generator.n
    #return train_generator, validation_generator

In [25]:
histo_model = keras.models.load_model('DoubleLayers_Crop96_40')

In [26]:
#histo_model = keras.models.load_model('double_no_crop_auc_10', custom_objects={'auc': auc})

In [27]:
"""eval_model : This function uses the output of fit_model to evaluate the model after training is complete,
    and shows validation accuracy and validation loss as parameters.

    Args:
        model (Keras/TensorFlow model object) : The trained model output from fit_model
        val_generator (iterable image generator object) : The iterable validation generator from the setup_data function
        batch_size (int) : The batch size, or number of objects processed with each batch iteration.
        
    Returns:
        None, output is printed

    """
def eval_model(model, val_generator, batch_size):
    scores = model.evaluate_generator(val_generator, steps=val_n // batch_size +1)
    print("Loss: " + str(scores[0]) + " Accuracy: " + str(scores[1]))
    return scores

In [None]:
#Create instance of the val_generator
val_generator, val_n= setup_data(config.data_dir, batch_size=config.batch_size)

## Model Training

In [None]:
# Evaluate the model.
device_name="/gpu:0"
with tf.device(device_name):
    score = eval_model(histo_model, val_generator, batch_size=config.batch_size)

In [None]:
#Confution Matrix and Classification Report
Y_pred = histo_model.predict_generator(val_generator, val_n // config.batch_size+1)
y_pred = np.argmax(Y_pred, axis=1)


In [None]:
#Print out the confusion matrix
print('Confusion Matrix')
print(confusion_matrix(val_generator.classes, y_pred))
print('Classification Report')
target_names = ['No_Tumor', 'Tumor']
print(classification_report(val_generator.classes, y_pred, target_names=target_names))
