This notebook will use IFCB images to train a convolutional neural network and have it classify images.

In [None]:
from keras.models import Sequential, Model, load_model
from keras.layers import Dense, Activation, Conv2D, MaxPooling2D, Flatten, Dropout, BatchNormalization, ZeroPadding2D, Input
from keras.layers import concatenate
from keras.preprocessing import image as keras_image
from keras.optimizers import Adam
from keras.backend import tf as ktf
from keras.constraints import maxnorm
from keras.layers import Add, Multiply, Concatenate, Average

import keras.backend as K
import numpy as np
import cv2 as cv2
from collections import Counter

import os
import skimage.transform as ski_transform
import skimage.io as ski_io
from skimage import img_as_float


from PIL import Image as PIL_Image


from sklearn.metrics import classification_report, confusion_matrix

import pandas as pd
import seaborn as sns
from scipy.io import loadmat, savemat

import ROI_image_reader_stitched as ROI
import shutil
import pickle

import matplotlib.pyplot as plt
%pylab inline
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')


In [None]:
import keras
keras.__version__

In [None]:
home_path = 'F:/IFCB/'
out_folder_of_images = home_path + 'Training_sets/'
folder_of_images_validation = home_path + 'validation_sets/'
number_of_categories = 112

In [None]:
def eliminate_background(image):
    shape = image.shape
    mid = int(shape[0] / 2)
    bkgd_mean = image[mid,:].mean()
    bkgd_std = image[mid, :].std()
    image -= bkgd_mean
    image /= (bkgd_std+0.001)
    
    image *= -1
    return image

In [None]:
#image size
image_size = 300  #an X by X size square

In [None]:
#move validation set back into main set
photos = os.walk(folder_of_images_validation)

for files in photos:
    print(files[0])
    outdir = files[0].split('/')[-1]
    for picture in files[2]:
        if picture[-3:] == 'png' or picture[-3:] == 'tif':
            #print(picture)
            shutil.move(files[0]+'/'+picture, out_folder_of_images + outdir + '/' + picture)

In [None]:
#create a validation dataset
import shutil
photos = os.walk(out_folder_of_images)

num_photo = 0
for files in photos:
    print(files[0])
    outdir = files[0].split('/')[-1]
    if outdir not in os.listdir(folder_of_images_validation) and outdir != '':
        os.mkdir('{0}/{1}'.format(folder_of_images_validation, outdir))
    for picture in files[2][0::5]:
        if picture[-3:] == 'png' or picture[-3:] == 'tif':
            #print(picture)
            num_photo += 1
            shutil.move(files[0]+'/'+picture, folder_of_images_validation + outdir + '/' + picture)

In [None]:
#training data generator
#adding some modifications to allow for data augmentation (images are manipulated some to make more instances of training data)
input_photos = keras_image.ImageDataGenerator(#rotation_range=10,  #allow images to be rotated randomly between 0 and 90 degrees
                                        width_shift_range=5, #randomly shift image this fraction of total width
                                        #height_shift_range=5, #randomly shift image this fraction of total height
                                        horizontal_flip=True, #flip images horizontally
                                        vertical_flip=True, #flip images vertically
                                        fill_mode='nearest', #how to fill in empty space after shift/rotation
                                        #cval=128, #fill value for fill_mode
                                        #samplewise_std_normalization=True,
                                        preprocessing_function=eliminate_background,
                                        rescale = 1/255.,
                                        #zoom_range = 0.1,
                                        #featurewise_center=True,
                                        #featurewise_std_normalization=True
                                       )

#start the actual flow of images for training
photos = input_photos.flow_from_directory(out_folder_of_images, 
                                          #shuffle=False,
                                          color_mode='grayscale', #all ifcb images are grayscale
                                          class_mode='categorical', #there are multiple classes of images (i.e. > 2)
                                          target_size=(image_size,image_size),  #squish/stretch images to this size
                                          batch_size=16,) #how many images per batch
                                          

In [None]:
#validation data generator


#adding some modifications to allow for data augmentation (images are manipulated some to make more instances of training data)
input_photos_validation = keras_image.ImageDataGenerator(#rotation_range=10,  #allow images to be rotated randomly between 0 and 90 degrees
                                        #width_shift_range=5, #randomly shift image this fraction of total width
                                        #height_shift_range=5, #randomly shift image this fraction of total height
                                        horizontal_flip=True, #flip images horizontally
                                        vertical_flip=True, #flip images vertically
                                        fill_mode='nearest', #how to fill in empty space after shift/rotation[constant, wrap, reflect, nearest]
                                        #cval=128, #fill value for fill_mode
                                        preprocessing_function=eliminate_background,
                                        rescale = 1/255.,
                                        #zoom_range = 0.1,
                                        
                                        )
#start the actual flow of images for training
photos_validation = input_photos_validation.flow_from_directory(folder_of_images_validation, 
                                          #shuffle=False,
                                          color_mode='grayscale', #all ifcb images are grayscale
                                          class_mode='categorical', #there are multiple classes of images (i.e. > 2)
                                          target_size=(image_size,image_size),  #squish/stretch images to this size
                                          #save_to_dir='D:/Python27/Projects/Classifiers/augmented_data/',
                                          batch_size=16) #how many images per batch

In [None]:
#confusion_matrix data generator

#adding some modifications to allow for data augmentation (images are manipulated some to make more instances of training data)
input_photos_confusion = keras_image.ImageDataGenerator(#rotation_range=10.,  #allow images to be rotated randomly between 0 and 90 degrees
                                        #width_shift_range=5, #randomly shift image this fraction of total width
                                        #height_shift_range=5, #randomly shift image this fraction of total height
                                        #horizontal_flip=True, #flip images horizontally
                                        #vertical_flip=True, #flip images vertically
                                        fill_mode='nearest', #how to fill in empty space after shift/rotation[constant, wrap, reflect, nearest]
                                        #cval=128, #fill value for fill_mode
                                        preprocessing_function=eliminate_background,
                                        rescale = 1/255.,
                                        #zoom_range = 0.15,
                                        )
#start the actual flow of images for training
photos_confusion = input_photos_confusion.flow_from_directory(out_folder_of_images, 
                                          shuffle=False,
                                          color_mode='grayscale', #all ifcb images are grayscale
                                          class_mode='categorical', #there are multiple classes of images (i.e. > 2)
                                          target_size=(image_size,image_size),  #squish/stretch images to this size
                                          batch_size=16 #how many images per batch
                                        )

In [None]:
#confusion_matrix data generator

#adding some modifications to allow for data augmentation (images are manipulated some to make more instances of training data)
input_photos_validation_confusion = keras_image.ImageDataGenerator(#rotation_range=10.,  #allow images to be rotated randomly between 0 and 90 degrees
                                        #width_shift_range=5, #randomly shift image this fraction of total width
                                        #height_shift_range=5, #randomly shift image this fraction of total height
                                        #horizontal_flip=True, #flip images horizontally
                                        #vertical_flip=True, #flip images vertically
                                        fill_mode='nearest', #how to fill in empty space after shift/rotation[constant, wrap, reflect, nearest]
                                        #cval=128, #fill value for fill_mode
                                        preprocessing_function=eliminate_background,
                                        rescale = 1/255.,
                                        #zoom_range = 0.15,
                                        )
#start the actual flow of images for training
photos_validation_confusion = input_photos_confusion.flow_from_directory(folder_of_images_validation, 
                                          shuffle=False,
                                          color_mode='grayscale', #all ifcb images are grayscale
                                          class_mode='categorical', #there are multiple classes of images (i.e. > 2)
                                          target_size=(image_size,image_size),  #squish/stretch images to this size
                                          batch_size=16 #how many images per batch
                                        )

#Okay the images are all ready to be loaded and will be resized to a 300x300 image (I can change this in the #ImageDataGenerator).

In [None]:
#Functional API version of the model
#since Keras update broke the easy way of replacing model layers it seems more  prudent to use the functional way
#instead of the sequential api

inputs = Input((image_size, image_size, 1))
conv1 = Conv2D(64, (7,7), padding='same', strides=3)(inputs)
conv1 = BatchNormalization()(conv1)
conv1 = Activation('relu')(conv1)
conv1 = MaxPooling2D(pool_size=(2,2))(conv1)

conv2 = Conv2D(128, (3,3), padding='same', strides=1)(conv1)
conv2 = BatchNormalization()(conv2)
conv2 = Activation('relu')(conv2)

conv3 = Conv2D(128, (3,3), padding='same', strides=1)(conv2)
conv3 = BatchNormalization()(conv3)
conv3 = Activation('relu')(conv3)

conv_3a = Conv2D(32, (1,1), padding='same', strides=1, name='conv3a')(conv3)
conv_3a = BatchNormalization()(conv_3a)

conv_3b = Conv2D(128, (5,5), padding='same', name='conv_3b')(conv_3a)
conv_3b = BatchNormalization()(conv_3b)
conv_3b = Activation('relu')(conv_3b)
conv_3b = MaxPooling2D(pool_size=(2,2))(conv_3b)

conv_4a = Conv2D(32, (1,1), padding='same', strides=1, name='conv4a')(conv3)
conv_4a = BatchNormalization()(conv_4a)

conv_4b = Conv2D(128, (3,3), padding='same', name='conv_4b')(conv_4a)
conv_4b = BatchNormalization()(conv_4b)
conv_4b = Activation('relu')(conv_4b)
conv_4b = MaxPooling2D(pool_size=(2,2))(conv_4b)

conv4 = Conv2D(32, (1,1), padding='same')(conv3)
conv4 = BatchNormalization()(conv4)
conv4 = Activation('relu')(conv4)

conv5 = Conv2D(128, (3,3), padding='same')(conv4)
conv5 = BatchNormalization()(conv5)
conv5 = Activation('relu')(conv5)
conv5 = MaxPooling2D(pool_size=(2,2))(conv5)

merged = concatenate([conv5, conv_3b, conv_4b])

conv7 = Conv2D(256, (3,3), padding='same')(merged)
conv7 = BatchNormalization()(conv7)
conv7 = Activation('relu')(conv7)
conv7 = MaxPooling2D(pool_size=(2,2))(conv7)

conv9 = Conv2D(512, (3,3), padding='valid')(conv7)
conv9 = BatchNormalization()(conv9)
conv9 = Activation('relu')(conv9)
conv9 = MaxPooling2D(pool_size=(2,2))(conv9)

conv9 = Conv2D(1024, (3,3), padding='same')(conv9)
conv9 = BatchNormalization()(conv9)
conv9 = Activation('relu')(conv9)
conv9 = MaxPooling2D(pool_size=(2,2))(conv9)

flat = Flatten()(conv9)
flat = Dense(1000)(flat)
flat = Activation('relu')(flat)
flat = Dropout(0.25)(flat)
flat = Dense(250)(flat)
flat = Activation('relu')(flat)


finish = Dropout(0.25)(flat)
finish = Dense(number_of_categories)(finish)
finish = Activation('softmax')(finish)

model = Model(inputs=[inputs], outputs=[finish])

In [None]:
adam = Adam(lr=0.001, decay=.00001)

In [None]:
model.compile(loss='categorical_crossentropy',
              optimizer=adam,
              metrics=['accuracy'],
             )

In [None]:
print(model.count_params())
print(model.summary())

In [None]:
num_images = len(photos.classes)
num_images

In [None]:
#try this function to correct for imbalanced classes 
#downloaded this function from: https://github.com/cbaziotis/keras-utilities/blob/master/kutilities/helpers/data_preparation.py

def get_class_weights(y, smooth_factor=0):
    """
    Returns the weights for each class based on the frequencies of the samples
    :param smooth_factor: factor that smooths extremely uneven weights
    :param y: list of true labels (the labels must be hashable)
    :return: dictionary with the weight for each class
    """
    counter = Counter(y)

    if smooth_factor > 0:
        p = max(counter.values()) * smooth_factor
        for k in counter.keys():
            counter[k] += p

    majority = max(counter.values())

    return {cls: float(majority) / count for cls, count in counter.items()}

In [None]:
temp_class_weights = get_class_weights(photos.classes, 0.1)

In [None]:
hist = model.fit_generator(photos, 
                    #steps_per_epoch=200,
                    steps_per_epoch=num_images/16,
                    epochs=10,
                    initial_epoch=0,
                    validation_data = photos_validation,
                    validation_steps = 600,
                    class_weight=temp_class_weights,  #this is to help with the unbalanced class issue
                          )

In [None]:
#use these lines to save the model to a file; change the filename to whatever you want
model.save('path/to/where/you/store/models/CNN_model_mdl1.mdl')

#model weights are included in the model file itself so this isn't entirely necessary
model.save_weights('path/to/where/you/store/model/weights/CNN_model_weights_mdl1.wts')

In [None]:
#you might see a lot of warning for this one, but it should load fine
model = load_model(home_path + 'models/CNN_model_mdl1.mdl')

In [None]:
#generate a confusion matrix based on the training data
photos_confusion.reset()
Y_pred = model.predict_generator(photos_confusion, num_images/16 + 1)
y_pred = np.argmax(Y_pred, axis=1)
num_images = len(photos.classes)
print('Confusion Matrix')
print(confusion_matrix(photos_confusion.classes, y_pred[:num_images]))
check_answer = sort(list(photos_confusion.class_indices))
print('Classification Report')
target_names = check_answer
print(classification_report(photos_confusion.classes, y_pred[:num_images], target_names=target_names))
conf_mat = pd.DataFrame(confusion_matrix(photos_confusion.classes, y_pred[:num_images]), columns=target_names, index=target_names)
conf_mat

In [None]:
#plot a heatmap of the confusion matrix
figsize(20, 20)
sns.heatmap(conf_mat.divide(conf_mat.sum()+1), vmax=1, cmap='binary')

In [None]:
#confusion matrix for validation data set
photos_validation_confusion.reset()
Y_pred = model.predict_generator(photos_validation_confusion, num_images/16 + 1)
y_pred = np.argmax(Y_pred, axis=1)
num_images = len(photos_validation_confusion.classes)
print('Confusion Matrix')
print(confusion_matrix(photos_validation_confusion.classes, y_pred[:num_images]))
check_answer = sort(list(photos_validation_confusion.class_indices))
print('Classification Report')
target_names = check_answer
print(classification_report(photos_validation_confusion.classes, y_pred[:num_images], target_names=target_names))
conf_mat = pd.DataFrame(confusion_matrix(photos_validation_confusion.classes, y_pred[:num_images]), columns=target_names, index=target_names)
conf_mat

In [None]:
#plot heatmap of confusion matrix for validation data
figsize(20, 20)
sns.heatmap(conf_mat.divide(conf_mat.sum()+1), vmax=1, cmap='binary')

In [None]:
#move validation set back into main set before training next model
photos = os.walk(folder_of_images_validation)

for files in photos:
    print(files[0])
    outdir = files[0].split('/')[-1]
    for picture in files[2]:
        if picture[-3:] == 'png' or picture[-3:] == 'tif':
            #print(picture)
            shutil.move(files[0]+'/'+picture, out_folder_of_images + outdir + '/' + picture)

In [None]:
#create a second validation dataset
photos = os.walk(out_folder_of_images)
num_photo = 0
for files in photos:
    print(files[0])
    outdir = files[0].split('/')[-1]
    if outdir not in os.listdir(folder_of_images_validation) and outdir != '':
        os.mkdir('{0}/{1}'.format(folder_of_images_validation, outdir))
    for picture in files[2][1::5]:
        if picture[-3:] == 'png' or picture[-3:] == 'tif':
            #print(picture)
            num_photo += 1
            shutil.move(files[0]+'/'+picture, folder_of_images_validation + outdir + '/' + picture)

In [None]:
#training data generator
#adding some modifications to allow for data augmentation (images are manipulated some to make more instances of training data)
input_photos = keras_image.ImageDataGenerator(#rotation_range=10,  #allow images to be rotated randomly between 0 and 90 degrees
                                        width_shift_range=5, #randomly shift image this fraction of total width
                                        #height_shift_range=5, #randomly shift image this fraction of total height
                                        horizontal_flip=True, #flip images horizontally
                                        vertical_flip=True, #flip images vertically
                                        fill_mode='nearest', #how to fill in empty space after shift/rotation
                                        #cval=128, #fill value for fill_mode
                                        #samplewise_std_normalization=True,
                                        preprocessing_function=eliminate_background,
                                        rescale = 1/255.,
                                        #zoom_range = 0.1,
                                        #featurewise_center=True,
                                        #featurewise_std_normalization=True
                                       )

#start the actual flow of images for training
photos = input_photos.flow_from_directory(out_folder_of_images, 
                                          #shuffle=False,
                                          color_mode='grayscale', #all ifcb images are grayscale
                                          class_mode='categorical', #there are multiple classes of images (i.e. > 2)
                                          target_size=(image_size,image_size),  #squish/stretch images to this size
                                          batch_size=16,) #how many images per batch
                                          

In [None]:
#validation data generator

#adding some modifications to allow for data augmentation (images are manipulated some to make more instances of training data)
input_photos_validation = keras_image.ImageDataGenerator(#rotation_range=10,  #allow images to be rotated randomly between 0 and 90 degrees
                                        #width_shift_range=5, #randomly shift image this fraction of total width
                                        #height_shift_range=5, #randomly shift image this fraction of total height
                                        horizontal_flip=True, #flip images horizontally
                                        vertical_flip=True, #flip images vertically
                                        fill_mode='nearest', #how to fill in empty space after shift/rotation[constant, wrap, reflect, nearest]
                                        #cval=128, #fill value for fill_mode
                                        preprocessing_function=eliminate_background,
                                        rescale = 1/255.,
                                        #zoom_range = 0.1,
                                        
                                        )
#start the actual flow of images for training
photos_validation = input_photos_validation.flow_from_directory(folder_of_images_validation, 
                                          #shuffle=False,
                                          color_mode='grayscale', #all ifcb images are grayscale
                                          class_mode='categorical', #there are multiple classes of images (i.e. > 2)
                                          target_size=(image_size,image_size),  #squish/stretch images to this size
                                          #save_to_dir='D:/Python27/Projects/Classifiers/augmented_data/',
                                          batch_size=16) #how many images per batch


#confusion_matrix data generator

#adding some modifications to allow for data augmentation (images are manipulated some to make more instances of training data)
input_photos_confusion = keras_image.ImageDataGenerator(#rotation_range=10.,  #allow images to be rotated randomly between 0 and 90 degrees
                                        #width_shift_range=5, #randomly shift image this fraction of total width
                                        #height_shift_range=5, #randomly shift image this fraction of total height
                                        #horizontal_flip=True, #flip images horizontally
                                        #vertical_flip=True, #flip images vertically
                                        fill_mode='nearest', #how to fill in empty space after shift/rotation[constant, wrap, reflect, nearest]
                                        #cval=128, #fill value for fill_mode
                                        preprocessing_function=eliminate_background,
                                        rescale = 1/255.,
                                        #zoom_range = 0.15,
                                        )
#start the actual flow of images for training
photos_confusion = input_photos_confusion.flow_from_directory(out_folder_of_images, 
                                          shuffle=False,
                                          color_mode='grayscale', #all ifcb images are grayscale
                                          class_mode='categorical', #there are multiple classes of images (i.e. > 2)
                                          target_size=(image_size,image_size),  #squish/stretch images to this size
                                          batch_size=16 #how many images per batch
                                        )

In [None]:
#confusion_matrix data generator

#adding some modifications to allow for data augmentation (images are manipulated some to make more instances of training data)
input_photos_validation_confusion = keras_image.ImageDataGenerator(#rotation_range=10.,  #allow images to be rotated randomly between 0 and 90 degrees
                                        #width_shift_range=5, #randomly shift image this fraction of total width
                                        #height_shift_range=5, #randomly shift image this fraction of total height
                                        #horizontal_flip=True, #flip images horizontally
                                        #vertical_flip=True, #flip images vertically
                                        fill_mode='nearest', #how to fill in empty space after shift/rotation[constant, wrap, reflect, nearest]
                                        #cval=128, #fill value for fill_mode
                                        preprocessing_function=eliminate_background,
                                        rescale = 1/255.,
                                        #zoom_range = 0.15,
                                        )
#start the actual flow of images for training
photos_validation_confusion = input_photos_confusion.flow_from_directory(folder_of_images_validation, 
                                          shuffle=False,
                                          color_mode='grayscale', #all ifcb images are grayscale
                                          class_mode='categorical', #there are multiple classes of images (i.e. > 2)
                                          target_size=(image_size,image_size),  #squish/stretch images to this size
                                          batch_size=16 #how many images per batch
                                        )

In [None]:
#Functional API version of the model
#since Keras update broke the easy way of replacing model layers it seems more  prudent to use the functional way
#instead of the sequential api

inputs = Input((image_size, image_size, 1))
conv1 = Conv2D(64, (5,5), padding='same', strides=3)(inputs)
conv1 = BatchNormalization()(conv1)
conv1 = Activation('relu')(conv1)
conv1 = MaxPooling2D(pool_size=(2,2))(conv1)

conv2 = Conv2D(128, (3,3), padding='same', strides=1)(conv1)
conv2 = BatchNormalization()(conv2)
conv2 = Activation('relu')(conv2)

conv3 = Conv2D(128, (3,3), padding='same', strides=1)(conv2)
conv3 = BatchNormalization()(conv3)
conv3 = Activation('relu')(conv3)

conv_3a = Conv2D(32, (1,1), padding='same', strides=1, name='conv3a')(conv3)
conv_3a = BatchNormalization()(conv_3a)

conv_3b = Conv2D(128, (5,5), padding='same', name='conv_3b')(conv_3a)
conv_3b = BatchNormalization()(conv_3b)
conv_3b = Activation('relu')(conv_3b)
conv_3b = MaxPooling2D(pool_size=(2,2))(conv_3b)

conv_4a = Conv2D(32, (1,1), padding='same', strides=1, name='conv4a')(conv3)
conv_4a = BatchNormalization()(conv_4a)

conv_4b = Conv2D(128, (3,3), padding='same', name='conv_4b')(conv_4a)
conv_4b = BatchNormalization()(conv_4b)
conv_4b = Activation('relu')(conv_4b)
conv_4b = MaxPooling2D(pool_size=(2,2))(conv_4b)

conv4 = Conv2D(32, (1,1), padding='same')(conv3)
conv4 = BatchNormalization()(conv4)
conv4 = Activation('relu')(conv4)

conv5 = Conv2D(128, (3,3), padding='same')(conv4)
conv5 = BatchNormalization()(conv5)
conv5 = Activation('relu')(conv5)
conv5 = MaxPooling2D(pool_size=(2,2))(conv5)

merged = concatenate([conv5, conv_3b, conv_4b])

conv7 = Conv2D(256, (3,3), padding='same')(merged)
conv7 = BatchNormalization()(conv7)
conv7 = Activation('relu')(conv7)
conv7 = MaxPooling2D(pool_size=(2,2))(conv7)

conv9 = Conv2D(512, (3,3), padding='valid')(conv7)
conv9 = BatchNormalization()(conv9)
conv9 = Activation('relu')(conv9)
conv9 = MaxPooling2D(pool_size=(2,2))(conv9)

conv9 = Conv2D(1024, (3,3), padding='same')(conv9)
conv9 = BatchNormalization()(conv9)
conv9 = Activation('relu')(conv9)
conv9 = MaxPooling2D(pool_size=(2,2))(conv9)
#conv9 = MaxPooling2D(pool_size=(2,2))(conv9)

flat = Flatten()(conv9)
flat = Dense(1000)(flat)
flat = Activation('relu')(flat)
flat = Dropout(0.25)(flat)
flat = Dense(250)(flat)
flat = Activation('relu')(flat)


finish = Dropout(0.25)(flat)
finish = Dense(number_of_categories)(finish)
finish = Activation('softmax')(finish)

model = Model(inputs=[inputs], outputs=[finish])

In [None]:
adam = Adam(lr=0.001, decay=.00001)
model.compile(loss='categorical_crossentropy',
              optimizer=adam,
              metrics=['accuracy'],
             )

print(model.count_params())
print(model.summary())

temp_class_weights = get_class_weights(photos.classes, 0.1)
num_images = len(photos.classes)

In [None]:
hist = model.fit_generator(photos, 
                    #steps_per_epoch=200,
                    steps_per_epoch=num_images/16,
                    epochs=10,
                    initial_epoch=0,
                    validation_data = photos_validation,
                    validation_steps = 600,
                    class_weight=temp_class_weights,  #this is to help with the unbalanced class issue
                          )

In [None]:
model.save('path/to/models/CNN_model_mdl2.mdl')
model.save_weights('path/to/weights/CNN_model_weights_mdl2.wts')

In [None]:
model = load_model(home_path + 'models/CNN_model_mdl2.mdl')

In [None]:
photos_confusion.reset()
Y_pred = model.predict_generator(photos_confusion, num_images/16 + 1)
y_pred = np.argmax(Y_pred, axis=1)
num_images = len(photos_confusion.classes)
print('Confusion Matrix')
print(confusion_matrix(photos_confusion.classes, y_pred[:num_images]))
check_answer = sort(list(photos_confusion.class_indices))
print('Classification Report')
target_names = check_answer
print(classification_report(photos_confusion.classes, y_pred[:num_images], target_names=target_names))
conf_mat = pd.DataFrame(confusion_matrix(photos_confusion.classes, y_pred[:num_images]), columns=target_names, index=target_names)
conf_mat

In [None]:
figsize(20, 20)
sns.heatmap(conf_mat.divide(conf_mat.sum()+1), vmax=1, cmap='binary')

In [None]:
photos_validation_confusion.reset()
Y_pred = model.predict_generator(photos_validation_confusion, num_images/16 + 1)
y_pred = np.argmax(Y_pred, axis=1)
num_images = len(photos_validation_confusion.classes)
print('Confusion Matrix')
print(confusion_matrix(photos_validation_confusion.classes, y_pred[:num_images]))
check_answer = sort(list(photos_validation_confusion.class_indices))
print('Classification Report')
target_names = check_answer
print(classification_report(photos_validation_confusion.classes, y_pred[:num_images], target_names=target_names))
conf_mat = pd.DataFrame(confusion_matrix(photos_validation_confusion.classes, y_pred[:num_images]), columns=target_names, index=target_names)
conf_mat

In [None]:
figsize(20, 20)
sns.heatmap(conf_mat.divide(conf_mat.sum()+1), vmax=1, cmap='binary')

In [None]:
#move validation set back into main set
photos = os.walk(folder_of_images_validation)

for files in photos:
    print(files[0])
    outdir = files[0].split('/')[-1]
    for picture in files[2]:
        if picture[-3:] == 'png' or picture[-3:] == 'tif':
            #print(picture)
            shutil.move(files[0]+'/'+picture, out_folder_of_images + outdir + '/' + picture)

In [None]:
#create a validation dataset
import shutil
photos = os.walk(out_folder_of_images)

num_photo = 0
for files in photos:
    print(files[0])
    outdir = files[0].split('/')[-1]
    if outdir not in os.listdir(folder_of_images_validation) and outdir != '':
        os.mkdir('{0}/{1}'.format(folder_of_images_validation, outdir))
    for picture in files[2][2::5]:
        if picture[-3:] == 'png' or picture[-3:] == 'tif':
            #print(picture)
            num_photo += 1
            shutil.move(files[0]+'/'+picture, folder_of_images_validation + outdir + '/' + picture)

In [None]:
#training data generator
#adding some modifications to allow for data augmentation (images are manipulated some to make more instances of training data)
input_photos = keras_image.ImageDataGenerator(#rotation_range=10,  #allow images to be rotated randomly between 0 and 90 degrees
                                        width_shift_range=5, #randomly shift image this fraction of total width
                                        #height_shift_range=5, #randomly shift image this fraction of total height
                                        horizontal_flip=True, #flip images horizontally
                                        vertical_flip=True, #flip images vertically
                                        fill_mode='nearest', #how to fill in empty space after shift/rotation
                                        #cval=128, #fill value for fill_mode
                                        #samplewise_std_normalization=True,
                                        preprocessing_function=eliminate_background,
                                        rescale = 1/255.,
                                        #zoom_range = 0.1,
                                        #featurewise_center=True,
                                        #featurewise_std_normalization=True
                                       )

#start the actual flow of images for training
photos = input_photos.flow_from_directory(out_folder_of_images, 
                                          #shuffle=False,
                                          color_mode='grayscale', #all ifcb images are grayscale
                                          class_mode='categorical', #there are multiple classes of images (i.e. > 2)
                                          target_size=(image_size,image_size),  #squish/stretch images to this size
                                          batch_size=16,) #how many images per batch
                                          

In [None]:
#validation data generator

#adding some modifications to allow for data augmentation (images are manipulated some to make more instances of training data)
input_photos_validation = keras_image.ImageDataGenerator(#rotation_range=10,  #allow images to be rotated randomly between 0 and 90 degrees
                                        #width_shift_range=5, #randomly shift image this fraction of total width
                                        #height_shift_range=5, #randomly shift image this fraction of total height
                                        horizontal_flip=True, #flip images horizontally
                                        vertical_flip=True, #flip images vertically
                                        fill_mode='nearest', #how to fill in empty space after shift/rotation[constant, wrap, reflect, nearest]
                                        #cval=128, #fill value for fill_mode
                                        preprocessing_function=eliminate_background,
                                        rescale = 1/255.,
                                        #zoom_range = 0.1,
                                        
                                        )
#start the actual flow of images for training
photos_validation = input_photos_validation.flow_from_directory(folder_of_images_validation, 
                                          #shuffle=False,
                                          color_mode='grayscale', #all ifcb images are grayscale
                                          class_mode='categorical', #there are multiple classes of images (i.e. > 2)
                                          target_size=(image_size,image_size),  #squish/stretch images to this size
                                          #save_to_dir='D:/Python27/Projects/Classifiers/augmented_data/',
                                          batch_size=16) #how many images per batch


#confusion_matrix data generator

#adding some modifications to allow for data augmentation (images are manipulated some to make more instances of training data)
input_photos_confusion = keras_image.ImageDataGenerator(#rotation_range=10.,  #allow images to be rotated randomly between 0 and 90 degrees
                                        #width_shift_range=5, #randomly shift image this fraction of total width
                                        #height_shift_range=5, #randomly shift image this fraction of total height
                                        #horizontal_flip=True, #flip images horizontally
                                        #vertical_flip=True, #flip images vertically
                                        fill_mode='nearest', #how to fill in empty space after shift/rotation[constant, wrap, reflect, nearest]
                                        #cval=128, #fill value for fill_mode
                                        preprocessing_function=eliminate_background,
                                        rescale = 1/255.,
                                        #zoom_range = 0.15,
                                        )
#start the actual flow of images for training
photos_confusion = input_photos_confusion.flow_from_directory(out_folder_of_images, 
                                          shuffle=False,
                                          color_mode='grayscale', #all ifcb images are grayscale
                                          class_mode='categorical', #there are multiple classes of images (i.e. > 2)
                                          target_size=(image_size,image_size),  #squish/stretch images to this size
                                          batch_size=16 #how many images per batch
                                        )

In [None]:
#confusion_matrix data generator

#adding some modifications to allow for data augmentation (images are manipulated some to make more instances of training data)
input_photos_validation_confusion = keras_image.ImageDataGenerator(#rotation_range=10.,  #allow images to be rotated randomly between 0 and 90 degrees
                                        #width_shift_range=5, #randomly shift image this fraction of total width
                                        #height_shift_range=5, #randomly shift image this fraction of total height
                                        #horizontal_flip=True, #flip images horizontally
                                        #vertical_flip=True, #flip images vertically
                                        fill_mode='nearest', #how to fill in empty space after shift/rotation[constant, wrap, reflect, nearest]
                                        #cval=128, #fill value for fill_mode
                                        preprocessing_function=eliminate_background,
                                        rescale = 1/255.,
                                        #zoom_range = 0.15,
                                        )
#start the actual flow of images for training
photos_validation_confusion = input_photos_confusion.flow_from_directory(folder_of_images_validation, 
                                          shuffle=False,
                                          color_mode='grayscale', #all ifcb images are grayscale
                                          class_mode='categorical', #there are multiple classes of images (i.e. > 2)
                                          target_size=(image_size,image_size),  #squish/stretch images to this size
                                          batch_size=16 #how many images per batch
                                        )

In [None]:
#Functional API version of the model

inputs = Input((image_size, image_size, 1))
conv1 = Conv2D(64, (5,5), padding='same', strides=2)(inputs)
conv1 = BatchNormalization()(conv1)
conv1 = Activation('relu')(conv1)
#conv1 = MaxPooling2D(pool_size=(2,2))(conv1)

conv2 = Conv2D(64, (3,3), padding='same', strides=1)(conv1)
conv2 = BatchNormalization()(conv2)
conv2 = Activation('relu')(conv2)

conv3 = Conv2D(64, (3,3), padding='same', strides=2)(conv2)
conv3 = BatchNormalization()(conv3)
conv3 = Activation('relu')(conv3)
#conv3 = MaxPooling2D(pool_size=(2,2))(conv3)

conv4 = Conv2D(128, (3,3), padding='same')(conv3)
conv4 = BatchNormalization()(conv4)
conv4 = Activation('relu')(conv4)

conv5 = Conv2D(128, (3,3), padding='same', strides=2)(conv4)
conv5 = BatchNormalization()(conv5)
conv5 = Activation('relu')(conv5)
#conv5 = MaxPooling2D(pool_size=(2,2))(conv5)

conv6 = Conv2D(256, (3,3), padding='same')(conv5)
conv6 = BatchNormalization()(conv6)
conv6 = Activation('relu')(conv6)

conv7 = Conv2D(256, (3,3), padding='same', strides=2)(conv6)
conv7 = BatchNormalization()(conv7)
conv7 = Activation('relu')(conv7)
#conv7 = MaxPooling2D(pool_size=(2,2))(conv7)

conv8 = Conv2D(512, (3,3), padding='same')(conv7)
conv8 = BatchNormalization()(conv8)
conv8 = Activation('relu')(conv8)

conv9 = Conv2D(512, (3,3), padding='same', strides=2)(conv8)
conv9 = BatchNormalization()(conv9)
conv9 = Activation('relu')(conv9)
#conv9 = MaxPooling2D(pool_size=(2,2))(conv9)

conv10 = Conv2D(1024, (3,3), padding='same', strides=2)(conv9)
conv10 = BatchNormalization()(conv10)
conv10 = Activation('relu')(conv10)
conv10 = MaxPooling2D(pool_size=(2,2))(conv10)

flat = Flatten()(conv10)
flat = Dense(1000)(flat)
flat = Activation('relu')(flat)
flat = Dropout(0.35)(flat)
flat = Dense(250)(flat)
flat = Activation('relu')(flat)


finish = Dropout(0.35)(flat)
finish = Dense(number_of_categories)(finish)
finish = Activation('softmax')(finish)

model = Model(inputs=[inputs], outputs=[finish])

In [None]:
adam = Adam(lr=0.001, decay=.00001)
model.compile(loss='categorical_crossentropy',
              optimizer=adam,
              metrics=['accuracy'],
             )

print(model.count_params())
print(model.summary())

temp_class_weights = get_class_weights(photos.classes, 0.1)
num_images = len(photos.classes)

In [None]:
hist = model.fit_generator(photos, 
                    #steps_per_epoch=200,
                    steps_per_epoch=num_images/16,
                    epochs=10,
                    initial_epoch=0,
                    validation_data = photos_validation,
                    validation_steps = 600,
                    class_weight=temp_class_weights,  #this is to help with the unbalanced class issue
                          )

In [None]:
model.save('path/to/models/CNN_model_mdl3.mdl')
model.save_weights('path/to/model/weights/CNN_model_weights_mdl3.wts')

In [None]:
model = load_model(home_path + 'models/CNN_model_mdl3.mdl')

In [None]:
photos_confusion.reset()
Y_pred = model.predict_generator(photos_confusion, num_images/16 + 1)
y_pred = np.argmax(Y_pred, axis=1)
num_images = len(photos_confusion.classes)
print('Confusion Matrix')
print(confusion_matrix(photos_confusion.classes, y_pred[:num_images]))
check_answer = sort(list(photos_confusion.class_indices))
print('Classification Report')
target_names = check_answer
print(classification_report(photos_confusion.classes, y_pred[:num_images], target_names=target_names))
conf_mat = pd.DataFrame(confusion_matrix(photos_confusion.classes, y_pred[:num_images]), columns=target_names, index=target_names)
conf_mat

In [None]:
figsize(20, 20)
sns.heatmap(conf_mat.divide(conf_mat.sum()+1), vmax=1, cmap='binary')

In [None]:
photos_validation_confusion.reset()
Y_pred = model.predict_generator(photos_validation_confusion, num_images/16 + 1)
y_pred = np.argmax(Y_pred, axis=1)
num_images = len(photos_validation_confusion.classes)
print('Confusion Matrix')
print(confusion_matrix(photos_validation_confusion.classes, y_pred[:num_images]))
check_answer = sort(list(photos_validation_confusion.class_indices))
print('Classification Report')
target_names = check_answer
print(classification_report(photos_validation_confusion.classes, y_pred[:num_images], target_names=target_names))
conf_mat = pd.DataFrame(confusion_matrix(photos_validation_confusion.classes, y_pred[:num_images]), columns=target_names, index=target_names)
conf_mat

In [None]:
figsize(20, 20)
sns.heatmap(conf_mat.divide(conf_mat.sum()+1), vmax=1, cmap='binary')