In [1]:
import glob
import os
from random import shuffle
import time
import numpy as np
import random
import cv2 as cv
import matplotlib.pyplot as plt
import keras
import keras.backend as K
from keras.models import Model, load_model
from keras.layers import Activation, Input, Dense, Conv2D, Dropout, Flatten, BatchNormalization
from keras.regularizers import l1
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint, TerminateOnNaN, TensorBoard
from keras.utils import np_utils

Using TensorFlow backend.


# Define utility and training functions

In [5]:
def get_image_files(root_dir):
    full_paths = [x for x in os.walk(root_dir)]
    imgs = [os.path.join(ds,f) for ds,_,fs in full_paths for f in fs if f]    
    return imgs

def get_dimensions(files):
    min_height, min_width = 10000, 10000
    max_height, max_width = 0, 0
    for f in files:
        img = cv.imread(f)
        h,w = img.shape[:2]
        if h < min_height:
            min_height = h
        if h > max_height:
            max_height = h
        if w < min_width:
            min_width = w
        if w > max_width:
            max_width = w
    return min_height, min_width, max_height, max_width

def make_labels(files):
    # Assume input is a list of complete file paths.
    # Count the number of unique directory names that are immediate parent of the files.
    # Order the directory names alphabetically from a-z, and associate labels accordingly.
    dirs = []
    for f in files:
        dirs.append(f.split('\\')[-2])
    unique_dirs = list(set(dirs))
    unique_dirs.sort()
    label_map = {}
    for i,d in enumerate(unique_dirs):
        label_map[d] = i
    return label_map

def make_train_val(files, label_map):
    train = []
    val = []    
    for k in label_map.keys():
        imgs = [f for f in files if k in f]
        num_files = len(imgs)
        train.extend(imgs[:int(.6*num_files)])
        val.extend(imgs[int(.6*num_files):])
    return train, val

def get_batches(files, label_map, batch_size, resize_size, num_color_channels, augment=False, predict=False):
    shuffle(files)
    count = 0
    num_files = len(files)
    num_classes = len(label_map)
    batch_out = np.zeros((batch_size, resize_size[0], resize_size[1], num_color_channels), dtype=np.uint8)
    labels_out = np.zeros((batch_size,num_classes))    
    while True:
        f = files[count%num_files]
        img = cv.imread(f)        
        res = cv.resize(img, resize_size)
        
        # Augmentation 
        if augment:            
            rows,cols = res.shape[:2]
            M = cv.getRotationMatrix2D((cols/2,rows/2),np.random.uniform(0.0,360.0,1),1) # random rotation
            res = cv.warpAffine(res,M,(cols,rows))

        if num_color_channels == 1:
            res = cv.cvtColor(res, cv.COLOR_BGR2GRAY)
            res = res[...,None]
        batch_out[count%batch_size,...] = res
        for k in label_map.keys():
            if k in f:
                labels_out[count%batch_size,:] = np_utils.to_categorical(label_map[k],num_classes)
                break
        count += 1
        if count%batch_size == 0:
            if predict:
                yield batch_out.astype(np.float)/255.
            else:
                yield batch_out.astype(np.float)/255., labels_out
            
# Convnet classifier
class classifier():
    def __init__(self,
                 input_shape,
                 n_classes,
                 n_enc_conv_layers=2,
                 n_enc_conv_filters=[32]*2, # individually customizable
                 enc_kernel_size=[(3,2)]*2, # list of integers or tuples
                 n_dense_layers=1,
                 dense_units=[32],
                 dropout=[0.0]*3, # individually customizable
                 strides=[(2,1)]*2,
                 activation='relu',
                 kernel_initializer='glorot_uniform',
                 l1_reg=0.0,
                 lr=0.001
                ):

        if len(n_enc_conv_filters) == 1:
            n_enc_conv_filters = n_enc_conv_filters*n_enc_conv_layers

        if len(enc_kernel_size) == 1:
            enc_kernel_size = enc_kernel_size*n_enc_conv_layers
            
        if len(dense_units) == 1:
            dense_units = dense_units*n_dense_layers

        if len(dropout) == 1:
            dropout = dropout*(n_enc_conv_layers+n_dense_layers)

        if len(strides) == 1:
            strides = strides*n_enc_conv_layers

        self.input_shape=input_shape
        self.n_classes=n_classes
        self.n_enc_conv_layers=n_enc_conv_layers
        self.n_enc_conv_filters=n_enc_conv_filters
        self.enc_kernel_size=enc_kernel_size
        self.n_dense_layers=n_dense_layers
        self.dense_units=dense_units
        self.dropout=dropout
        self.strides=strides
        self.activation=activation
        self.kernel_initializer = kernel_initializer
        self.l1_reg=l1_reg
        self.lr=lr
        self.model = self.get_model()

    def get_model(self):
        I = Input(shape=self.input_shape, name='input')
        X = I
        # Add Conv layers
        for i in range(self.n_enc_conv_layers):
            X = Conv2D(self.n_enc_conv_filters[i], self.enc_kernel_size[i], strides=self.strides[i], padding='same',
                       data_format='channels_last', kernel_initializer=self.kernel_initializer,
                       kernel_regularizer=l1(self.l1_reg), name='conv_{}'.format(i))(X)
            X = Activation(self.activation)(X)
#             X = BatchNormalization()(X)
            X = Dropout(self.dropout[i])(X)
        
        X = Flatten()(X)
        # Add Dense layers
        for i in range(self.n_dense_layers):
            X = Dense(self.dense_units[i], kernel_initializer=self.kernel_initializer,
                      kernel_regularizer=l1(self.l1_reg), name='dense_{}'.format(i))(X)
            X = Activation(self.activation)(X)
#             X = BatchNormalization()(X)
            X = Dropout(self.dropout[i+self.n_enc_conv_layers])(X)
        O = Dense(self.n_classes, activation='softmax', kernel_initializer=self.kernel_initializer,
                  kernel_regularizer=l1(self.l1_reg), name='output')(X)
        
        model = Model(inputs=I, outputs=O)
        model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=self.lr), metrics=['accuracy'])
        return model
    
def train(train_files, val_files, label_map, epochs=100, batch_size=8, common_size=(100,100), num_color_channels=3, 
          new_model=True, save_model_name='classification_model_1.hdf5'):
    num_batches_per_epoch = len(train_files)//batch_size
    
    train_batch_generator = get_batches(train_files, label_map, batch_size, common_size, num_color_channels, augment=True)
    val_batch_generator = get_batches(val_files, label_map, batch_size, common_size, num_color_channels)

    checkpt = ModelCheckpoint(save_model_name, monitor='val_loss', verbose=1, save_best_only=True, mode='auto')
    
    if new_model: # create a new model
        #### CHANGE THIS SECTION TO CREATE NEW CONVOLUTIONAL ARCHITECTURE ###
        model = classifier([common_size[0], common_size[1], num_color_channels],
                           len(label_map),
                           n_enc_conv_layers=12,
                           n_enc_conv_filters=[16]*12,
                           enc_kernel_size=[(3,3)],
                           n_dense_layers=2,
                           dense_units=[8],
                           dropout=[0.25],
                           strides=([(1,1)]*2+[(2,2)])*4,
                           activation='relu',
                           kernel_initializer='glorot_uniform',
                           l1_reg=0.0,
                           lr=0.0001).model
    else: # continue to train a previous model
        print('Continuing training from a previous model')
        model = load_model('models/'+save_model_name)

    model.summary()
    model.fit_generator(train_batch_generator, steps_per_epoch=num_batches_per_epoch, epochs=epochs,
                        verbose=1, callbacks=[checkpt, TerminateOnNaN()], 
                        validation_data=val_batch_generator, validation_steps=len(val_files)//batch_size)
    return model

def predict(files, label_map, common_size=(100,100), num_color_channels=3, saved_model_name='classification_model_1.hdf5'):
    model = load_model(saved_model_name)
    num_batches_per_epoch = len(files)    
    predict_batch_generator = get_batches(files, {}, batch_size, common_size, num_color_channels)

    predicts = []
    p = model.predict_generator(predict_batch_generator, steps_per_epoch=num_batches_per_epoch)
    print(p)

# Data Preprocessing

In [6]:
# Get full paths to all classification data
# Data is assumed to reside under the directory "root_dir", and data for each class is assumed to reside in a separate subfolder
root_dir = 'Classification_Images\\'
files = get_image_files(root_dir)
print(len(files))
# Get the dimension range of the data for informational purposes
minh,minw,maxh,maxw = get_dimensions(files)
print('Over all images - minimum height: {}, minimum width: {}, maximum height: {}, maximum width:{}'.format(minh,minw,maxh,maxw))
# Assign numerical labels to categories - the number of categories is equal to the number of subfolders
label_map = make_labels(files)
print(label_map)
# Split the data into training and validation
train_files, val_files = make_train_val(files, label_map)
print(len(train_files))
print(len(val_files))

674
Over all images - minimum height: 56, minimum width: 48, maximum height: 256, maximum width:304
{'Prorocentrum_micans': 1, 'Ciliate': 0}
403
271


# Training Classifier

In [4]:
# Train a classifier
# Note: all images are resized to common_size.  Change as desired. 
# Images smaller than common_size will be enlarged using interpolation.  Images larger will be shrunk using decimation.
batch_size = 8
train_files = train_files[:len(train_files)//batch_size*batch_size]
val_files = val_files[:len(val_files)//batch_size*batch_size]
model = train(train_files, val_files, label_map, epochs=2000, batch_size=16, common_size=(200,200), num_color_channels=3, 
              new_model=True, save_model_name='classification_model_1.hdf5')

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input (InputLayer)           (None, 200, 200, 3)       0         
_________________________________________________________________
conv_0 (Conv2D)              (None, 200, 200, 16)      448       
_________________________________________________________________
activation_1 (Activation)    (None, 200, 200, 16)      0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 200, 200, 16)      0         
_________________________________________________________________
conv_1 (Conv2D)              (None, 200, 200, 16)      2320      
_________________________________________________________________
activation_2 (Activation)    (None, 200, 200, 16)      0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 200, 200, 16)      0         
__________


Epoch 00010: val_loss improved from 0.69232 to 0.69207, saving model to classification_model_1.hdf5
Epoch 11/2000

Epoch 00011: val_loss improved from 0.69207 to 0.69184, saving model to classification_model_1.hdf5
Epoch 12/2000

Epoch 00012: val_loss improved from 0.69184 to 0.69161, saving model to classification_model_1.hdf5
Epoch 13/2000

Epoch 00013: val_loss improved from 0.69161 to 0.69086, saving model to classification_model_1.hdf5
Epoch 14/2000

Epoch 00014: val_loss improved from 0.69086 to 0.69057, saving model to classification_model_1.hdf5
Epoch 15/2000

Epoch 00015: val_loss did not improve
Epoch 16/2000

Epoch 00016: val_loss improved from 0.69057 to 0.68979, saving model to classification_model_1.hdf5
Epoch 17/2000

Epoch 00017: val_loss improved from 0.68979 to 0.68644, saving model to classification_model_1.hdf5
Epoch 18/2000

Epoch 00018: val_loss improved from 0.68644 to 0.68535, saving model to classification_model_1.hdf5
Epoch 19/2000

Epoch 00019: val_loss did 


Epoch 00053: val_loss did not improve
Epoch 54/2000

Epoch 00054: val_loss did not improve
Epoch 55/2000

Epoch 00055: val_loss did not improve
Epoch 56/2000

Epoch 00056: val_loss did not improve
Epoch 57/2000

Epoch 00057: val_loss did not improve
Epoch 58/2000

Epoch 00058: val_loss did not improve
Epoch 59/2000

Epoch 00059: val_loss did not improve
Epoch 60/2000

Epoch 00060: val_loss did not improve
Epoch 61/2000

Epoch 00061: val_loss did not improve
Epoch 62/2000

Epoch 00062: val_loss did not improve
Epoch 63/2000

Epoch 00063: val_loss did not improve
Epoch 64/2000

Epoch 00064: val_loss did not improve
Epoch 65/2000

Epoch 00065: val_loss did not improve
Epoch 66/2000

Epoch 00066: val_loss did not improve
Epoch 67/2000

Epoch 00067: val_loss did not improve
Epoch 68/2000

Epoch 00068: val_loss did not improve
Epoch 69/2000

Epoch 00069: val_loss did not improve
Epoch 70/2000

Epoch 00070: val_loss did not improve
Epoch 71/2000

Epoch 00071: val_loss did not improve
Epoch 7


Epoch 00100: val_loss did not improve
Epoch 101/2000

Epoch 00101: val_loss did not improve
Epoch 102/2000

Epoch 00102: val_loss did not improve
Epoch 103/2000

Epoch 00103: val_loss did not improve
Epoch 104/2000

Epoch 00104: val_loss did not improve
Epoch 105/2000

Epoch 00105: val_loss did not improve
Epoch 106/2000

Epoch 00106: val_loss did not improve
Epoch 107/2000

Epoch 00107: val_loss did not improve
Epoch 108/2000

Epoch 00108: val_loss did not improve
Epoch 109/2000

Epoch 00109: val_loss did not improve
Epoch 110/2000

Epoch 00110: val_loss did not improve
Epoch 111/2000

Epoch 00111: val_loss did not improve
Epoch 112/2000

Epoch 00112: val_loss improved from 0.66708 to 0.66664, saving model to classification_model_1.hdf5
Epoch 113/2000

Epoch 00113: val_loss did not improve
Epoch 114/2000

Epoch 00114: val_loss did not improve
Epoch 115/2000

Epoch 00115: val_loss did not improve
Epoch 116/2000

Epoch 00116: val_loss did not improve
Epoch 117/2000

Epoch 00117: val_lo


Epoch 00147: val_loss did not improve
Epoch 148/2000

Epoch 00148: val_loss did not improve
Epoch 149/2000

Epoch 00149: val_loss did not improve
Epoch 150/2000

Epoch 00150: val_loss did not improve
Epoch 151/2000

Epoch 00151: val_loss did not improve
Epoch 152/2000

Epoch 00152: val_loss did not improve
Epoch 153/2000

Epoch 00153: val_loss did not improve
Epoch 154/2000

Epoch 00154: val_loss did not improve
Epoch 155/2000

Epoch 00155: val_loss did not improve
Epoch 156/2000

Epoch 00156: val_loss did not improve
Epoch 157/2000

Epoch 00157: val_loss did not improve
Epoch 158/2000

Epoch 00158: val_loss did not improve
Epoch 159/2000

Epoch 00159: val_loss did not improve
Epoch 160/2000

Epoch 00160: val_loss did not improve
Epoch 161/2000

Epoch 00161: val_loss did not improve
Epoch 162/2000

Epoch 00162: val_loss did not improve
Epoch 163/2000

Epoch 00163: val_loss did not improve
Epoch 164/2000

Epoch 00164: val_loss did not improve
Epoch 165/2000

Epoch 00165: val_loss did n


Epoch 00195: val_loss did not improve
Epoch 196/2000

Epoch 00196: val_loss did not improve
Epoch 197/2000

Epoch 00197: val_loss did not improve
Epoch 198/2000

Epoch 00198: val_loss did not improve
Epoch 199/2000

Epoch 00199: val_loss did not improve
Epoch 200/2000

Epoch 00200: val_loss improved from 0.66664 to 0.66594, saving model to classification_model_1.hdf5
Epoch 201/2000

Epoch 00201: val_loss did not improve
Epoch 202/2000

Epoch 00202: val_loss did not improve
Epoch 203/2000

Epoch 00203: val_loss did not improve
Epoch 204/2000

Epoch 00204: val_loss did not improve
Epoch 205/2000

Epoch 00205: val_loss did not improve
Epoch 206/2000

Epoch 00206: val_loss did not improve
Epoch 207/2000

Epoch 00207: val_loss did not improve
Epoch 208/2000

Epoch 00208: val_loss did not improve
Epoch 209/2000

Epoch 00209: val_loss did not improve
Epoch 210/2000

Epoch 00210: val_loss did not improve
Epoch 211/2000

Epoch 00211: val_loss did not improve
Epoch 212/2000

Epoch 00212: val_lo


Epoch 00242: val_loss did not improve
Epoch 243/2000

Epoch 00243: val_loss did not improve
Epoch 244/2000

Epoch 00244: val_loss did not improve
Epoch 245/2000

Epoch 00245: val_loss did not improve
Epoch 246/2000

Epoch 00246: val_loss did not improve
Epoch 247/2000

Epoch 00247: val_loss did not improve
Epoch 248/2000

Epoch 00248: val_loss did not improve
Epoch 249/2000

Epoch 00249: val_loss did not improve
Epoch 250/2000

Epoch 00250: val_loss did not improve
Epoch 251/2000

Epoch 00251: val_loss did not improve
Epoch 252/2000

Epoch 00252: val_loss did not improve
Epoch 253/2000

Epoch 00253: val_loss improved from 0.66594 to 0.66267, saving model to classification_model_1.hdf5
Epoch 254/2000

Epoch 00254: val_loss did not improve
Epoch 255/2000

Epoch 00255: val_loss did not improve
Epoch 256/2000

Epoch 00256: val_loss did not improve
Epoch 257/2000

Epoch 00257: val_loss did not improve
Epoch 258/2000

Epoch 00258: val_loss did not improve
Epoch 259/2000

Epoch 00259: val_lo


Epoch 00289: val_loss did not improve
Epoch 290/2000

Epoch 00290: val_loss did not improve
Epoch 291/2000

Epoch 00291: val_loss did not improve
Epoch 292/2000

Epoch 00292: val_loss did not improve
Epoch 293/2000

Epoch 00293: val_loss did not improve
Epoch 294/2000

Epoch 00294: val_loss did not improve
Epoch 295/2000

Epoch 00295: val_loss did not improve
Epoch 296/2000

Epoch 00296: val_loss did not improve
Epoch 297/2000

Epoch 00297: val_loss did not improve
Epoch 298/2000

Epoch 00298: val_loss did not improve
Epoch 299/2000

Epoch 00299: val_loss did not improve
Epoch 300/2000

Epoch 00300: val_loss did not improve
Epoch 301/2000

Epoch 00301: val_loss did not improve
Epoch 302/2000

Epoch 00302: val_loss did not improve
Epoch 303/2000

Epoch 00303: val_loss did not improve
Epoch 304/2000

Epoch 00304: val_loss did not improve
Epoch 305/2000

Epoch 00305: val_loss did not improve
Epoch 306/2000

Epoch 00306: val_loss did not improve
Epoch 307/2000

Epoch 00307: val_loss did n

error: C:\projects\opencv-python\opencv\modules\imgproc\src\resize.cpp:4044: error: (-215) ssize.width > 0 && ssize.height > 0 in function cv::resize


# Prediction on New Data