In [1]:
import os
import sys
import pandas as pd
import numpy as np
import PIL

seed = 16
np.random.seed(seed)

from keras.utils.np_utils import to_categorical
from keras.preprocessing.image import ImageDataGenerator


  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


Purpose of this notebook is to build/test working model using a smaller subset of classes and data to minimize iteration time and to test CovNets of varying size before running on broader dataset.

In [2]:
#check using system GPU for processing

from tensorflow.python.client import device_lib
import tensorflow as tf
os.environ["CUDA_VISIBLE_DEVICES"]="0" #for training on gpu
print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 10183346064960334723
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 1493781708
locality {
  bus_id: 1
}
incarnation: 45552984422354421
physical_device_desc: "device: 0, name: GeForce GTX 860M, pci bus id: 0000:01:00.0, compute capability: 5.0"
]


In [3]:
# copied over the train, validate and test sets for 5 randomly selected breeds

Given the subset, I copied over their respesctive train/validate/test image folders from the broader image data set.  I maintained the full size of each train, val and test set.  

In [4]:
os.chdir('C:\\Users\\Garrick\Documents\\Springboard\\Capstone Project 2\\datasets_subset1')

In [5]:
train_datagen = ImageDataGenerator(rotation_range=15, shear_range=0.1, channel_shift_range=20,
                                    width_shift_range=0.1,  height_shift_range=0.1, zoom_range=0.1, horizontal_flip=True,
                                    fill_mode='nearest', rescale=1./255)
validation_datagen = ImageDataGenerator(rescale=1./255)

test_datagen = ImageDataGenerator(rescale=1./255)

batch_size = 25

train_generator = train_datagen.flow_from_directory('subset_train', target_size=(224,224), color_mode='rgb',
            class_mode='categorical', shuffle=False, batch_size=batch_size)

validation_generator = validation_datagen.flow_from_directory('subset_val', target_size=(224,224), color_mode='rgb',
            class_mode='categorical', shuffle=False, batch_size=batch_size)


test_generator = test_datagen.flow_from_directory('subset_test', target_size=(224,224), color_mode='rgb',
            class_mode='categorical', shuffle=False, batch_size=batch_size)

# reminder to self... flow_from_directory infers the class labels

Found 400 images belonging to 5 classes.
Found 100 images belonging to 5 classes.
Found 389 images belonging to 5 classes.


In [6]:
# importing keras modules and setting up a few parameters, instantiating early stopping

from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import Flatten
from keras.constraints import maxnorm
from keras.optimizers import SGD
from keras.optimizers import Adam
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
import keras.utils
from keras import backend as K
K.set_image_dim_ordering('tf')

from keras.callbacks import EarlyStopping

early_stopping = EarlyStopping(monitor='val_loss', patience=2)


In [7]:
import tensorflow as tf
tf_config = tf.ConfigProto()
tf_config.gpu_options.per_process_gpu_memory_fraction = 0.99
# tf_config.gpu_options.allow_growth = True **this causes python to crash, error: las.cc:444] failed to create cublas handle: CUBLAS_STATUS_ALLOC_FAILED
sess = tf.Session(config=tf_config)

In [8]:
input_shape = (224,224, 3)
num_classes = 5

# will create a few different models.... initial base model 

base_model = Sequential()
base_model.add(Conv2D(64, (11, 11), strides=4, input_shape=input_shape, padding='valid', activation='relu', kernel_constraint=maxnorm(3)))
base_model.add(MaxPooling2D(pool_size=(2, 2)))

base_model.add(Conv2D(64, (4, 4), strides=2, activation='relu', padding='valid', kernel_constraint=maxnorm(3)))
base_model.add(MaxPooling2D(pool_size=(2, 2)))

base_model.add(Conv2D(64, (4, 4), strides=2, activation='relu', padding='valid', kernel_constraint=maxnorm(3)))
base_model.add(Flatten())

base_model.add(Dense(256, activation='relu', kernel_constraint=maxnorm(3)))
base_model.add(Dropout(0.2))
base_model.add(Dense(num_classes, activation='softmax'))
    
# Compile model
epochs = 10
lrate = 0.003
decay = lrate/epochs
sgd = SGD(lr=lrate, momentum=0.9, decay=decay, nesterov=False)
base_model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
print(base_model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 54, 54, 64)        23296     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 27, 27, 64)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 12, 12, 64)        65600     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 6, 6, 64)          0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 2, 2, 64)          65600     
_________________________________________________________________
flatten_1 (Flatten)          (None, 256)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 256)               65792     
__________

In [17]:
# train base_model

base_model.fit_generator(train_generator, validation_data=validation_generator,
                    steps_per_epoch=100, epochs=epochs, callbacks=[early_stopping])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x2261368c7f0>

In [8]:
# same model, more epochs (10 -> 50) and fewere steps per epoch, prior model params saw train and validate accuracies double, expecting the model to be within 80% acc

base_model.fit_generator(train_generator, validation_data=validation_generator,
                    steps_per_epoch=50, epochs=25, callbacks=[early_stopping])

NameError: name 'base_model' is not defined

Looks like we are increasing in accuracy with each successive epoch, perhaps need to train for more epochs. Let's test a deeper network with the same amount of epochs and see if we can begin with a better first epoch accuracy of 15%.

In [10]:
# taking the base model and adding more hidden layers

deep_model = Sequential()

deep_model = Sequential()
deep_model.add(Conv2D(64, (11, 11), strides=4, input_shape=input_shape, padding='valid', activation='relu', kernel_constraint=maxnorm(3)))
deep_model.add(MaxPooling2D(pool_size=(2, 2)))

deep_model.add(Conv2D(32, (3, 3), strides=1, activation='relu', padding='valid', kernel_constraint=maxnorm(3)))
deep_model.add(MaxPooling2D(pool_size=(2, 2)))

deep_model.add(Conv2D(32, (3, 3), strides=1, activation='relu', padding='valid', kernel_constraint=maxnorm(3)))
deep_model.add(MaxPooling2D(pool_size=(2, 2)))

deep_model.add(Conv2D(32, (3, 3), strides=1, activation='relu', padding='valid', kernel_constraint=maxnorm(3)))
deep_model.add(Flatten())

deep_model.add(Dense(288, activation='relu', kernel_constraint=maxnorm(3)))
deep_model.add(Dropout(0.2))
deep_model.add(Dense(num_classes, activation='softmax'))
    
# Compile model
epochs = 10
lrate = 0.003
decay = lrate/epochs
sgd = SGD(lr=lrate, momentum=0.9, decay=decay, nesterov=False)
deep_model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
print(deep_model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_4 (Conv2D)            (None, 54, 54, 64)        23296     
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 27, 27, 64)        0         
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 25, 25, 32)        18464     
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 12, 12, 32)        0         
_________________________________________________________________
conv2d_6 (Conv2D)            (None, 10, 10, 32)        9248      
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 5, 5, 32)          0         
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 3, 3, 32)          9248      
__________

In [11]:
# train deeper model

deep_model.fit_generator(train_generator, validation_data=validation_generator,
                    steps_per_epoch=100, epochs=epochs, callbacks=[early_stopping])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x2047dd37eb8>

In [12]:
# looks like the deeper model overfits the training data, but performs better on the validation data... let's train for more epochs

deep_model.fit_generator(train_generator, validation_data=validation_generator,
                    steps_per_epoch=100, epochs=50, callbacks=[early_stopping])

Epoch 1/50
Epoch 2/50
Epoch 3/50


<keras.callbacks.History at 0x2048c3c54e0>

In [15]:
# deeper model ran into early stopping on the validation set

# lets try base model with more epochs

base_model.fit_generator(train_generator, validation_data=validation_generator,
                    steps_per_epoch=100, epochs=25, callbacks=[early_stopping])

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25


<keras.callbacks.History at 0x2047dd37f98>

In [38]:
# deep model with Adam optimizer

# taking the base model and adding more hidden layers



deep_model_Adam = Sequential()

deep_model_Adam = Sequential()
deep_model_Adam.add(Conv2D(64, (11, 11), strides=4, input_shape=input_shape, padding='valid', activation='relu', kernel_constraint=maxnorm(3)))
deep_model_Adam.add(MaxPooling2D(pool_size=(2, 2)))

deep_model_Adam.add(Conv2D(32, (3, 3), strides=1, activation='relu', padding='valid', kernel_constraint=maxnorm(3)))
deep_model_Adam.add(MaxPooling2D(pool_size=(2, 2)))

deep_model_Adam.add(Conv2D(32, (3, 3), strides=1, activation='relu', padding='valid', kernel_constraint=maxnorm(3)))
deep_model_Adam.add(MaxPooling2D(pool_size=(2, 2)))

deep_model_Adam.add(Conv2D(32, (3, 3), strides=1, activation='relu', padding='valid', kernel_constraint=maxnorm(3)))
deep_model_Adam.add(Flatten())

deep_model_Adam.add(Dense(288, activation='relu', kernel_constraint=maxnorm(3)))
deep_model_Adam.add(Dropout(0.2))
deep_model_Adam.add(Dense(num_classes, activation='softmax'))
    
# Compile model
adam_op = Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
deep_model_Adam.compile(loss='categorical_crossentropy', optimizer=adam_op, metrics=['accuracy'])
print(deep_model_Adam.summary())


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_29 (Conv2D)           (None, 54, 54, 64)        23296     
_________________________________________________________________
max_pooling2d_27 (MaxPooling (None, 27, 27, 64)        0         
_________________________________________________________________
conv2d_30 (Conv2D)           (None, 25, 25, 32)        18464     
_________________________________________________________________
max_pooling2d_28 (MaxPooling (None, 12, 12, 32)        0         
_________________________________________________________________
conv2d_31 (Conv2D)           (None, 10, 10, 32)        9248      
_________________________________________________________________
max_pooling2d_29 (MaxPooling (None, 5, 5, 32)          0         
_________________________________________________________________
conv2d_32 (Conv2D)           (None, 3, 3, 32)          9248      
__________

In [20]:
deep_model_Adam.fit_generator(train_generator, validation_data=validation_generator,
                    steps_per_epoch=100, epochs=25, callbacks=[early_stopping])

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25


<keras.callbacks.History at 0x12e0ce440b8>

In [22]:
# tweaked deep model w/ Adam optimizer.  Deeper network topology near the input (less convolution than prior models), more FC nodes

deep_model_Adam_2 = Sequential()

deep_model_Adam_2 = Sequential()
deep_model_Adam_2.add(Conv2D(64, (8, 8), strides=2, input_shape=input_shape, padding='valid', activation='relu', kernel_constraint=maxnorm(3)))
deep_model_Adam_2.add(MaxPooling2D(pool_size=(2, 2)))

deep_model_Adam_2.add(Conv2D(32, (3, 3), strides=1, activation='relu', padding='valid', kernel_constraint=maxnorm(3)))
deep_model_Adam_2.add(MaxPooling2D(pool_size=(2, 2)))

deep_model_Adam_2.add(Conv2D(32, (3, 3), strides=1, activation='relu', padding='valid', kernel_constraint=maxnorm(3)))
deep_model_Adam_2.add(MaxPooling2D(pool_size=(2, 2)))

deep_model_Adam_2.add(Conv2D(32, (3, 3), strides=1, activation='relu', padding='valid', kernel_constraint=maxnorm(3)))
deep_model_Adam_2.add(Flatten())

deep_model_Adam_2.add(Dense(256, activation='relu', kernel_constraint=maxnorm(3)))
deep_model_Adam_2.add(Dropout(0.2))
deep_model_Adam_2.add(Dense(num_classes, activation='softmax'))
    
# Compile model
adam_op = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
deep_model_Adam_2.compile(loss='categorical_crossentropy', optimizer=adam_op, metrics=['accuracy'])
print(deep_model_Adam_2.summary())


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_19 (Conv2D)           (None, 109, 109, 64)      12352     
_________________________________________________________________
max_pooling2d_18 (MaxPooling (None, 54, 54, 64)        0         
_________________________________________________________________
conv2d_20 (Conv2D)           (None, 52, 52, 32)        18464     
_________________________________________________________________
max_pooling2d_19 (MaxPooling (None, 26, 26, 32)        0         
_________________________________________________________________
conv2d_21 (Conv2D)           (None, 24, 24, 32)        9248      
_________________________________________________________________
max_pooling2d_20 (MaxPooling (None, 12, 12, 32)        0         
_________________________________________________________________
conv2d_22 (Conv2D)           (None, 10, 10, 32)        9248      
__________

In [18]:
deep_model_Adam_2.fit_generator(train_generator, validation_data=validation_generator,
                    steps_per_epoch=50, epochs=50, callbacks=[early_stopping])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50


<keras.callbacks.History at 0x12e0c77d710>

Deeper topology not necessarily better and is over-fitting.  

In [15]:
# tweaked deep model w/ RMSProp optimizer again with Deeper network topology near the input (less convolution than prior models), more FC nodes

deep_model_RMS = Sequential()

deep_model_RMS = Sequential()
deep_model_RMS.add(Conv2D(64, (8, 8), strides=2, input_shape=input_shape, padding='valid', activation='relu', kernel_constraint=maxnorm(3)))
deep_model_RMS.add(MaxPooling2D(pool_size=(2, 2)))

deep_model_RMS.add(Conv2D(32, (3, 3), strides=1, activation='relu', padding='valid', kernel_constraint=maxnorm(3)))
deep_model_RMS.add(MaxPooling2D(pool_size=(2, 2)))

deep_model_RMS.add(Conv2D(32, (3, 3), strides=1, activation='relu', padding='valid', kernel_constraint=maxnorm(3)))
deep_model_RMS.add(MaxPooling2D(pool_size=(2, 2)))

deep_model_RMS.add(Conv2D(32, (3, 3), strides=1, activation='relu', padding='valid', kernel_constraint=maxnorm(3)))
deep_model_RMS.add(Flatten())

deep_model_RMS.add(Dense(256, activation='relu', kernel_constraint=maxnorm(3)))
deep_model_RMS.add(Dropout(0.2))
deep_model_RMS.add(Dense(num_classes, activation='softmax'))
    
# Compile model
deep_model_RMS.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
print(deep_model_RMS.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_14 (Conv2D)           (None, 109, 109, 64)      12352     
_________________________________________________________________
max_pooling2d_12 (MaxPooling (None, 54, 54, 64)        0         
_________________________________________________________________
conv2d_15 (Conv2D)           (None, 52, 52, 32)        18464     
_________________________________________________________________
max_pooling2d_13 (MaxPooling (None, 26, 26, 32)        0         
_________________________________________________________________
conv2d_16 (Conv2D)           (None, 24, 24, 32)        9248      
_________________________________________________________________
max_pooling2d_14 (MaxPooling (None, 12, 12, 32)        0         
_________________________________________________________________
conv2d_17 (Conv2D)           (None, 10, 10, 32)        9248      
__________

In [16]:
deep_model_RMS.fit_generator(train_generator, validation_data=validation_generator,
                    steps_per_epoch=100, epochs=50, callbacks=[early_stopping])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50


<keras.callbacks.History at 0x12e110a0860>

In [19]:
# so more layers doesn't work.... let us keep the standard 3 CONV layers and widen the toplogy

wide_model = Sequential()
wide_model.add(Conv2D(32, (3, 3), strides=1, input_shape=input_shape, padding='valid', activation='relu', kernel_constraint=maxnorm(3)))
wide_model.add(MaxPooling2D(pool_size=(2, 2)))

wide_model.add(Conv2D(32, (3, 3), strides=1, activation='relu', padding='valid', kernel_constraint=maxnorm(3)))
wide_model.add(MaxPooling2D(pool_size=(2, 2)))

wide_model.add(Conv2D(32, (3, 3), strides=2, activation='relu', padding='valid', kernel_constraint=maxnorm(3)))
wide_model.add(MaxPooling2D(pool_size=(2, 2)))
wide_model.add(Flatten())

wide_model.add(Dense(256, activation='relu', kernel_constraint=maxnorm(3)))
wide_model.add(Dropout(0.2))
wide_model.add(Dense(num_classes, activation='softmax'))
    
# Compile model
epochs = 10
wide_model.compile(loss='categorical_crossentropy', optimizer=adam_op, metrics=['accuracy'])
print(wide_model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_22 (Conv2D)           (None, 222, 222, 32)      896       
_________________________________________________________________
max_pooling2d_18 (MaxPooling (None, 111, 111, 32)      0         
_________________________________________________________________
conv2d_23 (Conv2D)           (None, 109, 109, 32)      9248      
_________________________________________________________________
max_pooling2d_19 (MaxPooling (None, 54, 54, 32)        0         
_________________________________________________________________
conv2d_24 (Conv2D)           (None, 26, 26, 32)        9248      
_________________________________________________________________
max_pooling2d_20 (MaxPooling (None, 13, 13, 32)        0         
_________________________________________________________________
flatten_7 (Flatten)          (None, 5408)              0         
__________

In [14]:
wide_model.fit_generator(train_generator, validation_data=validation_generator,
                    steps_per_epoch=50, epochs=50, callbacks=[early_stopping])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50


<keras.callbacks.History at 0x12df0df25f8>

In [1]:
# wider doesn't necessarily work... however, slowing the learning rate seems to having a positive impact. same model as above, decrease LR

wide_model_slow_learn = Sequential()
wide_model_slow_learn.add(Conv2D(32, (3, 3), strides=1, input_shape=input_shape, padding='valid', activation='relu', kernel_constraint=maxnorm(3)))
wide_model_slow_learn.add(MaxPooling2D(pool_size=(2, 2)))

wide_model_slow_learn.add(Conv2D(32, (3, 3), strides=1, activation='relu', padding='valid', kernel_constraint=maxnorm(3)))
wide_model_slow_learn.add(MaxPooling2D(pool_size=(2, 2)))

wide_model_slow_learn.add(Conv2D(32, (3, 3), strides=2, activation='relu', padding='valid', kernel_constraint=maxnorm(3)))
wide_model_slow_learn.add(MaxPooling2D(pool_size=(2, 2)))
wide_model_slow_learn.add(Flatten())

wide_model_slow_learn.add(Dense(256, activation='relu', kernel_constraint=maxnorm(3)))
wide_model_slow_learn.add(Dropout(0.2))
wide_model_slow_learn.add(Dense(num_classes, activation='softmax'))
    
# Compile model

adam_op = Adam(lr=0.00001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
wide_model_slow_learn.compile(loss='categorical_crossentropy', optimizer=adam_op, metrics=['accuracy'])
print(wide_model_slow_learn.summary())


NameError: name 'Sequential' is not defined

In [28]:
wide_model_slow_learn.fit_generator(train_generator, validation_data=validation_generator,
                    steps_per_epoch=50, epochs=50, callbacks=[early_stopping])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50


<keras.callbacks.History at 0x13e9d96c278>

In [29]:
# it appears a slower learning rate might be key in allowing prior models to train for more epochs... 
# let's try a few earlier models with a decreased learning rate

In [39]:
adam_op = Adam(lr=0.00001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
deep_model_Adam.compile(loss='categorical_crossentropy', optimizer=adam_op, metrics=['accuracy'])
print(deep_model_Adam.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_29 (Conv2D)           (None, 54, 54, 64)        23296     
_________________________________________________________________
max_pooling2d_27 (MaxPooling (None, 27, 27, 64)        0         
_________________________________________________________________
conv2d_30 (Conv2D)           (None, 25, 25, 32)        18464     
_________________________________________________________________
max_pooling2d_28 (MaxPooling (None, 12, 12, 32)        0         
_________________________________________________________________
conv2d_31 (Conv2D)           (None, 10, 10, 32)        9248      
_________________________________________________________________
max_pooling2d_29 (MaxPooling (None, 5, 5, 32)          0         
_________________________________________________________________
conv2d_32 (Conv2D)           (None, 3, 3, 32)          9248      
__________

In [40]:
deep_model_Adam.fit_generator(train_generator, validation_data=validation_generator,
                    steps_per_epoch=50, epochs=50, callbacks=[early_stopping])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x13eb46cbdd8>

In [35]:
# let's try tye base model w/ decreased learning rate and Adam optimizer (vs. SGD)

base_model.compile(loss='categorical_crossentropy', optimizer=adam_op, metrics=['accuracy'])
print(base_model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 54, 54, 64)        23296     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 27, 27, 64)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 12, 12, 64)        65600     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 6, 6, 64)          0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 2, 2, 64)          65600     
_________________________________________________________________
flatten_1 (Flatten)          (None, 256)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 256)               65792     
__________

In [36]:
base_model.fit_generator(train_generator, validation_data=validation_generator,
                    steps_per_epoch=50, epochs=50, callbacks=[early_stopping])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x13eb40f7470>

So far the wider model (with less stride, smaller Convolution filter) w/ more trainable parameters and the simple base model appear to perform the best on validation data.  

In [41]:
# let's test on these iterations of the base, deep and wide models

base_scores = base_model.evaluate_generator(test_generator, steps=25)
print("Accuracy: %.2f%%" % (base_scores[1]*100))

Accuracy: 43.16%


In [42]:
deep_model_Adam_scores = deep_model_Adam.evaluate_generator(test_generator, steps=25)
print("Accuracy: %.2f%%" % (deep_model_Adam_scores[1]*100))

Accuracy: 36.64%


In [43]:
wide_model_slow_learn_scores = wide_model_slow_learn.evaluate_generator(test_generator, steps=25)
print("Accuracy: %.2f%%" % (wide_model_slow_learn_scores[1]*100))

Accuracy: 39.58%


Next steps... pick top 2 or 3 models and test and note which performs best.  use top 2-3 on broader image data set (simple_CNN notebook)

In [44]:
# saving models and weights just in case... will need to retrain on broader image sets anyways
base_model.save('subset_base_model.h5')
deep_model_Adam.save('subset_deep_model_Adam.h5')
wide_model_slow_learn.save('subset_wide_model_slow_learn.h5')