# Binary classification of images with a convolutional DNN

In [None]:
# !pip install -U --no-cache-dir \
#   keras \
#   kaggle \
#   tensorflow-gpu==1.12.0

In [None]:
import plaidml.keras
plaidml.keras.install_backend()

import shutil
import os

data_dir = '/content/data'
output_dir = '/content/output'

# avoid future errors caused by missing directories
os.makedirs(data_dir, exist_ok=True)
os.makedirs(output_dir, exist_ok=True)

In [None]:
!kaggle competitions download -c dogs-vs-cats -p /content/data

!unzip -n -q /content/data/train.zip -d /content/data
!unzip -n -q /content/data/test1.zip -d /content/data

original_data_count = len([name for name in os.listdir(os.path.join(data_dir, 'train'))])
print("Image count: %s" % original_data_count)

The data I have for this problem is a large collection of images containing either a cat or a dog. The sizes of these images vary.

In [None]:
from keras.preprocessing import image

import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import warnings

warnings.filterwarnings("ignore")
%pylab inline

img = image.load_img(os.path.join(data_dir, 'train/cat.1.jpg'))

plt.imshow(img)
plt.colorbar()
plt.show()

I want to make this a small data problem so I'll seperate the training data into three collections. A training set of 1000. A validation set of 500. And, a test set of 500. Sub directories are added for the two class as I'll be using Keras's ImageDataGenerator to read in the images.

In [None]:
original_data_dir = os.path.join(data_dir, 'train')

base_dir = os.path.join(data_dir, 'cats-dogs-small')

train_dir = os.path.join(base_dir, 'train')
validation_dir = os.path.join(base_dir, 'validation')
test_dir = os.path.join(base_dir, 'test')

train_cats_dir = os.path.join(train_dir, 'cats')
train_dogs_dir = os.path.join(train_dir, 'dogs')

validation_cats_dir = os.path.join(validation_dir, 'cats')
validation_dogs_dir = os.path.join(validation_dir, 'dogs')

test_cats_dir = os.path.join(test_dir, 'cats')
test_dogs_dir = os.path.join(test_dir, 'dogs')

try:
    os.mkdir(base_dir)

    os.mkdir(train_dir)
    os.mkdir(validation_dir)
    os.mkdir(test_dir)

    os.mkdir(train_cats_dir)
    for i in range(1000):
        src = os.path.join(original_data_dir, 'cat.%s.jpg') % i
        dst = os.path.join(train_cats_dir, '%s.jpg') % i
        shutil.copyfile(src, dst)        
    
    os.mkdir(train_dogs_dir)
    for i in range(1000):
        src = os.path.join(original_data_dir, 'dog.%s.jpg') % i
        dst = os.path.join(train_dogs_dir, '%s.jpg') % i
        shutil.copyfile(src, dst)

    os.mkdir(validation_cats_dir)
    for i in range(1000, 1500):
        src = os.path.join(original_data_dir, 'cat.%s.jpg') % i
        dst = os.path.join(validation_cats_dir, '%s.jpg') % i
        shutil.copyfile(src, dst)

    
    os.mkdir(validation_dogs_dir)
    for i in range(1000, 1500):
        src = os.path.join(original_data_dir, 'dog.%s.jpg') % i
        dst = os.path.join(validation_dogs_dir, '%s.jpg') % i
        shutil.copyfile(src, dst)

    
    os.mkdir(test_cats_dir)
    for i in range(1500, 2000):
        src = os.path.join(original_data_dir, 'cat.%s.jpg') % i
        dst = os.path.join(test_cats_dir, '%s.jpg') % i
        shutil.copyfile(src, dst)

    
    os.mkdir(test_dogs_dir)
    for i in range(1500, 2000):
        src = os.path.join(original_data_dir, 'dog.%s.jpg') % i
        dst = os.path.join(test_dogs_dir, '%s.jpg') % i
        shutil.copyfile(src, dst)
except:
    pass

# check image collections are sized correctly
print('Train cats: %s' % len(os.listdir(train_cats_dir)))
print('Train dogs: %s' % len(os.listdir(train_dogs_dir)))
print('Validate cats: %s' % len(os.listdir(validation_cats_dir)))
print('Validate dogs: %s' % len(os.listdir(validation_dogs_dir)))
print('Test cats: %s' % len(os.listdir(test_cats_dir)))
print('Test dogs: %s' % len(os.listdir(test_dogs_dir)))

Images in JPEG format really aren't suitable inputs for the model. They need reworked into tensors. Decoding a JPEG into a bitmap gives me a tensor with shape (150, 150, 3).

In [None]:
from keras.preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = test_datagen.flow_from_directory(
    train_dir,
    target_size=(150, 150),
    batch_size=20,
    class_mode='binary'
)

validation_generator = test_datagen.flow_from_directory(
    validation_dir,
    target_size=(150, 150),
    batch_size=20,
    class_mode='binary'
)

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(150, 150),
    batch_size=20,
    class_mode='binary'
)

In [None]:
for x_batch, y_batch in train_generator:
    images = x_batch
    labels = list(map(lambda x: 'dog' if x == 1.0 else 'cat', y_batch))
    break

plt.figure(figsize=(10,10))
for i in range(20):
    plt.subplot(5,5,i+1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.imshow(images[i], cmap=plt.cm.binary)
    plt.xlabel(labels[i])

In [None]:
from keras.models import Sequential
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Flatten
from keras.layers import Dense
from keras.optimizers import RMSprop

std_model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3)),
    MaxPooling2D((2, 2)),
    
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    
    Flatten(),
    Dense(512, activation='relu'),
    Dense(1, activation='sigmoid')
])

std_model.compile(loss='binary_crossentropy',
             optimizer=RMSprop(lr=1e-4),
             metrics=['accuracy'])

std_model.summary()

In [None]:
std_history = std_model.fit_generator(
    train_generator,
    steps_per_epoch=100,
    epochs=30,
    validation_data=validation_generator,
    validation_steps=50
)

In [None]:
import matplotlib.pyplot as plt
from pandas import Series

def plot_loss(history, smooth=False):
    loss = Series(data=history['loss'])
    val_loss = Series(history['val_loss'])
    num_epochs = range(1, len(loss) + 1)
    
    if smooth:
        loss = loss.rolling(window=6).mean()
        val_loss = val_loss.rolling(window=6).mean()
    
    plt.plot(num_epochs, loss, 'bo', label='Training loss')
    plt.plot(num_epochs, val_loss, 'b', label='Validation loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.show()

def plot_accuracy(history, smooth=False):
    acc = Series(history['acc'])
    val_acc = Series(history['val_acc'])
    num_epochs = range(1, len(acc) + 1)
    
    if smooth:
        acc = acc.rolling(window=6).mean()
        val_acc = val_acc.rolling(window=6).mean()
    
    plt.plot(num_epochs, acc, 'ro', label='Training accuracy')
    plt.plot(num_epochs, val_acc, 'r', label='Validation accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.show()

In [None]:
plot_loss(std_history.history)
plot_accuracy(std_history.history)

My model has the characteristics of overfitting. Accuracy on the training data increases Linearly towards 100% while accuracy on the validation data stalls before the 10th epoch. Common methods for avoiding overfitting are dropout and weight decay but in this case I will use data augmentation as its well suited to image recognition problems. This technique creates new images by applying transformations to the input tensors while still keeping its recognisable characteristics. The ImageDataGenerator class has the utility to achieve this given a few extra parameters.

In [None]:
augmented_train_datagen = ImageDataGenerator(rescale=1./255,
                           rotation_range=40,
                           width_shift_range=0.2,
                           height_shift_range=0.2,
                           shear_range=0.2,
                           zoom_range=0.2,
                           horizontal_flip=True,
                           fill_mode='nearest')

augmented_train_generator = augmented_train_datagen.flow_from_directory(
    train_dir,
    target_size=(150, 150),
    batch_size=20,
    class_mode='binary'
)

In [None]:
from keras.preprocessing import image as Img

img = Img.load_img(os.path.join(original_data_dir, 'cat.1.jpg'))
x = Img.img_to_array(img)
x = x.reshape((1,) + x.shape)

plt.figure(figsize=(10,10))
i = 0
for batch in augmented_train_datagen.flow(x, batch_size=1):
    plt.subplot(2,2,i+1)
    plt.imshow(Img.array_to_img(batch[0]))
    i += 1
    if i % 4 == 0:
        break

plt.show()

In [None]:
from keras.models import Sequential
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Flatten
from keras.layers import Dropout
from keras.layers import Dense
from keras.optimizers import RMSprop

aug_model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3)),
    MaxPooling2D((2, 2)),
    
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    
    Flatten(),
    Dropout(0.5),
    Dense(512, activation='relu'),
    Dense(1, activation='sigmoid')
])

aug_model.compile(loss='binary_crossentropy',
             optimizer=RMSprop(lr=1e-4),
             metrics=['accuracy'])

aug_model.summary()

In [None]:
aug_history = aug_model.fit_generator(
    augmented_train_generator,
    steps_per_epoch=100,
    epochs=10,
    validation_data=validation_generator,
    validation_steps=50
)

In [None]:
hist = aug_history.history

plot_loss(hist['loss'], hist['val_loss'])
plot_accuracy(hist['acc'], hist['val_acc'])

In [None]:
(loss, acc) = aug_model.evaluate_generator(test_generator)

print('With data augmentation and dropout the model achieves an accuracy of %f' % acc)

## Feature extraction of a pre-trained model

To achieve a higher accuracy I will leverage a pre-trained model with a convolutional base containing a generic representation of the visual world.

In [None]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.optimizers import RMSprop
from keras.applications import VGG16

fe_model_path = os.path.join(output_dir, 'cats-vs-dogs-feat-ext.h5')

conv_base = VGG16(weights='imagenet', include_top=False, input_shape=(150, 150, 3))

fe_model = Sequential([
    conv_base,
    Flatten(),
    Dense(256, activation='relu'),
    Dense(1, activation='sigmoid')
])

# must set weight trainability before compilation
conv_base.trainable = False

fe_model.compile(loss='binary_crossentropy',
             optimizer=RMSprop(lr=2e-5), # decreased learning rate should mean less divergence
             metrics=['accuracy'])

fe_model.summary()

In [None]:
fe_history = fe_model.fit_generator(
    augmented_train_generator,
    steps_per_epoch=100,
    epochs=100,
    validation_data=validation_generator,
    validation_steps=50
)

fe_model.save(fe_model_path)

In [None]:
plot_loss(fe_history.history)
plot_accuracy(fe_history.history)

In [None]:
(loss, acc) = fe_model.evaluate_generator(test_generator, steps=10)
print('With feature extraction the model achieves an accuracy of %f' % acc)

## Fine-tuning

I can improve the model by unfreezing several layers at the top of the convolutional base  (containing high level representation of features) and re-training them along with the fully connected layers which have learned a representation of the data.

Fine-tuning is only possible after the fully connected layer on top of the convolutional base is trained, otherwise the error signal propagating through the network will destroy the representations held in the unfrozen layers. It’s benifitual to unfreeze the top layers of the model as they contain the hight level representations of the classes the pre-trained model. It would be ineffective to unfreeze the lower layers containing low level representations  (such as lines and curves) as these will be relevant to all image classification problems.

A very small learning rate prevents the useful representations in the unfrozen layers and the previous trained fully connected layer from being lost.

In [None]:
from keras.models import load_model

tuned_model_path = os.path.join(output_dir, 'cats-vs-dogs-tuned.h5')

tuned_model = load_model(fe_model_path)

conv_base = tuned_model.get_layer(name='vgg16')

conv_base.trainable = True

frozen_layers = filter(lambda x : 'block5' not in x.name, conv_base.layers)
for layer in frozen_layers:
    layer.trainable = False
    
tuned_model.compile(loss='binary_crossentropy',
                 optimizer=RMSprop(lr=1e-5),
                 metrics=['accuracy'])
    
tuned_model.summary()

In [None]:
tuned_history = tuned_model.fit_generator(
    augmented_train_generator,
    steps_per_epoch=100,
    epochs=100,
    validation_data=validation_generator,
    validation_steps=50
)

tuned_model.save(tuned_model_path)

In [None]:
plot_loss(tuned_history.history, smooth=True)
plot_accuracy(tuned_history.history, smooth=True)

In [None]:
(loss, acc) = tuned_model.evaluate_generator(test_generator, steps=50)
print('After fine-tuning the model achieves an accuracy of %f' % acc)