In [None]:
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
plt.rcParams['figure.figsize'] = [9, 8]

import cv2
import numpy as np
import pandas as pd
pd.set_option('display.width', 74)
pd.set_option('display.max_columns', 15)
pd.set_option('display.max_rows', 20)

In [None]:
import os
import datetime
import itertools

from GPUtil import getGPUs
from sklearn.metrics import confusion_matrix
import tensorflow as tf
from keras.applications.vgg16 import VGG16
from keras.utils import multi_gpu_model

try:
    NUM_GPU = len(getGPUs())
except:
    NUM_GPU = 0

DATA_DIR = "data/keras"
BATCH_SIZE = 16 * max(NUM_GPU, 1)
LEARNING_RATE = 0.0001

IMAGE_ROW_SIZE = 584
IMAGE_COLUMN_SIZE = 480

We're gonna need these callbacks and functions.

In [None]:
from keras.callbacks import EarlyStopping, ReduceLROnPlateau

CHAR_NAMES = os.listdir(os.path.join(DATA_DIR, "train", "images"))
NUM_CHARACTERS = len(CHAR_NAMES)

stop_on_val_loss = EarlyStopping(monitor='val_loss', min_delta=0.005, 
                                 patience=3, verbose=0, mode='auto')
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, 
                              patience=2, min_lr=0.00001)

In [None]:
def plot_confusion_matrix(matrix, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if normalize:
        matrix = matrix.astype('float') / matrix.sum(axis=1)[:, np.newaxis]

    plt.imshow(matrix, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = matrix.max() / 2.
    for i, j in itertools.product(range(matrix.shape[0]), range(matrix.shape[1])):
        plt.text(j, i, format(matrix[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if matrix[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

In [None]:
def parallelize_model_maybe(model):
    if NUM_GPU > 0:
        model = multi_gpu_model(model, gpus=NUM_GPU)
    return model

In [None]:
from keras.preprocessing.image import ImageDataGenerator
from keras.applications.imagenet_utils import preprocess_input

def make_generator(folder="train",
                   data_gen_args={"fill_mode": "constant",
                                  "cval": 0,
                                  "width_shift_range": 0.05,
                                  "height_shift_range": 0.05,
                                  "zoom_range": 0.1,
                                  "horizontal_flip": True,
                                  "preprocessing_function": preprocess_input},
                   data_flow_args={"seed": 1,
                                   "batch_size": BATCH_SIZE}):

    image_datagen = ImageDataGenerator(**data_gen_args)

    image_generator = image_datagen.flow_from_directory(
        directory=os.path.join(DATA_DIR, folder, "images"),
        target_size=(IMAGE_ROW_SIZE, IMAGE_COLUMN_SIZE),
        color_mode='rgb',
        **data_flow_args)

    return image_generator


def count_images(folder="train"):
    image_directory = os.path.join(DATA_DIR, folder, "images")
    data_size = 0

    for char_name in os.listdir(image_directory):
        char_directory = os.path.join(image_directory, char_name)
        data_size += len(os.listdir(char_directory))

    return data_size


def steps_per_epoch(folder="train", batch_size=BATCH_SIZE):
    return count_images(folder) // batch_size

# Extract Features from my Images

Just fyi, in the following we're only dealing with about 30% of all the images.  That's to ensure we can fit all the cached features in memory.  We generated these lists with `make keras KERAS_TRAIN_PCT=20 KERAS_TEST_PCT=5 KERAS_VALID_PCT=5`

This all takes a very long time to run.  We should not run this every time, rather just run it once to generate and save the VGG16 features.  

In [None]:
with tf.device("/cpu:0"):
    vgg_model = VGG16(include_top=False, weights="imagenet")
vgg_model = parallelize_model_maybe(vgg_model)

To pull out all the images for feeding through VGG16 we use `class_mode=None` and `shuffled=False`.

In [None]:
image_count_train = count_images("train")
image_count_train_generator_cutoff = image_count_train / BATCH_SIZE + 1

generator_train = make_generator(
    "train",
    data_gen_args={"preprocessing_function": preprocess_input},
    data_flow_args={"batch_size": BATCH_SIZE,
                    "class_mode": None,
                    "shuffle": False})
generator_train_labels = make_generator(
    "train",
    data_gen_args={},
    data_flow_args={"batch_size": BATCH_SIZE,
                    "shuffle": False})

This step takes a while.  I wish I could figure out a faster way to pull out just the labels in generator order.

In [None]:
train_labels = []

for i, (_, labels) in enumerate(generator_train_labels):
    if i >= image_count_train_generator_cutoff:
        break
    else:
        train_labels.extend(labels)
train_labels = np.array(train_labels[:image_count_train])

Then this one takes super long, because we're running VGG16.  I've tried upping the batch size as much as I can, but it's still quite slow.  The resulting feature array is also quite huge.

In [None]:
train_data = vgg_model.predict_generator(generator_train,
                                         steps=image_count_train_generator_cutoff)
train_data = train_data[:image_count_train]

Do the same with our validation data.

In [None]:
image_count_valid = count_images("valid")
image_count_valid_generator_cutoff = image_count_valid / BATCH_SIZE + 1

generator_valid = make_generator(
    "valid",
    data_gen_args={"preprocessing_function": preprocess_input},
    data_flow_args={"batch_size": BATCH_SIZE,
                    "class_mode": None,
                    "shuffle": False})
generator_valid_labels = make_generator(
    "valid",
    data_gen_args={},
    data_flow_args={"batch_size": BATCH_SIZE,
                    "shuffle": False})

valid_labels = []
for i, (_, labels) in enumerate(generator_valid_labels):
    if i >= image_count_valid_generator_cutoff:
        break
    else:
        valid_labels.extend(labels)
valid_labels = np.array(valid_labels[:image_count_valid])

valid_data = vgg_model.predict_generator(generator_valid,
                                         steps=image_count_valid_generator_cutoff)
valid_data = valid_data[:image_count_valid]

And with our test data.

In [None]:
image_count_test = count_images("test")
image_count_test_generator_cutoff = image_count_test / BATCH_SIZE + 1

generator_test = make_generator(
    "test",
    data_gen_args={"preprocessing_function": preprocess_input},
    data_flow_args={"batch_size": BATCH_SIZE,
                    "class_mode": None,
                    "shuffle": False})
generator_test_labels = make_generator(
    "test",
    data_gen_args={},
    data_flow_args={"batch_size": BATCH_SIZE,
                    "shuffle": False})

test_labels = []
for i, (_, labels) in enumerate(generator_test_labels):
    if i >= image_count_test_generator_cutoff:
        break
    else:
        test_labels.extend(labels)
test_labels = np.array(test_labels[:image_count_test])

test_data = vgg_model.predict_generator(generator_test,
                                        steps=image_count_test_generator_cutoff)
test_data = test_data[:image_count_test]

# Train a Small Neural Network

Randomize the data.

In [None]:
train_randomizer = np.arange(train_data.shape[0])
np.random.shuffle(train_randomizer)

valid_randomizer = np.arange(valid_data.shape[0])
np.random.shuffle(valid_randomizer)

test_randomizer = np.arange(test_data.shape[0])
np.random.shuffle(test_randomizer)

In [None]:
train_data_r = train_data[train_randomizer]
train_labels_r = train_labels[train_randomizer]

valid_data_r = valid_data[valid_randomizer]
valid_labels_r = valid_labels[valid_randomizer]

test_data_r = test_data[test_randomizer]
test_labels_r = test_labels[test_randomizer]

In [None]:
from keras.models import Sequential
from keras.optimizers import Adam
from keras.layers import Dropout, Flatten, Dense

def build_model(input_shape, target_num=NUM_CHARACTERS,
               hidden_width=128, hidden_depth=1, dropout_rate=0.2):
    with tf.device("/cpu:0"):
        model = Sequential()
        model.add(Flatten(input_shape=input_shape))
        for _ in range(hidden_depth):
            model.add(Dense(hidden_width, activation='relu', 
                            kernel_initializer='lecun_uniform'))
            model.add(Dropout(dropout_rate))
        model.add(Dense(target_num, activation='softmax'))

    return model

def compile_model(model, learning_rate=LEARNING_RATE):
    model = parallelize_model_maybe(model)
    
    model.compile(optimizer=Adam(lr=learning_rate),
                loss='categorical_crossentropy',
                metrics=['accuracy'])
    return model

And, finally, start training our model.

In [None]:
template_model = build_model(train_data.shape[1:], hidden_width=512, hidden_depth=3, dropout_rate=0.2)
tiny_model = compile_model(template_model)

In [None]:
template_model.summary()

In [None]:
training_history = tiny_model.fit(train_data_r, train_labels_r,
                                  epochs=15,
                                  batch_size=BATCH_SIZE,
                                  validation_data=(valid_data_r, valid_labels_r),
                                  callbacks=[stop_on_val_loss, reduce_lr])

In [None]:
template_model.save(os.path.join(DATA_DIR, datetime.datetime.today().strftime('%Y-%m-%d-%H-%M') + "-tiny-model.h5"))

And now we can visualize our progress!

In [None]:
true_classes = valid_labels.argmax(1)
pred_classes = tiny_model.predict(valid_data).argmax(1)
class_names = [c for c, i in sorted(generator_valid.class_indices.items(), key=lambda pair: pair[1])]

c_matrix = confusion_matrix(true_classes, pred_classes)

In [None]:
plot_confusion_matrix(c_matrix, class_names, normalize=True)

# Train a Huge Neural Network

So it seems I just can't get good results trying to save features and train a small network.  Maybe that bodes very poorly for my project.  But let's try training a giant network on the expensive amazon machines and just hope.

In [None]:
from keras.applications.vgg16 import VGG16
from keras.layers import Dense, Flatten, Dropout
from keras.models import Model
from keras.optimizers import Adam

def init_model(target_num=NUM_CHARACTERS, learning_rate=LEARNING_RATE, 
               hidden_width=128, hidden_depth=1, dropout_rate=0.2):
    input_shape = (IMAGE_ROW_SIZE, IMAGE_COLUMN_SIZE, 3)

    with tf.device("/cpu:0"):
        # Fine-tune prediction layer
        pretrained_model = VGG16(include_top=False, weights='imagenet',
                                 input_shape=input_shape)
        for layer in pretrained_model.layers:
            layer.trainable = False

        output_tensor = pretrained_model.output
        output_tensor = Flatten()(output_tensor)
        for _ in range(hidden_depth):
            output_tensor = Dense(hidden_width, activation='relu', 
                                  kernel_initializer='lecun_uniform')(output_tensor)
            output_tensor = Dropout(dropout_rate)(output_tensor)
        output_tensor = Dense(target_num, activation="softmax", 
                              name="predictions")(output_tensor)

        model = Model(inputs=pretrained_model.input, outputs=output_tensor)

    model = parallelize_model_maybe(model)
    
    model.compile(optimizer=Adam(lr=learning_rate),
                  loss="categorical_crossentropy",
                  metrics=["accuracy"])

    return model

In [None]:
big_model = init_model(hidden_width=256, hidden_depth=2, dropout_rate=0.2)

In [None]:
big_model.summary()

In [None]:
training_history = big_model.fit_generator(generator=make_generator("train"),
                                           steps_per_epoch=steps_per_epoch("train"),
                                           epochs=5,
                                           validation_data=make_generator("valid"),
                                           validation_steps=steps_per_epoch("valid"),
                                           callbacks=[stop_on_val_loss])

In [None]:
big_model.save(os.path.join(DATA_DIR, datetime.datetime.today().strftime('%Y-%m-%d-%H-%M') + "-big-model.h5"))