In [122]:
import tensorflow as tf
import keras
import numpy as np

import os.path

## General Configs

In [152]:
TRAIN_IMAGE_DIR = os.path.abspath('../../data/png/train')
VALIDATE_IMAGE_DIR = os.path.abspath('../../data/png/validate')
TEST_IMAGE_DIR = os.path.abspath('../../data/png/test')

MODEL_PATH = os.path.abspath('../baseline.h5')

IMAGE_FILE_WILDCARD = '*/*.png'
DOC_TEMPLATES_DIR = os.path.abspath('../../doc_pic_generator/templates/')

BATCH_SIZE = 16
ORIGINAL_IMAGE_DIMENSION = (1684, 1190, 3)
MODEL_IMAGE_DIMENSION = (224, 224, 3) # as VGG input is (224, 224, 3)

categorical_classes = 4

# Model

For this classification task, we used a pre-trained model (VGG16) stacked with a fully connected layer of 128 units. The most of the VGG16 layers are freezed, except the last convulutional block (3 layers of Conv2D + 1 layer of maxpooling) is set to be trainable (to prevent overfitting).

In [76]:
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Cropping2D
from keras.layers import Activation, Dropout, Flatten, Dense
from keras import optimizers
from keras.applications import VGG16

In [118]:
learning_rate = 1.0e-05
dropout_rate = .5

model = Sequential()

# freeze the weight in the VGG model
trainable_VGG = VGG16(include_top=False)

for layer in trainable_VGG.layers[:-4]:
    layer.trainable = False

for layer in trainable_VGG.layers[-4:]:
    layer.trainable = True

model.add(Lambda(lambda image: tf.image.resize_images(image, MODEL_IMAGE_DIMENSION[:-1]), input_shape=(None, None, 3), output_shape=MODEL_IMAGE_DIMENSION))
model.add(trainable_VGG)

# model.add(Conv2D(32, (5, 5), strides=(3, 3), input_shape=ORIGINAL_IMAGE_DIMENSION, padding='same'))
# model.add(Activation('relu'))
# model.add(MaxPooling2D(pool_size=(2, 2)))

# model.add(Conv2D(32, (3, 3), strides=(2, 2), padding='same'))
# model.add(Activation('relu'))
# model.add(MaxPooling2D(pool_size=(2, 2)))

# model.add(Conv2D(64, (3, 3), strides=(2, 2), padding='same'))
# model.add(Activation('relu'))
# model.add(MaxPooling2D(pool_size=(2, 2)))

# model.add(Conv2D(128, (3, 3), padding='same'))
# model.add(Activation('relu'))
# model.add(MaxPooling2D(pool_size=(2, 2)))

# model.add(Conv2D(128, (2, 2), padding='same'))
# model.add(Activation('relu'))
# model.add(MaxPooling2D(pool_size=(2, 2)))

# model.add(Conv2D(512, (1, 1), padding='same'))
# model.add(Activation('relu'))
# model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())  # this converts our 3D feature maps to 1D feature vectors

model.add(Dense(128, activation='relu'))
model.add(Dropout(dropout_rate))
model.add(Dense(categorical_classes, activation='softmax'))

optimizer = optimizers.Adam(lr=learning_rate, clipnorm=1.)

model.compile(loss='categorical_crossentropy',
              optimizer=optimizer,
              metrics=['accuracy'])

In [119]:
print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lambda_18 (Lambda)           (None, 224, 224, 3)       0         
_________________________________________________________________
vgg16 (Model)                multiple                  14714688  
_________________________________________________________________
flatten_20 (Flatten)         (None, 25088)             0         
_________________________________________________________________
dense_49 (Dense)             (None, 128)               3211392   
_________________________________________________________________
dropout_24 (Dropout)         (None, 128)               0         
_________________________________________________________________
dense_50 (Dense)             (None, 4)                 516       
Total params: 17,926,596
Trainable params: 10,291,332
Non-trainable params: 7,635,264
________________________________________________________

# Prepare Data

In [None]:
from keras.preprocessing.image import ImageDataGenerator

In [40]:
train_datagen = ImageDataGenerator(rescale=1./255, data_format='channels_last')
validate_datagen = ImageDataGenerator(rescale=1./255, data_format='channels_last')
test_datagen = ImageDataGenerator(rescale=1./255, data_format='channels_last')

In [102]:
train_generator = train_datagen.flow_from_directory(
        TRAIN_IMAGE_DIR,  # this is the target directory
#         target_size=MODEL_IMAGE_DIMENSION[:-1],
        target_size=ORIGINAL_IMAGE_DIMENSION[:-1],
        batch_size=BATCH_SIZE,
        class_mode='categorical')

Found 471 images belonging to 4 classes.


In [103]:
validation_generator = validate_datagen.flow_from_directory(
        VALIDATE_IMAGE_DIR,  # this is the target directory
#         target_size=MODEL_IMAGE_DIMENSION[:-1],
        target_size=ORIGINAL_IMAGE_DIMENSION[:-1],
        batch_size=BATCH_SIZE,
        class_mode='categorical'
)

Found 157 images belonging to 4 classes.


In [104]:
test_generator = test_datagen.flow_from_directory(
        VALIDATE_IMAGE_DIR,  # this is the target directory
#         target_size=MODEL_IMAGE_DIMENSION[:-1],
        target_size=ORIGINAL_IMAGE_DIMENSION[:-1],
        batch_size=BATCH_SIZE,
        class_mode='categorical'
)

Found 157 images belonging to 4 classes.


# Train

In [120]:
model.fit_generator(
        train_generator,
        steps_per_epoch=train_generator.samples // BATCH_SIZE,
#         steps_per_epoch=10,
        epochs=3,
        validation_data=validation_generator,
        validation_steps=validation_generator.samples // BATCH_SIZE)


Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x1fbc27908>

# Accuracy

Checking accuracy with test data where the model did not see in the training process.

In [121]:
# [loss, accuracy]
model.evaluate_generator(test_generator, test_generator.samples // BATCH_SIZE)

[0.00036661404010374099, 1.0]

Got 100% accuracy from the unseen data.. the model is doing good (because the documents are quite standardized)

# Save Model

In [126]:
model.save(MODEL_PATH)

In [154]:
class_indices = {val: key for key, val in train_generator.class_indices.items()}
json.dump(class_indices, open(CLASS_INDICES_PATH, 'w'))

# For Users

classify_doc_pic() is for end users to use the model to classify. It is available in baseline.py as well.

In [165]:
import json
import os.path
import numpy as np
import keras
from keras.preprocessing.image import img_to_array, load_img

MODEL_PATH = os.path.abspath('../baseline.h5')
CLASS_INDICES_PATH = os.path.abspath('../class_indices.json')


def classify_doc_pic(image_path, model=None, class_indices=None):
    """
    Args:
      image_path (str): file path of the image
      model (Keras Model)
      class_indices (dict): e.g. {"0": "doc_template_01", "1": "doc_template_02", "2": "doc_template_03", "3": "doc_template_04"}

    Returns:
      {
        'class_index': class_index, # e.g. 0
        'class_name': class_indices[class_index] # e.g. "doc_template_01"
      }

    """
    img = load_img(image_path)
    img_arr = np.expand_dims(img_to_array(img), 0)

    # load model if it is not load
    if model is None:
        model = keras.models.load_model(MODEL_PATH)
        print('model loaded from: {}'.format(MODEL_PATH))

    if class_indices is None:
        class_indices = json.load(open(CLASS_INDICES_PATH, 'r'))
        print('class indices loaded from: {}'.format(CLASS_INDICES_PATH))

    class_index = np.asscalar(model.predict_classes(img_arr))

    return {
        'class_index': class_index,
        'class_name': class_indices[class_index]
    }

## Example

In [167]:
classify_doc_pic(os.path.abspath('../../data/png/test/doc_template_04/doc_template_04.1507524410.1.html.png-clipped.png'), model, class_indices)



{'class_index': 3, 'class_name': 'doc_template_04'}