In [1]:
from __future__ import division, print_function
from keras import backend as K
from keras.callbacks import ModelCheckpoint
from keras.layers import Input, merge
from keras.layers.core import Activation, Dense, Dropout, Lambda
from keras.layers.convolutional import Convolution2D
from keras.layers.pooling import MaxPooling2D, GlobalAveragePooling2D
from keras.models import Sequential, Model
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import RMSprop
from keras.utils import np_utils
from sklearn.model_selection import train_test_split
from scipy.misc import imresize
import itertools
import matplotlib.pyplot as plt
import numpy as np
import os
%matplotlib inline

Using TensorFlow backend.


In [2]:
DATA_DIR = "../data"
IMAGE_DIR = os.path.join(DATA_DIR, "holiday-photos")

RESIZE_WIDTH = 300
RESIZE_HEIGHT = 300

## Preprocess Data

File naming conventions are as follows:

* first 4 numbers of filename prefix refer to the group
* next 2 numbers refer to the image in the group.

For example: 100000.jpg and 100002.jpg are "similar", but 123700.jpg is "different".

In [3]:
def get_image_triples(image_dir):
    
    image_groups = {}
    for image_name in os.listdir(image_dir):
        base_name = image_name[0:-4]
        group_name = base_name[0:4]
        if image_groups.has_key(group_name):
            image_groups[group_name].append(image_name)
        else:
            image_groups[group_name] = [image_name]

    num_sim = 0
    image_triples = []
    group_list = sorted(list(image_groups.keys()))
    for i, g in enumerate(group_list):
        if num_sim % 100 == 0:
            print("Generated {:d} pos + {:d} neg = {:d} total image triples"
                  .format(num_sim, num_sim, 2*num_sim), end="\r")
        images_in_group = image_groups[g]
        # generate similar pairs
        sim_pairs_it = itertools.combinations(images_in_group, 2)
        # for each similar pair, generate a different pair
        for ref_image, sim_image in sim_pairs_it:
            image_triples.append((ref_image, sim_image, 1))
            num_sim += 1
            while True:
                j = np.random.randint(low=0, high=len(group_list), size=1)[0]
                if j != i: break
            dif_image_candidates = image_groups[group_list[j]]
            k = np.random.randint(low=0, high=len(dif_image_candidates), size=1)[0]
            dif_image = dif_image_candidates[k]
            image_triples.append((ref_image, dif_image, 0))
        
    print("Generated {:d} pos + {:d} neg = {:d} total image triples, COMPLETE"
          .format(num_sim, num_sim, 2*num_sim))
    return image_triples


triples = get_image_triples(IMAGE_DIR)
triples_train, triples_test = train_test_split(triples, train_size=0.7)
print(len(triples_train), len(triples_test))

Generated 0 pos + 0 neg = 0 total image triplesGenerated 300 pos + 300 neg = 600 total image triplesGenerated 1100 pos + 1100 neg = 2200 total image triplesGenerated 1800 pos + 1800 neg = 3600 total image triplesGenerated 2072 pos + 2072 neg = 4144 total image triples, COMPLETE
2900 1244


In [4]:
def cached_imread(image_path, image_cache):
    if not image_cache.has_key(image_path):
        image = plt.imread(image_path)
        image = imresize(image, (RESIZE_WIDTH, RESIZE_HEIGHT))
        image_cache[image_path] = image
    return image_cache[image_path]

def preprocess_images(image_names, seed, datagen, image_cache):
    np.random.seed(seed)
    X = np.zeros((len(image_names), RESIZE_WIDTH, RESIZE_HEIGHT, 3))
    for i, image_name in enumerate(image_names):
        image = cached_imread(os.path.join(IMAGE_DIR, image_name), image_cache)
        X[i] = datagen.random_transform(image) / 255.0
    return X

def image_triple_generator(image_triples, batch_size):
    datagen_args = dict(rotation_range=10,
                        width_shift_range=0.2,
                        height_shift_range=0.2,
                        shear_range=0.2,
                        zoom_range=0.2,
                        horizontal_flip=True)
    datagen_left = ImageDataGenerator(**datagen_args)
    datagen_right = ImageDataGenerator(**datagen_args)
    image_cache = {}
    
    while True:
        # loop once per epoch
        num_recs = len(image_triples)
        indices = np.random.permutation(np.arange(num_recs))
        num_batches = num_recs // batch_size
        for bid in range(num_batches):
            # loop once per batch
            batch_indices = indices[bid * batch_size : (bid + 1) * batch_size]
            batch = [image_triples[i] for i in batch_indices]
            # make sure the two image data generators generate same transformations
            seed = np.random.randint(low=0, high=1000, size=1)[0]
            Xleft = preprocess_images([b[0] for b in batch], seed, datagen_left, image_cache)
            Xright = preprocess_images([b[1] for b in batch], seed, datagen_right, image_cache)
            Y = np_utils.to_categorical(np.array([b[2] for b in batch]))
            yield [Xleft, Xright], Y
            

image_datagen = image_triple_generator(triples_train, 32)
image_datagen_val = image_triple_generator(triples_train, 32)

In [5]:
# batch = image_datagen.next()
# batch

## Define Network

In [6]:
def create_base_network(input_shape):
    model = Sequential()
    model.add(Convolution2D(64, 3, 3, border_mode="same", input_shape=input_shape))
    model.add(Activation("relu"))
    model.add(MaxPooling2D())
    model.add(Dropout(0.2))
    model.add(Convolution2D(128, 3, 3, border_mode="same"))
    model.add(Activation("relu"))
    model.add(MaxPooling2D())
    model.add(Dropout(0.2))
    model.add(Convolution2D(256, 3, 3, border_mode="same"))
    model.add(Activation("relu"))
    model.add(MaxPooling2D())
    model.add(Dropout(0.2))
    model.add(GlobalAveragePooling2D())
    return model

input_shape = (RESIZE_WIDTH, RESIZE_HEIGHT, 3)
vectorizer = create_base_network(input_shape)
input_left = Input(shape=input_shape)
input_right = Input(shape=input_shape)

vector_left = vectorizer(input_left)
vector_right = vectorizer(input_right)

In [7]:
def euclidean_distance(vectors):
    x, y = vectors
    return K.sqrt(K.sum(K.square(x - y), axis=1, keepdims=True))

def euclidean_distance_output_shape(shapes):
    shape_x, shape_y = shapes
    return (shape_x[0], 1)

distance = Lambda(euclidean_distance, 
                  output_shape=euclidean_distance_output_shape)([vector_left, vector_right])

In [8]:
model = Model(input=[input_left, input_right], output=distance)

In [None]:
def contrastive_loss(y, y_):
    margin = 1
    return K.mean(y - K.square(y_) + (1 - y) * K.square(K.maximum(margin - y_, 0)))

rms = RMSprop()
model.compile(loss=contrastive_loss, optimizer=rms)

## Train Model

In [None]:
checkpoint = ModelCheckpoint(filepath=os.path.join(DATA_DIR, "holidays-siamese-best.h5"),
                            save_best_only=True)

NUM_EPOCHS = 10
num_train_samples = len(triples_train)
num_val_samples = int(0.2 * num_train_samples)
history = model.fit_generator(image_datagen, 
                              samples_per_epoch=num_train_samples,
                              nb_epoch=NUM_EPOCHS,
                              validation_data=image_datagen_val,
                              nb_val_samples=num_val_samples,
                              callbacks=[checkpoint])

Epoch 1/10
