In [None]:
from __future__ import absolute_import
from __future__ import print_function
import numpy as np

import tensorflow as tf

import random

from keras import backend as K, callbacks
from keras.datasets import mnist
from keras.models import Model
from keras.layers import Input, Flatten, Dense, Dropout, Lambda

from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
sns.set()

In [None]:
num_classes = 10
epochs = 30
embedding_samples = 2000


def euclidean_distance(vects):
    x, y = vects
    sum_square = K.sum(K.square(x - y), axis=1, keepdims=True)
    return K.sqrt(K.maximum(sum_square, K.epsilon()))


def eucl_dist_output_shape(shapes):
    shape1, shape2 = shapes
    return (shape1[0], 1)


def contrastive_loss(y_true, y_pred):
    '''Contrastive loss from Hadsell-et-al.'06
    http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf
    '''
    margin = 1
    square_pred = K.square(y_pred)
    margin_square = K.square(K.maximum(margin - y_pred, 0))
    return K.mean(y_true * square_pred + (1 - y_true) * margin_square)


def create_pairs(x, digit_indices):
    '''Positive and negative pair creation.
    Alternates between positive and negative pairs.
    '''
    pairs = []
    labels = []
    n = min([len(digit_indices[d]) for d in range(num_classes)]) - 1
    for d in range(num_classes):
        for i in range(n):
            z1, z2 = digit_indices[d][i], digit_indices[d][i + 1]
            pairs += [[x[z1], x[z2]]]
            inc = random.randrange(1, num_classes)
            dn = (d + inc) % num_classes
            z1, z2 = digit_indices[d][i], digit_indices[dn][i]
            pairs += [[x[z1], x[z2]]]
            labels += [1, 0]
    return np.array(pairs), np.array(labels)


def create_base_network(input_shape):
    '''Base network to be shared (eq. to feature extraction).
    '''
    input = Input(shape=input_shape)
    x = Flatten()(input)
    x = Dense(128, activation='relu', name='fc1')(x)
    x = Dropout(0.1)(x)
    x = Dense(128, activation='relu', name='fc2')(x)
    x = Dropout(0.1)(x)
    x = Dense(128, activation='relu', name='fc3')(x)
    return Model(input, x, name='L')


def compute_accuracy(y_true, y_pred):
    '''Compute classification accuracy with a fixed threshold on distances.
    '''
    pred = y_pred.ravel() < 0.5
    return np.mean(pred == y_true)


def accuracy(y_true, y_pred):
    '''Compute classification accuracy with a fixed threshold on distances.
    '''
    return K.mean(K.equal(y_true, K.cast(y_pred < 0.5, y_true.dtype)))


# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
input_shape = x_train.shape[1:]

# create training+test positive and negative pairs
digit_indices = [np.where(y_train == i)[0] for i in range(num_classes)]
tr_pairs, tr_y = create_pairs(x_train, digit_indices)

digit_indices = [np.where(y_test == i)[0] for i in range(num_classes)]
te_pairs, te_y = create_pairs(x_test, digit_indices)


s = np.arange(len(te_pairs))
np.random.shuffle(s)
s = s[:embedding_samples]

embedding_data = [te_pairs[s, 0], te_pairs[s, 1]]

In [None]:
L = create_base_network(input_shape)

xa = Input(shape=input_shape)
xb = Input(shape=input_shape)

ya, yb = L(xa), L(xb)

y = Lambda(euclidean_distance,
           output_shape=eucl_dist_output_shape)([ya, yb])

model = Model([xa, xb], y)
model.compile(loss=contrastive_loss, optimizer='adam', metrics=[accuracy])

In [None]:
model.summary()

In [None]:
model.fit([tr_pairs[:, 0], tr_pairs[:, 1]], tr_y,
          batch_size=128,
          epochs=epochs,
          validation_data=([te_pairs[:, 0], te_pairs[:, 1]], te_y));

In [None]:
y_pred = model.predict([tr_pairs[:, 0], tr_pairs[:, 1]])
tr_acc = compute_accuracy(tr_y, y_pred)
y_pred = model.predict([te_pairs[:, 0], te_pairs[:, 1]])
te_acc = compute_accuracy(te_y, y_pred)

print('* Accuracy on training set: %0.2f%%' % (100 * tr_acc))
print('* Accuracy on test set: %0.2f%%' % (100 * te_acc))

In [None]:
inner_model = model.get_layer('L')
x_test_e = inner_model.predict(x_test)

In [None]:
from sklearn.decomposition import PCA

e = PCA(n_components=2).fit_transform(x_test[:3000].reshape(3000, -1))

fig = plt.figure(figsize=(16, 9))
ax = fig.add_subplot(111, projection='3d')
ax.scatter(*e.T, c=y_test[:3000], cmap='tab10')
ax.view_init(45, -15)

In [None]:
from sklearn.decomposition import PCA

e = PCA(n_components=3).fit_transform(x_test_e[:3000])

fig = plt.figure(figsize=(16, 9))
ax = fig.add_subplot(111, projection='3d')
ax.scatter(*e.T, c=y_test[:3000], cmap='tab10')
ax.view_init(45, -15)

In [None]:
from sklearn.manifold import TSNE

e = TSNE(n_components=3).fit_transform(x_test_e[:3000])

fig = plt.figure(figsize=(16, 9))
ax = fig.add_subplot(111, projection='3d')
ax.scatter(*e.T, c=y_test[:3000], cmap='tab10')
ax.view_init(45, -15)