In [66]:
import os
import re
import numbers
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
import matplotlib.pyplot as plt

from keras import backend as K
from keras.models import Model, Sequential
from keras.layers import Input, Dense, Flatten, Lambda, Activation, MaxPooling2D, GlobalAveragePooling2D, Conv2D
from keras.callbacks import EarlyStopping

INPUT_DIR = '../input'

EMB_SIZE = 8
N_FOLDS = 5
SEED = 32

In [52]:
""" --------------------------------- Triplet loss implementation ----------------------------------- """


def _all_diffs(a, b):
    """ Returns a tensor of all combinations of a - b.
    Args:
        a (2D tensor): A batch of vectors shaped (B1, F).
        b (2D tensor): A batch of vectors shaped (B2, F).
    Returns:
        The matrix of all pairwise differences between all vectors in `a` and in
        `b`, will be of shape (B1, B2).
    Note:
        For convenience, if either `a` or `b` is a `Distribution` object, its
        mean is used.
    """
    return tf.expand_dims(a, axis=1) - tf.expand_dims(b, axis=0)


def _cdist(a, b, metric='euclidean'):
    """Similar to scipy.spatial's _cdist, but symbolic.
    The currently supported metrics can be listed as `_cdist.supported_metrics` and are:
        - 'euclidean', although with a fudge-factor epsilon.
        - 'sqeuclidean', the squared euclidean.
        - 'cityblock', the manhattan or L1 distance.
    Args:
        a (2D tensor): The left-hand side, shaped (B1, F).
        b (2D tensor): The right-hand side, shaped (B2, F).
        metric (string): Which distance metric to use, see notes.
    Returns:
        The matrix of all pairwise distances between all vectors in `a` and in
        `b`, will be of shape (B1, B2).
    Note:
        When a square root is taken (such as in the Euclidean case), a small
        epsilon is added because the gradient of the square-root at zero is
        undefined. Thus, it will never return exact zero in these cases.
    """
    with tf.name_scope("_cdist"):
        diffs = _all_diffs(a, b)
        if metric == 'sqeuclidean':
            return tf.reduce_sum(tf.square(diffs), axis=-1)
        elif metric == 'euclidean':
            return tf.sqrt(tf.reduce_sum(tf.square(diffs), axis=-1) + 1e-12)
        elif metric == 'cityblock':
            return tf.reduce_sum(tf.abs(diffs), axis=-1)
        else:
            raise NotImplementedError(
                'The following metric is not implemented by `_cdist` yet: {}'.format(metric))


_cdist.supported_metrics = [
    'euclidean',
    'sqeuclidean',
    'cityblock',
]


def _get_at_indices(tensor, indices):
    """ Like `tensor[np.arange(len(tensor)), indices]` in numpy. """
    counter = tf.range(tf.shape(indices, out_type=indices.dtype)[0])
    return tf.gather_nd(tensor, tf.stack((counter, indices), -1))


def batch_hard_loss(features, pids, metric='euclidean', margin=0.1):
    """Computes the batch-hard loss from arxiv.org/abs/1703.07737.
    Args:
        dists (2D tensor): A square all-to-all distance matrix as given by _cdist.
        pids (1D tensor): The identities of the entries in `batch`, shape (B,).
            This can be of any type that can be compared, thus also a string.
        margin: The value of the margin if a number, alternatively the string
            'soft' for using the soft-margin formulation, or `None` for not
            using a margin at all.
    Returns:
        A 1D tensor of shape (B,) containing the loss value for each sample.
        :param margin:
        :param features:
        :param pids:
        :param metric:
    """
    with tf.name_scope("batch_hard_loss"):

        dists = _cdist(features, features, metric=metric)

        pids = tf.argmax(pids, axis=1)

        exp_dims0 = tf.expand_dims(pids, axis=0)
        exp_dims1 = tf.expand_dims(pids, axis=1)

        same_identity_mask = tf.equal(exp_dims1, exp_dims0)

        negative_mask = tf.logical_not(same_identity_mask)
        positive_mask = tf.logical_xor(same_identity_mask,
                                       tf.eye(tf.shape(pids)[0], dtype=tf.bool))

        furthest_positive = tf.reduce_max(dists*tf.cast(positive_mask, tf.float32), axis=1)
        # closest_negative = tf.map_fn(lambda x: tf.reduce_min(tf.boolean_mask(x[0], x[1])),
        #                              (dists, negative_mask), tf.float32)
        # Another way of achieving the same, though more hacky:
        closest_negative = tf.reduce_min(dists + 1e5*tf.cast(same_identity_mask, tf.float32), axis=1)

        diff = furthest_positive - closest_negative
        if isinstance(margin, numbers.Real):
            diff = tf.maximum(diff + margin, 0.0)
        elif margin == 'soft':
            diff = tf.nn.softplus(diff)
        elif margin is None:
            pass
        else:
            raise NotImplementedError('The margin {} is not implemented in batch_hard_loss'.format(margin))

    return diff


def triplet_loss(labels, features):
    # https://github.com/tensorflow/tensorflow/issues/20253
    # from tensorflow.contrib.losses import metric_learning
    # return metric_learning.triplet_semihard_loss(K.argmax(labels, axis=1), embeddings, margin=0.2)
    return tf.reduce_mean(batch_hard_loss(features, labels, margin=0.2))

In [53]:
""" ------------------------------------ Data loading -------------------------------------- """

# load dataframes
df_train = pd.read_csv(os.path.join(INPUT_DIR, 'train.csv'))
df_test = pd.read_csv(os.path.join(INPUT_DIR, 'test.csv'))

print(df_train.head())

   label  pixel0  pixel1    ...     pixel781  pixel782  pixel783
0      1       0       0    ...            0         0         0
1      0       0       0    ...            0         0         0
2      1       0       0    ...            0         0         0
3      4       0       0    ...            0         0         0
4      0       0       0    ...            0         0         0

[5 rows x 785 columns]


In [54]:
""" --------------------------------- Data preprocessing ----------------------------------- """

# load pixel values, reshape them to 28x28 pixels and rescale from [0, 255] to [0, 1]
x_train = df_train.iloc[:,1:].values.astype('float32') / 255.
x_test = df_test.values.astype('float32') / 255.

# make images 28x28x1
xc_train = np.reshape(x_train, (len(x_train), 28, 28, 1))
xc_test = np.reshape(x_test, (len(x_test), 28, 28, 1))

# load labels
y_train = df_train.label.values

input_size = output_size = x_train.shape[1]
input_csize = output_size = xc_train.shape[1]

print(input_size, input_csize)

784 28


In [20]:
""" --------------------------------- Dense Autoencoder model ----------------------------------- """

input_img = Input(shape=(784,))
encoded = Dense(256, activation='relu')(input_img)
encoded = Dense(EMB_SIZE, activation='relu')(encoded)
decoded = Dense(256, activation='sigmoid')(encoded)
decoded = Dense(784, activation='sigmoid')(encoded)

# this model maps an input to its reconstruction
autoencoder = Model(input_img, decoded)

autoencoder.compile(optimizer='adagrad',
             loss='binary_crossentropy',
             metrics=['accuracy'])

In [6]:
""" --------------------------------- Dense Autoencoder model training ----------------------------------- """

callbacks=[
    EarlyStopping(monitor='val_loss'),
]

autoencoder.fit(x_train, x_train,
                 epochs=100,
                 batch_size=1025,
                 shuffle=True,
                 validation_data=(x_test, x_test),
                 callbacks=callbacks)

Instructions for updating:
Use tf.cast instead.
Train on 42000 samples, validate on 28000 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100


<keras.callbacks.History at 0x7fba7e72c0b8>

In [None]:
""" --------------------------------- Convolutional Autoencoder model ----------------------------------- """

input_img = Input(shape=(input_csize, input_csize, 1))

x = Conv2D(16, (3, 3), activation='relu', padding='same')(input_img)
x = MaxPooling2D((2, 2), padding='same')(x)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
x = MaxPooling2D((2, 2), padding='same')(x)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
encoded = MaxPooling2D((2, 2), padding='same')(x)

# at this point the representation is (4, 4, 8) i.e. 128-dimensional

x = Conv2D(8, (3, 3), activation='relu', padding='same')(encoded)
x = UpSampling2D((2, 2))(x)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
x = UpSampling2D((2, 2))(x)
x = Conv2D(16, (3, 3), activation='relu')(x)
x = UpSampling2D((2, 2))(x)
decoded = Conv2D(1, (3, 3), activation='sigmoid', padding='same')(x)

autoencoder = Model(input_img, decoded)
autoencoder.compile(optimizer='adadelta', loss='binary_crossentropy')

In [None]:
""" --------------------------------- Convolutional Autoencoder model training ----------------------------------- """

callbacks=[
    EarlyStopping(monitor='val_loss'),
]

autoencoder.fit(xc_train, xc_train,
                epochs=50,
                batch_size=128,
                shuffle=True,
                validation_data=(xc_test, xc_test),
                callbacks=callbacks)

In [75]:
""" --------------------------------- Triplet model ----------------------------------- """

input_img = Input(shape=(input_csize, input_csize, 1))

x = Conv2D(16, (3, 3), activation='relu', padding='same')(input_img)
x = MaxPooling2D((2, 2), padding='same')(x)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
x = MaxPooling2D((2, 2), padding='same')(x)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
x = MaxPooling2D((2, 2), padding='same')(x)
x = Flatten()(x)
x = Dense(256, activation='relu')(x)
embeddings = Lambda(lambda z: K.l2_normalize(z, axis=1))(x)

triplet_model = Model(input_img, embeddings)
triplet_model.compile(optimizer='adadelta', loss=triplet_loss)

In [76]:
""" --------------------------------- Triplet model training ----------------------------------- """

callbacks=[
    EarlyStopping(monitor='val_loss'),
]

triplet_model.fit(xc_train, y_train,
                epochs=50,
                batch_size=128,
                shuffle=True)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50

KeyboardInterrupt: 