In [1]:
import keras 
from keras.datasets import mnist 
from keras.layers import Dense, Input, concatenate,subtract, Lambda, Dropout
from keras.losses import binary_crossentropy
from keras.optimizers import SGD
from keras.models import Model
import numpy as np 
from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn import linear_model

Using TensorFlow backend.


In [2]:
class Utils(object):
    @classmethod
    def create_pairs(cls, classes, count):
        import numpy as np
        import pandas as pd

        inx = np.random.randint(low=0, high=len(classes), size=(count, 2))
        df = pd.DataFrame(data=inx, columns=["i1", "i2"])
        df["c1"] = df["i1"].map(lambda x:classes[x])
        df["c2"] = df["i2"].map(lambda x:classes[x])
        df["pos"] = df["c1"] == df["c2"]
        return df.drop_duplicates()

    @classmethod
    def image_to_patches(cls, path, shape, patches):
        from sklearn.feature_extraction import image
        from skimage.io import imread
        img = imread(path)
        return image.extract_patches_2d(img, shape, patches)

    @classmethod
    def select_random_class_indecies(cls, wanted_classes, classes_indecies, count=1, replace=False):
        pass

    @classmethod
    def get_other_classes_random(cls, classes_list, unique_classes):
        import random

        unique = set(unique_classes)
        return [random.choice(tuple(unique - set([c]))) for c in classes_list]


    @classmethod
    def create_siamese_pairs_1(cls, original_class_list, pos_count, neg_count,
                               drop_dups=True, shuffle=True):
        import numpy as np

        if not isinstance(original_class_list, np.ndarray):
            original_class_list = np.array(original_class_list)

        left = np.random.choice(original_class_list, pos_count+neg_count)

        #unique_classes = np.unique(original_class_list)
        pos_classes = list(left[:pos_count])
        neg_classes = cls.get_other_classes_random(left[pos_count:], original_class_list)
        right = pos_classes + neg_classes

        left = cls.sample_indecies_for_classes(original_class_list, left)
        right = cls.sample_indecies_for_classes(original_class_list, right)

        tags = [1] * pos_count + [0] * neg_count
        data = np.array([left, right, tags]).transpose()

        if drop_dups:
            data = np.unique(data, axis=0)

        if shuffle:
            i = np.arange(len(data))
            np.random.shuffle(i)
            data = data[i]

        return data


    @classmethod
    def sample_indecies_for_classes(cls, original_classes_list, classes_to_sample):
        import numpy as np
        import random

        unique_classes = np.unique(original_classes_list)

        if not isinstance(original_classes_list, np.ndarray):
            original_classes_list = np.array(original_classes_list)

        c2i = {c: np.where(original_classes_list == c)[0].astype(int).tolist()
               for c in unique_classes}

        arr = np.zeros(shape=len(classes_to_sample))
        for i, c in enumerate(classes_to_sample):
            arr[i] = random.choice(c2i[c])

        return arr.astype(int).tolist()

In [3]:
(train_x, train_y), (test_x, test_y) = mnist.load_data()

In [4]:
train_x = (train_x / 255.0).reshape(-1, 28*28) 
test_x = (test_x / 255.0).reshape(-1, 28*28)

In [5]:
inp1 = Input(shape=(28*28,))
inp2 = Input(shape=(28*28,))

l1 = Dense(100, activation="relu")
a1 = l1(inp1)
a2 = l1(inp2)

l2 = Dense(50, activation="relu")
b1 = l2(a1)
b2 = l2(a2)

output = Lambda(lambda inputs: np.absolute(inputs[0]-inputs[1]), output_shape=(50,))([b1, b2])
output = Dropout(0.3)(output)

output1 = Dense(1)(output)

model = Model([inp1, inp2], [output1])
model.compile(loss=binary_crossentropy, optimizer=SGD(lr = 0.0001)) 

#this model is not learning, so don't need to compile it.
encoder = Model([inp1, inp2], [output])

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [6]:
uc = np.unique(train_y)
data = Utils.create_siamese_pairs_1(train_y, 300000, 300000)

In [7]:
test_size = int(len(data)*0.1)
ind = np.arange(len(data))
np.random.shuffle(ind)
data, test = data[ind[:-test_size]], data[ind[-test_size:]]
model.fit([train_x[data[:,0]], train_x[data[:,1]]], data[:,2], epochs=5, validation_split=0.1, shuffle=True)

Instructions for updating:
Use tf.cast instead.
Train on 485894 samples, validate on 53989 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f2ead2cc0f0>

In [8]:
#creating the training set for the classifiers
train_x_siamese = encoder.predict([train_x[data[:,0]], train_x[data[:,1]]])
train_x_euc = np.absolute(train_x[data[:,0]] - train_x[data[:,1]])

In [9]:
#creating the test set for the classifiers
test_x = [train_x[test[:,0]], train_x[test[:,1]]]
test_y = test[:,2]
test_x_siamese = encoder.predict([test_x[0], test_x[1]])
test_x_euc = np.absolute(test_x[0] - test_x[1])

In [10]:
#initializing the classifiers, generally we can use SVM, as it should be stronger, 
#but its so painfully slow on large datasets with a lot of dimensions
clf1 = linear_model.SGDClassifier()
clf2 = linear_model.SGDClassifier()

In [11]:
clf1.fit(train_x_siamese, data[:,2])
clf2.fit(train_x_euc, data[:,2])

SGDClassifier(alpha=0.0001, average=False, class_weight=None,
              early_stopping=False, epsilon=0.1, eta0=0.0, fit_intercept=True,
              l1_ratio=0.15, learning_rate='optimal', loss='hinge',
              max_iter=1000, n_iter_no_change=5, n_jobs=None, penalty='l2',
              power_t=0.5, random_state=None, shuffle=True, tol=0.001,
              validation_fraction=0.1, verbose=0, warm_start=False)

In [12]:
t1 = clf1.predict(test_x_siamese)
t2 = clf2.predict(test_x_euc)

In [13]:
print(accuracy_score(test_y, t1), accuracy_score(test_y, t2))

0.8015203547494415 0.7409395525622645


# we can see that the siamese network gives us better accuracy, with a faster supervised learning task, this means that the spatial representation is better then the original one in terms of classification.