In [1]:
import tensorflow as tf

In [2]:
import numpy as np

In [3]:
from sklearn.model_selection import StratifiedShuffleSplit

In [350]:
from sklearn.datasets import fetch_mldata

In [351]:
data = fetch_mldata('mnist original')

In [395]:
x, _y = data.data, data.target

In [396]:
sss = StratifiedShuffleSplit(test_size=5000)
for train_index, test_index in sss.split(x, _y):
    x_train, valx_test = x[train_index], x[test_index]
    y_train, valy_test = _y[train_index], _y[test_index]

In [398]:
x_train.shape, y_train.shape, valx_test.shape

((65000, 784), (65000,), (5000, 784))

In [399]:
for train_index, test_index in sss.split(x_train, y_train):
    x_train1, x_test = x_train[train_index], x_train[test_index]
    y_train1, y_test = y_train[train_index], y_train[test_index]

In [400]:
x_train1 = x_train1/255
valx_test = valx_test/255
x_test = x_test/255

In [377]:
batch_size = 500
n_epochs = 100
learning_rate = 0.001
n_features = 784

In [378]:
def generate_batch(images, labels, batch_size):
    size1 = batch_size // 2
    size2 = batch_size - size1
    if size1 != size2 and np.random.rand() > 0.5:
        size1, size2 = size2, size1
    X = []
    y = []
    while len(X) < size1:
        rnd_idx1, rnd_idx2 = np.random.randint(0, len(images), 2)
        if rnd_idx1 != rnd_idx2 and labels[rnd_idx1] == labels[rnd_idx2]:
            X.append(np.array([images[rnd_idx1], images[rnd_idx2]]))
            y.append([1])
    while len(X) < batch_size:
        rnd_idx1, rnd_idx2 = np.random.randint(0, len(images), 2)
        if labels[rnd_idx1] != labels[rnd_idx2]:
            X.append(np.array([images[rnd_idx1], images[rnd_idx2]]))
            y.append([0])
    rnd_indices = np.random.permutation(batch_size)
    return np.array(X)[rnd_indices], np.array(y)[rnd_indices]

In [412]:
from functools import partial
he_init = tf.contrib.layers.variance_scaling_initializer()
create_dense = partial(tf.layers.dense, units = 100, kernel_initializer = he_init,activation = tf.nn.elu)

In [414]:
tf.reset_default_graph()

In [415]:
with tf.name_scope("Inputs"):
    X = tf.placeholder(shape = [None,2, n_features], dtype = tf.float32)
    Xa, Xb = tf.unstack(X, axis = 1)
    y = tf.placeholder(shape = [None,1], dtype = tf.float32)
    threshold = tf.placeholder_with_default(input = tf.constant([0.5], dtype=tf.float32), shape = [1])

In [416]:
with tf.name_scope("DNN-A"):
    hidden1a = create_dense(inputs = Xa)
    hidden2a = create_dense(inputs = hidden1a)
    hidden3a = create_dense(inputs = hidden2a)
    hidden4a = create_dense(inputs = hidden3a)
    hidden5a = create_dense(inputs = hidden4a)

In [417]:
with tf.name_scope("DNN-B"):
    hidden1b = create_dense(inputs = Xb)
    hidden2b = create_dense(inputs = hidden1b)
    hidden3b = create_dense(inputs = hidden2b)
    hidden4b = create_dense(inputs = hidden3b)
    hidden5b = create_dense(inputs = hidden4b)

In [418]:
combined_output = tf.concat([hidden5a, hidden5b], axis=1)
hiddenAB = create_dense(inputs = combined_output, units = 10)
concat_layer = create_dense(inputs = hiddenAB, units = 1, activation = None)

In [419]:
with tf.name_scope("loss"):
    xentropy = tf.nn.sigmoid_cross_entropy_with_logits(labels=y,logits=concat_layer)
    loss = tf.reduce_mean(xentropy)

In [420]:
with tf.name_scope("optimizer"):
    optimizer = tf.train.MomentumOptimizer(momentum = 0.9,learning_rate=learning_rate, use_nesterov=True)
    training_op = optimizer.minimize(loss)

In [421]:
with tf.name_scope("performance"):
    sig_output = tf.sigmoid(concat_layer)
    correct = tf.equal(tf.cast(tf.logical_not(tf.less(sig_output,threshold)), dtype = tf.float32),y)
    accuracy = tf.reduce_mean(tf.cast(correct, dtype = tf.float32))

In [423]:
init = tf.global_variables_initializer()
saver = tf.train.Saver()
with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for iteration in range(len(x_train1)//batch_size):
            x_batch, y_batch = generate_batch(batch_size=batch_size, images=x_train1, labels=y_train1)
            sess.run(training_op, feed_dict = {X:x_batch, y: y_batch})
        if epoch % 5 == 0:
            x_batch, y_batch = generate_batch(batch_size = len(x_train1), images = x_train1, labels = y_train1)
            acc_train = accuracy.eval(feed_dict={X:x_batch, y: y_batch})
            x_batch, y_batch = generate_batch(batch_size = len(x_test), images = x_test, labels = y_test)
            acc_test = accuracy.eval(feed_dict={X:x_batch, y:y_batch})
            acc_val = 'ND'
            if epoch%10 == 0:
                x_batch, y_batch = generate_batch(batch_size = len(valx_test), images = valx_test, labels = valy_test)
                acc_val = accuracy.eval(feed_dict = {X:x_batch, y: y_batch})
            print('epoch', epoch, 'Training accuracy:', acc_train, 'Testing accuracy:', acc_test, 'Validation accuracy:',acc_val)
    
    saver.save(sess, './PretrainingNw.ckpt')

epoch 0 Training accuracy: 0.5164667 Testing accuracy: 0.523 Validation accuracy: 0.5228
epoch 5 Training accuracy: 0.6864833 Testing accuracy: 0.6948 Validation accuracy: ND
epoch 10 Training accuracy: 0.7691 Testing accuracy: 0.7742 Validation accuracy: 0.763
epoch 15 Training accuracy: 0.80403334 Testing accuracy: 0.8028 Validation accuracy: ND
epoch 20 Training accuracy: 0.8354 Testing accuracy: 0.8488 Validation accuracy: 0.833
epoch 25 Training accuracy: 0.85941666 Testing accuracy: 0.8566 Validation accuracy: ND
epoch 30 Training accuracy: 0.86985 Testing accuracy: 0.871 Validation accuracy: 0.8692
epoch 35 Training accuracy: 0.8785 Testing accuracy: 0.8808 Validation accuracy: ND
epoch 40 Training accuracy: 0.88881665 Testing accuracy: 0.8888 Validation accuracy: 0.8848
epoch 45 Training accuracy: 0.89685 Testing accuracy: 0.8978 Validation accuracy: ND
epoch 50 Training accuracy: 0.9033667 Testing accuracy: 0.9052 Validation accuracy: 0.8968
epoch 55 Training accuracy: 0.90908