In [1]:
import tensorflow as tf

In [2]:
import numpy as np

In [3]:
from sklearn.model_selection import StratifiedShuffleSplit

In [4]:
from sklearn.datasets import fetch_mldata

In [5]:
data = fetch_mldata('mnist original')

In [6]:
x, _y = data.data, data.target

In [7]:
sss = StratifiedShuffleSplit(test_size=5000)
for train_index, test_index in sss.split(x, _y):
    x_train, valx_test = x[train_index], x[test_index]
    y_train, valy_test = _y[train_index], _y[test_index]

In [8]:
x_train.shape, y_train.shape, valx_test.shape

((65000, 784), (65000,), (5000, 784))

In [9]:
for train_index, test_index in sss.split(x_train, y_train):
    x_train1, x_test = x_train[train_index], x_train[test_index]
    y_train1, y_test = y_train[train_index], y_train[test_index]

In [10]:
x_train1 = x_train1/255
valx_test = valx_test/255
x_test = x_test/255

In [11]:
batch_size = 500
n_epochs = 100
learning_rate = 0.001
n_features = 784

In [12]:
def generate_batch(images, labels, batch_size):
    size1 = batch_size // 2
    size2 = batch_size - size1
    if size1 != size2 and np.random.rand() > 0.5:
        size1, size2 = size2, size1
    X = []
    y = []
    while len(X) < size1:
        rnd_idx1, rnd_idx2 = np.random.randint(0, len(images), 2)
        if rnd_idx1 != rnd_idx2 and labels[rnd_idx1] == labels[rnd_idx2]:
            X.append(np.array([images[rnd_idx1], images[rnd_idx2]]))
            y.append([1])
    while len(X) < batch_size:
        rnd_idx1, rnd_idx2 = np.random.randint(0, len(images), 2)
        if labels[rnd_idx1] != labels[rnd_idx2]:
            X.append(np.array([images[rnd_idx1], images[rnd_idx2]]))
            y.append([0])
    rnd_indices = np.random.permutation(batch_size)
    return np.array(X)[rnd_indices], np.array(y)[rnd_indices]

In [13]:
from functools import partial
he_init = tf.contrib.layers.variance_scaling_initializer()
create_dense = partial(tf.layers.dense, units = 100, kernel_initializer = he_init,activation = tf.nn.elu)

In [14]:
tf.reset_default_graph()

In [15]:
with tf.name_scope("Inputs"):
    X = tf.placeholder(shape = [None,2, n_features], dtype = tf.float32)
    Xa, Xb = tf.unstack(X, axis = 1)
    y = tf.placeholder(shape = [None,1], dtype = tf.float32)
    threshold = tf.placeholder_with_default(input = tf.constant([0.5], dtype=tf.float32), shape = [1])

In [16]:
with tf.variable_scope("DNN-A"):
    hidden1a = create_dense(inputs = Xa)
    hidden2a = create_dense(inputs = hidden1a)
    hidden3a = create_dense(inputs = hidden2a)
    hidden4a = create_dense(inputs = hidden3a)
    hidden5a = create_dense(inputs = hidden4a)

In [17]:
with tf.variable_scope("DNN-B"):
    hidden1b = create_dense(inputs = Xb)
    hidden2b = create_dense(inputs = hidden1b)
    hidden3b = create_dense(inputs = hidden2b)
    hidden4b = create_dense(inputs = hidden3b)
    hidden5b = create_dense(inputs = hidden4b)

In [18]:
combined_output = tf.concat([hidden5a, hidden5b], axis=1)
hiddenAB = create_dense(inputs = combined_output, units = 10)
concat_layer = create_dense(inputs = hiddenAB, units = 1, activation = None)

In [19]:
with tf.name_scope("loss"):
    xentropy = tf.nn.sigmoid_cross_entropy_with_logits(labels=y,logits=concat_layer)
    loss = tf.reduce_mean(xentropy)

In [20]:
with tf.name_scope("optimizer"):
    optimizer = tf.train.MomentumOptimizer(momentum = 0.9,learning_rate=learning_rate, use_nesterov=True)
    training_op = optimizer.minimize(loss)

In [21]:
with tf.name_scope("performance"):
    sig_output = tf.sigmoid(concat_layer)
    correct = tf.equal(tf.cast(tf.logical_not(tf.less(sig_output,threshold)), dtype = tf.float32),y)
    accuracy = tf.reduce_mean(tf.cast(correct, dtype = tf.float32))

In [22]:
init = tf.global_variables_initializer()
saver = tf.train.Saver()
with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for iteration in range(len(x_train1)//batch_size):
            x_batch, y_batch = generate_batch(batch_size=batch_size, images=x_train1, labels=y_train1)
            sess.run(training_op, feed_dict = {X:x_batch, y: y_batch})
        if epoch % 5 == 0:
            x_batch, y_batch = generate_batch(batch_size = len(x_train1), images = x_train1, labels = y_train1)
            acc_train = accuracy.eval(feed_dict={X:x_batch, y: y_batch})
            x_batch, y_batch = generate_batch(batch_size = len(x_test), images = x_test, labels = y_test)
            acc_test = accuracy.eval(feed_dict={X:x_batch, y:y_batch})
            acc_val = 'ND'
            if epoch%10 == 0:
                x_batch, y_batch = generate_batch(batch_size = len(valx_test), images = valx_test, labels = valy_test)
                acc_val = accuracy.eval(feed_dict = {X:x_batch, y: y_batch})
            print('epoch', epoch, 'Training accuracy:', acc_train, 'Testing accuracy:', acc_test, 'Validation accuracy:',acc_val)
    
    saver.save(sess, './PretrainingNw.ckpt')

epoch 0 Training accuracy: 0.5165833 Testing accuracy: 0.5144 Validation accuracy: 0.5052
epoch 5 Training accuracy: 0.6698 Testing accuracy: 0.6622 Validation accuracy: ND
epoch 10 Training accuracy: 0.77468336 Testing accuracy: 0.7848 Validation accuracy: 0.7874
epoch 15 Training accuracy: 0.81556666 Testing accuracy: 0.8228 Validation accuracy: ND
epoch 20 Training accuracy: 0.8413 Testing accuracy: 0.837 Validation accuracy: 0.8378
epoch 25 Training accuracy: 0.8484333 Testing accuracy: 0.8446 Validation accuracy: ND
epoch 30 Training accuracy: 0.86293334 Testing accuracy: 0.8606 Validation accuracy: 0.8642
epoch 35 Training accuracy: 0.86755 Testing accuracy: 0.8846 Validation accuracy: ND
epoch 40 Training accuracy: 0.87586665 Testing accuracy: 0.8788 Validation accuracy: 0.8728
epoch 45 Training accuracy: 0.8802 Testing accuracy: 0.8802 Validation accuracy: ND
epoch 50 Training accuracy: 0.8879333 Testing accuracy: 0.8936 Validation accuracy: 0.89
epoch 55 Training accuracy: 0.8

# Transfer Learning

In [23]:
#using DNN-A
tf.reset_default_graph()
X = tf.placeholder(shape=[None, n_features], dtype = tf.float32)
y = tf.placeholder(shape=[None], dtype = tf.int32)

with tf.variable_scope("DNN-A"):
    hidden1 = create_dense(inputs = X)
    hidden2 = create_dense(inputs = hidden1)
    hidden3 = create_dense(inputs = hidden2)
    hidden4 = create_dense(inputs = hidden3)
    hidden5 = create_dense(inputs = hidden4)
    
with tf.name_scope("SoftLayer"):
    freezed = tf.stop_gradient(hidden5)
    logits = tf.layers.dense(inputs= freezed, units=10, kernel_initializer=he_init, activation = None)
    
with tf.name_scope("TL/loss"):
    TLxentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=y)
    loss = tf.reduce_mean(TLxentropy)
    
with tf.name_scope("TL/training"):
    TLoptimizer = tf.train.MomentumOptimizer(momentum=0.9, learning_rate= learning_rate,use_nesterov=True)
    TLtraining_op = TLoptimizer.minimize(loss)
    
with tf.name_scope("TL/performance"):
    TLcorrect = tf.nn.in_top_k(k=1,predictions=logits, targets=y)
    TLaccuracy = tf.reduce_mean(tf.cast(TLcorrect,tf.float32))

In [36]:
TLepochs = 150
TLbatch_size = 50
TLlearning_rate = 0.001

In [33]:
def next_batch(batch_size = TLbatch_size, x_train = x_test, y_train = y_test):
    prev = 0
    for i in range(batch_size , x_train.shape[0], batch_size):
        yield x_train[prev:i], y_train[prev:i]
        prev = i
    

In [37]:
var_listt = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope = "DNN-A")
init = tf.global_variables_initializer()
restore = tf.train.Saver(var_list = {var.op.name : var for var in var_listt})
TLsaver = tf.train.Saver()

with tf.Session() as sess:
    init.run()
    restore.restore(sess, './PretrainingNw.ckpt')
    for epoch in range(TLepochs):
        batch = next_batch()
        for x_batch, y_batch in batch:
            sess.run(TLtraining_op, feed_dict = {X:x_batch, y:y_batch})
        acc_train = TLaccuracy.eval(feed_dict = {X:x_test, y: y_test})
        acc_test = TLaccuracy.eval(feed_dict = {X: valx_test, y: valy_test})
        if epoch% 5 == 0:
            print('epoch', epoch, 'Training accuracy:', acc_train, 'Testing accuracy:', acc_test)
    final_path = TLsaver.save(sess, './PretrainingNwTL.ckpt')

INFO:tensorflow:Restoring parameters from ./PretrainingNw.ckpt
epoch 0 Training accuracy: 0.7894 Testing accuracy: 0.7904
epoch 5 Training accuracy: 0.8968 Testing accuracy: 0.892
epoch 10 Training accuracy: 0.9064 Testing accuracy: 0.8984
epoch 15 Training accuracy: 0.9114 Testing accuracy: 0.903
epoch 20 Training accuracy: 0.9148 Testing accuracy: 0.9056
epoch 25 Training accuracy: 0.9184 Testing accuracy: 0.9076
epoch 30 Training accuracy: 0.92 Testing accuracy: 0.909
epoch 35 Training accuracy: 0.9222 Testing accuracy: 0.909
epoch 40 Training accuracy: 0.9238 Testing accuracy: 0.9104
epoch 45 Training accuracy: 0.9248 Testing accuracy: 0.9112
epoch 50 Training accuracy: 0.9252 Testing accuracy: 0.9122
epoch 55 Training accuracy: 0.9254 Testing accuracy: 0.913
epoch 60 Training accuracy: 0.9274 Testing accuracy: 0.9126
epoch 65 Training accuracy: 0.9276 Testing accuracy: 0.9124
epoch 70 Training accuracy: 0.9282 Testing accuracy: 0.9138
epoch 75 Training accuracy: 0.9294 Testing acc