In [None]:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import numpy as np
import time
import os
import tflearn
from utils import *
os.environ["CUDA_VISIBLE_DEVICES"]='1'
mnist_data = input_data.read_data_sets("MNIST_data", one_hot=True)

In [None]:
inputs = tf.placeholder(tf.float32, [None, 28, 28, 1], name='inputs')
targets = tf.placeholder(tf.float32, [None, 3], name='targets')
input_shortcut = tf.placeholder(tf.float32, [None, 2], name='input_shortcut')
train_flag = tf.placeholder(tf.bool, name='training')


net = tflearn.conv_2d(inputs, 64, 3, activation='relu', bias=False)
# Residual blocks
net = tflearn.residual_bottleneck(net, 3, 16, 64)
net = tflearn.residual_bottleneck(net, 1, 32, 128, downsample=True)
net = tflearn.residual_bottleneck(net, 2, 32, 128)
net = tflearn.residual_bottleneck(net, 1, 64, 256, downsample=True)
net = tflearn.residual_bottleneck(net, 2, 64, 256)
net = tflearn.batch_normalization(net)
net = tflearn.activation(net, 'relu')
net = tflearn.global_avg_pool(net)

net = tflearn.fully_connected(net, 200, activation='relu')
feature_transform = tflearn.fully_connected(net, 2, activation='relu')
output = dense_custom(tf.cond(train_flag, lambda: feature_transform,
                              lambda: input_shortcut),
                      2, 3, activation=None)

#This part is for computing the accuracy of this model
pred_y = tf.nn.softmax(output)
pred_y_true = tf.argmax(pred_y, 1)
y_true = tf.argmax(targets, 1)
correct_prediction = tf.equal(pred_y_true, y_true)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

# loss function and optimizer
cost = tf.reduce_mean((tf.nn.softmax_cross_entropy_with_logits(logits=output, labels=targets)))

step_adam = tf.train.AdamOptimizer(1e-4).minimize(cost)
step_momentum = tf.train.MomentumOptimizer(0.001, 0.9).minimize(cost)
step_gd = tf.train.GradientDescentOptimizer(0.0001).minimize(cost)

all_variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
last_layer_variables = filter(lambda x: 'Variable' in x.name, all_variables)
previous_layer_variables = filter(lambda x: 'Variable' not in x.name, all_variables)

step_sgd_previous_layer = tf.train.GradientDescentOptimizer(0.0001).minimize(cost,
                        var_list = previous_layer_variables)

optimizer_sgd_last_layer = tf.train.GradientDescentOptimizer(1e-4)
step_sgd_last_layer = optimizer_sgd_last_layer.minimize(cost,
                        var_list = last_layer_variables)
optimizer_m_last_layer = tf.train.MomentumOptimizer(1e-4, 0.9)
step_m_last_layer = optimizer_m_last_layer.minimize(cost,
                        var_list = last_layer_variables)


# optimizer = tf.train.MomentumOptimizer(0.0001, 0.9).minimize(cost)
saver = tf.train.Saver(tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES))

config = tf.ConfigProto()
config.log_device_placement=False
config.allow_soft_placement=True
config.gpu_options.allow_growth=True
session = tf.Session(config=config)
session.run(tf.global_variables_initializer())

In [None]:
def get_3_class(imgs, labs):
    label = np.argmax(labs,1)
    index_0 = np.where(label==0)[0]
    index_1 = np.where(label==1)[0]
    index_2 = np.where(label==2)[0]
    index = list(index_0)+list(index_1)+list(index_2)
    images = imgs[index]
    label = label[index]
    label = to_categorical(label, 3)
    return images, label

In [None]:
batch_size = 4096
def optmizer(optimizer_step, epochs):

    for i in (range(epochs)):
        epoch_loss = []
        start_epoch = time.time()
        for ii in range(mnist_data.train.num_examples//batch_size):
            batch = mnist_data.train.next_batch(batch_size)
            imgs = batch[0].reshape((-1, 28, 28, 1))
            labs = batch[1]
            imgs, labs = get_3_class(imgs, labs)

            dict_input = {inputs:imgs, targets:labs, train_flag: True, 
                         input_shortcut: np.zeros([batch_size, 2])}

            c, _ = session.run([cost, optimizer_step], feed_dict=dict_input)
            epoch_loss.append(c)
        print("Epoch: {}/{}".format(i+1, epochs), "| Training accuracy: ", session.run(accuracy, feed_dict=dict_input), 
              "| Cost: {}".format(np.mean(epoch_loss)), " | Time for epoch: {:.2f}s".format(time.time() - start_epoch))
        if i%100==0:
            saver.save(session,'../model/mnist_resnet_3_class_mom_{}.ckpt'.format(i))

In [None]:
optmizer(step_momentum, 10000)

In [None]:
def plot_temp_all_3(feature, label, feature_t, label_t, 
                    name=None):
    pylab.figure()
    red = feature[label == 0]
    blue = feature[label == 1]
    c = feature[label == 2]
    green = feature_t[label_t == 0]
    black = feature_t[label_t == 1]
    yellow = feature_t[label_t == 2]

    pylab.plot(red[:, 0], red[:, 1], 'r.')
    pylab.plot(blue[:, 0], blue[:, 1], 'b.')
    pylab.plot(c[:, 0], c[:,1], 'c.')
    pylab.plot(green[:, 0], green[:, 1], 'g.')
    pylab.plot(black[:, 0], black[:, 1], 'k.')
    pylab.plot(yellow[:, 0], yellow[:, 1], 'y.')
    pylab.xticks(fontsize=17)
    pylab.yticks(fontsize=17)
    if name==None:
        pylab.show()
    else:
        pylab.savefig(name)


def random_points_3(start_x, end_x, start_y, end_y, size, random_state):
	np.random.seed(random_state)
	x1 = np.random.uniform(start_x, end_x, size)
	x2 = np.random.uniform(start_y, end_y, size)
	feature =  np.vstack([x1,x2]).transpose()
	label = np.random.choice(3,size)
	return feature, label

def get_svm(feature, label):
    from sklearn.svm import LinearSVC
    clf = LinearSVC(multi_class='ovr')
    clf.fit(feature, label)
    return clf

In [None]:
# def get_transform_feature():
feature_t = session.run(feature_transform, 
            feed_dict={inputs:feature, targets:label_hot, 
                       train_flag: True, 
                       input_shortcut: np.zeros([100, 2])})
label_t = label
feature_t = np.array(feature_t)


In [None]:
plot_blobs_3(feature_t, label_t)

# Original space

In [None]:
feature_random, label_random = random_points_3(-20,
    100,-20,100, 30000,
    random_state=100)
label_random = to_categorical(label_random, 3)
pre_label_nn= session.run(
    pred_y_true,
    feed_dict={inputs:feature_random, 
               targets:label_random,
               train_flag: True, 
               input_shortcut: feature_random})

clf = get_svm(feature, label)
pre_label_svm = clf.predict(feature_random)


In [None]:
plot_temp_all_3(feature_random, pre_label_nn,feature, label_t)
plot_temp_all_3(feature_random, pre_label_svm,feature, label_t)

In [None]:
plot_temp_all_3(feature_random, pre_label_nn,feature, label_t , '../result/adam_3_class/nn_original.png')
plot_temp_all_3(feature_random, pre_label_svm,feature, label_t, '../result/adam_3_class/svm_original.png')

# transformed space

In [None]:
feature_random, label_random = random_points_3(0,
    100,0,300, 30000,
    random_state=100)
label_random = to_categorical(label_random, 3)
pre_label_nn= session.run(
    pred_y_true,
    feed_dict={inputs:feature_random, 
               targets:label_random,
               train_flag: False, 
               input_shortcut: feature_random})

clf = get_svm(feature_t, label_t)
pre_label_svm = clf.predict(feature_random)

In [None]:
plot_temp_all_3(feature_random, pre_label_nn,feature_t, label_t)
plot_temp_all_3(feature_random, pre_label_svm,feature_t, label_t)

In [None]:
plot_temp_all_3(feature_random, pre_label_nn,feature_t, label_t, '../result/adam_3_class/nn_transformed.png')
plot_temp_all_3(feature_random, pre_label_svm,feature_t, label_t, '../result/adam_3_class/svm_transformed.png')