In [None]:
#!nvidia-smi

In [None]:
#%env CUDA_DEVICE_ORDER=PCI_BUS_ID
#%env CUDA_VISIBLE_DEVICES=0

In [None]:
import os
import numpy as np
from scipy.misc import imread, imresize
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import matplotlib.pyplot as plt
import random

In [None]:
mnist = input_data.read_data_sets("./data/", one_hot=True)
# Check out https://www.tensorflow.org/get_started/mnist/beginners for
# more information about the mnist dataset

In [None]:
# hyper parameters
learning_rate = 0.001
training_epochs = 70
batch_size = 64
n_class = 10
n_train = 11409
n_test = 2853
seed = 777
tf.set_random_seed(seed)
num_epochs_per_decay = 10
cur_dir = os.getcwd()

In [None]:
X = tf.placeholder(tf.float32, [None, 784], name="X")
Y = tf.placeholder(tf.float32, [None, n_class], name = "Y")
is_train = tf.placeholder(tf.bool, name="is_train")

In [None]:
def preproc(x):
    # x = x*2 - 1.0
    # per-example mean subtraction (http://ufldl.stanford.edu/wiki/index.php/Data_Preprocessing)
    mean = tf.reduce_mean(x, axis=1, keep_dims=True)
    return x - mean

In [None]:
def conv_bn_activ_dropout(x, n_filters, kernel_size, strides, dropout_rate, training, seed, 
                          padding='SAME', activ_fn=tf.nn.relu, name="conv_bn_act_dr"):
    #with tf.variable_scope(name):
    net = tf.layers.conv2d(x, n_filters, kernel_size, strides=strides, padding=padding, use_bias=False, 
                           kernel_initializer=tf.contrib.layers.xavier_initializer(seed=seed))
    net = tf.layers.batch_normalization(net, training=training)
    net = activ_fn(net)
    if dropout_rate > 0.0:            
        net = tf.layers.dropout(net, rate=dropout_rate, training=training, seed=seed)
    return net

In [None]:
def conv_bn_activ(x, n_filters, kernel_size, strides=1, training=is_train, seed=seed, 
                  padding='SAME', activ_fn=tf.nn.relu, name="conv_bn_act"):
    return conv_bn_activ_dropout(x, n_filters, kernel_size, strides, 0.0, training, seed, 
                                 padding=padding, activ_fn=activ_fn, name=name)

In [None]:
def stem(x, name="stem"):
    with tf.variable_scope(name):
        b1 = conv_bn_activ(x, 32, [1, 1])
        b1 = conv_bn_activ(b1, 48, [3, 3])
        b2 = conv_bn_activ(x, 32, [1, 1])
        b2 = conv_bn_activ(x, 32, [1, 7])
        b2 = conv_bn_activ(b2, 32, [7, 1])
        b2 = conv_bn_activ(b2, 48, [3, 3])
        net = tf.concat([b1, b2], axis=-1)
        print(net)
        return net

In [None]:
def inception_A(x, name="inception_A"):
    # num of channels : 24 x 4 = 96
    with tf.variable_scope(name):
        b1 = tf.layers.average_pooling2d(x, [3, 3], 1, padding='SAME')
        b1 = conv_bn_activ(b1, 24, [1, 1])
        b2 = conv_bn_activ(x, 24, [1, 1])
        b3 = conv_bn_activ(x, 16, [1, 1])
        b3 = conv_bn_activ(b3, 24, [3, 3])
        b4 = conv_bn_activ(x, 16, [1, 1])
        b4 = conv_bn_activ(b4, 24, [3, 3])
        b4 = conv_bn_activ(b4, 24, [3, 3])
        net = tf.concat([b1, b2, b3, b4], axis=-1)
        print(net)
        return net

In [None]:
def inception_B(x, name="inception_B"):
    # num of channels : 32 + 96 + 64 + 64 = 256
    with tf.variable_scope(name):
        b1 = tf.layers.average_pooling2d(x, [3, 3], 1, padding='SAME')
        b1 = conv_bn_activ(b1, 32, [1, 1])
        b2 = conv_bn_activ(x, 96, [1, 1])
        b3 = conv_bn_activ(x, 48, [1, 1])
        b3 = conv_bn_activ(b3, 56, [1, 7])
        b3 = conv_bn_activ(b3, 64, [7, 1])
        b4 = conv_bn_activ(x, 48, [1, 1])
        b4 = conv_bn_activ(b4, 48, [1, 7])
        b4 = conv_bn_activ(b4, 56, [7, 1])
        b4 = conv_bn_activ(b4, 56, [1, 7])
        b4 = conv_bn_activ(b4, 64, [7, 1])
        net = tf.concat([b1, b2, b3, b4], axis=-1)
        print(net)
        return net

In [None]:
def inception_C(x, name="inception_C"):
    # num of channels : 64 * 6 = 384
    with tf.variable_scope(name):
        b1 = tf.layers.average_pooling2d(x, [3, 3], 1, padding='SAME')
        b1 = conv_bn_activ(b1, 64, [1, 1])
        b2 = conv_bn_activ(x, 64, [1, 1])
        b3 = conv_bn_activ(x, 96, [1, 1])
        b3_1 = conv_bn_activ(b3, 64, [1, 3])
        b3_2 = conv_bn_activ(b3, 64, [3, 1])
        b4 = conv_bn_activ(x, 96, [1, 1])
        b4 = conv_bn_activ(b4, 112, [1, 3])
        b4 = conv_bn_activ(b4, 128, [3, 1])
        b4_1 = conv_bn_activ(b4, 64, [3, 1])
        b4_2 = conv_bn_activ(b4, 64, [1, 3])
        net = tf.concat([b1, b2, b3_1, b3_2, b4_1, b4_2], axis=-1)
        print(net)
        return net

In [None]:
def reduction_A(x, name="reduction_A"):
    # num of channels : 96 + 64 + 96 = 256
    with tf.variable_scope(name):
        b1 = tf.layers.max_pooling2d(x, [3, 3], 2, padding='SAME')
        b2 = conv_bn_activ(x, 96, [3, 3], strides=2)
        b3 = conv_bn_activ(x, 48, [1, 1])
        b3 = conv_bn_activ(b3, 56, [3, 3])
        b3 = conv_bn_activ(b3, 64, [3, 3], strides=2)
        net = tf.concat([b1, b2, b3], axis=-1)
        print(net)
        return net

In [None]:
def reduction_B(x, name="reduction_B"):
    # num of channes : 256 + 48 + 80 = 384
    with tf.variable_scope(name):
        b1 = tf.layers.max_pooling2d(x, [3, 3], 2, padding='SAME')
        b2 = conv_bn_activ(x, 48, [1, 1])
        b2 = conv_bn_activ(b2, 48, [3, 3], strides=2)
        b3 = conv_bn_activ(x, 64, [1, 1])
        b3 = conv_bn_activ(b3, 64, [1, 7])
        b3 = conv_bn_activ(b3, 80, [7, 1])
        b3 = conv_bn_activ(b3, 80, [3, 3], strides=2)
        net = tf.concat([b1, b2, b3], axis=-1)
        print(net)
        return net

In [None]:
def build_inception_slim(X_img):
    net = X_img
    with tf.variable_scope("stem"):
        net = stem(net)
    with tf.variable_scope("inception-A"):
        for i in range(2):
            net = inception_A(net, name="inception_block_a{}".format(i))
    with tf.variable_scope("reduction-A"):
        net = reduction_A(net)
    with tf.variable_scope("inception-B"):
        for i in range(3):
            net = inception_B(net, name="inception_block_b{}".format(i))
    with tf.variable_scope("reduction-B"):
        net = reduction_B(net)
    with tf.variable_scope("inception-C"):
        for i in range(1):
            net = inception_C(net, name="inception_block_c{}".format(i))
    with tf.variable_scope("fc"):
        net = tf.layers.average_pooling2d(name="gap", inputs=net, pool_size=[7, 7], 
                                          strides=7, padding='SAME')
        print(net)
        net = tf.reshape(net, [-1, 384])
        print(net)
        net = tf.layers.dropout(net, rate=0.2, training=is_train, seed=seed)        
        logits = tf.layers.dense(net, 10, name="logits", 
                              kernel_initializer=tf.contrib.layers.variance_scaling_initializer(seed=seed))
        print(logits)
    return logits

In [None]:
X_pre = preproc(X)
X_img = tf.reshape(X_pre, [-1, 28, 28, 1], name="X_img")

In [None]:
logits = build_inception_slim(X_img)

In [None]:
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=Y), name="loss")
n_batches_per_epoch = int(mnist.train.num_examples / batch_size)
print(n_batches_per_epoch)
decay_steps = int(n_batches_per_epoch * num_epochs_per_decay)
global_step = tf.Variable(0, trainable=False)
learningRate = tf.train.exponential_decay(learning_rate=learning_rate,
                                          global_step= global_step,
                                          decay_steps=decay_steps,
                                          decay_rate= 0.1,
                                          staircase=True)
#learningRate = tf.train.exponential_decay(learning_rate=learning_rate,
#                                          global_step= global_step,
#                                          decay_steps=5000,
#                                          decay_rate= 0.1,
#                                          staircase=True)
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
    optimizer = tf.train.AdamOptimizer(learning_rate=learningRate).minimize(
        loss, global_step=global_step, name="optimizer") 

In [None]:
pred = tf.argmax(logits, axis=1, name="prediction")
prob = tf.nn.softmax(logits, name="softmax")
accuracy = tf.reduce_mean(tf.cast(tf.equal(pred, tf.argmax(Y, axis=1)), tf.float32), name="accuracy")

In [None]:
train_var = [X, Y, is_train, logits, pred, prob, accuracy]
tf.add_to_collection('train_var', train_var[0])
tf.add_to_collection('train_var', train_var[1])
tf.add_to_collection('train_var', train_var[2])
tf.add_to_collection('train_var', train_var[3])
tf.add_to_collection('train_var', train_var[4])
tf.add_to_collection('train_var', train_var[5])
tf.add_to_collection('train_var', train_var[6])
saver = tf.train.Saver()

In [None]:
savedir = 'checkpoints'
if not os.path.exists(savedir):
    os.makedirs(savedir)
saver.export_meta_graph(os.path.join(cur_dir, savedir, 'mnist_small_incv4.meta'), collection_list=['train_var'])

In [None]:
# initialize
sess = tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth =True)))
sess.run(tf.global_variables_initializer())

In [None]:
# train my model
print('Learning started. It takes sometime.')
max_test_acc = 0.
for epoch in range(training_epochs):
    avg_cost = 0.
    avg_train_acc = 0.
    avg_test_acc = 0.
    
    total_batch = int(n_train / batch_size)
    total_batch_test = int(n_test / batch_size)

    for i in range(total_batch):
        #batch_xs, batch_ys = mnist.train.next_batch(batch_size)
        #batch_xs = batch_xs.reshape(-1, time_steps, element_size)
        feed_dict = {is_train:True}
        acc, c, _ = sess.run([accuracy, loss, optimizer], feed_dict=feed_dict)
        avg_cost += c / total_batch
        avg_train_acc += acc / total_batch
        
    for i in range(total_batch_test):
        #batch_xs, batch_ys = mnist.test.next_batch(batch_size)        
        #batch_xs = batch_xs.reshape(-1, time_steps, element_size)
        feed_dict = {X: batch_xs, Y: batch_ys, is_train:False}
        acc = sess.run(accuracy, feed_dict=feed_dict)
        avg_test_acc += acc / total_batch_test
    if avg_test_acc > max_test_acc:
        max_test_acc = avg_test_acc
        print('saving a graph and weights => accuracy : {}'.format(max_test_acc))
        saver.save(sess, os.path.join(cur_dir, savedir, 'mnist_small_incv4.ckpt'))

    print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.9f}'.format(avg_cost), 
          'train accuracy = ', '{:.5f}'.format(avg_train_acc), 
          'test accuracy = ', '{:.5f}'.format(avg_test_acc))


print('Learning Finished!')

In [None]:
#saver.save(sess, os.path.join(cur_dir, 'checkpoints', 'mnist_save.ckpt'))

In [None]:
def evaluate(X_sample, y_sample, batch_size=100):
    """Run a minibatch accuracy op"""

    N = X_sample.shape[0]
    correct_sample = 0

    for i in range(0, N, batch_size):
        X_batch = X_sample[i: i + batch_size]
        y_batch = y_sample[i: i + batch_size]
        N_batch = X_batch.shape[0]

        feed = {
            X: X_batch,
            Y: y_batch,
            is_train: False
        }

        correct_sample += sess.run(accuracy, feed_dict=feed) * N_batch

    return correct_sample / N

print("\nAccuracy Evaluates")
print("-------------------------------")
print('Train Accuracy:', '{:.5f}'.format(evaluate(mnist.train.images, mnist.train.labels)))
print('Test Accuracy:', '{:.5f}'.format(evaluate(mnist.test.images, mnist.test.labels)))