In [None]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt

print(tf.__version__)

In [None]:
learning_rate = 0.001
training_epochs = 20
batch_size = 16
n_class = 10
seed = 777
lr_decay_epoch_num = 10

tf.set_random_seed(seed)

In [None]:
mnist = keras.datasets.mnist
class_names = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
#mnist = keras.datasets.fashion_mnist
#class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']

In [None]:
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

In [None]:
train_dataset = tf.data.Dataset.from_tensor_slices((train_images, train_labels)).shuffle(
    buffer_size=100000).prefetch(buffer_size=batch_size).batch(batch_size).repeat()
test_dataset = tf.data.Dataset.from_tensor_slices((test_images, test_labels)).shuffle(
    buffer_size=100000).prefetch(buffer_size=batch_size).batch(batch_size).repeat()

In [None]:
iterator = tf.data.Iterator.from_structure(train_dataset.output_types, train_dataset.output_shapes)
images, labels = iterator.get_next()

In [None]:
train_init = iterator.make_initializer(train_dataset)
test_init = iterator.make_initializer(test_dataset)

In [None]:
vec_images = tf.reshape(images, [-1, 28, 28, 1])
vec_images = tf.cast(vec_images, tf.float32) / 255.
vec_images = tf.image.resize_bicubic(vec_images, [299, 299])
onehot_labels = tf.one_hot(labels, 10)

In [None]:
is_train = tf.placeholder(tf.bool)

In [None]:
def conv_bn_activ_dropout(x, n_filters, kernel_size, strides, dropout_rate, training, seed, 
                          padding='SAME', activ_fn=tf.nn.relu, name="conv_bn_act_dr"):
    #with tf.variable_scope(name):
    net = tf.layers.conv2d(x, n_filters, kernel_size, strides=strides, padding=padding, use_bias=False, 
                           kernel_initializer=tf.contrib.layers.xavier_initializer(seed=seed))
    net = tf.layers.batch_normalization(net, training=training)
    net = activ_fn(net)
    if dropout_rate > 0.0:            
        net = tf.layers.dropout(net, rate=dropout_rate, training=training, seed=seed)
    return net

In [None]:
def conv_bn_activ(x, n_filters, kernel_size, strides=1, training=is_train, seed=seed, 
                  padding='SAME', activ_fn=tf.nn.relu, name="conv_bn_act"):
    return conv_bn_activ_dropout(x, n_filters, kernel_size, strides, 0.0, training, seed, 
                                 padding=padding, activ_fn=activ_fn, name=name)

In [None]:
def stem(x, name="stem"):
    with tf.variable_scope(name):
        with tf.variable_scope("stem1"):
            # x : 299x299x3
            net = conv_bn_activ(x, 32, [3, 3], 2, padding='VALID') # 149x149x32
            net = conv_bn_activ(net, 32, [3, 3], padding='VALID') # 147x147x32
            net = conv_bn_activ(net, 64, [3, 3]) # 147x147x64
            b1 = tf.layers.max_pooling2d(net, [3, 3], 2, padding='VALID') # 73x73x64
            b2 = conv_bn_activ(net, 96, [3, 3], 2, padding='VALID') # 73x73x96
            net = tf.concat([b1, b2], axis=-1) # 73x73x160
            print(net)
        with tf.variable_scope("stem2"):
            b1 = conv_bn_activ(net, 64, [1, 1])
            b1 = conv_bn_activ(b1, 96, [3, 3], padding='VALID')
            b2 = conv_bn_activ(net, 64, [1, 1])
            b2 = conv_bn_activ(b2, 64, [1, 7])
            b2 = conv_bn_activ(b2, 64, [7, 1])
            b2 = conv_bn_activ(b2, 96, [3, 3], padding='VALID')
            net = tf.concat([b1, b2], axis=-1) # 71x71x192
            print(net)
        with tf.variable_scope("stem3"):
            b1 = conv_bn_activ(net, 192, [3, 3], 2, padding='VALID')
            b2 = tf.layers.max_pooling2d(net, [3, 3], 2, padding='VALID')
            net = tf.concat([b1, b2], axis=-1) # 35x35x384
    print(net)
    return net

In [None]:
def inception_A(x, name="inception_A"):
    # num of channels : 96 x 4 = 384
    with tf.variable_scope(name):
        b1 = tf.layers.average_pooling2d(x, [3, 3], 1, padding='SAME')
        b1 = conv_bn_activ(b1, 96, [1, 1])
        b2 = conv_bn_activ(x, 96, [1, 1])
        b3 = conv_bn_activ(x, 64, [1, 1])
        b3 = conv_bn_activ(b3, 96, [3, 3])
        b4 = conv_bn_activ(x, 64, [1, 1])
        b4 = conv_bn_activ(b4, 96, [3, 3])
        b4 = conv_bn_activ(b4, 96, [3, 3])
        net = tf.concat([b1, b2, b3, b4], axis=-1)
        print(net)
        return net

In [None]:
def inception_B(x, name="inception_B"):
    # num of channels : 128 + 384 + 256 + 256 = 1024
    with tf.variable_scope(name):
        b1 = tf.layers.average_pooling2d(x, [3, 3], 1, padding='SAME')
        b1 = conv_bn_activ(b1, 128, [1, 1])
        b2 = conv_bn_activ(x, 384, [1, 1])
        b3 = conv_bn_activ(x, 192, [1, 1])
        b3 = conv_bn_activ(b3, 224, [1, 7])
        b3 = conv_bn_activ(b3, 256, [7, 1])
        b4 = conv_bn_activ(x, 192, [1, 1])
        b4 = conv_bn_activ(b4, 192, [1, 7])
        b4 = conv_bn_activ(b4, 224, [7, 1])
        b4 = conv_bn_activ(b4, 224, [1, 7])
        b4 = conv_bn_activ(b4, 256, [7, 1])
        net = tf.concat([b1, b2, b3, b4], axis=-1)
        print(net)
        return net

In [None]:
def inception_C(x, name="inception_C"):
    # num of channels : 256 * 6 = 1536
    with tf.variable_scope(name):
        b1 = tf.layers.average_pooling2d(x, [3, 3], 1, padding='SAME')
        b1 = conv_bn_activ(b1, 256, [1, 1])
        b2 = conv_bn_activ(x, 256, [1, 1])
        b3 = conv_bn_activ(x, 384, [1, 1])
        b3_1 = conv_bn_activ(b3, 256, [1, 3])
        b3_2 = conv_bn_activ(b3, 256, [3, 1])
        b4 = conv_bn_activ(x, 384, [1, 1])
        b4 = conv_bn_activ(b4, 448, [1, 3])
        b4 = conv_bn_activ(b4, 512, [3, 1])
        b4_1 = conv_bn_activ(b4, 256, [3, 1])
        b4_2 = conv_bn_activ(b4, 256, [1, 3])
        net = tf.concat([b1, b2, b3_1, b3_2, b4_1, b4_2], axis=-1)
        print(net)
        return net

In [None]:
def reduction_A(x, name="reduction_A"):
    # num of channels : 384 + 384 + 256 = 1024
    with tf.variable_scope(name):
        b1 = tf.layers.max_pooling2d(x, [3, 3], 2, padding='VALID')
        b2 = conv_bn_activ(x, 384, [3, 3], 2, padding='VALID')
        b3 = conv_bn_activ(x, 192, [1, 1])
        b3 = conv_bn_activ(b3, 224, [3, 3])
        b3 = conv_bn_activ(b3, 256, [3, 3], 2, padding='VALID')
        net = tf.concat([b1, b2, b3], axis=-1) # 17x17x1024
        print(net)
        return net

In [None]:
def reduction_B(x, name="reduction_B"):
    # num of channes : 1024 + 192 + 320 = 1536
    with tf.variable_scope(name):
        b1 = tf.layers.max_pooling2d(x, [3, 3], 2, padding='VALID')
        b2 = conv_bn_activ(x, 192, [1, 1])
        b2 = conv_bn_activ(b2, 192, [3, 3], 2, padding='VALID')
        b3 = conv_bn_activ(x, 256, [1, 1])
        b3 = conv_bn_activ(b3, 256, [1, 7])
        b3 = conv_bn_activ(b3, 320, [7, 1])
        b3 = conv_bn_activ(b3, 320, [3, 3], 2, padding='VALID')
        net = tf.concat([b1, b2, b3], axis=-1) # 8x8x1536
        print(net)
        return net

In [None]:
def build_inception_v4(X_img):
    net = X_img
    with tf.variable_scope("stem"):
        net = stem(net)
    with tf.variable_scope("inception-A"):
        for i in range(4):
            net = inception_A(net, name="inception_block_a{}".format(i))
    with tf.variable_scope("reduction-A"):
        net = reduction_A(net)
    with tf.variable_scope("inception-B"):
        for i in range(7):
            net = inception_B(net, name="inception_block_b{}".format(i))
    with tf.variable_scope("reduction-B"):
        net = reduction_B(net)
    with tf.variable_scope("inception-C"):
        for i in range(3):
            net = inception_C(net, name="inception_block_c{}".format(i))
    with tf.variable_scope("fc"):
        net = tf.layers.average_pooling2d(name="gap", inputs=net, pool_size=[8, 8], 
                                          strides=8, padding='SAME')
        print(net)
        net = tf.reshape(net, [-1, 1536])
        print(net)
        net = tf.layers.dropout(net, rate=0.2, training=is_train, seed=seed)        
        logits = tf.layers.dense(net, n_class, name="logits", 
                              kernel_initializer=tf.contrib.layers.variance_scaling_initializer(seed=seed))
        print(logits)
    return logits

In [None]:
hypothesis = build_inception_v4(vec_images)

In [None]:
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(
    logits=hypothesis, labels=onehot_labels))
global_step = tf.Variable(0, trainable=False)
lr_decay = tf.train.exponential_decay(learning_rate=learning_rate,
                                          global_step= global_step,
                                          decay_steps=int(train_images.shape[0]/batch_size*lr_decay_epoch_num),
                                          decay_rate= 0.1,
                                          staircase=True)
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
    optimizer = tf.train.AdamOptimizer(learning_rate=lr_decay).minimize(
       cost, global_step=global_step)
#optimizer = tf.train.AdamOptimizer(learning_rate=lr_decay).minimize(cost, global_step=global_step)

In [None]:
correct_prediction = tf.equal(tf.argmax(hypothesis, 1), tf.argmax(onehot_labels, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

In [None]:
sess = tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth =True)))
sess.run(tf.global_variables_initializer())

In [None]:
# train my model
print('Learning started. It takes sometime.')
max_test_acc = 0.
for epoch in range(training_epochs):
    avg_cost = 0.
    avg_train_acc = 0.
    avg_test_acc = 0.
    
    total_batch = int(train_images.shape[0] / batch_size)
    total_batch_test = int(test_images.shape[0] / batch_size)
    
    sess.run(train_init)
    for i in range(total_batch):
        acc, c, _ = sess.run([accuracy, cost, optimizer], feed_dict={is_train: True})
        avg_cost += c / total_batch
        avg_train_acc += acc / total_batch
        
    sess.run(test_init)        
    for i in range(total_batch_test):
        acc = sess.run(accuracy, feed_dict={is_train: False})
        avg_test_acc += acc / total_batch_test

    print('Epoch:', '{}'.format(epoch + 1), 'cost =', '{:.8f}'.format(avg_cost), 
          'train accuracy = ', '{:.4f}'.format(avg_train_acc), 
          'test accuracy = ', '{:.4f}'.format(avg_test_acc))


print('Learning Finished!')

In [None]:
def plot_image(i, predictions_array, true_label, img):
    predictions_array, true_label, img = predictions_array[i], true_label[i], img[i]
    plt.grid(False)
    plt.xticks([])
    plt.yticks([])

    plt.imshow(img, cmap=plt.cm.binary)

    predicted_label = np.argmax(predictions_array)
    if predicted_label == true_label:
        color = 'blue'
    else:
        color = 'red'

    plt.xlabel("{} {:2.0f}% ({})".format(class_names[predicted_label],
                                100*np.max(predictions_array),
                                class_names[true_label]),
                                color=color)

def plot_value_array(i, predictions_array, true_label):
    predictions_array, true_label = predictions_array[i], true_label[i]
    plt.grid(False)
    #plt.xticks([])
    plt.xticks(range(10), class_names, rotation=90)
    plt.yticks([])
    thisplot = plt.bar(range(10), predictions_array, color="#777777")
    plt.ylim([0, 1]) 
    predicted_label = np.argmax(predictions_array)
 
    thisplot[predicted_label].set_color('red')
    thisplot[true_label].set_color('blue')

In [None]:
prob = tf.nn.softmax(hypothesis)
#sess.run(test_init)
imgs, lbs, x, y = sess.run([images, labels, vec_images, onehot_labels])
predictions = sess.run(prob, feed_dict={vec_images:x, onehot_labels:y, is_train:False})
num_rows = 5
num_cols = 3
num_images = num_rows*num_cols
plt.figure(figsize=(3*2*num_cols, 3*num_rows))
for i in range(num_images):
    plt.subplot(num_rows, 2*num_cols, 2*i+1)
    plot_image(i, predictions, lbs, imgs)
    plt.subplot(num_rows, 2*num_cols, 2*i+2)
    plot_value_array(i, predictions, lbs)