## CNN with BN in TensorFlow

* DCGAN 의 CNN 모델을 만들어서 MNIST classification 을 수행해보자. 
* TF 에서 BN 을 적용하는 걸 연습하는 용도. 
* MNIST 로 정확도를 테스트하기 어렵다면 다른 데이터셋도 구해서 적용해보자.

Discriminator of DCGAN:

![Discriminator of DCGAN](http://bamos.github.io/data/2016-08-09/discrim-architecture.png)

In [1]:
import tensorflow as tf
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data

In [2]:
def weight_init(shape):
    return tf.truncated_normal(shape, stddev=0.1)

def bias_init(shape):
    return tf.constant(0.1, shape=shape)

In [3]:
# 참조한 image completion 코드에서는 다른 식으로 구현하는데, 그게 더 빠른가?
# 특이하게 구현함. https://github.com/bamos/dcgan-completion.tensorflow/blob/master/ops.py
def lrelu(x, leak=0.2):
    return tf.maximum(x, x*leak)

In [27]:
def batch_norm(summed_input, n_out, is_training):
    return tf.layers.batch_normalization(summed_input, training=is_training)
#     return tf.contrib.layers.batch_norm(summed_input, center=True, scale=True, is_training=is_training)

In [None]:
def batch_norm(x, n_out, phase_train, scope='bn'):
    """
    Batch normalization on convolutional maps.
    Args:
        x:           Tensor, 4D BHWD input maps
        n_out:       integer, depth of input maps
        phase_train: boolean tf.Varialbe, true indicates training phase
        scope:       string, variable scope
    Return:
        normed:      batch-normalized maps
    """
    with tf.variable_scope(scope):
        beta = tf.Variable(tf.constant(0.0, shape=[n_out]),
                                     name='beta', trainable=True)
        gamma = tf.Variable(tf.constant(1.0, shape=[n_out]),
                                      name='gamma', trainable=True)
        batch_mean, batch_var = tf.nn.moments(x, [0,1,2], name='moments')
        ema = tf.train.ExponentialMovingAverage(decay=0.5)

        def mean_var_with_update():
            ema_apply_op = ema.apply([batch_mean, batch_var])
            with tf.control_dependencies([ema_apply_op]):
                return tf.identity(batch_mean), tf.identity(batch_var)

        mean, var = tf.cond(phase_train,
                            mean_var_with_update,
                            lambda: (ema.average(batch_mean), ema.average(batch_var)))
        normed = tf.nn.batch_normalization(x, mean, var, beta, gamma, 1e-3)
    return normed

In [28]:
# 일단 MNIST datset 은 28x28x1 이므로, 
X = tf.placeholder(tf.float32, shape=[None, 784])
Y = tf.placeholder(tf.float32, shape=[None, 10])
isTraining = tf.placeholder(tf.bool)

In [29]:
# reshape for CNN
X_img = tf.reshape(X, [-1, 28, 28, 1])

# first conv layer: 
W1 = tf.Variable(weight_init([5, 5, 1, 64]))

a1 = tf.nn.conv2d(X_img, W1, strides=[1, 2, 2, 1], padding='SAME')
bn1 = batch_norm(a1, 64, isTraining)
h1 = lrelu(bn1)

In [30]:
W2 = tf.Variable(weight_init([5, 5, 64, 128]))

a2 = tf.nn.conv2d(h1, W2, strides=[1, 2, 2, 1], padding='SAME')
bn2 = batch_norm(a2, 128, isTraining)
h2 = lrelu(bn2)

In [31]:
W3 = tf.Variable(weight_init([5, 5, 128, 256]))

a3 = tf.nn.conv2d(h2, W3, strides=[1, 2, 2, 1], padding='SAME')
bn3 = batch_norm(a3, 256, isTraining)
h3 = lrelu(bn3)

In [32]:
# FC layer. 원래 더 해야 하지만 이건 MNIST 니까 여기까지만 하자.
# 원래는 sigmoid 로 0/1 만 판별하는데, MNIST 는 10개니까 softmax 로 해야함

W4 = tf.Variable(weight_init([4096, 10]))
b4 = tf.Variable(bias_init([10]))
h3_flat = tf.reshape(h3, [-1, 4096])

# last layer activation = logit
logits = tf.matmul(h3_flat, W4) + b4
y_prob = tf.nn.softmax(logits)

In [33]:
loss = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=Y)
solver = tf.train.AdamOptimizer().minimize(loss)

In [34]:
pred = tf.argmax(logits, axis=1)
correction = tf.equal(pred, tf.argmax(Y, axis=1))
accuracy = tf.reduce_mean(tf.cast(correction, "float"))

In [35]:
mnist = input_data.read_data_sets('MNIST_data/', one_hot=True)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [36]:
def train():
    # moving average 때문에 얘를 해줘야 된다는데 잘 모르겠음
#     update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
#     with tf.control_dependencies(update_ops):
#         solver = tf.train.AdamOptimizer().minimize(loss)
    
    batch_size = 100
    total_batch = mnist.train.num_examples / batch_size
    history = []

    sess = tf.Session()
    sess.run(tf.global_variables_initializer())

    for epoch in range(20):
        loss_sum = 0
        for i in range(total_batch / 20):
            batch = mnist.train.next_batch(batch_size)

            loss_cur, _ = sess.run([loss, solver], feed_dict={X: batch[0], Y: batch[1], isTraining: True})
            loss_sum += np.average(loss_cur)

        # accuracy & loss calc
        train_loss = loss_sum / total_batch
        test_loss = np.average(sess.run(loss, feed_dict={X: mnist.test.images, Y: mnist.test.labels, isTraining: False}))

        # calculate accuracy for both train and test
        train_acc = sess.run(accuracy, {X: mnist.train.images[:10000], Y: mnist.train.labels[:10000], isTraining: False})
        test_acc = sess.run(accuracy, {X: mnist.test.images, Y: mnist.test.labels, isTraining: False})
        print("[{:3}] train: {:.5f} / test: {:.5f} | [acc] train: {:.4f} / test: {:.4f}"
              .format(epoch+1, train_loss, test_loss, train_acc, test_acc))
        history.append([train_acc, test_acc])
    
    return history

In [37]:
history_bn = train()

[  1] train: 0.07143 / test: 2.04948 | [acc] train: 0.7314 / test: 0.7382
[  2] train: 0.03412 / test: 1.76274 | [acc] train: 0.7806 / test: 0.7875
[  3] train: 0.02681 / test: 2.02391 | [acc] train: 0.7755 / test: 0.7755
[  4] train: 0.02151 / test: 1.14994 | [acc] train: 0.8283 / test: 0.8347
[  5] train: 0.01732 / test: 0.75705 | [acc] train: 0.8806 / test: 0.8918
[  6] train: 0.01300 / test: 1.60495 | [acc] train: 0.7862 / test: 0.7912
[  7] train: 0.01150 / test: 0.65120 | [acc] train: 0.8867 / test: 0.8966
[  8] train: 0.01122 / test: 0.65268 | [acc] train: 0.8903 / test: 0.8984
[  9] train: 0.01274 / test: 0.69672 | [acc] train: 0.8739 / test: 0.8912
[ 10] train: 0.01236 / test: 0.37112 | [acc] train: 0.9342 / test: 0.9377
[ 11] train: 0.00931 / test: 0.47703 | [acc] train: 0.9220 / test: 0.9301
[ 12] train: 0.00936 / test: 0.39207 | [acc] train: 0.9332 / test: 0.9372
[ 13] train: 0.01030 / test: 0.72610 | [acc] train: 0.8951 / test: 0.8956
[ 14] train: 0.00975 / test: 0.42332 |

## Plotting

In [None]:
%matplotlib inline

import matplotlib
import matplotlib.pyplot as plt

In [None]:
history = np.array(history)
history_bn = np.array(history_bn)

In [None]:
plt.plot(history[:, 1], label="without bn")
plt.plot(history_bn[:, 1], label="bn")
plt.legend(loc=4)