In [1]:
# A demo for Graph visualization with TensorBoard
# https://www.tensorflow.org/get_started/summaries_and_tensorboard

In [2]:
import os
import time
import numpy as np
import tensorflow as tf

In [3]:
# Step1 load MNITST data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data", one_hot=True)

  from ._conv import register_converters as _register_converters


Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [4]:
def variable_summaries(var):
  """Attach a lot of summaries to a Tensor (for TensorBoard visualization)."""
  with tf.name_scope('summaries'):
    mean = tf.reduce_mean(var)
    tf.summary.scalar('mean', mean)
    with tf.name_scope('stddev'):
      stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
    tf.summary.scalar('stddev', stddev)
    tf.summary.scalar('max', tf.reduce_max(var))
    tf.summary.scalar('min', tf.reduce_min(var))
    tf.summary.histogram('histogram', var)

In [5]:
def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)

def bias_variable(shape):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)

In [6]:
def nn_layer(x, l_in, l_out, l_name, 
             act_fn, BN_flag=True):
    with tf.name_scope(l_name):
        with tf.name_scope('weights'):
            W = weight_variable(shape=[l_in, l_out])
            variable_summaries(W)
        with tf.name_scope('biases'):
            b = bias_variable(shape=[l_out])
            variable_summaries(b)
        with tf.name_scope('Wx_plus_b'):
            x_h = tf.matmul(x, W) + b
            tf.summary.histogram('x_h', x_h)
        if BN_flag:
            with tf.name_scope('BatchNorm'):  
                axis = list(range(len(x.get_shape()) - 1))
                mean,var = tf.nn.moments(x_h, axis)
                with tf.name_scope('gamma'):
                    gamma = tf.Variable(
                        tf.constant(0.1, shape=mean.get_shape()))
                    variable_summaries(gamma)
                with tf.name_scope('beta'):
                    beta = tf.Variable(
                        tf.constant(0.1, shape=mean.get_shape()))
                    variable_summaries(beta)
                y = tf.nn.batch_normalization(
                    x = x_h,
                    mean = mean,
                    variance = var,
                    offset = beta,
                    scale = gamma,
                    variance_epsilon = 1e-5,
                    name= 'BN')
                tf.summary.histogram('y', y)
            with tf.name_scope('activation'):
                y_act = act_fn(y)
                tf.summary.histogram('activation', y_act)
        else:
            with tf.name_scope('activation'):
                y_act = act_fn(x_h)
                tf.summary.histogram('activation', y_act)
   
    return y_act  

In [7]:
def dropout_layer(layer, keep_prob):
    with tf.name_scope('dropout'):
        y_dropout = tf.nn.dropout(layer, keep_prob)
    return y_dropout

In [8]:
# Build a softmax regression model
# Placeholders
x = tf.placeholder(tf.float32, shape=[None, 784])
y_ = tf.placeholder(tf.float32, shape=[None, 10]) # one-hot 10-dimensional vector
with tf.name_scope('dropout'):
    keep_prob = tf.placeholder(tf.float32)
    tf.summary.scalar('dropout_keep_probability', keep_prob)

In [9]:
# Layers
# Refer to tf.nn.moments
# FC1
l_FC1 = 512
y_FC1 = nn_layer(x=x, l_in = 784, l_out=512, 
                 l_name='FC1',
                 act_fn=tf.nn.relu, BN_flag=False)
y_FC1_drop = dropout_layer(y_FC1, keep_prob)

In [10]:
# FC2
l_FC2 = 256
y_FC2 = nn_layer(x=y_FC1_drop, l_in = l_FC1, 
                 l_out=l_FC2, l_name='FC2',
                 act_fn=tf.nn.relu, BN_flag=False)
y_FC2_drop = dropout_layer(y_FC2, keep_prob)

In [11]:
# FC3
l_FC3 = 128
y_FC3 = nn_layer(x=y_FC2_drop, l_in = l_FC2, 
                 l_out=l_FC3, l_name='FC3',
                 act_fn=tf.nn.relu, BN_flag=False)
y_FC3_drop = dropout_layer(y_FC3, keep_prob)

In [12]:
# softmax
l_s = 10
y = nn_layer(x=y_FC3_drop, l_in = l_FC3, 
             l_out=l_s, l_name='softmax',
             act_fn=tf.nn.tanh, BN_flag=False)

In [13]:
# loss function
with tf.name_scope('loss'):
    cross_entropy = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y))
tf.summary.scalar('loss', cross_entropy)

<tf.Tensor 'loss_1:0' shape=() dtype=string>

In [14]:
# learning rate
with tf.name_scope('train'):
    with tf.name_scope('learning_rate'):
        init_lr = tf.placeholder(tf.float32, name='LR')
        global_step = tf.placeholder(tf.float32, name="global_step")
        decay_step = tf.placeholder(tf.float32, name="decay_step")
        decay_rate = tf.placeholder(tf.float32, name="decay_rate")
        learning_rate = tf.train.exponential_decay(
            learning_rate = init_lr ,
            global_step = global_step,
            decay_steps = decay_step,
            decay_rate = decay_rate,
            staircase=False,
            name=None)
    train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(cross_entropy)

In [15]:
# Accuracy
with tf.name_scope('accuracy'):
    with tf.name_scope('correct_prediction'):
        correct_prediction = tf.equal(
            tf.argmax(y, 1), tf.argmax(y_, 1))
    with tf.name_scope('accuracy'):
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
tf.summary.scalar('accuracy', accuracy)

<tf.Tensor 'accuracy_1:0' shape=() dtype=string>

In [16]:
# Initialize the variables
sess = tf.InteractiveSession()

In [17]:
# Merge all the summaries and write to logdir
logdir = './log'
if not os.path.exists(logdir):
    os.mkdir(logdir)
merged = tf.summary.merge_all()
train_writer = tf.summary.FileWriter(logdir + '/train',
                                      sess.graph)
test_writer = tf.summary.FileWriter(logdir + '/test')
sess.run(tf.global_variables_initializer())

In [18]:
def feed_dict(train,batchsize=100,drop=0.5,lr_dict=None):
    """Make a TensorFlow feed_dict: maps data onto Tensor placeholders."""
    if train:
        xs, ys = mnist.train.next_batch(batchsize)
        f_dict = {x: xs, y_: ys, keep_prob:drop}
        f_dict.update(lr_dict)
    else:
        xs, ys = mnist.test.images, mnist.test.labels
        f_dict = {x: xs, y_: ys, keep_prob:1.0}
      
    return f_dict

In [19]:
# Training the model by repeatedly running train_step
import time 
epochs = 1000
batchsize= 100

lr_init = 0.5
d_rate = 0.9

for i in range(epochs):
    if i % 100 == 0:
        timestamp = time.strftime('%Y-%m-%d: %H:%M:%S', time.localtime(time.time()))
        summary, acc = sess.run(
            [merged, accuracy],feed_dict=feed_dict(False))
        test_writer.add_summary(summary, i)
        print('[%s]: accuracy at step %s: %s' % (timestamp, i, acc))
    else:
        lr_dict = {init_lr: lr_init, global_step:i,
                   decay_step: i, decay_step: batchsize,
                   decay_rate: d_rate}
        summary, _ = sess.run(
            [merged, train_step], 
            feed_dict=feed_dict(True,lr_dict=lr_dict))
        train_writer.add_summary(summary, i)

[2018-01-24: 18:24:49]: accuracy at step 0: 0.110999994
[2018-01-24: 18:24:51]: accuracy at step 100: 0.63479996
[2018-01-24: 18:24:53]: accuracy at step 200: 0.794
[2018-01-24: 18:24:55]: accuracy at step 300: 0.8346001
[2018-01-24: 18:24:57]: accuracy at step 400: 0.89100015
[2018-01-24: 18:24:59]: accuracy at step 500: 0.90560013
[2018-01-24: 18:25:02]: accuracy at step 600: 0.9139001
[2018-01-24: 18:25:04]: accuracy at step 700: 0.92040014
[2018-01-24: 18:25:06]: accuracy at step 800: 0.9240001
[2018-01-24: 18:25:08]: accuracy at step 900: 0.93120015


In [20]:
tf.global_variables()

[<tf.Variable 'FC1/weights/Variable:0' shape=(784, 512) dtype=float32_ref>,
 <tf.Variable 'FC1/biases/Variable:0' shape=(512,) dtype=float32_ref>,
 <tf.Variable 'FC2/weights/Variable:0' shape=(512, 256) dtype=float32_ref>,
 <tf.Variable 'FC2/biases/Variable:0' shape=(256,) dtype=float32_ref>,
 <tf.Variable 'FC3/weights/Variable:0' shape=(256, 128) dtype=float32_ref>,
 <tf.Variable 'FC3/biases/Variable:0' shape=(128,) dtype=float32_ref>,
 <tf.Variable 'softmax/weights/Variable:0' shape=(128, 10) dtype=float32_ref>,
 <tf.Variable 'softmax/biases/Variable:0' shape=(10,) dtype=float32_ref>]