# DeepLearning 02. Convolutional Neural Networks (CNN)

* **Implementation 3a**: CNN with Tensorflow (basic)
    * *Source*: Tensorflow tutorials (https://www.tensorflow.org/tutorials/mnist/pros/)
    * *Contribution*: 
        * Code structure consistent with previous sections
        * Hopefully clearer pipeline

* **Implementation 3b**: CNN with Tensorflow (with tuning options)

In [17]:
import time, random
import numpy as np
import tensorflow as tf
from tensorflow import placeholder, Variable, constant
from tensorflow.examples.tutorials.mnist import input_data

## I. Implementation 3a

In [2]:
# LOAD DATA

path = "/Users/jacobsw/Desktop/UNIV/SPRING_2017/LIN389C_RSCH_COMPLING/CODE/MNIST/"
mnist = input_data.read_data_sets(path, one_hot=True)

Extracting /Users/jacobsw/Desktop/UNIV/SPRING_2017/LIN389C_RSCH_COMPLING/CODE/MNIST/train-images-idx3-ubyte.gz
Extracting /Users/jacobsw/Desktop/UNIV/SPRING_2017/LIN389C_RSCH_COMPLING/CODE/MNIST/train-labels-idx1-ubyte.gz
Extracting /Users/jacobsw/Desktop/UNIV/SPRING_2017/LIN389C_RSCH_COMPLING/CODE/MNIST/t10k-images-idx3-ubyte.gz
Extracting /Users/jacobsw/Desktop/UNIV/SPRING_2017/LIN389C_RSCH_COMPLING/CODE/MNIST/t10k-labels-idx1-ubyte.gz


In [3]:
# HELPER FUNCS

def initialize_variable(shape, vtype):
    """
    Initialize for weights or biases (by truncated normal distribution).
    
    Arguments:
    shape: Shape of weight/bias matrix.
    vtype: Select weight/bias matrix.
    
    Returns a tensorflow Variable (tensor) of desired shape with random values. 
    """
    return tf.Variable(tf.truncated_normal(shape, stddev=.1)) if vtype=='weight' else \
           tf.constant(.1, shape=shape) # shape: [height, width, input/channel, output/feature_map].
    
def conv2d(x, W):
    """
    Perform 2D convolution on input matrix.
    
    Arguments:
    x: Input matrix.
    W: Shared weights for x.
    
    Returns output matrix after feature mapping.
    """
    return tf.nn.conv2d(x, W, strides=[1,1,1,1], padding='SAME') 
        # strides: [num_in_batch, height, width, num_channels]

def max_pool_2x2(x):
    """
    Perform 2D max-pooling on input matrix (usually output from convolution operation).
    
    Arguments:
    x: Input matrix.
    
    Returns output matrix after max-pooling.
    """
    return tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
        # ksize: kernel size, [num_in_batch, height, width, num_channels].
        # dimensions need to be the same as strides.

In [4]:
# SET CONFIGS

num_train = mnist.train.num_examples
num_batchs = num_train // 100
num_epochs = 30

In [7]:
# BUILD GRAPH

# set sizes for input and output
x = tf.placeholder(tf.float32, shape=[None, 784])
y = tf.placeholder(tf.float32, shape=[None, 10])
x_image = tf.reshape(x, [-1, 28, 28, 1])

# confif conv-layer 1
W_conv1 = initialize_variable([5,5,1,32], vtype='weight') # 5 x 5 filter, channel=1 (grayscale), 32 feature maps.
b_conv1 = initialize_variable([32], vtype='bias')
h_conv1 = tf.nn.relu(tf.add(conv2d(x_image, W_conv1), b_conv1))
h_pool1 = max_pool_2x2(h_conv1)
layer1 = h_pool1 # image: 28 x 28 -> 14 x 14 by maxpooling; feature maps: 1 -> 32 by convolution.

# config conv-layer 2
W_conv2 = initialize_variable([5,5,32,64], vtype='weight')
b_conv2 = initialize_variable([64], vtype='bias')
h_conv2 = tf.nn.relu(tf.add(conv2d(layer1, W_conv2), b_conv2))
h_pool2 = max_pool_2x2(h_conv2)
layer2 = h_pool2 # image: 14 x 14 -> 7 x 7 by maxpooling; feature maps: 32 -> 64 by convolution.
layer2_mat = tf.reshape(layer2, [-1, 7*7*64]) # flatten, but ignore num_in_batch with -1.

# config fully-connected layer 1
W_dense1 = initialize_variable([7*7*64, 1024], vtype='weight')
b_dense1 = initialize_variable([1024], vtype='bias')
h_dense1 = tf.nn.relu(tf.add(tf.matmul(layer2_mat, W_dense1), b_dense1))
layer3 = h_dense1 # flattened convoluted image: 7*7*64=3136 -> 1024.
keep_prob = tf.placeholder(tf.float32)
layer3_drop = tf.nn.dropout(layer3, keep_prob) # dropout at a prob.

# config fully-connected layer 2
W_dense2 = initialize_variable([1024, 10], vtype='weight')
b_dense2 = initialize_variable([10], vtype='bias')
y_pred = tf.nn.softmax(tf.add(tf.matmul(layer3_drop, W_dense2), b_dense2)) # 1024 -> 10 for 10-class prediction.

# set cost (crossent)
cost = tf.reduce_mean(-tf.reduce_sum(y * tf.log(y_pred), reduction_indices=[1]))

# set optimization option
train = tf.train.AdamOptimizer(1e-4).minimize(cost) # faster and performs better than basic gradient descent.

# set port for computing accuracy
correct_pred = tf.equal(tf.argmax(y_pred,1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

In [13]:
%%time

# TRAINING

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for i in xrange(num_epochs):
        start_time = time.time()
        for _ in xrange(num_batchs):
            batch_xs, batch_ys = mnist.train.next_batch(100)
            sess.run(train, feed_dict={x: batch_xs, y: batch_ys, keep_prob:.5})
        valid_acc = sess.run(accuracy, feed_dict={x: mnist.validation.images, 
                                                  y: mnist.validation.labels,
                                                  keep_prob: 1.})
        end_time = time.time()
        print "Epoch", i, "validation accuracy:", valid_acc, "("+str(end_time-start_time)+" secs)"
    print "Final accuracy:", sess.run(accuracy, feed_dict={x: mnist.test.images, 
                                                           y: mnist.test.labels,
                                                           keep_prob:1.})

Epoch 0 validation accuracy: 0.957 (123.180384874 secs)
Epoch 1 validation accuracy: 0.9736 (124.004452944 secs)
Epoch 2 validation accuracy: 0.9796 (127.318325996 secs)
Epoch 3 validation accuracy: 0.9824 (124.715348959 secs)
Epoch 4 validation accuracy: 0.9854 (124.891174078 secs)
Epoch 5 validation accuracy: 0.9856 (124.213739872 secs)
Epoch 6 validation accuracy: 0.9866 (129.192487955 secs)
Epoch 7 validation accuracy: 0.9884 (123.489541054 secs)
Epoch 8 validation accuracy: 0.989 (124.504203081 secs)
Epoch 9 validation accuracy: 0.9892 (122.247591019 secs)
Epoch 10 validation accuracy: 0.99 (128.489157915 secs)
Epoch 11 validation accuracy: 0.9896 (133.802914143 secs)
Epoch 12 validation accuracy: 0.9914 (115.682154894 secs)
Epoch 13 validation accuracy: 0.9914 (126.977251053 secs)
Epoch 14 validation accuracy: 0.9906 (122.074306965 secs)
Epoch 15 validation accuracy: 0.9912 (128.885483027 secs)
Epoch 16 validation accuracy: 0.992 (123.629317045 secs)
Epoch 17 validation accuracy:

## II. Implementation 3b

* TODO: Finding the right hyperparams might take quite a bit of time.

In [54]:
# HELPER FUNCS

def initialize_variable(shape, vtype):
    """
    Initialize for weights or biases (by Xavier Initialization, Glorot & Bengio (2010)).
    
    Arguments:
    shape: Shape of weight/bias matrix.
    vtype: Select weight/bias matrix.
    
    Returns a tensorflow Variable (tensor) of desired shape with random values. 
    """
    if vtype=='bias': 
        return constant(.1, shape=shape)
    in_dim, out_dim = shape[2], shape[3]
    low = -4*np.sqrt(6.0/(in_dim+out_dim))
    high = 4*np.sqrt(6.0/(in_dim+out_dim))
    return Variable(tf.random_uniform(shape=shape, minval=low, maxval=high))
           
    return tf.Variable(tf.truncated_normal(shape, stddev=.1)) if vtype=='weight' else \
           tf.constant(.1, shape=shape) # shape: [height, width, input/channel, output/feature_map].    
    
def conv2d(x, W):
    """
    Perform 2D convolution on input matrix.
    
    Arguments:
    x: Input matrix.
    W: Shared weights for x.
    
    Returns output matrix after feature mapping.
    """
    return tf.nn.conv2d(x, W, strides=[1,1,1,1], padding='SAME') 
        # strides: [num_in_batch, height, width, num_channels]

def max_pool_2x2(x):
    """
    Perform 2D max-pooling on input matrix (usually output from convolution operation).
    
    Arguments:
    x: Input matrix.
    
    Returns output matrix after max-pooling.
    """
    return tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
        # ksize: kernel size, [num_in_batch, height, width, num_channels].
        # dimensions need to be the same as strides.

In [55]:
# SET CONFIGS

num_train = mnist.train.num_examples
num_batchs = num_train // 100
num_epochs = 30

lmd = 5.0

valid_tol = 0.0
early_stop_steps = 10

lr = 0.0001
lr_decay = 0.5
lr_stop = 7

mu = 0.5

In [56]:
# BUILD GRAPH

# set sizes for input and output
x = tf.placeholder(tf.float32, shape=[None, 784])
y = tf.placeholder(tf.float32, shape=[None, 10])
x_image = tf.reshape(x, [-1, 28, 28, 1])

# confif conv-layer 1
W_conv1 = initialize_variable([5,5,1,32], vtype='weight') # 5 x 5 filter, channel=1 (grayscale), 32 feature maps.
b_conv1 = initialize_variable([32], vtype='bias')
h_conv1 = tf.nn.relu(tf.add(conv2d(x_image, W_conv1), b_conv1))
h_pool1 = max_pool_2x2(h_conv1)
layer1 = h_pool1 # image: 28 x 28 -> 14 x 14 by maxpooling; feature maps: 1 -> 32 by convolution.

# config conv-layer 2
W_conv2 = initialize_variable([5,5,32,64], vtype='weight')
b_conv2 = initialize_variable([64], vtype='bias')
h_conv2 = tf.nn.relu(tf.add(conv2d(layer1, W_conv2), b_conv2))
h_pool2 = max_pool_2x2(h_conv2)
layer2 = h_pool2 # image: 14 x 14 -> 7 x 7 by maxpooling; feature maps: 32 -> 64 by convolution.
layer2_mat = tf.reshape(layer2, [-1, 7*7*64]) # flatten, but ignore num_in_batch with -1.

# config fully-connected layer 1
W_dense1 = initialize_variable([7*7*64, 1024], vtype='weight')
b_dense1 = initialize_variable([1024], vtype='bias')
h_dense1 = tf.nn.relu(tf.add(tf.matmul(layer2_mat, W_dense1), b_dense1))
layer3 = h_dense1 # flattened convoluted image: 7*7*64=3136 -> 1024.
keep_prob = tf.placeholder(tf.float32)
layer3_drop = tf.nn.dropout(layer3, keep_prob) # dropout at a prob.

# config fully-connected layer 2
W_dense2 = initialize_variable([1024, 10], vtype='weight')
b_dense2 = initialize_variable([10], vtype='bias')
y_pred = tf.nn.softmax(tf.add(tf.matmul(layer3_drop, W_dense2), b_dense2)) # 1024 -> 10 for 10-class prediction.

# set cost (crossent)
# cost = tf.reduce_mean(-tf.reduce_sum(y * tf.log(y_pred), reduction_indices=[1]))
loss = tf.reduce_mean(-tf.reduce_sum(y * tf.log(y_pred), reduction_indices=[1]))
regularizer = tf.reduce_mean(tf.nn.l2_loss(W_conv1) + tf.nn.l2_loss(W_conv2) + \
                             tf.nn.l2_loss(W_dense1) + tf.nn.l2_loss(W_dense2))
cost = loss + lmd * regularizer

# loss = tf.reduce_mean(-tf.reduce_sum(y * tf.log(a2), reduction_indices=[1]))
# regularizer = tf.reduce_mean(tf.nn.l2_loss(w1) + tf.nn.l2_loss(w2))
# cost = loss + lmd * regularizer

# set optimization option
train = tf.train.AdamOptimizer(lr).minimize(cost) # faster and performs better than basic gradient descent.

# set port for computing accuracy
correct_pred = tf.equal(tf.argmax(y_pred,1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

In [58]:
%%time

# TRAINING

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    cur_valid_acc = 0.0
    no_improve_steps = 0
    lr_decay_steps = 0
    stop_at = num_epochs
    for i in xrange(num_epochs):
        start_time = time.time()
        for _ in xrange(num_batchs):
            batch_xs, batch_ys = mnist.train.next_batch(100)
            sess.run(train, feed_dict={x: batch_xs, y: batch_ys, keep_prob:.5})
        valid_acc = sess.run(accuracy, feed_dict={x: mnist.validation.images, 
                                                  y: mnist.validation.labels,
                                                  keep_prob: 1.})
        if valid_acc - valid_acc < valid_tol:
            no_improve_steps += 1
            lr = lr * lr_decay
            lr_decay_steps += 1
        if no_improve_steps==early_stop_steps or lr_decay_steps==lr_stop:
            stop_at = i
            break
        end_time = time.time()
        print "Epoch", i, "validation accuracy:", valid_acc, "("+str(end_time-start_time)+" secs)"
    print "Final accuracy:", sess.run(accuracy, feed_dict={x: mnist.test.images, 
                                                           y: mnist.test.labels,
                                                           keep_prob:1.}), '('+str(stop_at)+')'