In [1]:
%matplotlib inline
# standard library
import itertools

# pandas
import pandas as pd

# numpy, matplotlib, seaborn
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# tensorflow
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

# sklearn
import sklearn.datasets
from sklearn.preprocessing import OneHotEncoder

# this styling is purely my preference
# less chartjunk
sns.set_context('notebook', font_scale=1.5, rc={'line.linewidth': 2.5})
sns.set(style='ticks', palette='Set2')

In [2]:
# input the data
mnist = input_data.read_data_sets('../MNIST_data/', one_hot=True)

Extracting ../MNIST_data/train-images-idx3-ubyte.gz
Extracting ../MNIST_data/train-labels-idx1-ubyte.gz
Extracting ../MNIST_data/t10k-images-idx3-ubyte.gz
Extracting ../MNIST_data/t10k-labels-idx1-ubyte.gz


In [3]:
# get the sizes of the images and he number of labels
img_size = mnist.train.images[0].shape[0]
no_of_classes = mnist.train.labels[0].shape[0]

### Creating the layers
Building blocks for our network. The same can be achieved with `tf.layers`, but this is a proof of concept implementation.

In [4]:
def weight_variable(shape):
    """weight_variable generates a weight variable of a given shape."""
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)


def bias_variable(shape):
    """bias_variable generates a bias variable of a given shape."""
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)


def nn_fully_connected_layer(input_tens, output_num, layer_name):
    """Given an input tensor return the output tensor of a fully connected 
    layer(preactivations). If reshape is set to `True`, reshape the input(for conv nets)."""
    input_num = input_tens.get_shape().as_list()[-1]
    with tf.name_scope(layer_name):
        input_tensor = input_tens
        # using contex managers to differentiate better
        # useful for tensorboard
        with tf.name_scope('weights'):
            weights = weight_variable([input_num, output_num])
        with tf.name_scope('biases'):
            biases = bias_variable([output_num])
        with tf.name_scope('Wx_plus_b'):
            preactivate = tf.matmul(input_tensor, weights) + biases
    
    # return the preactivations
    return preactivate


def nn_2dconv_layer(input_tens, filter_shape, filter_strides=[1, 1, 1, 1], fill='SAME', layer_name='hidden'):
    """Given an input tensor, return the output tensor of a convolutional 2d layer 
    with biases."""
    
    with tf.name_scope(layer_name):
        with tf.name_scope('weights'):
            weights = weight_variable(filter_shape)
        with tf.name_scope('biases'):
            # the last dimension of the filter tells how many
            # features it output channels, tha's how mnay biases 
            # we need
            biases = bias_variable([filter_shape[-1]])
        with tf.name_scope('convolution'):
            conv =  tf.nn.conv2d(input_tens, weights, strides=[1, 1, 1, 1], padding=fill)
        with tf.name_scope('conv_plus_b'):
            conv_biased = tf.add(conv, biases)
    
    # return the convolution plus biases
    return conv_biased


def nn_max_pool_layer(x, x_dim, y_dim, fill='SAME', layer_name='hidden'):
    """Given an input tesnor,create a max downscaling layer"""
    return tf.nn.max_pool(x, ksize=[1, x_dim, y_dim, 1],
                          strides=[1, x_dim, y_dim, 1], padding=fill, name=layer_name)


In [5]:
x_input = tf.placeholder(tf.float32, shape=[None, 50])

In [6]:
# fully connected
preact = nn_fully_connected_layer(x_input, 100, layer_name='hidden1_preactiv')
activ = tf.nn.relu(preact, name='hidden1_activ')
print(activ)

Tensor("hidden1_activ:0", shape=(?, 100), dtype=float32)


In [7]:
# convolution
x_reshaped = tf.reshape(x_input, [-1, 28, 28, 1])
conv_preact = nn_2dconv_layer(x_reshaped, [5, 5, 1, 32], layer_name='conv_layer_preactiv')
conv_act = tf.nn.relu(conv_preact, name='conv_layer_activ')
conv_act

<tf.Tensor 'conv_layer_activ:0' shape=(?, 28, 28, 32) dtype=float32>

In [8]:
# downsampling
x_reshaped = tf.reshape(x_input, [-1, 28, 28, 1])
downsamp = nn_max_pool_layer(x_reshaped, 2, 2, layer_name='downsamp')  # 2x2 downsampling
downsamp

<tf.Tensor 'downsamp:0' shape=(?, 14, 14, 1) dtype=float32>

### Building the model
Now that we have the building blocks we can build a convolutional model with 2 convolutions, a fully connected layer and a dropout.

In [9]:
def get_conv_model(x_input, keep_prob):
    # reshape to an image tensor(28x28x1)(greyscale)
    x_reshaped = tf.reshape(x_input, [-1, 28, 28, 1])
    
    # first convolution(1 -> 32 feats)
    conv1_preact = nn_2dconv_layer(x_reshaped, [5, 5, 1, 32], layer_name='conv1_preactiv')
    conv1_act = tf.nn.relu(conv1_preact, name='conv1_activ')
    
    # first pooling(2x2)
    pool1 = nn_max_pool_layer(conv1_act, 2, 2, layer_name='pool1')
    
    # second convolution(32 -> 64 feats)
    conv2_preact = nn_2dconv_layer(pool1, [5, 5, 32, 64], layer_name='conv2_preactiv')
    conv2_act = tf.nn.relu(conv2_preact, name='conv2_activ')
    
    # second pooling(2x2)
    pool2 = nn_max_pool_layer(conv2_act, 2, 2, layer_name='pool2')
    
    # fully connected_layer(maps to 1024 feats in total)
    fully_con_input = tf.reshape(pool2, shape=[-1, 7 * 7 * 64])
    fully_con_preact = nn_fully_connected_layer(fully_con_input, 1024, 
                                                layer_name='fully_connected_preactiv')
    fully_con_act = tf.nn.relu(fully_con_preact, name='fully_connected_activ')
    
    # dropout, before prediction layer
    drop = tf.nn.dropout(fully_con_act, keep_prob)
    
    # final layer
    readout = nn_fully_connected_layer(drop, 10, layer_name='readout')
    
    return readout

In [14]:
# building the model
x_input = tf.placeholder(tf.float32, shape=[None, img_size])
y_expected = tf.placeholder(tf.float32, shape=[None, no_of_classes])

# neural net
dropout_keep_prob = tf.placeholder(tf.float32)
y_conv = get_conv_model(x_input, dropout_keep_prob)

# define the optimizer
cross_entropy_loss = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(labels=y_expected, logits=y_conv))
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy_loss)
correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_expected, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

# NOTE: the same can be done with tf.metrics

In [20]:
%%time
# interactive session, no need to run everything again if failednvi
sess = tf.InteractiveSession(config=tf.ConfigProto(log_device_placement=True))

# training
sess.run(tf.global_variables_initializer())
for i in range(20000):
    batch = mnist.train.next_batch(50)
    if i % 100 == 0:
        # use full dropout layer for performance metrics
        train_accuracy = accuracy.eval(feed_dict={x_input: batch[0], 
                                                  y_expected: batch[1], 
                                                  dropout_keep_prob: 1.0})
        print('step %d, training accuracy %g' % (i, train_accuracy))
    train_step.run(feed_dict={x_input: batch[0], y_expected: batch[1], dropout_keep_prob: 0.5})

  

step 0, training accuracy 0.04
step 100, training accuracy 0.82
step 200, training accuracy 0.9
step 300, training accuracy 0.92
step 400, training accuracy 0.92
step 500, training accuracy 0.92
step 600, training accuracy 0.9
step 700, training accuracy 0.96
step 800, training accuracy 0.94
step 900, training accuracy 0.96
step 1000, training accuracy 0.92
step 1100, training accuracy 0.92
step 1200, training accuracy 0.94
step 1300, training accuracy 0.96
step 1400, training accuracy 0.94
step 1500, training accuracy 1
step 1600, training accuracy 1
step 1700, training accuracy 0.94
step 1800, training accuracy 0.98
step 1900, training accuracy 0.94
step 2000, training accuracy 0.94
step 2100, training accuracy 0.96
step 2200, training accuracy 0.96
step 2300, training accuracy 1
step 2400, training accuracy 0.98
step 2500, training accuracy 0.94
step 2600, training accuracy 0.96
step 2700, training accuracy 0.98
step 2800, training accuracy 0.98
step 2900, training accuracy 0.98
ste

In [26]:
print('test accuracy %g' % accuracy.eval(feed_dict={
    x_input: mnist.test.images[:1000], y_expected: mnist.test.labels[:1000], dropout_keep_prob: 1.0}))

test accuracy 0.989
