*Accompanying code examples of the book "Introduction to Artificial Neural Networks and Deep Learning: A Practical Guide with Applications in Python" by [Sebastian Raschka](https://sebastianraschka.com). All code examples are released under the [MIT license](https://github.com/rasbt/deep-learning-book/blob/master/LICENSE). If you find this content useful, please consider supporting the work by buying a [copy of the book](https://leanpub.com/ann-and-deeplearning).*
  
Other code examples and content are available on [GitHub](https://github.com/rasbt/deep-learning-book). The PDF and ebook versions of the book are available through [Leanpub](https://leanpub.com/ann-and-deeplearning).

In [1]:
%load_ext watermark
%watermark -a 'Sebastian Raschka' -v -p tensorflow

Sebastian Raschka 

CPython 3.6.0
IPython 6.0.0

tensorflow 1.1.0


# Model Zoo -- Convolutional Neural Network

### Low-level Implementation

In [2]:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data


##########################
### DATASET
##########################

mnist = input_data.read_data_sets("./", one_hot=True)


##########################
### SETTINGS
##########################

# Hyperparameters
learning_rate = 0.1
dropout_keep_proba = 0.5
training_epochs = 1
batch_size = 32

print_interval = 500

# Architecture
input_size = 784
image_width, image_height = 28, 28
conv1_kernelsize = [5, 5]
conv1_filtersize = 8
pool1_poolsize = [2, 2]
pool1_stridesize = [1, 1]
conv2_kernelsize = [5, 5]
conv2_filtersize = 16
pool2_poolsize = [2, 2]
pool2_stridesize = [2, 2]
fc_layer_size = 64
n_classes = 10

# Feature dimensions
width_conv1_out = (image_width - conv1_kernelsize[0]) + 1
width_pool1_out = (width_conv1_out - pool1_poolsize[0]) // pool1_stridesize[0] + 1
width_conv2_out = (width_pool1_out - conv2_kernelsize[0]) + 1
width_pool2_out = (width_conv2_out - pool2_poolsize[0]) // pool2_stridesize[1] + 1


##########################
### WRAPPER FUNCTIONS
##########################

def conv_layer(input, input_channels, output_channels, kernel_size, strides):

    weights_shape = kernel_size + [input_channels, output_channels]
    weights = tf.Variable(tf.truncated_normal(shape=weights_shape,
                                              mean=0.0,
                                              stddev=0.1,
                                              dtype=tf.float32))
    biases = tf.Variable(tf.zeros(shape=[output_channels]))
    conv = tf.nn.conv2d(input=input,
                        filter=weights,
                        strides=strides,
                        padding='VALID')

    return tf.nn.relu(conv + biases)


def fc_layer(input, output_nodes,
             activation=None, seed=None):
    
    weights = tf.Variable(tf.truncated_normal(shape=[input.get_shape().as_list()[-1],
                                                     output_nodes],
                                              mean=0.0,
                                              stddev=0.1,
                                              dtype=tf.float32,
                                              seed=seed))
    biases = tf.Variable(tf.zeros(shape=[output_nodes]))
    act = tf.matmul(input, weights) + biases
    
    if activation is not None:
        act = activation(act)
        
    return act

##########################
### GRAPH DEFINITION
##########################

g = tf.Graph()
with g.as_default():

    # Input data
    tf_x = tf.placeholder(tf.float32, [None, input_size, 1])
    tf_y = tf.placeholder(tf.float32, [None, n_classes])
    
    keep_proba = tf.placeholder(tf.float32, shape=None)

    # Convolutional Neural Network:
    # 2 convolutional layers with maxpool and ReLU activation
    input_layer = tf.reshape(tf_x, shape=[-1, image_width, image_height, 1])
    

    conv_layer_1 = conv_layer(input=input_layer,
                              input_channels=1,
                              output_channels=conv1_filtersize,
                              kernel_size=conv1_kernelsize,
                              strides=[1, 1, 1, 1])
    pool_layer_1 = tf.nn.max_pool(conv_layer_1,
                                  ksize=[1, pool1_poolsize[0], pool1_poolsize[1], 1], 
                                  strides=[1, pool1_stridesize[0], pool1_stridesize[1], 1],
                                  padding='VALID')

    conv_layer_2 = conv_layer(input=pool_layer_1,
                              input_channels=conv1_filtersize,
                              output_channels=conv2_filtersize,
                              kernel_size=conv2_kernelsize,
                              strides=[1, 1, 1, 1])
    pool_layer_2 = tf.nn.max_pool(conv_layer_2,
                                  ksize=[1, pool2_poolsize[0], pool2_poolsize[1], 1], 
                                  strides=[1, pool2_stridesize[0], pool2_stridesize[1], 1],
                                  padding='VALID')
    
    
    flat = tf.reshape(pool_layer_2, [-1, width_pool2_out * width_pool2_out * conv2_filtersize])
    
    fc = fc_layer(flat, fc_layer_size, activation=tf.nn.relu)
    fc = tf.nn.dropout(fc, keep_prob=keep_proba)
    out_layer = fc_layer(fc, n_classes, activation=None)

    # Loss and optimizer
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=out_layer, labels=tf_y))
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(cost)

    # Prediction
    correct_prediction = tf.equal(tf.argmax(tf_y, 1), tf.argmax(out_layer, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    
##########################
### TRAINING & EVALUATION
##########################

with tf.Session(graph=g) as sess:
    sess.run(tf.global_variables_initializer())

    for epoch in range(training_epochs):
        avg_cost = 0.
        total_batch = mnist.train.num_examples // batch_size

        for i in range(total_batch):
            batch_x, batch_y = mnist.train.next_batch(batch_size)
            batch_x = batch_x[:, :, None] # add "missing" color channel
            
            _, c = sess.run([optimizer, cost], feed_dict={tf_x: batch_x,
                                                          tf_y: batch_y,
                                                          keep_proba: dropout_keep_proba})
            avg_cost += c
            if not i % print_interval:
                print("Minibatch: %03d | Cost: %.3f" % (i + 1, c))
        
        train_acc = sess.run(accuracy, feed_dict={tf_x: mnist.train.images[:, :, None],
                                                  tf_y: mnist.train.labels,
                                                  keep_proba: 1.0})
        valid_acc = sess.run(accuracy, feed_dict={tf_x: mnist.validation.images[:, :, None],
                                                  tf_y: mnist.validation.labels,
                                                  keep_proba: 1.0})  
        
        print("Epoch: %03d | AvgCost: %.3f" % (epoch + 1, avg_cost / (i + 1)), end="")
        print(" | Train/Valid ACC: %.3f/%.3f" % (train_acc, valid_acc))
        
    test_acc = sess.run(accuracy, feed_dict={tf_x: mnist.test.images[:, :, None],
                                             tf_y: mnist.test.labels,
                                             keep_proba: 1.0})
    print('Test ACC: %.3f' % test_acc)

Extracting ./train-images-idx3-ubyte.gz
Extracting ./train-labels-idx1-ubyte.gz
Extracting ./t10k-images-idx3-ubyte.gz
Extracting ./t10k-labels-idx1-ubyte.gz
Minibatch: 001 | Cost: 2.492
Minibatch: 501 | Cost: 0.368
Minibatch: 1001 | Cost: 0.315
Minibatch: 1501 | Cost: 0.311
Epoch: 001 | AvgCost: 0.373 | Train/Valid ACC: 0.974/0.974
Test ACC: 0.976


### tensorflow.layers Abstraction

In [3]:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data


##########################
### DATASET
##########################

mnist = input_data.read_data_sets("./", one_hot=True)


##########################
### SETTINGS
##########################

# Hyperparameters
learning_rate = 0.1
dropout_rate = 0.5
training_epochs = 1
batch_size = 32

print_interval = 500

# Architecture
input_size = 784
image_width, image_height = 28, 28
conv1_kernelsize = [5, 5]
conv1_filtersize = 8
pool1_poolsize = [2, 2]
pool1_stridesize = [1, 1]
conv2_kernelsize = [5, 5]
conv2_filtersize = 16
pool2_poolsize = [2, 2]
pool2_stridesize = [1, 1]
fc_layer_size = 64
n_classes = 10

# Feature dimensions
width_conv1_out = (image_width - conv1_kernelsize[0]) + 1
width_pool1_out = (width_conv1_out - pool1_poolsize[0]) // pool1_stridesize[0] + 1
width_conv2_out = (width_pool1_out - conv2_kernelsize[0]) + 1
width_pool2_out = (width_conv2_out - pool2_poolsize[0]) // pool2_stridesize[1] + 1


##########################
### GRAPH DEFINITION
##########################

g = tf.Graph()
with g.as_default():

    # Input data
    tf_x = tf.placeholder(tf.float32, [None, input_size, 1])
    tf_y = tf.placeholder(tf.float32, [None, n_classes])
    
    dropout_proba = tf.placeholder(tf.float32, shape=None)

    # Convolutional Neural Network:
    # 2 convolutional layers with maxpool and ReLU activation
    input_layer = tf.reshape(tf_x, shape=[-1, image_width, image_height, 1])
    
    conv_layer_1 = tf.layers.conv2d(input_layer, 
                                    kernel_size=[conv1_kernelsize[0], conv1_kernelsize[1]], 
                                    filters=conv1_filtersize, 
                                    activation=tf.nn.relu)
    pool_layer_1 = tf.layers.max_pooling2d(conv_layer_1, 
                                           pool_size=[pool1_poolsize[0], pool1_poolsize[1]], 
                                           strides=[pool1_stridesize[0], pool1_stridesize[1]])
    
    conv_layer_2 = tf.layers.conv2d(pool_layer_1,  
                                    kernel_size=[conv2_kernelsize[0], conv2_kernelsize[1]],
                                    filters=conv2_filtersize,
                                    activation=tf.nn.relu)
    pool_layer_2 = tf.layers.max_pooling2d(conv_layer_2, 
                                           pool_size=[pool2_poolsize[0], pool2_poolsize[1]], 
                                           strides=[pool2_stridesize[0], pool2_stridesize[1]])
    
    flat = tf.reshape(pool_layer_2, [-1, width_pool2_out * width_pool2_out * conv2_filtersize])
    fc_layer = tf.layers.dense(flat, fc_layer_size, activation=tf.nn.relu)
    fc_layer = tf.layers.dropout(fc_layer, rate=dropout_proba)
    
    out_layer = tf.layers.dense(fc_layer, n_classes, activation=None)

    # Loss and optimizer
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=out_layer, labels=tf_y))
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(cost)

    # Prediction
    correct_prediction = tf.equal(tf.argmax(tf_y, 1), tf.argmax(out_layer, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    
##########################
### TRAINING & EVALUATION
##########################

with tf.Session(graph=g) as sess:
    sess.run(tf.global_variables_initializer())

    for epoch in range(training_epochs):
        avg_cost = 0.
        total_batch = mnist.train.num_examples // batch_size

        for i in range(total_batch):
            batch_x, batch_y = mnist.train.next_batch(batch_size)
            batch_x = batch_x[:, :, None] # add "missing" color channel
            
            _, c = sess.run([optimizer, cost], feed_dict={tf_x: batch_x,
                                                          tf_y: batch_y,
                                                          dropout_proba: dropout_rate})
            avg_cost += c
            if not i % print_interval:
                print("Minibatch: %04d | Cost: %.3f" % (i + 1, c))
        
        train_acc = sess.run(accuracy, feed_dict={tf_x: mnist.train.images[:, :, None],
                                                  tf_y: mnist.train.labels,
                                                  dropout_proba: 0.0})
        valid_acc = sess.run(accuracy, feed_dict={tf_x: mnist.validation.images[:, :, None],
                                                  tf_y: mnist.validation.labels,
                                                  dropout_proba: 0.0})  
        
        print("Epoch: %03d | AvgCost: %.3f" % (epoch + 1, avg_cost / (i + 1)), end="")
        print(" | Train/Valid ACC: %.3f/%.3f" % (train_acc, valid_acc))
        
    test_acc = sess.run(accuracy, feed_dict={tf_x: mnist.test.images[:, :, None],
                                             tf_y: mnist.test.labels,
                                             dropout_proba: 0.0})
    print('Test ACC: %.3f' % test_acc)

Extracting ./train-images-idx3-ubyte.gz
Extracting ./train-labels-idx1-ubyte.gz
Extracting ./t10k-images-idx3-ubyte.gz
Extracting ./t10k-labels-idx1-ubyte.gz
Minibatch: 0001 | Cost: 2.331
Minibatch: 0501 | Cost: 0.095
Minibatch: 1001 | Cost: 0.108
Minibatch: 1501 | Cost: 0.053
Epoch: 001 | AvgCost: 0.173 | Train/Valid ACC: 0.986/0.984
Test ACC: 0.984
