# Convolutional Neural Networks on MNIST

MNIST:
training set of 60,000 examples, and a test set of 10,000 examples - images of handwritten digits (0-9) that are 28x28 pixels (grayscale).

The CPU time is only an estimate and highly dependent upon the way in which the training is being evaluated as moving through epochs.


In [1]:
# Current kernel
import sys
sys.executable

'/Users/micheleenharris/anaconda36/bin/python'

## Keras ConvNet Example

Adapted from:  https://github.com/keras-team/keras/blob/master/examples/mnist_cnn.py

In [2]:
%%time
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K
from keras.optimizers import SGD

# For training
batch_size = 128
num_classes = 10
num_epochs = 10
num_samples = 60000

# input image dimensions
img_rows, img_cols = 28, 28

# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()

if K.image_data_format() == 'channels_first':
    x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
    x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
    input_shape = (1, img_rows, img_cols)
else:
    x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
    x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
    input_shape = (img_rows, img_cols, 1)

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),
                 strides=(1, 1),
                 padding='valid',
                 activation='relu',
                 input_shape=input_shape))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, kernel_size=(3, 3),
                 strides=(1, 1),
                 padding='valid',
                 activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
# model.add(Dropout(0.25))
model.add(Flatten())
# model.add(Dense(128, activation='relu'))
# model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))

sgd = SGD(lr=0.05, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', 
              optimizer=sgd,
              metrics=['accuracy'])

model.fit(x_train, y_train,
          batch_size=batch_size,
#           steps_per_epoch=int(num_samples/batch_size),
          epochs=num_epochs,
          verbose=1,
          validation_data=(x_test, y_test))
score = model.evaluate(x_test, y_test, verbose=0)


Using TensorFlow backend.


x_train shape: (60000, 28, 28, 1)
60000 train samples
10000 test samples
Train on 60000 samples, validate on 10000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
CPU times: user 17min 13s, sys: 1min 35s, total: 18min 48s
Wall time: 3min 48s


In [9]:
keras_score = score[1]
print('Test accuracy: {:.2%}'.format(keras_score))

Test accuracy: 99.14%


## PyTorch ConvNet Example

Apated from:  https://github.com/rasbt/deep-learning-book/blob/master/code/model_zoo/pytorch_ipynb/convnet.ipynb by Sebastian Raschka

In [4]:
%%time
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets
from torchvision import transforms
from torch.utils.data import DataLoader

##########################
### SETTINGS
##########################

# Device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Hyperparameters
random_seed = 1
learning_rate = 0.05
num_samples = 60000
num_epochs = 10
num_steps = num_samples//batch_size
batch_size = 128

# Architecture
num_classes = 10


##########################
### MNIST DATASET
##########################

# Note transforms.ToTensor() scales input images
# to 0-1 range
train_dataset = datasets.MNIST(root='data', 
                               train=True, 
                               transform=transforms.ToTensor(),
                               download=True)

test_dataset = datasets.MNIST(root='data', 
                              train=False, 
                              transform=transforms.ToTensor())


train_loader = DataLoader(dataset=train_dataset, 
                          batch_size=batch_size, 
                          shuffle=True)

test_loader = DataLoader(dataset=test_dataset, 
                         batch_size=batch_size, 
                         shuffle=False)

# Checking the dataset
for images, labels in train_loader:  
    print('Image batch dimensions:', images.shape)
    print('Image label dimensions:', labels.shape)
    break

##########################
### MODEL
##########################

    
class ConvNetPyTorch(nn.Module):
    """Adapted from:
    https://github.com/rasbt/deep-learning-book/blob/master/code/model_zoo/pytorch_ipynb/convnet.ipynb
    """
    def __init__(self, num_classes=10):
        super(ConvNetPyTorch, self).__init__()
        self.layer1 = nn.Sequential(
            # 28x28x1 => 28x28x32
            nn.Conv2d(in_channels=1,
                      out_channels=32,
                      kernel_size=(3, 3),
                      stride=(1, 1),
                      padding=1), # (1(28-1) - 28 + 3) / 2 = 1
            nn.ReLU(),
            # 28x28x32 => 14x14x32
            nn.MaxPool2d(kernel_size=(2, 2),
                         stride=(2, 2),
                         padding=0)) # (2(14-1) - 28 + 2) = 0    
        self.layer2 = nn.Sequential(
            # 14x14x32 => 14x14x64
            nn.Conv2d(in_channels=32,
                      out_channels=64,
                      kernel_size=(3, 3),
                      stride=(1, 1),
                      padding=1), # (1(14-1) - 14 + 3) / 2 = 1   
            nn.ReLU(),
            # 14x14x64 => 7x7x64
            nn.MaxPool2d(kernel_size=(2, 2),
                         stride=(2, 2),
                         padding=0)) # (2(7-1) - 14 + 2) = 0
        self.linear_1 = nn.Linear(7*7*64, num_classes)
        
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = out.reshape(out.size(0), -1)
        logits = self.linear_1(out.view(-1, 7*7*64))
        probas = F.softmax(logits, dim=1)
        return logits, probas

torch.manual_seed(random_seed)
model = ConvNetPyTorch(num_classes=num_classes)
model = model.to(device)
    

##########################
### COST AND OPTIMIZER
##########################

cost_fn = torch.nn.CrossEntropyLoss()  
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

def compute_accuracy(model, data_loader):
    correct_pred, num_examples = 0, 0
    for features, targets in data_loader:
        features = features.to(device)
        targets = targets.to(device)
        logits, probas = model(features)
        _, predicted_labels = torch.max(probas.data, 1)
        num_examples += targets.size(0)
        correct_pred += (predicted_labels == targets).sum()
    return correct_pred.float()/num_examples
    

for epoch in range(num_epochs):
    model = model.train()
    for batch_idx, (features, targets) in enumerate(train_loader):
        
        features = features.to(device)
        targets = targets.to(device)

        ### FORWARD AND BACK PROP
        logits, probas = model(features)
        cost = cost_fn(logits, targets)
        optimizer.zero_grad()
        
        cost.backward()
        
        ### UPDATE MODEL PARAMETERS
        optimizer.step()
        
        ### LOGGING
        if not batch_idx % num_steps:
            print ('Epoch: %03d/%03d | Batch %03d/%03d | Cost: %.4f' 
                   %(epoch+1, num_epochs, batch_idx, 
                     num_steps, cost.data))
    
    model = model.eval()
    print('Epoch: %03d/%03d training accuracy: %.2f%%' % (
          epoch+1, num_epochs, 
          compute_accuracy(model, train_loader)))
    


Image batch dimensions: torch.Size([128, 1, 28, 28])
Image label dimensions: torch.Size([128])
Epoch: 001/010 | Batch 000/468 | Cost: 2.3102
Epoch: 001/010 | Batch 468/468 | Cost: 0.1679
Epoch: 001/010 training accuracy: 95.56%
Epoch: 002/010 | Batch 000/468 | Cost: 0.1277
Epoch: 002/010 | Batch 468/468 | Cost: 0.0510
Epoch: 002/010 training accuracy: 97.10%
Epoch: 003/010 | Batch 000/468 | Cost: 0.0759
Epoch: 003/010 | Batch 468/468 | Cost: 0.1045
Epoch: 003/010 training accuracy: 97.74%
Epoch: 004/010 | Batch 000/468 | Cost: 0.1485
Epoch: 004/010 | Batch 468/468 | Cost: 0.0432
Epoch: 004/010 training accuracy: 98.03%
Epoch: 005/010 | Batch 000/468 | Cost: 0.0380
Epoch: 005/010 | Batch 468/468 | Cost: 0.0330
Epoch: 005/010 training accuracy: 98.03%
Epoch: 006/010 | Batch 000/468 | Cost: 0.0979
Epoch: 006/010 | Batch 468/468 | Cost: 0.0725
Epoch: 006/010 training accuracy: 98.24%
Epoch: 007/010 | Batch 000/468 | Cost: 0.0755
Epoch: 007/010 | Batch 468/468 | Cost: 0.0556
Epoch: 007/010 

In [10]:
pytorch_score = (compute_accuracy(model, test_loader))
print('Test accuracy: {:.2%}'.format(pytorch_score))

Test accuracy: 98.28%


## TensorFlow ConvNet Example

In [11]:
%%time
""" Convolutional Neural Network.
Build and train a convolutional neural network with TensorFlow.
This example is using the MNIST database of handwritten digits
(http://yann.lecun.com/exdb/mnist/)
This example is using TensorFlow layers API, see 'convolutional_network_raw' 
example for a raw implementation with variables.
Author: Aymeric Damien
Project: https://github.com/aymericdamien/TensorFlow-Examples/
"""
from __future__ import division, print_function, absolute_import
from tensorflow.examples.tutorials.mnist import input_data
import tensorflow as tf
import os

tf.logging.set_verbosity(tf.logging.ERROR)

# Import MNIST data
DATA_DIR = os.path.join(os.sep + "tmp", "data")
mnist = input_data.read_data_sets(DATA_DIR, one_hot=False)

# Training Parameters
num_samples = 60000
batch_size = 128
num_steps = int(num_samples/batch_size)
learning_rate = 0.05
num_epochs = 10

# Network Parameters
num_input = 784 # MNIST data input (img shape: 28*28)
num_classes = 10 # MNIST total classes (0-9 digits)

# Create the neural network
def convNetTensorFlow(x_dict, n_classes, reuse, is_training):
    # Define a scope for reusing the variables
    with tf.variable_scope('ConvNet', reuse=reuse):
        # TF Estimator input is a dict, in case of multiple inputs
        x = x_dict['images']

        # MNIST data input is a 1-D vector of 784 features (28*28 pixels)
        # Reshape to match picture format [Height x Width x Channel]
        # Tensor input become 4-D: [Batch Size, Height, Width, Channel]
        x = tf.reshape(x, shape=[-1, 28, 28, 1])

        # Convolution Layer with 32 filters and a kernel size of 5
        conv1 = tf.layers.conv2d(x, 32, 5, activation=tf.nn.relu)
        # Max Pooling (down-sampling) with strides of 2 and kernel size of 2
        conv1 = tf.layers.max_pooling2d(conv1, 2, 2)

        # Convolution Layer with 64 filters and a kernel size of 3
        conv2 = tf.layers.conv2d(conv1, 64, 3, activation=tf.nn.relu)
        # Max Pooling (down-sampling) with strides of 2 and kernel size of 2
        conv2 = tf.layers.max_pooling2d(conv2, 2, 2)

        # Flatten the data to a 1-D vector for the fully connected layer
        fc1 = tf.contrib.layers.flatten(conv2)

        # Output layer, class prediction
        logits = tf.layers.dense(fc1, n_classes, activation=None)
        
    return logits

# Define the model function (following TF Estimator Template)
def model_fn(features, labels, mode):
    # Build the neural network
    # Because Dropout have different behavior at training and prediction time, we
    # need to create 2 distinct computation graphs that still share the same weights.
    logits_train = convNetTensorFlow(features, num_classes, reuse=False,
                            is_training=True)
    logits_test = convNetTensorFlow(features, num_classes, reuse=True,
                           is_training=False)

    # Predictions
    pred_classes = tf.argmax(logits_test, axis=1)
    pred_probas = tf.nn.softmax(logits_test)

    # If prediction mode, early return
    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode, predictions=pred_classes)

        # Define loss and optimizer
    loss_op = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(
        logits=logits_train, labels=tf.cast(labels, dtype=tf.int32)))
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
    train_op = optimizer.minimize(loss_op,
                                  global_step=tf.train.get_global_step())

    # Evaluate the accuracy of the model
    acc_op = tf.metrics.accuracy(labels=labels, predictions=pred_classes)
    
    # TF Estimators requires to return a EstimatorSpec, that specify
    # the different ops for training, evaluating, ...
    estim_specs = tf.estimator.EstimatorSpec(
        mode=mode,
        predictions=pred_classes,
        loss=loss_op,
        train_op=train_op,
        eval_metric_ops={'accuracy': acc_op})

    return estim_specs

# Build the Estimator
model = tf.estimator.Estimator(model_fn)

for epoch_idx in range(num_epochs):
    print('Epoch = {}/{} | '.format(epoch_idx+1, num_epochs), end='')
    # Define the input function for training
    input_fn = tf.estimator.inputs.numpy_input_fn(
        x={'images': mnist.train.images}, y=mnist.train.labels,
        batch_size=batch_size, num_epochs=None, shuffle=True)
    # Train the Model
    model.train(input_fn, steps=num_steps)

    # Evaluate the Model on training data
    input_fn = tf.estimator.inputs.numpy_input_fn(
        x={'images': mnist.train.images}, y=mnist.train.labels,
        batch_size=batch_size, num_epochs=1, shuffle=True)
    e = model.evaluate(input_fn)
    print('Loss = {} | Accuracy = {}'.format(e['loss'], e['accuracy']))

# Evaluate the Model on test data
# Define the input function for evaluating
input_fn = tf.estimator.inputs.numpy_input_fn(
    x={'images': mnist.test.images}, y=mnist.test.labels,
    batch_size=batch_size, shuffle=False)
# Use the Estimator 'evaluate' method
e = model.evaluate(input_fn)

Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz
Epoch = 1/10 | Loss = 0.1832001656293869 | Accuracy = 0.9459999799728394
Epoch = 2/10 | Loss = 0.10717695951461792 | Accuracy = 0.9688000082969666
Epoch = 3/10 | Loss = 0.08500224351882935 | Accuracy = 0.9753999710083008
Epoch = 4/10 | Loss = 0.07122006267309189 | Accuracy = 0.9789999723434448
Epoch = 5/10 | Loss = 0.06446825712919235 | Accuracy = 0.9802908897399902
Epoch = 6/10 | Loss = 0.0528169721364975 | Accuracy = 0.9838545322418213
Epoch = 7/10 | Loss = 0.06288313865661621 | Accuracy = 0.9801999926567078
Epoch = 8/10 | Loss = 0.04890061169862747 | Accuracy = 0.9851999878883362
Epoch = 9/10 | Loss = 0.044429562985897064 | Accuracy = 0.9868909120559692
Epoch = 10/10 | Loss = 0.041722942143678665 | Accuracy = 0.9875817894935608
CPU times: user 23min 8s, sys: 1min 28s, total: 24min 37s
Wall time: 

In [12]:
print('Test accuracy: {:.2%}'.format(e['accuracy']))

Test accuracy: 98.62%


## CNTK ConvNet Example

Adapted from:
https://cntk.ai/pythondocs/CNTK_103A_MNIST_DataLoader.html and https://cntk.ai/pythondocs/CNTK_103D_MNIST_ConvolutionalNeuralNetwork.html

NOT RUN, ONLY FOR REFERENCE - there is _no distribution_ that natively works on macOS.

In [None]:
from __future__ import print_function # Use a function definition from future version (say 3.x from 2.7 interpreter)
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import numpy as np
import os
import sys
import time

import cntk as C
import cntk.tests.test_utils
cntk.tests.test_utils.set_device_from_pytest_env() # (only needed for our build system)
C.cntk_py.set_fixed_random_seed(1) # fix a random seed for CNTK components

%matplotlib inline

# From TF example above (this is location the 
# MNIST .gz files were downloaded to)
DATA_DIR = os.path.join(os.sep + "tmp", "data")

# Import MNIST data using TensorFlow downloader (if files exist, does nothing)
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets(DATA_DIR, one_hot=False)

# Save the data files into a format compatible with CNTK text reader
def savetxt(filename, ndarray):
    dir = os.path.dirname(filename)

    if not os.path.exists(dir):
        os.makedirs(dir)

    if not os.path.isfile(filename):
        print("Saving", filename )
        with open(filename, 'w') as f:
            labels = list(map(' '.join, np.eye(10, dtype=np.uint).astype(str)))
            for row in ndarray:
                row_str = row.astype(str)
                label_str = labels[row[-1]]
                feature_str = ' '.join(row_str[:-1])
                f.write('|labels {} |features {}\n'.format(label_str, feature_str))
    else:
        print("File already exists", filename)

print ('Writing train text file...')
savetxt(os.path.join(DATA_DIR, "Train-28x28_cntk_text.txt"), train)

print ('Writing test text file...')
savetxt(os.path.join(DATA_DIR, "Test-28x28_cntk_text.txt"), test)

print('Done')
        
# Define the data dimensions
input_dim_model = (1, 28, 28)    # images are 28 x 28 with 1 channel of color (gray)
input_dim = 28*28                # used by readers to treat input data as a vector
num_output_classes = 10

# Train parameters
learning_rate = 0.05
train_minibatch_size = 128
num_samples_per_sweep = 60000
num_minibatches_to_train = (num_samples_per_sweep * num_sweeps_to_train_with) / train_minibatch_size

# Test parameters
test_minibatch_size = 128
num_samples = 10000
num_minibatches_to_test = num_samples // test_minibatch_size

# Read a CTF formatted text (as mentioned above) using the CTF deserializer from a file
def create_reader(path, is_training, input_dim, num_label_classes):

    ctf = C.io.CTFDeserializer(path, C.io.StreamDefs(
          labels=C.io.StreamDef(field='labels', shape=num_label_classes, is_sparse=False),
          features=C.io.StreamDef(field='features', shape=input_dim, is_sparse=False)))

    return C.io.MinibatchSource(ctf,
        randomize = is_training, max_sweeps = C.io.INFINITELY_REPEAT if is_training else 1)

def create_criterion_function(model, labels):
    loss = C.cross_entropy_with_softmax(model, labels)
    errs = C.classification_error(model, labels)
    return loss, errs # (model, labels) -> (loss, error metric)

# Define a utility function to compute the moving average sum.
# A more efficient implementation is possible with np.cumsum() function
def moving_average(a, w=5):
    if len(a) < w:
        return a[:]    # Need to send a copy of the array
    return [val if idx < w else sum(a[(idx-w):idx])/w for idx, val in enumerate(a)]


# Defines a utility that prints the training progress
def print_training_progress(trainer, mb, frequency, verbose=1):
    training_loss = "NA"
    eval_error = "NA"

    if mb%frequency == 0:
        training_loss = trainer.previous_minibatch_loss_average
        eval_error = trainer.previous_minibatch_evaluation_average
        if verbose:
            print ("Minibatch: {0}, Loss: {1:.4f}, Error: {2:.2f}%".format(mb, 
                                                                           training_loss, 
                                                                           eval_error*100))

    return mb, training_loss, eval_error

def convNetCNTK(features, num_output_classes):

    with C.layers.default_options(init=C.glorot_uniform(), activation=C.relu):
        model = C.layers.Sequential([
            C.layers.For(range(2), lambda i: [
                C.layers.Convolution((3,3), [32,64][i], pad=True),
                C.layers.MaxPooling((2,2), strides=(2,2))
                ]),
            C.layers.Dense(64),
            C.layers.Dense(out_dims, activation=None)
        ])

    return model(features)

def train_test(train_reader, test_reader, model_func, num_sweeps_to_train_with=10):

    # Instantiate the model function; x is the input (feature) variable
    # We will scale the input image pixels within 0-1 range by dividing all input value by 255.
    model = convNetCNTK(x/255, num_output_classes)

    # Instantiate the loss and error function
    loss, label_error = create_criterion_function(model, y)

    # Instantiate the trainer object to drive the model training
    lr_schedule = C.learning_parameter_schedule(learning_rate)
    learner = C.sgd(z.parameters, lr_schedule)
    trainer = C.Trainer(z, (loss, label_error), [learner])

    # Map the data streams to the input and labels.
    input_map={
        y  : train_reader.streams.labels,
        x  : train_reader.streams.features
    }

    # Uncomment below for more detailed logging
    training_progress_output_freq = 500

    # Start a timer
    start = time.time()

    for i in range(0, int(num_minibatches_to_train)):
        # Read a mini batch from the training data file
        data=train_reader.next_minibatch(train_minibatch_size, input_map=input_map)
        trainer.train_minibatch(data)
        print_training_progress(trainer, i, training_progress_output_freq, verbose=1)

    # Print training time
    print("Training took {:.1f} sec".format(time.time() - start))

    # Test the model
    test_input_map = {
        y  : test_reader.streams.labels,
        x  : test_reader.streams.features
    }

    test_result = 0.0

    for i in range(num_minibatches_to_test):

        # We are loading test data in batches specified by test_minibatch_size
        # Each data point in the minibatch is a MNIST digit image of 784 dimensions
        # with one pixel per dimension that we will encode / decode with the
        # trained model.
        data = test_reader.next_minibatch(test_minibatch_size, input_map=test_input_map)
        eval_error = trainer.test_minibatch(data)
        test_result = test_result + eval_error

    # Average of evaluation errors of all test minibatches
    print("Average test error: {0:.2f}%".format(test_result*100 / num_minibatches_to_test))