# Learning MNIST

In this exercise you will design a classifier for the very simple but very popular [MNIST dataset](http://yann.lecun.com/exdb/mnist/), a classic of dataset in computer vision and one of the first real world problems solved by neural networks.

## Load Dataset in TFRecords Format

The dataset is available on [Gdrive](https://drive.google.com/drive/folders/0B7DvVTpUhOA9S0Ryek11Sk1oNTA?usp=sharing). You should download the files and put them in the `DATA_DIR` directory.

In [None]:
%matplotlib inline

import os
from datetime import datetime

import matplotlib.pyplot as plt

import tensorflow as tf
slim = tf.contrib.slim


Below are some utilities to load the dataset. Make sure you have placed the dataset in the `DATA_DIR` directory.

In [None]:
# MNIST dataset utilities

DATA_DIR = 'mnist/'

_FILE_PATTERN = 'mnist_%s.tfrecord'

_SPLITS_TO_SIZES = {'train': 60000, 'test': 10000}

_NUM_CLASSES = 10

_ITEMS_TO_DESCRIPTIONS = {
    'image': 'A [28 x 28 x 1] grayscale image.',
    'label': 'A single integer between 0 and 9',
}

LABELS_FILENAME = 'labels.txt'


def read_label_file(dataset_dir, filename=LABELS_FILENAME):
    """Reads the labels file and returns a mapping from ID to class name.

    Args:
        dataset_dir: The directory in which the labels file is found.
        filename: The filename where the class names are written.

    Returns:
        A map from a label (integer) to class name.
    """
    labels_filename = os.path.join(dataset_dir, filename)
    with tf.gfile.Open(labels_filename, 'r') as f:
        lines = f.read().decode()
    lines = lines.split('\n')
    lines = filter(None, lines)

    labels_to_class_names = {}
    for line in lines:
        index = line.index(':')
        labels_to_class_names[int(line[:index])] = line[index+1:]
    return labels_to_class_names


def get_split(split_name, dataset_dir, file_pattern=None, reader=None):
    """Gets a dataset tuple with instructions for reading MNIST.

    Args:
        split_name: A train/test split name.
        dataset_dir: The base directory of the dataset sources.
        file_pattern: The file pattern to use when matching the dataset sources.
          It is assumed that the pattern contains a '%s' string so that the split
          name can be inserted.
        reader: The TensorFlow reader type.

    Returns:
        A `Dataset` namedtuple.

    Raises:
        ValueError: if `split_name` is not a valid train/test split.
  """
    if split_name not in _SPLITS_TO_SIZES:
        raise ValueError('split name %s was not recognized.' % split_name)

    if not file_pattern:
        file_pattern = _FILE_PATTERN
    file_pattern = os.path.join(dataset_dir, file_pattern % split_name)

    # Allowing None in the signature so that dataset_factory can use the default.
    if reader is None:
        reader = tf.TFRecordReader

    keys_to_features = {
        'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''),
        'image/format': tf.FixedLenFeature((), tf.string, default_value='raw'),
        'image/class/label': tf.FixedLenFeature(
            [1], tf.int64, default_value=tf.zeros([1], dtype=tf.int64)),
    }

    items_to_handlers = {
        'image': slim.tfexample_decoder.Image(shape=[28, 28, 1], channels=1),
        'label': slim.tfexample_decoder.Tensor('image/class/label', shape=[]),
    }

    decoder = slim.tfexample_decoder.TFExampleDecoder(
        keys_to_features, items_to_handlers)

    labels_to_names = read_label_file(dataset_dir)

    return slim.dataset.Dataset(
        data_sources=file_pattern,
        reader=reader,
        decoder=decoder,
        num_samples=_SPLITS_TO_SIZES[split_name],
        num_classes=_NUM_CLASSES,
        items_to_descriptions=_ITEMS_TO_DESCRIPTIONS,
        labels_to_names=labels_to_names)


def load_batch(dataset, batch_size=32):
    """Load a single batch of data
    """
    data_provider = slim.dataset_data_provider.DatasetDataProvider(
        dataset, common_queue_capacity=32, common_queue_min=8)
    
    image, label = data_provider.get(['image', 'label'])
    image = tf.to_float(image)
    image = (1. / 255) * image
    
    # batch it up
    images, labels = tf.train.batch(
        [image, label],
        batch_size=batch_size,
        num_threads=1,
        capacity=2 * batch_size
    )
    
    return images, labels

It is always a good idea to view some examples to build up an intuition about your dataset. The block below creates a `DatasetDataProvider` object that can read data examples from the TFRecord files. It then plots some of the images in the dataset along with their labels.

In [None]:
# display a few examples
dataset = get_split("train", DATA_DIR)
provider = slim.dataset_data_provider.DatasetDataProvider(
    dataset, shuffle=False)
image, label = provider.get(["image", "label"])
with tf.Session() as sess:
    with slim.queues.QueueRunners(sess):
        for _ in range(5):
            np_image, np_label = sess.run([image, label])
            height, width, _  = np_image.shape
            class_name = name = dataset.labels_to_names[np_label]
            
            plt.figure()
            plt.imshow(np_image.squeeze(), cmap=plt.cm.gray, interpolation='None')
            plt.title('%s, %d x %d' % (name, height, width))
            plt.axis('off')
            plt.show()

### Exercise 1 - design an MLP for MNIST

Build an MLP. It is up to you what the structure of the model will be, but keep in mind that this problem is much higher dimensional than previous problems we have worked on. This is your first chance to design a model on real data! See if you can get 90% accuracy or better.

Here are some of the things you will need to decide about your model:
- number of layers
- activation function
- number of dimensions in each layer
- batch size
- number of epochs
- learning rate

Suggestions:
- We will treat each images as one large vector. To do this, the first thing you need to do is "flatten" the images before feeding them into more layers. Check out `slim.flatten` to do this.
- The training logs are saved in `train_mnist/`, so you can use Tensorboard to help you visualize training.
- Feel free to compare results with you  neighbors to find out what works well.
- You may be able to improve your test performance by regularizing you model. Check out `slim.dropout`.
- You may want to try a more sophisticated optimizer. Tensorflow's `AdamOptimizer` is a good choice that takes care of tuning the learning rate for you.


If you want to talk over design decisions, feel free to ask!

In [None]:
# Define your model
def my_model(images, num_classes):
    """Build a basic neural network
    """
    
    # TODO - design a model for MNIST
    
    return ...


## Train model

This block will train the model that you define above.

In [None]:
train_dir = "train_mnist/%s" % datetime.now().strftime("%H-%M-%S")
batch_size = 100
learning_rate = 0.5
num_epochs = 2

with tf.Graph().as_default():
    tf.logging.set_verbosity(tf.logging.DEBUG)
    
    dataset = get_split('train', DATA_DIR)
    images, labels = load_batch(dataset, batch_size=batch_size)
    
    # create the model
    logits = my_model(images, dataset.num_classes)
    
    # specify the loss function
    one_hot_labels = slim.one_hot_encoding(labels, dataset.num_classes)
    slim.losses.softmax_cross_entropy(logits, one_hot_labels)
    total_loss = slim.losses.get_total_loss()
    
    # create some summaries to visualize the training process
    tf.scalar_summary('losses/Total Loss', total_loss)
    
    # specify the otpimizer and create the train op
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
#     optimizer = tf.train.AdamOptimizer(1e-4)

    train_op = slim.learning.create_train_op(total_loss, optimizer)
    
    # run the training
    slim.learning.train(
        train_op,
        logdir=train_dir,
        log_every_n_steps=50,
        number_of_steps=dataset.num_samples * num_epochs / batch_size,
        save_summaries_secs=10
    )

## Evaluate performance

Run the block below to evaluate your latest model on the test dataset.

In [None]:
with tf.Graph().as_default():
    tf.logging.set_verbosity(tf.logging.DEBUG)
    
    dataset = get_split('test', DATA_DIR)
    images, labels = load_batch(dataset, batch_size=dataset.num_samples)
    
    # build model and retrieve predictions
    logits = my_model(images, dataset.num_classes)
    predictions = tf.argmax(logits, 1)
    
    # accuracy metric
    acc_value_op, acc_update_op = slim.metrics.streaming_accuracy(predictions, labels)
    
    # model checkpoint
    checkpoint_path = tf.train.latest_checkpoint(train_dir)
    print "Model checkpoint:", checkpoint_path
    
    accuracy = slim.evaluation.evaluate_once(
        master='',
        checkpoint_path=checkpoint_path,
        logdir=train_dir,
        num_evals=1,
        eval_op=acc_update_op,
        final_op=acc_value_op
    )
    
    print "Accuracy:", accuracy
    