# TensorFlow Walkthrough

Boilerplate

In [None]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

# Import the training data (MNIST)
from tensorflow.examples.tutorials.mnist import input_data

# Let the tensorflowing begin

In [None]:
import tensorflow as tf

Possibly download and extract the MNIST data set.
Retrieve the labels as one-hot-encoded vectors.

In [None]:
mnist = input_data.read_data_sets("/tmp/mnist", one_hot=True)

Create a new `Graph` and register it as the *default graph* 

In [None]:
graph = tf.Graph()

with graph.as_default():
    ...

Create placeholders for examples and labels

In [None]:
    # Placeholder for input examples (None = variable dimension)
    examples = tf.placeholder(shape=[None, 784], dtype=tf.float32)
    # Placeholder for labels
    labels = tf.placeholder(shape=[None, 10], dtype=tf.float32)

Create Weight and bias variables

In [None]:
    # Draw the weights from a random uniform distribution for symmetry breaking
    weights = tf.Variable(tf.truncated_normal(shape=[784, 10], stddev=0.1))
    # Slightly positive initial bias to avoid dead neurons
    bias = tf.Variable(tf.constant(0.1, shape=[10]))

1. Apply the affine transformation to the input features to produce *scores*
2. Run the softmax function over the scores to create a probability distribution

In [None]:
    # First get the logits
    logits = tf.matmul(examples, weights) + bias
    estimates = tf.nn.softmax(logits)

Now that we have our estimates, we want to compute some cost metric that tells us how accurate our model is. 
For this, we use the cross entropy between the softmax probabilities and the label "distribution".

In [None]:
    # Compute the cross-entropy
    cross_entropy = -tf.reduce_sum(labels * tf.log(estimates),
                                   reduction_indices=[1])
    # And finally the loss
    loss = tf.reduce_mean(cross_entropy)

Use gradient descent to minimize the loss.
There exist also other optimizers, such as the `MomentumOptimizer`.

In [None]:
    # Create a gradient-descent optimizer that minimizes the loss.
    # We choose a learning rate of 0.01
    optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(loss)

Compute the accuracy to let us know how well our model is doing (it is more descriptive than the loss alone)

In [None]:
    # See if the likeliest prediction matches the label for each row
    correct_predictions = tf.equal(
        tf.argmax(estimates, dimension=1),
        tf.argmax(labels, dimension=1))
    # correct predictions / all predictions
    accuracy = tf.reduce_mean(tf.cast(correct_predictions, tf.float32))

Now we can actually run and train our algorithm!

In [None]:
# Enter a session environment
# Pass our graph as the graph to be managed
with tf.Session(graph=graph) as session:
    # Must first initialize variables
    tf.initialize_all_variables().run()
    for step in range(1001):
    # Grab next example and label batches
    example_batch, label_batch = mnist.train.next_batch(100)
    # Replace the placeholder tensors with
    feed_dict = {examples: example_batch, labels: label_batch}
    # Compute the accuracy every 100 steps
    if step % 100 == 0:
        # Ignore the Optimizer's None return value
        _, loss_value, accuracy_value = session.run(
           [optimizer, loss, accuracy],
           feed_dict=feed_dict
        )
        print("Loss at time {0}: {1}".format(step, loss_value))
        print("Accuracy at time {0}: {1}".format(step, accuracy_value))
    else:
        # Run the optimizer directly!
        optimizer.run(feed_dict)