# TensorFlow Mechanics 101

- This tutorial is meant as a companion to the code [here](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/examples/tutorials/mnist/)
- The goal of this tutorial is to show how to use TensorFlow to train and evaluate a simple feed-forward neural network for handwritten digit classification using the (classic) MNIST data set. 

- [`mnist.py`](https://www.tensorflow.org/code/tensorflow/examples/tutorials/mnist/mnist.py), the code for making a fully-connected MNIST model
- [`fully_connected_feed.py`](https://www.tensorflow.org/code/tensorflow/examples/tutorials/mnist/fully_connected_feed.py), the main code to train the built MNIST model against the downloaded dataset using a feed dictionary.

In [86]:
import math
import time
import os.path
import tensorflow as tf
import pandas as pd
import random
#from tensorflow.examples.tutorials.mnist import input_data, mnist

In [87]:
def read_praxis_data():
    train_data = pd.read_csv("test_data_revised/training_macro.csv",
                            dtype = {'apppointment_id': str})
    train_labels = train_data['H1']
    train_labels = train_labels.apply(lambda x: x - 1)
    train_features = train_data[[a for a in train_data.columns.tolist() if
                                 a not in ['appointment_id', 'H1']]]
    
    test_data = pd.read_csv("test_data_revised/testing_macro.csv",
                            dtype = {'apppointment_id': str})
    test_labels = test_data['H1']
    test_labels = test_labels.apply(lambda x: x - 1)
    test_features = test_data[[a for a in train_data.columns.tolist() if
                                 a not in ['appointment_id', 'H1']]]
    
    return train_features,train_labels, test_features, test_labels

In [105]:
# Get data and define some parameters
log_dir_path = "/tmp/tensorflow/mnist/logs/fully_connected_feed"
data_path = "/tmp/tensorflow/mnist/input_data"
learning_rate = 0.01
max_steps = 2000
hidden1 = 9
hidden2 = 9
hidden3=9
batch_size = 100
NUM_CLASSES = 6
IMAGE_SIZE = 28
#IMAGE_PIXELS = IMAGE_SIZE * IMAGE_SIZE
IMAGE_PIXELS = 9
# Get the sets of images and labels for training, validation, and
# test on MNIST.
#data_sets = input_data.read_data_sets(data_path)

In [100]:
train_features,train_labels, test_features, test_labels = read_praxis_data()
print(train_features.shape)
print(type(train_features))
print(test_labels.shape)
print(train_features[:2])


(4000, 9)
<class 'pandas.core.frame.DataFrame'>
(2750,)
       NSQG      NSQU      NSQM   LOGDTU    LOGDTA  WORDLN_2  NWF_MEDIAN  \
0 -0.087124 -0.087204 -0.163144  1.94591  4.319391  4.505703  -64.661807   
1 -0.078680 -0.079493 -0.153937  1.94591  4.504560  4.928910  -58.183479   

    COLPREP       SVF  
0  0.596079  3.622808  
1  0.657845  4.446178  


In [109]:
# From mnist.py
def inference(images, hidden1_units, hidden2_units, hidden3_units):
    """Build the MNIST model up to where it may be used for inference.
    Args:
        images: Images placeholder, from inputs().
        hidden1_units: Size of the first hidden layer.
        hidden2_units: Size of the second hidden layer.
    Returns:
        softmax_linear: Output tensor with the computed logits.
    """
    # Hidden 1
    with tf.name_scope('hidden1'):
        weights = tf.Variable(
              tf.truncated_normal([IMAGE_PIXELS, hidden1_units],
                                  stddev=1.0 / math.sqrt(float(IMAGE_PIXELS))),
              name='weights')
        biases = tf.Variable(tf.zeros([hidden1_units]),
                             name='biases')
        hidden1 = tf.nn.relu(tf.matmul(images, weights) + biases)
    # Hidden 2
    with tf.name_scope('hidden2'):
        weights = tf.Variable(
            tf.truncated_normal([hidden1_units, hidden2_units],
                                stddev=1.0 / math.sqrt(float(hidden1_units))),
            name='weights')
        biases = tf.Variable(tf.zeros([hidden2_units]),
                             name='biases')
        hidden2 = tf.nn.relu(tf.matmul(hidden1, weights) + biases)
    
    # Hidden 3
    with tf.name_scope('hidden3'):
        weights = tf.Variable(
            tf.truncated_normal([hidden2_units, hidden3_units],
                                stddev=1.0 / math.sqrt(float(hidden2_units))),
            name='weights')
        biases = tf.Variable(tf.zeros([hidden3_units]),
                             name='biases')
        hidden3 = tf.nn.relu(tf.matmul(hidden2, weights) + biases)
    # Linear
    with tf.name_scope('softmax_linear'):
        weights = tf.Variable(
            tf.truncated_normal([hidden3_units, NUM_CLASSES],
                                stddev=1.0 / math.sqrt(float(hidden3_units))),
            name='weights')
        biases = tf.Variable(tf.zeros([NUM_CLASSES]),
                             name='biases')
        logits = tf.matmul(hidden2, weights) + biases
    return logits


def loss(logits, labels):
    """Calculates the loss from the logits and the labels.
    Args:
        logits: Logits tensor, float - [batch_size, NUM_CLASSES].
        labels: Labels tensor, int32 - [batch_size].
    Returns:
        loss: Loss tensor of type float.
    """
    labels = tf.to_int64(labels)
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
        labels=labels, logits=logits, name='xentropy')
    return tf.reduce_mean(cross_entropy, name='xentropy_mean')

In [110]:
# From fully_connected_feed.py
def fill_feed_dict(features, labels, images_pl, labels_pl):
    """Fills the feed_dict for training the given step.
    A feed_dict takes the form of:
    feed_dict = {
        <placeholder>: <tensor of values to be passed for placeholder>,
        ....
    }
    Args:
        data_set: The set of images and labels, from input_data.read_data_sets()
        images_pl: The images placeholder, from placeholder_inputs().
        labels_pl: The labels placeholder, from placeholder_inputs().
    Returns:
        feed_dict: The feed dictionary mapping from placeholders to values.
    """
    # Create the feed_dict for the placeholders filled with the next
    # `batch size` examples.
    #images_feed, labels_feed = data_set.next_batch(batch_size)
    sample = random.sample(range(2500), k=100)
    images_feed = features.ix[sample]
    labels_feed = labels.ix[sample]
    
    
    feed_dict = {
        images_pl: images_feed,
        labels_pl: labels_feed,
    }
    return feed_dict


def do_eval(sess,
            eval_correct,
            images_placeholder,
            labels_placeholder,
            features, labels):
    print(features.shape)
    print(labels.shape)
    """Runs one evaluation against the full epoch of data.
    Args:
        sess: The session in which the model has been trained.
        eval_correct: The Tensor that returns the number of correct predictions.
        images_placeholder: The images placeholder.
        labels_placeholder: The labels placeholder.
        data_set: The set of images and labels to evaluate, from
          input_data.read_data_sets().
    """
    # And run one epoch of eval.
    true_count = 0  # Counts the number of correct predictions.
    steps_per_epoch = features.shape[0] // batch_size
    num_examples = steps_per_epoch * batch_size
    for step in range(steps_per_epoch):
        feed_dict = fill_feed_dict(features, labels,
                                   images_placeholder,
                                   labels_placeholder)
        logit_output, true_cnt = sess.run([logits, eval_correct], feed_dict=feed_dict)
        true_count += true_cnt
        #print(logit_output)
    precision = float(true_count) / num_examples
    print('  Num examples: %d  Num correct: %d  Precision @ 1: %0.04f' %
          (num_examples, true_count, precision))

In [111]:
# Tell TensorFlow that the model will be built into the default Graph.
with tf.Graph().as_default():

    # Generate placeholders for the images and labels.
    images_placeholder = tf.placeholder(tf.float32, shape=(batch_size,
                                                           IMAGE_PIXELS))
    labels_placeholder = tf.placeholder(tf.int32, shape=(batch_size))

    # Build a Graph that computes predictions from the inference model.
    logits = inference(images_placeholder,
                       hidden1,
                       hidden2, hidden3)

    # Add to the Graph the Ops for loss calculation.
    loss = mnist.loss(logits, labels_placeholder)

    # Add to the Graph the Ops that calculate and apply gradients.
    train_op = mnist.training(loss, learning_rate)

    # Add the Op to compare the logits to the labels during evaluation.
    eval_correct = mnist.evaluation(logits, labels_placeholder)

    # Build the summary Tensor based on the TF collection of Summaries.
    summary = tf.summary.merge_all()

    # Add the variable initializer Op.
    init = tf.global_variables_initializer()

    # Create a saver for writing training checkpoints.
    saver = tf.train.Saver()

    # Create a session for running Ops on the Graph.
    sess = tf.Session()

    # Instantiate a SummaryWriter to output summaries and the Graph.
    summary_writer = tf.summary.FileWriter(log_dir_path, sess.graph)

    # And then after everything is built:

    # Run the Op to initialize the variables.
    sess.run(init)

    # Start the training loop.
    for step in range(max_steps):
        start_time = time.time()

        # Fill a feed dictionary with the actual set of images and labels
        # for this particular training step.
        feed_dict = fill_feed_dict(train_features, train_labels,
                                   images_placeholder,
                                   labels_placeholder)

        # Run one step of the model.  The return values are the activations
        # from the `train_op` (which is discarded) and the `loss` Op.  To
        # inspect the values of your Ops or variables, you may include them
        # in the list passed to sess.run() and the value tensors will be
        # returned in the tuple from the call.
        _, loss_value = sess.run([train_op, loss],
                                 feed_dict=feed_dict)

        duration = time.time() - start_time

        # Write the summaries and print an overview fairly often.
        if step % 100 == 0:
            # Print status to stdout.
            print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_value, duration))
            # Update the events file.
            summary_str = sess.run(summary, feed_dict=feed_dict)
            summary_writer.add_summary(summary_str, step)
            summary_writer.flush()
        
        # Save a checkpoint and evaluate the model periodically.
        if (step + 1) % 1000 == 0 or (step + 1) == max_steps:
            checkpoint_file = os.path.join(log_dir_path, 'model.ckpt')
            saver.save(sess, checkpoint_file, global_step=step)
            # Evaluate against the training set.
            print('Test Data Eval:')
            do_eval(sess,
                    eval_correct,
                    images_placeholder,
                    labels_placeholder,
                    test_features, test_labels)
            # Evaluate against the validation set.
            '''
            print('Validation Data Eval:')
            do_eval(sess,
                    eval_correct,
                    images_placeholder,
                    labels_placeholder,
                    data_sets.validation)
            
            # Evaluate against the test set.
            print('Test Data Eval:')
            do_eval(sess,
                    eval_correct,
                    images_placeholder,
                    labels_placeholder,
                    data_sets.test)
        '''

Step 0: loss = 4.86 (0.008 sec)
Step 100: loss = 1.17 (0.001 sec)
Step 200: loss = 1.09 (0.001 sec)
Step 300: loss = 1.19 (0.001 sec)
Step 400: loss = 1.18 (0.002 sec)
Step 500: loss = 1.15 (0.001 sec)
Step 600: loss = 1.24 (0.002 sec)
Step 700: loss = 1.11 (0.001 sec)
Step 800: loss = 1.15 (0.003 sec)
Step 900: loss = 1.19 (0.001 sec)
Test Data Eval:
(2750, 9)
(2750,)
  Num examples: 2700  Num correct: 1388  Precision @ 1: 0.5141
Step 1000: loss = 1.07 (0.002 sec)
Step 1100: loss = 1.16 (0.001 sec)
Step 1200: loss = 1.08 (0.001 sec)
Step 1300: loss = 1.09 (0.001 sec)
Step 1400: loss = 1.10 (0.002 sec)
Step 1500: loss = 1.13 (0.001 sec)
Step 1600: loss = 1.02 (0.001 sec)
Step 1700: loss = 1.13 (0.002 sec)
Step 1800: loss = 1.15 (0.002 sec)
Step 1900: loss = 1.11 (0.001 sec)
Test Data Eval:
(2750, 9)
(2750,)
  Num examples: 2700  Num correct: 1361  Precision @ 1: 0.5041
