In [1]:
from tensorflow.examples.tutorials.mnist import input_data
# reads mnist data.
# mnist has training set (mnist.train), test set (mnist.test),
# and validation set (mnist.validation)
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [2]:
### NOTES REGARDING DATA SET
## Training Set (55,000 dp)
# mnist.train.images -> the "x"s.
# - each image is 28x28 pixels.
# - we will flatten this array to make a 784-d vector space.
# - note: flattening data throws away info.
#   - for future, don't do this! learn better methods!
# - this images is a tensor with a shape of [55000, 784].
#   - first-d is index into the list of images.
#   - second-d is index for each pixel.
#     - each entry in the tensor is a pixel intensity between 0-1
# mnist.train.labels -> the "y"s.
# - each image has a corresponding label, a # between 0 and 9.
# - we want the "one-hot vector"
#   - one-hot-vector: a vector which is 0 in most dimensions, and
#      1 in a single dimension.
#   - 3 would be [0,0,0,1,0,0,0,0,0,0]
# - thus, mist.train.labels is a [55000, 10] array of floats.
## Test Set (10,000 dp)
## Validation Set (5,000 dp)

In [3]:
### SOFTMAX REGRESSIONS
## Objective: look at an image -> get possibility for it being each digit
## Usage:
# if you want to assign ***probabilities*** to an object being
#   one of several different things, softmax is the thing to do.
## Steps:
# 1. add up the evidence of our input being in certain classes.
#   - to do this, we do a weighted sum of the pixel intensities.
#   - the weight is negative if that pixel having a high intensity
#     is evidence against the image being in that weight,
#     and positive if it is evidence in favor.
#   - we also add some bias! 
#     - we want to be able to say that some things are more likely
#       independent of the input.
#   > evidence_i = \sum_j W_{i,j}x_j + b_i
#     where W_i is the weights and b_i is the bias for class i,
#       and j is an index for summing over the pixels in our input image x.
# 2.  convert evidence into probabilities.
#   - We convert the evidence tallied into our predicted probabilities y
#     using the "softmax" function
#     > y = softmax(evidence) = softmax(x) = normalized(exp(x))
#     > softmax(x)_i = \frac{\exp(x_i)}{\sum_j \exp(x_j)}
#   - Softmax is serving as an "activation" function, shaping the output
#     of our linear function into the form we want.
#   - Softmax = exponentiating its inputs and then normalizing them.
#     - Softmax normalizes so that they add up to one, forming a valid
#       probability distribution.
#   - Vectorized procedure:
#     > y = softmax(Wx + b)

In [4]:
import tensorflow as tf
sess = tf.InteractiveSession()

In [5]:
# x is a placeholder (a value that we'll input when we ask tf to run 
#   a computation).
# x represents the 784-d vector space of any number of images.
#   None represents that the dimension can be of any length.
x = tf.placeholder(tf.float32, [None, 784], name="x")

# W and b, here, are the weights and bias respectively.
# Variable is a modifiable tensor that lives in tf's graph of interacting
#   ops.
# Variable can be used and even modified by the computation.
# We create variables by providing initial values.
#   - Here, we create matrixes and vectors of zeros.
# W has a shape of 784 x 10. 
#   - 10 for the 10 classes (numbers), 784 for the image vector.
#   - b has a shape of 10 for each of the 10 digits.
W = tf.Variable(tf.zeros([784, 10]), name="W")
b = tf.Variable(tf.zeros([10]), name="b")

In [6]:
# we can now implement our model: y = softmax(Wx + b)
# note that x and W is flipped. This is a small trick with x being 
# a 2D tensor with multiple inputs.(???)
# new: matmul => matrix multiplication
y = tf.matmul(x, W) + b

In [7]:
# TRAINING: CROSS ENTROPY(???)
# we want to minimize the loss function.
# one common and very nice loss function is called "cross-entropy"
# Cross-entropy:
#   H_{y'}(y) = -\sum_i[y'_i log(y_i)]
#   where y is our predicted probability distribution, and
#         y' is the true distribution (the one-hot vetor with the digit 
#           labels)
# y_ here is y'
y_ = tf.placeholder(tf.float32, [None, 10], name="y_")
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y))
# raw formulation of cross-entropy
#               tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), 
#                                             reduction_indices=[1])
#                              , name="H_y\'(y)")
train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)

In [8]:
# TODO: learn about: cross entropy, tf.matmul

In [9]:

tf.global_variables_initializer().run() #initialize variables we created
#training part:
for _ in range(1000):
    # next_batch is part of mnist's dataset class's function
    # retrieves small batches of random data (stochastic training)
    # in this case, it is stochastic gradient descent.
    batch_xs, batch_ys = mnist.train.next_batch(100)
    sess.run(train_step, feed_dict={x: batch_xs, y_:batch_ys})

In [10]:
# evaluating the model
# testing how well our model did.
# tf.argmax allows us to see which label gives us the maximum 
#   probability.
# the 1 in the below statement is the axis.
# The axis describes which axis of the input Tensor to reduce across.
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
# tf.reduce_mean takes the average over these sums
# tf.cast casts values within the tensor to the type specified.
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print(sess.run(accuracy, feed_dict={x: mnist.test.images,
                                   y_: mnist.test.labels}))

0.9151


In [11]:
# writes the current state into a serializable format
# to visualize the graph from sess.graph,
#  go to localhost:6006
file_writer = tf.summary.FileWriter('.', sess.graph)