In [13]:
# MNIST Logistic Regression: model for MNIST handwritten digit dataset classification
# Shomik Jain, USC CAIS++

# Classification Problem: predict which of k discrete categories some input belongs to (supervised learning)

In [14]:
# 1. Import Dataset & Data Pre-Processing

In [15]:
import pickle # used to save and restore python objects
import gzip
import numpy as np

# Load the dataset
f = gzip.open('mnist.pkl.gz', 'rb')
train_set, valid_set, test_set = pickle.load(f, encoding = 'latin1')
f.close()

# dataset information
print("Inputs:")
print(train_set[0])
print("Inputs shape is " + str(train_set[0].shape))
print("Input type is " + str(type(train_set[0])))
print("Labels:")
print(train_set[1])
print("Labels shape is" + str(train_set[1].shape))
print("Labels type is " + str(type(train_set[1])))

Inputs:
[[ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 ..., 
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]]
Inputs shape is (50000, 784)
Input type is <class 'numpy.ndarray'>
Labels:
[5 0 4 ..., 8 4 8]
Labels shape is(50000,)
Labels type is <class 'numpy.ndarray'>


In [16]:
# Convert output from numerical (0 or 1) to categorical (one-hot vector encoding)
import numpy as np

# convert class vector (integers from 0 to nb_classes) to binary class matrix
# i.e. -- [1 0 1] => [[0 1] [1 0] [0 1]] (useful for multiple classes)
def to_categorical(y, nb_classes):
    y = np.asarray(y, dtype='int32')
    if not nb_classes:
        nb_classes = np.max(y)+1
    Y = np.zeros((len(y), nb_classes))
    Y[np.arange(len(y)),y] = 1.
    return Y

In [17]:
# Train-test split: assess model using test partition, data the model hasn't seen
train_x = train_set[0]
train_y = to_categorical(train_set[1], 10)
test_x = test_set[0]
test_y = to_categorical(test_set[1],10)

# print result of categorizing the outputs
print(test_y[:5])

[[ 0.  0.  0.  0.  0.  0.  0.  1.  0.  0.]
 [ 0.  0.  1.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  1.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 1.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  1.  0.  0.  0.  0.  0.]]


In [18]:
# 2. Create Logistic Regression Model using TensorFlow
import tensorflow as tf

In [19]:
# Set up the TensorFlow variables (Add variables to TensorFlow's computational graph)

# this just helps with using tensorflow inside jupyter (reset/clear all tf variables)
tf.reset_default_graph()

# Input Parameter
x = tf.placeholder(tf.float32, [None, 784])

# Weights Variable (xavier initializer -- random values centered around zero)
W = tf.get_variable("W", shape=[784, 10], initializer = tf.contrib.layers.xavier_initializer())

# Biases variable: initialized to zero
b = tf.Variable(tf.zeros([10]))

# Define hypothesis fxn (y): represents probability of possible outputs given inputs
# Uses the softmax activation function: like sigmoid, but makes sure probabiltiies add to 1    
y = tf.nn.softmax(tf.matmul(x, W)+b)

# y_: actual labels
y_ = tf.placeholder(tf.float32, [None, 10])

In [20]:
# 3. Training the Model 
# Find best possible hypothesis function <=> find weights that minimize cost function

In [21]:
# Logistic Regression Cost Function: Cross-Entropy Loss
# cost increases as predicted probability diverges from actual label 
loss = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))

In [22]:
# Optimization -- Batch Gradient Descent
# at each step, use a random subset from the training set
# instead of traditional gradient descent, which looks at every example at every step 
def generate_batch(batch_size):
    indexes = np.random.randint(49999, size = (batch_size,))
    return train_x[indexes], train_y[indexes]

In [23]:
# Training 
# TensorFlow automatically takes care of the gradient calculations
train_step = tf.train.GradientDescentOptimizer(.01).minimize(loss) # ".01" specifies the learning rate
sess = tf.InteractiveSession() # create the sesion object
tf.global_variables_initializer().run() # initialize variables

In [24]:
# Run gradient descent
for i in range(50000):
    # get the x training batch and y training batch
    batch_xs, batch_ys = generate_batch(500)
    
    # this evaluates the computational graph
    # passes batch_xs in for the x placeholder, batch_ys in for the y_ placeholder
    sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys}) 

In [25]:
# 4. Evaluate the Model 

In [26]:
# store correct_predictions list and calculate accuracy
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 

In [27]:
# test predictions using built in test dataset
print(sess.run(accuracy, feed_dict={x: test_x, y_: test_y}))

0.9208
