In [1]:
""" Starter code for logistic regression model to solve OCR task 
with MNIST in TensorFlow
MNIST dataset: yann.lecun.com/exdb/mnist/
Author: Chip Huyen
Prepared for the class CS 20SI: "TensorFlow for Deep Learning Research"
cs20si.stanford.edu
"""
import os
os.environ['TF_CPP_MIN_LOG_LEVEL']='2'

import numpy as np
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import time

# Step 1: Read in data
# using TF Learn's built in function to load MNIST data to the folder data/mnist
mnist = input_data.read_data_sets('./data/mnist', one_hot=True) 

Extracting ./data/mnist\train-images-idx3-ubyte.gz
Extracting ./data/mnist\train-labels-idx1-ubyte.gz
Extracting ./data/mnist\t10k-images-idx3-ubyte.gz
Extracting ./data/mnist\t10k-labels-idx1-ubyte.gz


In [2]:
# Define paramaters for the model
learning_rate = 0.01

tf.reset_default_graph()
# Step 2: create placeholders for features and labels
# each image in the MNIST data is of shape 28*28 = 784
# therefore, each image is represented with a 1x784 tensor
# there are 10 classes for each image, corresponding to digits 0 - 9. 
# Features are of the type float, and labels are of the type int
X =  tf.placeholder(dtype= tf.float32, shape=(None, 784), name ='input_X')
Y = tf.placeholder(dtype=tf.int16, shape=(None, 10), name = 'output_Y')
# Step 3: create weights and bias
# weights and biases are initialized to 0
# shape of w depends on the dimension of X and Y so that Y = X * w + b
# shape of b depends on Y
#w = tf.Variable(tf.zeros([784,10]), name = 'weight')
w = tf.Variable(tf.random_normal(shape=[784, 10], stddev=0.01), name='weights')  ## initialize all to zero or nomal  doesnt matter here

b = tf.Variable(tf.zeros([1, 10]), name ='bias')

# Step 4: build model
# the model that returns the logits.
# this logits will be later passed through softmax layer
# to get the probability distribution of possible label of the image
# DO NOT DO SOFTMAX HERE
logits = tf.matmul(X, w) + b

# Step 5: define loss function
# use cross entropy loss of the real labels with the softmax of logits
# use the method:
# tf.nn.softmax_cross_entropy_with_logits(logits, Y)
# then use tf.reduce_mean to get the mean loss of the batch
Y_predicted = tf.argmax(logits, axis= 1)
Y_true = tf.argmax(Y, axis= 1)
correct_preds = tf.reduce_sum(tf.cast(tf.equal(Y_predicted, Y_true), tf.float32))

xentropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=Y, name='loss')
loss = tf.reduce_mean(xentropy) # computes the mean over all the examples in the batch

# Step 6: define training op
# using gradient descent to minimize loss
optimizer = tf.train.GradientDescentOptimizer(learning_rate= learning_rate).minimize(loss)

In [3]:
batch_size = 96
n_epochs = 10

In [4]:
with tf.Session() as sess:
    start_time = time.time()
    sess.run(tf.global_variables_initializer())	
    n_batches = int(mnist.train.num_examples/batch_size)
    for i in range(n_epochs): # train the model n_epochs times
        total_loss = 0

        for _ in range(n_batches):
            X_batch, Y_batch = mnist.train.next_batch(batch_size)
            # TO-DO: run optimizer + fetch loss_batch
            _, loss_batch = sess.run([optimizer, loss], feed_dict={X:X_batch, Y: Y_batch})
            
            total_loss += loss_batch
        print('Average loss epoch {0}: {1}'.format(i, total_loss/n_batches))

    print('Total time: {0} seconds'.format(time.time() - start_time))

    print('Optimization Finished!') # should be around 0.35 after 25 epochs
    
    
    n_batches = int(mnist.test.num_examples/batch_size)
    total_correct_preds = 0
    
    for i in range(n_batches):
        X_batch, Y_batch = mnist.test.next_batch(batch_size)
        correct_preds_batch = sess.run(correct_preds, feed_dict={X: X_batch, Y:Y_batch})
        #accuracy_batch = sess.run([accuracy], feed_dict={X: X_batch, Y:Y_batch})  
        #This way, accuracy_bath will be a list
        
        #print(type(accuracy_batch))
        total_correct_preds += correct_preds_batch
    
    print('Accuracy {0}'.format(total_correct_preds/mnist.test.num_examples))

Average loss epoch 0: 1.1685101960922455
Average loss epoch 1: 0.6554624157456251
Average loss epoch 2: 0.5448721821283127
Average loss epoch 3: 0.4935684443249569
Average loss epoch 4: 0.4603578448816613
Average loss epoch 5: 0.4384937681830846
Average loss epoch 6: 0.4208638999689292
Average loss epoch 7: 0.40927609517336727
Average loss epoch 8: 0.3974391644026016
Average loss epoch 9: 0.3905230011087614
Total time: 13.946876049041748 seconds
Optimization Finished!
Accuracy 0.9027


```python
w = tf.Variable(tf.zeros([784,10]), name = 'weight')
```
```
Average loss epoch 0: 1.1667942075462607
Average loss epoch 1: 0.6569620260408708
Average loss epoch 2: 0.5480158749472844
Average loss epoch 3: 0.4924163765736393
Average loss epoch 4: 0.4585406290744985
Average loss epoch 5: 0.43732188983292847
Average loss epoch 6: 0.4233226356962761
Average loss epoch 7: 0.4066769373114709
Average loss epoch 8: 0.3994443095647372
Average loss epoch 9: 0.38840923929860544
Total time: 12.172384023666382 seconds
Optimization Finished!
Accuracy 0.9013
```