In [2]:
import numpy as np
import tensorflow as tf
from __future__ import print_function
from six.moves import cPickle as pickle
from six.moves import range
import os

In [3]:
pickle_file = 'notMNIST.pickle'
with open(pickle_file,'rb') as f:
    save = pickle.load(f)
    train_dataset = save['train_dataset']
    train_labels = save['train_labels']
    valid_dataset = save['valid_dataset']
    valid_labels = save['valid_labels']
    test_dataset = save['test_dataset']
    test_labels = save['test_labels']
    del save
    print('training shape',train_dataset.shape,train_labels.shape)
    print('valid shape',valid_dataset.shape,valid_labels.shape)
    print('test shape',test_dataset.shape,test_labels.shape)

training shape (200000, 28, 28) (200000,)
valid shape (10000, 28, 28) (10000,)
test shape (10000, 28, 28) (10000,)


In [4]:
img_dim = 28
num_label = 10
def reformat(dataset,label):
    dataset = dataset.reshape((-1,img_dim*img_dim)).astype(np.float32)
    label = (np.arange(num_label) == label[:,None]).astype(np.float32)
    return dataset,label
train_dataset,train_labels = reformat(train_dataset,train_labels)
valid_dataset,valid_labels = reformat(valid_dataset,valid_labels)
test_dataset,test_labels = reformat(test_dataset,test_labels)
print('training shape',train_dataset.shape,train_labels.shape)
print('valid shape',valid_dataset.shape,valid_labels.shape)
print('test shape',test_dataset.shape,test_labels.shape)

training shape (200000, 784) (200000, 10)
valid shape (10000, 784) (10000, 10)
test shape (10000, 784) (10000, 10)


In [4]:
train_subset = 10000
graph = tf.Graph()
with graph.as_default():
    tf_train = tf.constant(train_dataset[:train_subset,:])
    tf_train_label = tf.constant(train_labels[:train_subset])
    tf_test = tf.constant(test_dataset)
    tf_valid = tf.constant(valid_dataset)
    
    #defining layers
    weights = tf.Variable(tf.truncated_normal(shape=[img_dim*img_dim,num_label]))
    biases = tf.Variable(tf.zeros(shape=[10]))
    
    logits = tf.matmul(tf_train,weights)+biases
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits,labels=tf_train_label))
    
    #optimizer
    optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(loss)
    
    #predictions
    train_prediction = tf.nn.softmax(logits)
    valid_prediction = tf.nn.softmax(tf.matmul(tf_valid,weights)+biases)
    test_prediction = tf.nn.softmax(tf.matmul(tf_test,weights)+biases)

In [31]:
num_step = 800

def accuracy(predictions,labels):
    return 100*np.sum(np.argmax(predictions,1)==np.argmax(labels,1))/predictions.shape[0]

with tf.Session(graph=graph) as session:
    tf.global_variables_initializer().run()
    
    for step in range(num_step):
        _,l,predictions = session.run([optimizer,loss,train_prediction])
        
        if (step%100==0):
            print('training accuracy %f'% accuracy(predictions,train_labels[:train_subset]))
            print('loss at step %d :%f'%(step,l))
            print('validation_accuracy %f'% accuracy(valid_prediction.eval(),valid_labels))
    print('test accuracy %f'% accuracy(test_prediction.eval(),test_labels))

training accuracy 9.240000
loss at step 0 :18.537615
validation_accuracy 10.760000
training accuracy 72.380000
loss at step 100 :2.258971
validation_accuracy 70.840000
training accuracy 75.550000
loss at step 200 :1.834490
validation_accuracy 72.950000
training accuracy 76.930000
loss at step 300 :1.601650
validation_accuracy 74.080000
training accuracy 77.940000
loss at step 400 :1.441922
validation_accuracy 74.430000
training accuracy 78.580000
loss at step 500 :1.321468
validation_accuracy 74.910000
training accuracy 79.000000
loss at step 600 :1.225886
validation_accuracy 75.130000
training accuracy 79.440000
loss at step 700 :1.147394
validation_accuracy 75.360000
test accuracy 82.914975


In [5]:
batch_size = 128
num_nodes = 1024

graph = tf.Graph()

with graph.as_default():

  # Input data. For the training data, we use a placeholder that will be fed
  # at run time with a training minibatch.
  tf_train_dataset = tf.placeholder(tf.float32,
                                    shape=(batch_size, img_dim* img_dim))
  tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_label))
  tf_valid_dataset = tf.constant(valid_dataset)
  tf_test_dataset = tf.constant(test_dataset)

  # Variables.

  weights1 = tf.Variable(
    tf.truncated_normal([img_dim * img_dim,1024]))
  biases1 = tf.Variable(tf.zeros([1024]))
  weights2 = tf.Variable(
    tf.truncated_normal([num_nodes,num_label]))
  biases2 = tf.Variable(tf.zeros([num_label]))    
  beta = tf.Variable(tf.ones([1]))
  # Training computation.
  def logits_compute_train(tf_dataset):
      hidden_layer = tf.matmul(tf_dataset,weights1)+biases1
      after_relu = tf.nn.relu(hidden_layer)
      tf.nn.dropout(x=after_relu,keep_prob=0.5)
      print(after_relu.get_shape())  
      logits = tf.matmul(after_relu,weights2) + biases2
      return logits
    
  def logits_compute(tf_dataset):
      hidden_layer = tf.matmul(tf_dataset,weights1)+biases1
      tf.nn.dropout(x=hidden_layer,keep_prob=0.5)
      after_relu = tf.nn.relu(hidden_layer)
      print(after_relu.get_shape())  
      logits = tf.matmul(after_relu,weights2) + biases2
      return logits

  logits = logits_compute_train(tf_train_dataset)  

  loss = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(labels=tf_train_labels, logits=logits))

  loss += 0.0005*tf.nn.l2_loss(tf_train_dataset)     

  # Optimizer.
  optimizer = tf.train.GradientDescentOptimizer(0.1).minimize(loss)

  # Predictions for the training, validation, and test data.
  train_prediction = tf.nn.softmax(logits)
  valid_prediction = tf.nn.softmax(logits_compute(tf_valid_dataset))
  test_prediction = tf.nn.softmax(logits_compute(tf_test_dataset))

(128, 1024)
(10000, 1024)
(10000, 1024)


In [6]:
num_step = 5001

def accuracy(predictions,labels):
    return 100*np.sum(np.argmax(predictions,1)==np.argmax(labels,1))/predictions.shape[0]
with tf.Session(graph=graph) as session:
    tf.global_variables_initializer().run()
    for step in range(num_step):
        offset = (step*batch_size)%(train_labels.shape[0]-batch_size)
        batch_data = train_dataset[offset:(offset+batch_size),:]
        batch_labels = train_labels[offset:(offset+batch_size),:]
        feed_dict = {tf_train_dataset : batch_data,tf_train_labels: batch_labels}

        _,l,predictions = session.run([optimizer,loss,train_prediction],feed_dict=feed_dict)

        if (step % 1000 == 0):
          print("Minibatch loss at step %d: %f" % (step, l))
          print("Minibatch accuracy: %.1f%%" % accuracy(predictions, batch_labels))
          print("Validation accuracy: %.1f%%" % accuracy(
            valid_prediction.eval(), valid_labels))
    print("Test accuracy: %.1f%%" % accuracy(test_prediction.eval(), test_labels))

Minibatch loss at step 0: 318.176971
Minibatch accuracy: 9.4%
Validation accuracy: 23.6%
Minibatch loss at step 1000: 17.791245
Minibatch accuracy: 82.8%
Validation accuracy: 81.0%
Minibatch loss at step 2000: 19.342945
Minibatch accuracy: 85.9%
Validation accuracy: 80.9%
Minibatch loss at step 3000: 12.553429
Minibatch accuracy: 83.6%
Validation accuracy: 80.5%
Minibatch loss at step 4000: 11.684322
Minibatch accuracy: 82.0%
Validation accuracy: 80.9%
Minibatch loss at step 5000: 9.091236
Minibatch accuracy: 85.2%
Validation accuracy: 83.7%
Test accuracy: 90.9%
