In [1]:
import numpy as np
import tensorflow as tf
from six.moves import cPickle as pickle
from six.moves import range

In [2]:
pickle_file = 'notMNIST.pickle'

with open(pickle_file,'rb') as f:
    save =  pickle.load(f)
    train_dataset = save['train_dataset']
    valid_dataset = save['valid_dataset']
    test_dataset = save['test_dataset']
    train_labels = save['train_labels']
    test_labels = save['test_labels']
    valid_labels = save['valid_labels']
    del save
    print 'Train Dataset: ',train_dataset.shape,train_labels.shape
    print 'Valid Dataset: ',valid_dataset.shape,valid_labels.shape
    print 'Test Dataset: ',test_dataset.shape,train_labels.shape

Train Dataset:  (200000, 28, 28) (200000,)
Valid Dataset:  (10000, 28, 28) (10000,)
Test Dataset:  (18724, 28, 28) (200000,)


In [3]:
test_labels, train_dataset

(array([4, 9, 3, ..., 9, 9, 6], dtype=int32),
 array([[[-0.5       , -0.5       , -0.5       , ..., -0.5       ,
          -0.5       , -0.5       ],
         [-0.5       , -0.5       , -0.5       , ..., -0.5       ,
          -0.5       , -0.5       ],
         [-0.5       , -0.5       , -0.5       , ..., -0.5       ,
          -0.5       , -0.5       ],
         ..., 
         [-0.34705883,  0.5       ,  0.36666667, ..., -0.5       ,
          -0.5       , -0.5       ],
         [-0.08039216,  0.5       ,  0.11176471, ..., -0.5       ,
          -0.5       , -0.5       ],
         [-0.06078431,  0.44901961, -0.3392157 , ..., -0.5       ,
          -0.5       , -0.5       ]],
 
        [[-0.5       , -0.49215686, -0.5       , ..., -0.29215688,
          -0.26862746, -0.33529413],
         [-0.31568629, -0.00588235,  0.13137256, ...,  0.48431373,
           0.42156863,  0.13921569],
         [ 0.33137256,  0.5       ,  0.49607843, ..., -0.31568629,
          -0.42156863, -0.5       ],


In [4]:
image_size = 28
num_labels = 10
num_channels = 1

def reformat(dataset,labels):
    dataset = dataset.reshape((-1,image_size,image_size,num_channels)).astype(np.float32)
    labels = (np.arange(num_labels)==labels[:,None]).astype(np.float32)
    return dataset,labels
train_dataset,train_labels = reformat(train_dataset,train_labels)
valid_dataset,valid_labels = reformat(valid_dataset,valid_labels)
test_dataset, test_labels = reformat(test_dataset,test_labels)

print 'Train Dataset: ',train_dataset.shape,train_labels.shape
print 'Valid Dataset: ',valid_dataset.shape,valid_labels.shape
print 'Test Dataset: ',test_dataset.shape,train_labels.shape

Train Dataset:  (200000, 28, 28, 1) (200000, 10)
Valid Dataset:  (10000, 28, 28, 1) (10000, 10)
Test Dataset:  (18724, 28, 28, 1) (200000, 10)


In [5]:
def accuracy(predictions,labels):
    return (100.0*np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1)))/predictions.shape[0]

# Simple Convolution

In [8]:
batch_size = 16
patch_size = 5
depth = 16
num_hidden = 64

graph = tf.Graph()

with graph.as_default():
    tf_train_dataset = tf.placeholder(tf.float32,shape=(batch_size,image_size,image_size,num_channels))
    tf_train_labels = tf.placeholder(tf.float32,shape=(batch_size,num_labels))
    tf_valid_dataset = tf.constant(valid_dataset)
    tf_test_dataset = tf.constant(test_dataset)
    
    layer_w1 = tf.Variable(tf.truncated_normal([patch_size,patch_size,num_channels,depth],stddev=0.1))
    layer_b1 = tf.Variable(tf.zeros([depth]))
    
    layer_w2 = tf.Variable(tf.truncated_normal([patch_size,patch_size,depth,depth],stddev=0.1))
    layer_b2 = tf.Variable(tf.constant(1.0,shape=[depth]))
    
    layer_w3 = tf.Variable(tf.truncated_normal([image_size // 4 * image_size // 4 * depth, num_hidden], stddev=0.1))
    layer_b3 = tf.Variable(tf.constant(1.0, shape=[num_hidden]))
    
    layer_w4 = tf.Variable(tf.truncated_normal([num_hidden,num_labels],stddev=0.1))
    layer_b4 = tf.Variable(tf.constant(1.0,shape=[num_labels]))
    
    def model(data):
        conv = tf.nn.conv2d(data,layer_w1,[1,2,2,1],padding='SAME')
        hidden = tf.nn.relu(conv+layer_b1)
        conv = tf.nn.conv2d(hidden,layer_w2,[1,2,2,1],padding='SAME')
        hidden = tf.nn.relu(conv+layer_b2)
        shape = hidden.get_shape().as_list()
        reshape = tf.reshape(hidden,[shape[0],shape[1]*shape[2]*shape[3]])
        hidden = tf.matmul(reshape,layer_w3)+layer_b3
        hidden = tf.nn.relu(hidden)
        return tf.matmul(hidden,layer_w4)+layer_b4
    
    logits = model(tf_train_dataset)
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=tf_train_labels,logits=logits))
    
    optimizer = tf.train.GradientDescentOptimizer(0.05).minimize(loss)
    
    train_prediction = tf.nn.softmax(logits)
    valid_prediction = tf.nn.softmax(model(valid_dataset))
    test_prediction = tf.nn.softmax(model(test_dataset))
    

In [11]:
num_steps = 1001

with tf.Session(graph=graph) as session:
    tf.global_variables_initializer().run()
    print 'Initialized\n'
    
    for step in range(num_steps):
        offset = (step*batch_size)%(train_labels.shape[0]-batch_size)
        batch_data = train_dataset[offset:(offset+batch_size),:,:,:]
        batch_labels = train_labels[offset:(offset+batch_size),:]
        feed_dict = {tf_train_dataset:batch_data,tf_train_labels:batch_labels}
        
        _,l,predictions = session.run([optimizer,loss,train_prediction],feed_dict=feed_dict)
        
        if step%50==0:
            print str(step)+':'
            print 'Batch Loss: '+str(l)
            print 'Batch Accuracy: '+str(accuracy(predictions,batch_labels))+'%'
            print 'Validation Accuracy: '+str(accuracy(valid_prediction.eval(),valid_labels))+'%\n'
    print 'Test Accuracy: '+str(accuracy(test_prediction.eval(),test_labels))+'%'
            


Initialized

0:
Batch Loss: 2.78846
Batch Accuracy: 18.75%
Validation Accuracy: 10.18%

50:
Batch Loss: 1.13397
Batch Accuracy: 56.25%
Validation Accuracy: 64.52%

100:
Batch Loss: 0.861956
Batch Accuracy: 75.0%
Validation Accuracy: 74.69%

150:
Batch Loss: 1.18869
Batch Accuracy: 81.25%
Validation Accuracy: 76.17%

200:
Batch Loss: 1.00891
Batch Accuracy: 68.75%
Validation Accuracy: 76.48%

250:
Batch Loss: 0.38222
Batch Accuracy: 93.75%
Validation Accuracy: 79.55%

300:
Batch Loss: 0.373316
Batch Accuracy: 93.75%
Validation Accuracy: 79.92%

350:
Batch Loss: 0.511557
Batch Accuracy: 87.5%
Validation Accuracy: 80.72%

400:
Batch Loss: 0.133208
Batch Accuracy: 93.75%
Validation Accuracy: 80.32%

450:
Batch Loss: 0.672095
Batch Accuracy: 81.25%
Validation Accuracy: 81.49%

500:
Batch Loss: 0.749888
Batch Accuracy: 68.75%
Validation Accuracy: 80.96%

550:
Batch Loss: 0.249885
Batch Accuracy: 93.75%
Validation Accuracy: 81.49%

600:
Batch Loss: 0.597276
Batch Accuracy: 87.5%
Validation Ac

# Pooling Used

In [17]:
batch_size = 16
patch_size = 5
depth = 16
num_hidden = 64

graph = tf.Graph()

with graph.as_default():
    tf_train_dataset = tf.placeholder(tf.float32,shape=(batch_size,image_size,image_size,num_channels))
    tf_train_labels = tf.placeholder(tf.float32,shape=(batch_size,num_labels))
    tf_valid_dataset = tf.constant(valid_dataset)
    tf_test_dataset = tf.constant(test_dataset)
    
    layer_w1 = tf.Variable(tf.truncated_normal([patch_size,patch_size,num_channels,depth],stddev=0.1))
    layer_b1 = tf.Variable(tf.zeros([depth]))
    
    layer_w2 = tf.Variable(tf.truncated_normal([patch_size,patch_size,depth,depth],stddev=0.1))
    layer_b2 = tf.Variable(tf.constant(1.0,shape=[depth]))
    
    layer_w3 = tf.Variable(tf.truncated_normal([image_size // 4 * image_size // 4 * depth, num_hidden], stddev=0.1))
    layer_b3 = tf.Variable(tf.constant(1.0, shape=[num_hidden]))
    
    layer_w4 = tf.Variable(tf.truncated_normal([num_hidden,num_labels],stddev=0.1))
    layer_b4 = tf.Variable(tf.constant(1.0,shape=[num_labels]))
    
    def model(data):
        conv = tf.nn.relu(tf.nn.conv2d(data,layer_w1,[1,1,1,1],padding='SAME'))
        pool = tf.nn.max_pool(conv,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')
        conv = tf.nn.relu(tf.nn.conv2d(pool,layer_w2,[1,1,1,1],padding='SAME'))
        pool = tf.nn.max_pool(conv,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')
        shape = pool.get_shape().as_list()
        reshape = tf.reshape(pool,[shape[0],shape[1]*shape[2]*shape[3]])
        pool = tf.matmul(reshape,layer_w3)+layer_b3
        pool = tf.nn.relu(pool)
        return tf.matmul(pool,layer_w4)+layer_b4
    
    logits = model(tf_train_dataset)
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=tf_train_labels,logits=logits))
    
    optimizer = tf.train.GradientDescentOptimizer(0.05).minimize(loss)
    
    train_prediction = tf.nn.softmax(logits)
    valid_prediction = tf.nn.softmax(model(valid_dataset))
    test_prediction = tf.nn.softmax(model(test_dataset))

In [18]:
num_steps = 1001

with tf.Session(graph=graph) as session:
    tf.global_variables_initializer().run()
    print 'Initialized\n'
    
    for step in range(num_steps):
        offset = (step*batch_size)%(train_labels.shape[0]-batch_size)
        batch_data = train_dataset[offset:(offset+batch_size),:,:,:]
        batch_labels = train_labels[offset:(offset+batch_size),:]
        feed_dict = {tf_train_dataset:batch_data,tf_train_labels:batch_labels}
        
        _,l,predictions = session.run([optimizer,loss,train_prediction],feed_dict=feed_dict)
        
        if step%50==0:
            print str(step)+':'
            print 'Batch Loss: '+str(l)
            print 'Batch Accuracy: '+str(accuracy(predictions,batch_labels))+'%'
            print 'Validation Accuracy: '+str(accuracy(valid_prediction.eval(),valid_labels))+'%\n'
    print 'Test Accuracy: '+str(accuracy(test_prediction.eval(),test_labels))+'%'
            


Initialized

0:
Batch Loss: 2.91537
Batch Accuracy: 6.25%
Validation Accuracy: 12.9%

50:
Batch Loss: 1.2105
Batch Accuracy: 56.25%
Validation Accuracy: 64.65%

100:
Batch Loss: 1.13393
Batch Accuracy: 75.0%
Validation Accuracy: 75.96%

150:
Batch Loss: 1.1764
Batch Accuracy: 75.0%
Validation Accuracy: 77.49%

200:
Batch Loss: 0.887419
Batch Accuracy: 75.0%
Validation Accuracy: 76.19%

250:
Batch Loss: 0.314069
Batch Accuracy: 87.5%
Validation Accuracy: 80.73%

300:
Batch Loss: 0.318696
Batch Accuracy: 93.75%
Validation Accuracy: 80.77%

350:
Batch Loss: 0.324049
Batch Accuracy: 93.75%
Validation Accuracy: 82.32%

400:
Batch Loss: 0.0780629
Batch Accuracy: 100.0%
Validation Accuracy: 82.31%

450:
Batch Loss: 0.649593
Batch Accuracy: 87.5%
Validation Accuracy: 83.06%

500:
Batch Loss: 0.497171
Batch Accuracy: 81.25%
Validation Accuracy: 82.76%

550:
Batch Loss: 0.259662
Batch Accuracy: 93.75%
Validation Accuracy: 82.43%

600:
Batch Loss: 0.497851
Batch Accuracy: 87.5%
Validation Accurac

# Dropout Used

In [25]:
batch_size = 16
patch_size = 5
depth = 16
num_hidden = 64
num_steps = 30001

graph = tf.Graph()

with graph.as_default():
    tf_train_dataset = tf.placeholder(tf.float32,shape=(batch_size,image_size,image_size,num_channels))
    tf_train_labels = tf.placeholder(tf.float32,shape=(batch_size,num_labels))
    tf_valid_dataset = tf.constant(valid_dataset)
    tf_test_dataset = tf.constant(test_dataset)
    global_step = tf.Variable(0)
    
    layer_w1 = tf.Variable(tf.truncated_normal([patch_size,patch_size,num_channels,depth],stddev=0.1))
    layer_b1 = tf.Variable(tf.zeros([depth]))
    
    layer_w2 = tf.Variable(tf.truncated_normal([patch_size,patch_size,depth,depth],stddev=0.1))
    layer_b2 = tf.Variable(tf.constant(1.0,shape=[depth]))
    
    layer_w3 = tf.Variable(tf.truncated_normal([image_size // 4 * image_size // 4 * depth, num_hidden], stddev=0.1))
    layer_b3 = tf.Variable(tf.constant(1.0, shape=[num_hidden]))
    
    layer_w4 = tf.Variable(tf.truncated_normal([num_hidden,num_labels],stddev=0.1))
    layer_b4 = tf.Variable(tf.constant(1.0,shape=[num_labels]))
    
    def model(data,keep_prob):
        conv = tf.nn.relu(tf.nn.conv2d(data,layer_w1,[1,1,1,1],padding='SAME'))
        pool = tf.nn.max_pool(conv,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')
        conv = tf.nn.relu(tf.nn.conv2d(pool,layer_w2,[1,1,1,1],padding='SAME'))
        pool = tf.nn.max_pool(conv,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')
        shape = pool.get_shape().as_list()
        reshape = tf.reshape(pool,[shape[0],shape[1]*shape[2]*shape[3]])
        pool = tf.matmul(reshape,layer_w3)+layer_b3
        pool = tf.nn.relu(pool)
        pool = tf.nn.dropout(pool,keep_prob)
        
        return tf.nn.softmax(tf.matmul(pool,layer_w4)+layer_b4)
    
    
    def model2(data):
        conv = tf.nn.relu(tf.nn.conv2d(data,layer_w1,[1,1,1,1],padding='SAME'))
        pool = tf.nn.max_pool(conv,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')
        conv = tf.nn.relu(tf.nn.conv2d(pool,layer_w2,[1,1,1,1],padding='SAME'))
        pool = tf.nn.max_pool(conv,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')
        shape = pool.get_shape().as_list()
        reshape = tf.reshape(pool,[shape[0],shape[1]*shape[2]*shape[3]])
        pool = tf.matmul(reshape,layer_w3)+layer_b3
        pool = tf.nn.relu(pool)
        
        return tf.nn.softmax(tf.matmul(pool,layer_w4)+layer_b4)
    
    
    
    model = model(tf_train_dataset, 0.5)
    cross_entropy = tf.reduce_mean(-tf.reduce_sum(tf_train_labels* tf.log(model), reduction_indices=[1]))
    
    loss = cross_entropy + 0.001 * (tf.nn.l2_loss(layer_w3) + tf.nn.l2_loss(layer_b3) + tf.nn.l2_loss(layer_w4) + tf.nn.l2_loss(layer_b4))
    
    learning_rate = tf.train.exponential_decay(1e-1, global_step, num_steps, 0.7, staircase=True)
    optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)
    
    train_prediction = model
    valid_prediction = model2(tf_valid_dataset)
    test_prediction = model2(tf_test_dataset)

In [26]:
with tf.Session(graph=graph) as session:
    tf.global_variables_initializer().run()
    print 'Initialized\n'
    
    for step in range(num_steps):
        offset = (step*batch_size)%(train_labels.shape[0]-batch_size)
        batch_data = train_dataset[offset:(offset+batch_size),:,:,:]
        batch_labels = train_labels[offset:(offset+batch_size),:]
        feed_dict = {tf_train_dataset:batch_data,tf_train_labels:batch_labels}
        
        _,l,predictions = session.run([optimizer,loss,train_prediction],feed_dict=feed_dict)
        
        if step%300==0:
            print str(step)+':'
            print 'Batch Loss: '+str(l)
            print 'Batch Accuracy: '+str(accuracy(predictions,batch_labels))+'%'
            print 'Validation Accuracy: '+str(accuracy(valid_prediction.eval(),valid_labels))+'%\n'
    print 'Test Accuracy: '+str(accuracy(test_prediction.eval(),test_labels))+'%'
            


Initialized

0:
Batch Loss: 3.21239
Batch Accuracy: 0.0%
Validation Accuracy: 13.75%

300:
Batch Loss: 0.81148
Batch Accuracy: 75.0%
Validation Accuracy: 79.84%

600:
Batch Loss: 1.04903
Batch Accuracy: 75.0%
Validation Accuracy: 82.68%

900:
Batch Loss: 0.659055
Batch Accuracy: 81.25%
Validation Accuracy: 84.19%

1200:
Batch Loss: 1.10591
Batch Accuracy: 87.5%
Validation Accuracy: 84.12%

1500:
Batch Loss: 0.662814
Batch Accuracy: 93.75%
Validation Accuracy: 85.59%

1800:
Batch Loss: 0.786785
Batch Accuracy: 68.75%
Validation Accuracy: 85.98%

2100:
Batch Loss: 0.785577
Batch Accuracy: 75.0%
Validation Accuracy: 85.64%

2400:
Batch Loss: 0.573866
Batch Accuracy: 81.25%
Validation Accuracy: 84.9%

2700:
Batch Loss: 0.907325
Batch Accuracy: 68.75%
Validation Accuracy: 86.2%

3000:
Batch Loss: 0.694408
Batch Accuracy: 81.25%
Validation Accuracy: 84.68%

3300:
Batch Loss: 0.618389
Batch Accuracy: 81.25%
Validation Accuracy: 86.25%

3600:
Batch Loss: 0.989295
Batch Accuracy: 81.25%
Validat