In [1]:
import tensorflow as tf

In [2]:
from tensorflow.examples.tutorials.mnist import input_data

In [3]:
mnist = input_data.read_data_sets('MNIST_data/',one_hot=True)

Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
Instructions for updating:
Please write your own downloading logic.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting MNIST_data/train-images-idx3-ubyte.gz
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Instructions for updating:
Please use tf.one_hot on tensors.
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.


In [4]:
type(mnist)

tensorflow.contrib.learn.python.learn.datasets.base.Datasets

In [15]:
def init_weights(shape):
    '''
    param shape : [filter_height,filter_width,input_channels,output_channels(feature maps)]
    '''
    print("Shape to initialize weights(filters) is {}".format(shape))
    init_random_weights = tf.truncated_normal(shape,mean=0.0,stddev=0.1)
    return tf.Variable(init_random_weights)

In [16]:
def init_bias(shape):
    '''
    param shape : list/tuple of 1 dimension holding a value equal to the number of output feature maps required.
    '''
    init_bias = tf.constant(0.1,shape=shape)
    return tf.Variable(init_bias)

In [17]:
def conv2d(x,W):
    '''
    param x : [batch(number of examples),image_height,image_width,input_channels]
    param W : [filter_height,filter_width,channels input,channels out]
    
    returns: A tensor of same dimension and type as 'x'
    '''
   
    return tf.nn.conv2d(x,W,strides=[1,1,1,1],padding='SAME')

In [18]:
def max_pooling_2by2(x):
    '''
    param x : [batch(number of examples),height,width,channels]
    
    returns: A 4-D tensor(same type as x) following the data format 'NHWC'
    '''
    
    # parameters: x is the input tensor in the data format 'NHWC'
    # kszie: window size to be used for pooling,list or a tuple with the size of the window for each dimension
    # of the input tensor.e.g if ksize=[1,2,2,1], it means window size is 2 along the height and width of the image.
    
    return tf.nn.max_pool(x,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')

In [19]:
def convolution_layer(input_x,shape):
    '''
    param input_x : 4-D tensor [batch,image_height,image_width,channels]
    param shape : 4-D tensor [filter_height,filter_width,input_channels(before),output_channels]
    
    Here the output_channels are essentially the number of feature maps we want to generate from the current image.
    If initially  the image is RGB the input feature maps will be 3, one for each color.The output channels or feature maps
    is the number of features we want to detect in the input image.
    
    returns: Tensor with shape as the input provided to the relu operation,which in turn is the output from conv2d method.
    Thus the output will be a 4-D tensor of type [batch,image_h,image_w,channels]
    '''
    print("input_x shape is {}".format(input_x.shape))
    W = init_weights(shape)
    b = init_bias([shape[3]]) # one bias for every feature map,say if output maps=32,we'll need 32 biases.
    return tf.nn.relu(conv2d(input_x,W) + b)

In [20]:
def normal_full_layer(input_layer,size):
    '''
    param input_layer:number of neurons or trainable parameters after flattening the last convolution layer.
    param size:number of neurons you want in the fully connected layer.
    '''
    print("input_layer shape is {}".format(input_layer.shape))
    input_size = int(input_layer.get_shape()[1])
    W = init_weights([input_size,size])
    b = init_bias([size])
    return tf.matmul(input_layer,W) + b

In [21]:
x = tf.placeholder(tf.float32,shape=[None,784]) # None is size of the batch , 784-pixels
y_true = tf.placeholder(tf.float32,shape=[None,10]) # activations of each digit(10 digits in total) along the columns

In [22]:
#LAYERS
x_image = tf.reshape(x,[-1,28,28,1]) 
# reshapes the flattened image(784 X 1) back to original dimensions, 28 X 28. 1 is # of channels implying a greyscale image.

In [23]:
# output is of same type as input , a 4-D tensor
# the filter height and width are 5 X 5. 
# The third number is the number of input channels which is 1 in case of a greyscale image.
# 32- number of filters/feature detectors the image has to be passed through.
# This convolution is going to compute 32 features for each 5 by 5 patch.
convo_1 = convolution_layer(x_image,shape=[5,5,1,32])  
print("convo_1 shape is {}".format(convo_1.shape))


convo_1_max_pooling = max_pooling_2by2(convo_1)
print("convo_1_max_pooling shape is {}".format(convo_1_max_pooling.shape))

input_x shape is (?, 28, 28, 1)
Shape to initialize weights(filters) is [5, 5, 1, 32]
convo_1 shape is (?, 28, 28, 32)
convo_1_max_pooling shape is (?, 14, 14, 32)


In [24]:
# Again 5 by 5 filters. But the number of channels input is 32 ,i.e channels in previous layer.
# For next layer we want 64 feature detectors.
convo_2 = convolution_layer(convo_1_max_pooling,shape=[5,5,32,64])
print("convo_2 shape is {}".format(convo_2.shape))

convo_2_max_pooling = max_pooling_2by2(convo_2)
print("convo_2_max_pooling shape is {}".format(convo_2_max_pooling.shape))

input_x shape is (?, 14, 14, 32)
Shape to initialize weights(filters) is [5, 5, 32, 64]
convo_2 shape is (?, 14, 14, 64)
convo_2_max_pooling shape is (?, 7, 7, 64)


In [25]:
convo_2_flat = tf.reshape(convo_2_max_pooling,[-1,7*7*64]) #2 pooling layers, so (28/2)/2 = 7
print("convo_2_flat shape is {}".format(convo_2_flat.shape))
full_layer_one = tf.nn.relu(normal_full_layer(convo_2_flat,1024))
print("full layer one shape is {}".format(full_layer_one.shape))

convo_2_flat shape is (?, 3136)
input_layer shape is (?, 3136)
Shape to initialize weights(filters) is [3136, 1024]
full layer one shape is (?, 1024)


In [18]:
#dropout to prevent overfitting
hold_prob = tf.placeholder(tf.float32)
full_one_dropout = tf.nn.dropout(full_layer_one,keep_prob=hold_prob)

In [19]:
y_predictions = normal_full_layer(full_one_dropout,10) #10 - labels

input_layer shape is (?, 1024)
[1024, 10]


In [20]:
#loss function
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_true,logits=y_predictions))

Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See @{tf.nn.softmax_cross_entropy_with_logits_v2}.



In [21]:
#optimizer
optimizer = tf.train.AdamOptimizer(learning_rate=0.0001)
train = optimizer.minimize(cross_entropy)

In [22]:
init = tf.global_variables_initializer()

In [75]:
steps = 1000
with tf.Session() as sess:
    sess.run(init)
    for i in range(steps):
        batch_x,batch_y = mnist.train.next_batch(100)
        sess.run(train,feed_dict={x:batch_x,y_true:batch_y,hold_prob:0.5})
        
        if i%100 == 0:
            print("on step {}".format(i))
            print("accuracy is: ")
            pred = tf.equal(tf.argmax(y_predictions,1),tf.argmax(y_true,1))
            accuracy = tf.reduce_mean(tf.cast(pred,tf.float32))
            print(sess.run(accuracy,feed_dict={x:mnist.test.images,y_true:mnist.test.labels,hold_prob:1.0}))
            print()

on step 0
accuracy is: 
0.1169

on step 100
accuracy is: 
0.8736

on step 200
accuracy is: 
0.9201

on step 300
accuracy is: 
0.9343

on step 400
accuracy is: 
0.9489

on step 500
accuracy is: 
0.9509

on step 600
accuracy is: 
0.9561

on step 700
accuracy is: 
0.9596

on step 800
accuracy is: 
0.9648

on step 900
accuracy is: 
0.9679

