# MNIST with CNN

In [1]:
%reset -f
import tensorflow as tf

In [2]:
import pandas as pd
import numpy as np
class mnist_kaggle:
  def __init__(self):
    dataset = pd.read_csv("dataset/train.csv")
    dataset = dataset.sample(frac=1).reset_index(drop=True)
    y = dataset["label"].copy()
    X = dataset.drop(['label'], axis = 1)
    y = pd.get_dummies(y, columns=["label"], prefix="label" )

    X=np.array(X)/255
    y=np.array(y)

    from sklearn.model_selection import train_test_split
    self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)
    self.curr_iteration=0
    
  def next_batch(self,batch_size):
    max_segments=int(self.X_train.shape[0]/batch_size)
    curr_segment=self.curr_iteration%max_segments
    batch_X=self.X_train[curr_segment*batch_size:(curr_segment+1)*batch_size:]
    batch_y=self.y_train[curr_segment*batch_size:(curr_segment+1)*batch_size:]
    self.curr_iteration+=1
    return (batch_X,batch_y)


In [3]:
mnist_data=mnist_kaggle()

### Helper Functions

Function to help intialize random weights for fully connected or convolutional layers, we leave the shape attribute as a parameter for this.

In [4]:
def init_weights(shape):
    init_random_dist = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(init_random_dist)

Same as init_weights, but for the biases

In [5]:
def init_bias(shape):
    init_bias_vals = tf.constant(0.1, shape=shape)
    return tf.Variable(init_bias_vals)

Create a 2D convolution using builtin conv2d from TF. From those docs:

Computes a 2-D convolution given 4-D `input` and `filter` tensors.

Given an input tensor of shape `[batch, in_height, in_width, in_channels]`
and a filter / kernel tensor of shape
`[filter_height, filter_width, in_channels, out_channels]`, this op
performs the following:

1. Flattens the filter to a 2-D matrix with shape
   `[filter_height * filter_width * in_channels, output_channels]`.
2. Extracts image patches from the input tensor to form a *virtual*
   tensor of shape `[batch, out_height, out_width,
   filter_height * filter_width * in_channels]`.
3. For each patch, right-multiplies the filter matrix and the image patch
   vector.


In [6]:
def conv2d(x, W):
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

Create a max pooling layer, again using built in TF functions:

Performs the max pooling on the input.

    Args:
      value: A 4-D `Tensor` with shape `[batch, height, width, channels]` and
        type `tf.float32`.
      ksize: A list of ints that has length >= 4.  The size of the window for
        each dimension of the input tensor.
      strides: A list of ints that has length >= 4.  The stride of the sliding
        window for each dimension of the input tensor.
      padding: A string, either `'VALID'` or `'SAME'`. 

In [7]:
def max_pool_2by2(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
                          strides=[1, 2, 2, 1], padding='SAME')

Using the conv2d function, we'll return an actual convolutional layer here that uses an ReLu activation.

In [8]:
def convolutional_layer(input_x, shape):
    W = init_weights(shape)
    b = init_bias([shape[3]])
    return tf.nn.relu(conv2d(input_x, W) + b)

This is a normal fully connected layer

In [9]:
def normal_full_layer(input_layer, size):
    input_size = int(input_layer.get_shape()[1])
    W = init_weights([input_size, size])
    b = init_bias([size])
    return tf.matmul(input_layer, W) + b

### Placeholders

In [10]:
x = tf.placeholder(tf.float32,shape=[None,784])

In [11]:
y_true = tf.placeholder(tf.float32,shape=[None,10])

### Layers

In [12]:
x_image = tf.reshape(x,[-1,28,28,1])

In [13]:
# Using a 6by6 filter here, used 5by5 in video, you can play around with the filter size
# You can change the 32 output, that essentially represents the amount of filters used
# You need to pass in 32 to the next input though, the 1 comes from the original input of 
# a single image.
convo_1 = convolutional_layer(x_image,shape=[6,6,1,32])
convo_1_pooling = max_pool_2by2(convo_1)

In [14]:
# Using a 6by6 filter here, used 5by5 in video, you can play around with the filter size
# You can actually change the 64 output if you want, you can think of that as a representation
# of the amount of 6by6 filters used.
convo_2 = convolutional_layer(convo_1_pooling,shape=[6,6,32,64])
convo_2_pooling = max_pool_2by2(convo_2)

In [15]:
# Why 7 by 7 image? Because we did 2 pooling layers, so (28/2)/2 = 7
# 64 then just comes from the output of the previous Convolution
convo_2_flat = tf.reshape(convo_2_pooling,[-1,7*7*64])
full_layer_one = tf.nn.relu(normal_full_layer(convo_2_flat,1024))

In [16]:
# NOTE THE PLACEHOLDER HERE!
hold_prob = tf.placeholder(tf.float32)
full_one_dropout = tf.nn.dropout(full_layer_one,keep_prob=hold_prob)

Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [17]:
y_pred = normal_full_layer(full_one_dropout,10)

### Loss Function

In [18]:
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_true,logits=y_pred))

Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.



### Optimizer

In [19]:
optimizer = tf.train.AdamOptimizer(learning_rate=0.0001)
train = optimizer.minimize(cross_entropy)

### Intialize Variables

In [20]:
init = tf.global_variables_initializer()

In [21]:
batch_x_kaggle , batch_y_kaggle = mnist_data.next_batch(50)
print(batch_x_kaggle.shape)

(50, 784)


### Session

In [22]:
steps = 5000

sess = tf.Session()
    
sess.run(init)
    
for i in range(steps):
        
    batch_x , batch_y = mnist_data.next_batch(50)
        
    sess.run(train,feed_dict={x:batch_x,y_true:batch_y,hold_prob:0.5})
        
    # PRINT OUT A MESSAGE EVERY 100 STEPS
    if i%100 == 0:
            
        print('Currently on step {}'.format(i))
        print('Accuracy is:')
        # Test the Train Model
        matches = tf.equal(tf.argmax(y_pred,1),tf.argmax(y_true,1))

        acc = tf.reduce_mean(tf.cast(matches,tf.float32))

        print(sess.run(acc,feed_dict={x:mnist_data.X_test,y_true:mnist_data.y_test,hold_prob:1.0}))
        print('\n')

Currently on step 0
Accuracy is:
0.08914286


Currently on step 100
Accuracy is:
0.8358095


Currently on step 200
Accuracy is:
0.9028571


Currently on step 300
Accuracy is:
0.9202857


Currently on step 400
Accuracy is:
0.9306667


Currently on step 500
Accuracy is:
0.94095236


Currently on step 600
Accuracy is:
0.94009525


Currently on step 700
Accuracy is:
0.95133334


Currently on step 800
Accuracy is:
0.9546667


Currently on step 900
Accuracy is:
0.9581905


Currently on step 1000
Accuracy is:
0.95704764


Currently on step 1100
Accuracy is:
0.9627619


Currently on step 1200
Accuracy is:
0.9612381


Currently on step 1300
Accuracy is:
0.96533334


Currently on step 1400
Accuracy is:
0.96704763


Currently on step 1500
Accuracy is:
0.96704763


Currently on step 1600
Accuracy is:
0.9685714


Currently on step 1700
Accuracy is:
0.972


Currently on step 1800
Accuracy is:
0.97209525


Currently on step 1900
Accuracy is:
0.9714286


Currently on step 2000
Accuracy is:
0.9735238



## Great Job!

In [130]:
dataset_submit = pd.read_csv("dataset/test.csv")

In [131]:
X_submit = dataset_submit.copy()
X_submit=np.array(X_submit)/255
print(X_submit[0])

[0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         

In [167]:
#batch_X,batch_y = mnist_data.next_batch(2)
prediction = sess.run(y_pred,feed_dict={x:X_submit,hold_prob:1.0})

In [173]:
prediction_submit=np.zeros((prediction.shape[0],1), dtype=np.int32)
for i in range(prediction.shape[0]):
    maxi=prediction[i].argmax()
    prediction_submit[i]=maxi

In [174]:
prediction_submit

array([[2],
       [0],
       [9],
       ...,
       [3],
       [9],
       [2]], dtype=int32)

In [180]:
import csv
with open('submit.csv', 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(["ImageId", "Label"])
    
    for i in range(prediction_submit.shape[0]):
        writer.writerow([i+1, prediction_submit[i][0]])