# CNN using Tensorflow

In [1]:
import numpy as np
import tensorflow as tf

- CONV2D -> RELU -> MAXPOOL -> CONV2D -> RELU -> MAXPOOL -> FLATTEN -> FULLYCONNECTED

    - Conv2D: stride 1, padding is "SAME"
    - ReLU
    - Max pool: An 8 by 8 filter size and an 8 by 8 stride, padding is "SAME"
    - Conv2D: stride 1, padding is "SAME"
    - ReLU
    - Max pool: A 4 by 4 filter size and a 4 by 4 stride, padding is "SAME"
    - Flatten the previous output.
    - FULLYCONNECTED (FC) layer: A fully connected layer without an non-linear activation function. 
    
    Do not call the softmax here. This will result in 6 neurons in the output layer, which then get passed later to a softmax. In TensorFlow, the softmax and cost function are lumped together into a single function, which you'll call in a different function when computing the cost. 

In [2]:
def create_placeholders(n_h, n_w, n_c, n_y):
    '''
    Arguments:
    n_h -- height of the input image 
    n_w -- width of the input image
    n_c -- channels of the input image
    n_y -- number of classes
    
    Returns:
    X -- placeholder for the data inputs of shape (None, n_h, n_w, n_c)
    y -- placeholder for the output labels of shape (None, n_y)
    is_training -- boolean placeholder for batch normalization
    '''
    X = tf.placeholder(tf.float32, shape = (None, n_h, n_w, n_c))
    y = tf.placeholder(tf.float32, shape = (None, n_y))
    is_training = tf.placeholder_with_default(False, (), 'is_training')
    return X, y, is_training

In [3]:
def initialize_parameters():
    '''
    Returns:
    parameters -- a dictionary containing tensors for weights
    '''
    W1 = tf.get_variable('W1', shape = [4, 4, 1, 8], initializer = tf.contrib.layers.xavier_initializer())
    W2 = tf.get_variable('W2', shape = [2, 2, 8, 16], initializer = tf.contrib.layers.xavier_initializer())
    parameters = {
        'W1' : W1,
        'W2' : W2
    }
    return parameters

In [4]:
def forward_propagation(X, parameters, is_training):
    '''
    Arguments:
    X -- input dataset placeholder
    parameters -- python dict for weights
    is_training -- boolean value for batch normalization
    
    Returns:
    Z3 -- output of the last linear unit
    '''
    W1 = parameters['W1']
    W2 = parameters['W2']
    
    Z1 = tf.nn.conv2d(X, W1, strides = [1,1,1,1], padding = 'SAME')
    Z1_norm = tf.layers.batch_normalization(Z1, axis = 3, training=is_training)
    A1 = tf.nn.relu(Z1_norm)
    P1 = tf.nn.max_pool(A1, ksize = [1,2,2,1], strides = [1,2,2,1], padding = 'SAME')
    
    Z2 = tf.nn.conv2d(P1,W2, strides = [1,1,1,1], padding = 'SAME')
    Z2_norm = tf.layers.batch_normalization(Z2, axis = 3, training=is_training)
    A2 = tf.nn.relu(Z2_norm)
    P2 = tf.nn.max_pool(A2, ksize = [1,2,2,1], strides = [1,2,2,1], padding = 'SAME')
    
    F = tf.contrib.layers.flatten(P2)
    Z3 = tf.contrib.layers.fully_connected(F, 10, activation_fn = None)
    return Z3

In [5]:
def compute_cost(Z3, y):
    '''
    Arguments:
    Z3 -- output of the last linear unit
    y -- target labels
    
    Returns:
    cost -- softmax cross entropy cost
    '''
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = Z3, labels = y))
    return cost

In [6]:
def random_mini_batches(X, Y, mini_batch_size = 64, seed = 0):
    '''
    Arguments:
    X -- input dataset
    Y -- target labels
    mini_batch_size -- size of mini batches
    seed -- seed value for random functions
    
    Returns:
    mini_batches -- list of random mini batches
    '''
    m = X.shape[0] 
    mini_batches = []
    np.random.seed(seed)
    
    # Step 1: Shuffle (X, Y)
    permutation = list(np.random.permutation(m))
    shuffled_X = X[permutation,:]
    shuffled_Y = Y[permutation,:]

    # Step 2: Partition (shuffled_X, shuffled_Y). Minus the end case.
    num_complete_minibatches = int(m/mini_batch_size)
    for k in range(0, num_complete_minibatches):
        mini_batch_X = shuffled_X[k * mini_batch_size : k * mini_batch_size + mini_batch_size,:]
        mini_batch_Y = shuffled_Y[k * mini_batch_size : k * mini_batch_size + mini_batch_size,:]
        mini_batch = (mini_batch_X, mini_batch_Y)
        mini_batches.append(mini_batch)
    
    # Handling the end case (last mini-batch < mini_batch_size)
    if m % mini_batch_size != 0:
        mini_batch_X = shuffled_X[num_complete_minibatches * mini_batch_size : m,:]
        mini_batch_Y = shuffled_Y[num_complete_minibatches * mini_batch_size : m,:]
        mini_batch = (mini_batch_X, mini_batch_Y)
        mini_batches.append(mini_batch)
    
    return mini_batches

In [7]:
tf.reset_default_graph()

In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split

In [9]:
train = pd.read_csv('digit/train.csv')
train.head()

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [10]:
y = train['label']
X = train.drop('label', axis = 1)
X = X/255

In [11]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size = 0.2, random_state = 0)
y_hat = pd.get_dummies(y_train)
y_hat_val = pd.get_dummies(y_val)

In [12]:
X_train, X_val, y_train, y_val, y_hat, y_hat_val = np.array(X_train), np.array(X_val), np.array(y_train), np.array(y_val), np.array(y_hat), np.array(y_hat_val)

In [13]:
X_train = X_train.reshape(-1,28,28,1)
X_val = X_val.reshape(-1,28,28,1)

In [14]:
m_train, n_h, n_w, n_c = X_train.shape
m_train, n_y = y_hat.shape

m_val, n_h, n_w, n_c = X_val.shape
m_val, n_y = y_hat_val.shape

px, py, is_training = create_placeholders(n_h, n_w, n_c, n_y)
parameters = initialize_parameters()

Z3 = forward_propagation(px, parameters, is_training)
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
cost = compute_cost(Z3, py)
with tf.control_dependencies(update_ops):
    optimizer = tf.train.AdamOptimizer(learning_rate = 0.001).minimize(cost)

predictions = tf.argmax(Z3,1)
labels = tf.argmax(py,1)
results = tf.equal(predictions, labels)

init = tf.global_variables_initializer()
seed = 0

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.

Instructions for updating:
Use keras.layers.BatchNormalization instead.  In particular, `tf.control_dependencies(tf.GraphKeys.UPDATE_OPS)` should not be used (consult the `tf.keras.layers.batch_normalization` documentation).
Instructions for updating:
Please use `layer.__call__` method instead.
Instructions for updating:
Use keras.layers.flatten instead.
Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.



In [21]:
sess = tf.Session()
sess.run(init)
mini_batch_size = 64
epochs = 100
for epoch in range(epochs):
    mini_batch_cost = 0
    num_minibatches = int(m_train/mini_batch_size)
    mini_batches = random_mini_batches(X_train, y_hat, mini_batch_size, seed)
    seed += 1
    for mini_batch in mini_batches:
        mini_batch_X, mini_batch_y = mini_batch
        _, c = sess.run(fetches = [optimizer, cost], feed_dict = {px : mini_batch_X, py : mini_batch_y, is_training : True})
        mini_batch_cost += c/num_minibatches

    predictions_, labels_, results_ = sess.run([predictions, labels, results], feed_dict = {px : X_train, py : y_hat})
    train_score = results_.sum()/m_train
    predictions_, labels_, results_ = sess.run([predictions, labels, results], feed_dict = {px : X_val, py : y_hat_val})
    val_score = results_.sum()/m_val
    print(f"{epoch+1}) Cost => {mini_batch_cost} | Training Accuracy => {train_score*100} | Validation Accuracy {val_score*100}")
    if mini_batch_cost < 0.05:
        print('Training Done!!')
        break

1) Cost => 0.3703730647549743 | Training Accuracy => 96.36607142857143 | Validation Accuracy 95.89285714285715
2) Cost => 0.09547936830669637 | Training Accuracy => 97.95535714285715 | Validation Accuracy 97.4047619047619
3) Cost => 0.06813527378475387 | Training Accuracy => 98.49404761904762 | Validation Accuracy 97.83333333333334
4) Cost => 0.05563591164403722 | Training Accuracy => 98.54166666666667 | Validation Accuracy 97.82142857142857
5) Cost => 0.04860728753553254 | Training Accuracy => 98.58630952380952 | Validation Accuracy 98.02380952380952
Training Done!!


In [22]:
test = pd.read_csv('digit/test.csv')
#test = test.drop('id', axis = 1)
test.head()

Unnamed: 0,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [23]:
test = test/255
test = np.array(test)
test = test.reshape(-1,28,28,1)

In [24]:
test.shape

(28000, 28, 28, 1)

In [25]:
final_pred = sess.run([predictions], feed_dict = {px : test})
sess.close()

# Kaggle Submission

In [74]:
submission = pd.DataFrame()
submission['ImageId'] = np.array(range(final_pred[0].shape[0])) + 1
submission['Label'] = final_pred[0]
submission

Unnamed: 0,ImageId,Label
0,1,2
1,2,0
2,3,9
3,4,0
4,5,3
...,...,...
27995,27996,9
27996,27997,7
27997,27998,3
27998,27999,9


In [76]:
submission.to_csv('digit/Submission.csv', index = False)