In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import os

  from ._conv import register_converters as _register_converters


## Import Dataset

In [2]:
train_set = pd.read_csv('../datasets/mnist_train.csv', header=None)
test_set = pd.read_csv('../datasets/mnist_test.csv', header=None)

In [3]:
train_set.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,775,776,777,778,779,780,781,782,783,784
0,5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [4]:
type(train_set)

pandas.core.frame.DataFrame

## Get labels

In [5]:
# get labels in own array
train_labels = np.array(train_set[0])
test_labels = np.array(test_set[0])

In [6]:
type(train_labels)

numpy.ndarray

In [7]:
train_labels[:, None]

array([[5],
       [0],
       [4],
       ...,
       [5],
       [6],
       [8]])

In [8]:
# one hot encode the labels
train_labels = (np.arange(10) == train_labels[:, None]).astype(np.float32)
test_labels = (np.arange(10) == test_labels[:, None]).astype(np.float32)
type(train_labels)

numpy.ndarray

In [9]:
# drop the labels column from training dataframe
train_data = train_set.drop(0, axis=1)
test_data = test_set.drop(0, axis=1)
type(train_data)

pandas.core.frame.DataFrame

In [10]:
# put in correct float32 array format
train_data = np.array(train_data).astype(np.float32)
test_data = np.array(test_data).astype(np.float32)
type(train_data)

numpy.ndarray

In [11]:
# reformat the data so it's not flat (4D tensor)
train_data = train_data.reshape(len(train_data), 28, 28, 1)
test_data = test_data.reshape(len(test_data), 28, 28, 1)
train_data.shape, train_labels.shape

((60000, 28, 28, 1), (60000, 10))

In [12]:
# get a validation set and remove it from the train set so we can monitor how training is going
train_data, val_data, train_labels, val_labels = train_data[0: (len(train_data)-500), :, :, :], train_data[(len(train_data)-500):len(train_data), :, :, :], \
                                                 train_labels[0: (len(train_labels)-500), :], train_labels[(len(train_labels)-500):len(train_labels), :]                                            

## Set Batch Data

In [13]:
# returns accuracy of model
def accuracy(target, predictions):
    return(100.0 * np.sum(np.argmax(target, 1) == np.argmax(predictions, 1))/ target.shape[0])

In [14]:
# need to batch the test data because running low on memory
class test_batchs:
    def __init__(self, data):
        self.data = data
        self.batch_index = 0
        
    def next_batch(self, batch_size):
        if (batch_size + self.batch_index) > self.data.shape[0]:
            print("Batch sized is messed up.")
        batch = self.data[self.batch_index:(self.batch_index + batch_size), :, :, :]
        
        return batch

In [15]:
# set the test batch size
test_batch_size = 100

In [16]:
# training batch size
train_batch_size = 50

# number of feature maps output by each tower inside the first and second Inception module
map1 = 32
map2 = 64

# number of hidden nodes
num_fc1 = 700 #1028
num_fc2 = 10

# number of feature maps output by each 1×1 convolution that precedes a large convolution
reduce1x1 = 16

# dropout rate for nodes in the hidden layer during training
dropout = 0.5

## Define Model

In [17]:
#use os to get our current working directory so we can save variable there
file_path = os.getcwd() + '/models/mnist_inception_model.ckpt'
file_path

'/Users/muhamuttaqien/Desktop/Artificial Intelligence/AI-playground/deep-learning/visual-learning/inception-googlenet-implementation/models/mnist_inception_model.ckpt'

Time for the bulk of the work, which will require Tensorflow.

1. Once the graph is defined, create placeholders that hold the training data, training labels, validation data, and validation labels
2. Then create some helper functions which assist in defining tensors, 2D convolutions, and max pooling
3. Next, use the helper functions and hyperparameters to create variables in both Inception modules
4. Then, create another function that  takes data as input and passes it through the Inception modules and fully connected layers and outputs the logits
5. Finally, define the loss to be cross-entropy, use Adam to optimize, and create ops for converting data to predictions, initializing variables, and saving all variables in the model

<div>
    <img src="./naive-inception-module.png" />
    <center><caption>Figure 1. Naive Module</caption></center>
</div>

In [None]:
graph = tf.Graph()
with graph.as_default():
    # train data and labels
    tf_X = tf.placeholder(tf.float32, shape=(train_batch_size, 28, 28, 1))
    tf_y = tf.placeholder(tf.float32, shape=(train_batch_size, 10))
    
    # validation data
    tf_val_X = tf.placeholder(tf.float32, shape=(len(val_data), 28, 28, 1))
    
    # test data
    tf_test_X = tf.placeholder(tf.float32, shape=(test_batch_size, 28, 28, 1))
    
    def create_weight(size, name):
        return tf.Variable(tf.truncated_normal(size, stddev=0.1), name=name)
    
    def create_bias(size, name):
        return tf.Variable(tf.constant(0.1, shape=size), name=name)
    
    def conv2d_s1(x, W):
        return tf.nn.conv2d(x, W, strides=[1,1,1,1], padding='SAME')
     
    def max_pool_3x3_s1(x):
        return tf.nn.max_pool(x, ksize=[1,3,3,1], strides=[1,1,1,1], padding='SAME')
    
    # Defining Inception Module-1
    #
    # follows input
    W_conv1_1x1_1 = create_weight([1,1,1,map1], 'W_conv1_1x1_1')
    b_conv1_1x1_1 = create_bias([map1], 'b_conv1_1x1_1')
    
    # follows input
    W_conv1_1x1_2 = create_weight([1,1,1,reduce1x1], 'W_conv1_1x1_2')
    b_conv1_1x1_2 = create_bias([reduce1x1], 'b_conv1_1x1_2')
    
    # follows input
    W_conv1_1x1_3 = create_weight([1,1,1,reduce1x1], 'W_conv1_1x1_3')
    b_conv1_1x1_3 = create_bias([reduce1x1], 'b_conv1_1x1_3')
    
    # follows 1x1_2
    W_conv1_3x3 = create_weight([3, 3, reduce1x1, map1], 'W_conv1_3x3')
    b_conv1_3x3 = create_bias([map1], 'b_conv1_3x3')
    
    # follows 1x1_3
    W_conv1_5x5 = create_weight([5, 5, reduce1x1, map1], 'W_conv1_5x5')
    b_conv1_5x5 = create_bias([map1], 'b_conv1_5x5')
    
    # follows max pooling
    W_conv1_1x1_4 = create_weight([1,1,1,map1], 'W_conv1_1x1_4')
    b_conv1_1x1_4 = create_bias([map1], 'b_conv1_1x1_4')
    
    # Defining Inception Module-2
    #
    # follows Inception1
    W_conv2_1x1_1 = create_weight([1,1,4*map1,map2], 'W_conv2_1x1_1')
    b_conv2_1x1_1 = create_bias([map2], 'b_conv2_1x1_1')

    # follows Inception1
    W_conv2_1x1_2 = create_weight([1,1,4*map1,reduce1x1], 'W_conv2_1x1_2')
    b_conv2_1x1_2 = create_bias([reduce1x1], 'b_conv2_1x1_2')
    
    # follows Inception1
    W_conv2_1x1_3 = create_weight([1,1,4*map1,reduce1x1], 'W_conv2_1x1_3')
    b_conv2_1x1_3 = create_bias([reduce1x1], 'b_conv2_1x1_3')
    
    # follows 1x1_2
    W_conv2_3x3 = create_weight([3,3,reduce1x1,map2], 'W_conv2_3x3')
    b_conv2_3x3 = create_bias([map2], 'b_conv2_3x3')
    
    # follows 1x1_3
    W_conv2_5x5 = create_weight([5,5,reduce1x1,map2], 'W_conv2_5x5')
    b_conv2_5x5 = create_bias([map2], 'b_conv2_5x5')
    
    # follows max pooling
    W_conv2_1x1_4 = create_weight([1,1,4*map1, map2], 'W_conv2_1x1_4')
    b_conv2_1x1_4 = create_bias([map2], 'b_conv2_1x1_4')
    
    # Defining Fully-connected Layers
    # since padding is same, the feature map with there will be 4 28*28*map2
    W_fc1 = create_weight([28*28*(4*map2), num_fc1], 'W_fc1')
    b_fc1 = create_bias([num_fc1], 'b_fc1')
    
    W_fc2 = create_weight([num_fc1, num_fc2], 'W_fc2')
    b_fc2 = create_bias([num_fc2], 'b_fc2')
    
    def model(x, train=True):
        # Inception Module 1
        conv1_1x1_1 = conv2d_s1(x, W_conv1_1x1_1) + b_conv1_1x1_1
        conv1_1x1_2 = tf.nn.relu(conv2d_s1(x, W_conv1_1x1_2) + b_conv1_1x1_2)
        conv1_1x1_3 = tf.nn.relu(conv2d_s1(x, W_conv1_1x1_3) + b_conv1_1x1_3)
        conv1_3x3 = conv2d_s1(conv1_1x1_2, W_conv1_3x3) + b_conv1_3x3
        conv1_5x5 = conv2d_s1(conv1_1x1_3, W_conv1_5x5) + b_conv1_5x5
        maxpool_1 = max_pool_3x3_s1(x)
        conv1_1x1_4 = conv2d_s1(maxpool_1, W_conv1_1x1_4) + b_conv1_1x1_4
        
        # concatenate all the feature maps and hit them with a relu
        inception_1 = tf.nn.relu(tf.concat([conv1_1x1_1, conv1_3x3, conv1_5x5, conv1_1x1_4], 3))
 
        # Inception Module 2
        conv2_1x1_1 = conv2d_s1(inception_1, W_conv2_1x1_1) + b_conv2_1x1_1
        conv2_1x1_2 = tf.nn.relu(conv2d_s1(inception_1, W_conv2_1x1_2) + b_conv1_1x1_2)
        conv2_1x1_3 = tf.nn.relu(conv2d_s1(inception_1, W_conv2_1x1_3) + b_conv2_1x1_3)
        conv2_3x3 = conv2d_s1(conv2_1x1_2, W_conv2_3x3) + b_conv2_3x3
        conv2_5x5 = conv2d_s1(conv2_1x1_3, W_conv2_5x5) + b_conv2_5x5
        maxpool_2 = max_pool_3x3_s1(inception_1)
        conv2_1x1_4 = conv2d_s1(maxpool_2, W_conv2_1x1_4) + b_conv2_1x1_4
        
        # concatenate all the feature maps and hit them with a relu
        inception_2 = tf.nn.relu(tf.concat([conv2_1x1_1, conv2_3x3, conv2_5x5, conv2_1x1_4], 3))
        
        # flatten features for fully-connected layer
        inception_2_flat = tf.reshape(inception_2, [-1, 28*28*4*map2])
        
        # Fully-connected layers
        if train:
            h_fc1 = tf.nn.dropout(tf.nn.relu(tf.matmul(inception_2_flat, W_fc1) + b_fc1), dropout)
        else:
            h_fc1 = tf.nn.relu(tf.matmul(inception_2_flat, W_fc1) + b_fc1)
            
        return tf.matmul(h_fc1, W_fc2) + b_fc2
    
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = model(tf_X), labels = tf_y))
    opt = tf.train.AdamOptimizer(1e-4).minimize(loss)
    
    predictions_val = tf.nn.softmax(model(tf_val_X, train=False))
    predictions_test = tf.nn.softmax(model(tf_test_X, train=False))
      
    # add an op to initialize the variables
    init = tf.global_variables_initializer() # initialize_all_variables

    # add ops to save and restore all the variables so we can pick up later
    saver = tf.train.Saver()
    
    # later, launch the model, initialize the variables, do some work, and save the variables to disk
    with tf.Session() as sess:
      sess.run(init)
      
      # save the variables to disk
      save_path = saver.save(sess, file_path)
      print("Model saved in path: %s." % save_path)

## Train Model

To train the model, set the number of training steps, create a session, initialize variables, and run the optimizer op for each batch of training data.  You’ll want to see how your model is progressing, so run the op for getting your validation predictions every 100 steps.  When training is done, output the test data accuracy and save the model.  I also created a flag use_previous that allows you to load a model from the file_path to continue training.

<div>
    <img src="./inception-module.png" />
    <center><caption>Figure 2. Inception Module</caption></center>
</div>

In [None]:
# create a session
sess = tf.Session(graph=graph)

In [None]:
# initialize variables
sess.run(init)
print("Model initialized.")

In [None]:
# set use_previous=1 to use file_path model
# set use_previous=0 to start model from scratch
use_previous = 1

# use the previous model or don't and initialize variables
if use_previous:
    saver.restore(sess, file_path)
    print("Model restored.")

In [None]:
# training the model right now!
num_steps = 20 # 20000

for s in range(num_steps):
    offset = (s*train_batch_size) % (len(train_data)-train_batch_size)
    train_batch, test_batch = train_data[offset:(offset + train_batch_size), :], train_labels[offset:(offset + train_batch_size), :]
    
    feed_dict = {tf_X: train_batch, tf_y: test_batch}
    _, loss_value = sess.run([opt, loss], feed_dict=feed_dict)
    
    if s%100 == 0:
        feed_dict = {tf_val_X: val_data}
        preds = sess.run(predictions_val, feed_dict=feed_dict)
        
        print("Step: " +str(s))
        print("Validation accuracy: "+str(accuracy(val_labels, preds)))
        print(" ")
        
    # get test accuracy and save the model
    if s == (num_steps-1):
        # create an array to store the outputs for the test
        result = np.array([]).reshape(0, 10)
        
        # use the batches class
        batch_test_X = test_batchs(test_data)
        
        for i in range(int(len(test_data)/ test_batch_size)):
            feed_dict = {tf_test_X: batch_test_X.next_batch(test_batch_size)}
            preds = sess.run(predictions_test, feed_dict=feed_dict)
            result = np.concatenate((result, preds), axis=0)

        print("Test accuracy: "+str(accuracy(test_labels, result)))
        
        save_path = saver.save(sess, file_path)
        print("Model saved.")    

<hr/>