## NO NORMALIZATION 
This notebook shows a CNN model with [[CONV -> RELU] X 2 -> POOL -> Dropout] X 3 -> FC Layer -> Dropout -> FC Layer



Data-> (54,54,3) for 5 digits

In [1]:
import h5py
import tensorflow as tf
import numpy as np
import time
import os

#### Change file name and location below to the name and location of the file you would like to load


In [2]:
data = h5py.File('data/digits_54_54_3.h5','r')



In [3]:
# Reading training data
X_train=data['train_dataset'][:]
y_train=data['train_labels'][:]
X_val=data['valid_dataset'][:]
y_val=data['valid_labels'][:]
X_test=data['test_dataset'][:]
y_test=data['test_labels'][:]
data.close()

In [4]:
print('Training set', X_train.shape, y_train.shape)
print('Validation set', X_val.shape, y_val.shape)
print('Test set', X_test.shape, y_test.shape)

Training set (29401, 54, 54, 3) (29401, 5)
Validation set (4000, 54, 54, 3) (4000, 5)
Test set (13068, 54, 54, 3) (13068, 5)


#### Change file name and location below to the name and location of the file you would like to load


In [5]:
# Reading extra data
data = h5py.File('data/digits_54_54_3.h5','r')
X_extra=data['extra_dataset'][:]
y_extra=data['extra_labels'][:]
X_val_extra=data['valid_extra_dataset'][:]
y_val_extra=data['valid_extra_labels'][:]
data.close()

In [6]:
# Extracting 20000 samples from extra
X_extra1=X_extra[0:20000]
y_extra1=y_extra[0:20000]
X_val_extra1=X_val_extra[0:2000]
y_val_extra1=y_val_extra[0:2000]

In [7]:
# Concatenating the extra data with training
X_train = np.concatenate([X_train,X_extra1])
y_train = np.concatenate([y_train,y_extra1])
X_val = np.concatenate([X_val,X_val_extra1])
y_val = np.concatenate([y_val,y_val_extra1])
print('Train set', X_train.shape, y_train.shape)
print('Validation set', X_val.shape, y_val.shape)

Train set (49401, 54, 54, 3) (49401, 5)
Validation set (6000, 54, 54, 3) (6000, 5)


In [8]:
# Finding number of channels, labels and digits
num_channels=X_train.shape[3]
num_digits=y_train.shape[1]
num_labels=len(np.unique(y_train))

In [9]:
# Convolutional and FC layer definition
import tensorflow as tf
import time


class conv_layer(object):
    def __init__(self, input_x, in_channel, out_channel, kernel_shape,pooling,rand_seed=235,first=False,index=0):
        """
        :param input_x: The input of the conv layer. Should be a 4D array like (batch_num, img_len, img_len, channel_num)
        :param in_channel: The 4-th demension (channel number) of input matrix. For example, in_channel=3 means the input contains 3 channels.
        :param out_channel: The 4-th demension (channel number) of output matrix. For example, out_channel=5 means the output contains 5 channels (feature maps).
        :param kernel_shape: the shape of the kernel. For example, kernal_shape = 3 means you have a 3*3 kernel.
        :param rand_seed: An integer that presents the random seed used to generate the initial parameter value.
        :param index: The index of the layer. It is used for naming only.
        """
             
        assert len(input_x.shape) == 4 and input_x.shape[1] == input_x.shape[2] and input_x.shape[3] == in_channel

        with tf.variable_scope('conv_layer_%d' % index):
            with tf.name_scope('conv_kernel'):
                w_shape = [kernel_shape, kernel_shape, in_channel, out_channel]
                weight = tf.get_variable(name='conv_kernel_%d' % index, shape=w_shape, initializer=tf.contrib.layers.xavier_initializer_conv2d())
                self.weight = weight

            with tf.variable_scope('conv_bias'):
                b_shape = [out_channel]
                bias = tf.get_variable(name='conv_bias_%d' % index, shape=b_shape,initializer=tf.contrib.layers.xavier_initializer_conv2d())
                self.bias = bias

            conv_out = tf.nn.conv2d(input_x, weight, strides=[1, 2, 2, 1],padding="SAME")
            conv_out = tf.nn.relu(conv_out+bias)
          
            if pooling==True:
                conv_out = tf.nn.max_pool(conv_out, [1, 2, 2, 1], [1, 2, 2, 1],'SAME')
            self.cell_out = conv_out

    def output(self):
        return self.cell_out


class fc_layer(object):
    def __init__(self, input_x, in_size, out_size, rand_seed,activation_function=None,relu=False, index=0):
        """
        :param input_x: The input of the FC layer. It should be a flatten vector.
        :param in_size: The length of input vector.
        :param out_size: The length of output vector.
        :param rand_seed: An integer that presents the random seed used to generate the initial parameter value.
        :param keep_prob: The probability of dropout. Default set by 1.0 (no drop-out applied)
        :param activation_function: The activation function for the output. Default set to None.
        :param index: The index of the layer. It is used for naming only.

        """
        with tf.variable_scope('fc_layer_%d' % index):
            with tf.name_scope('fc_kernel'):
                w_shape = [in_size, out_size]
                weight = tf.get_variable(name='fc_kernel_%d' % index, shape=w_shape,
                                         initializer=tf.contrib.layers.xavier_initializer_conv2d())
                self.weight = weight

            with tf.variable_scope('fc_kernel'):
                b_shape = [out_size]
                bias = tf.get_variable(name='fc_bias_%d' % index, shape=b_shape,
                                       initializer=tf.contrib.layers.xavier_initializer_conv2d())
                self.bias = bias

            cell_out = tf.add(tf.matmul(input_x, weight), bias)
            if relu is True:
                cell_out = tf.nn.relu(cell_out+bias)
            
            self.cell_out = cell_out

            

    def output(self):
        return self.cell_out





In [10]:
# Neural net definition
def LeNet(input_x, input_y, nfilter_1, nfilter_2,nfilter_3, nfilter_4,nfilter_5,nfilter_6,
        filter_1,filter_2,filter_3,filter_4,filter_5,filter_6, img_len=54, num_channels=3, l2_norm=0.01, seed=235):
    
    pooling=False

    # Convolutional layer 1 with no normalization
    conv_layer_0 = conv_layer(input_x=input_x,
                              in_channel=num_channels,
                              out_channel=nfilter_1,
                              kernel_shape=filter_1,
                              pooling=False,
                              rand_seed=seed,first=True, index=0)

    # Convolutional layer 2 with no normalization
    conv_layer_1 = conv_layer(input_x=conv_layer_0.output(),
                              in_channel=nfilter_1,
                              out_channel=nfilter_2,
                              kernel_shape=filter_2,
                              pooling=True,
                              rand_seed=seed,first=False,index=1)
     # Dropout layer 1
    dropout_layer_0 = tf.nn.dropout(conv_layer_1.output(), keep_prob=0.9)

   # Convolutional layer 3 with no normalization
    conv_layer_2 = conv_layer(input_x=dropout_layer_0,
                              in_channel=nfilter_2,
                              out_channel=nfilter_3,
                              kernel_shape=filter_3,
                              pooling=False,
                              rand_seed=seed,first=False,index=2)
    
    # Convolutional layer 4 with no normalization
    
    conv_layer_3 = conv_layer(input_x=conv_layer_2.output(),
                              in_channel=nfilter_3,
                              out_channel=nfilter_4,
                              kernel_shape=filter_4,
                              pooling=True,
                              rand_seed=seed,first=False,index=3)
    
     # Dropout layer 2
    dropout_layer_1 = tf.nn.dropout(conv_layer_3.output(), keep_prob=0.9)
    
    # Convolutional layer 5 with no normalization
    conv_layer_4 = conv_layer(input_x=dropout_layer_1,
                              in_channel=nfilter_4,
                              out_channel=nfilter_5,
                              kernel_shape=filter_5,
                              pooling=False,
                              rand_seed=seed,first=False,index=4)
    # Convolutional layer 6 with no normalization
    conv_layer_5 = conv_layer(input_x=conv_layer_4.output(),
                              in_channel=nfilter_5,
                              out_channel=nfilter_6,
                              kernel_shape=filter_6,
                              pooling=True,
                              rand_seed=seed,first=False,index=5)
    
 
    pool_shape = conv_layer_3.output().get_shape()
    img_vector_length = pool_shape[1].value * pool_shape[2].value * pool_shape[3].value
    flatten = tf.reshape(conv_layer_3.output(), shape=[-1, img_vector_length])

    
    # fully connected layer 1
    fc_layer_0 = fc_layer(input_x=flatten,
                          in_size=img_vector_length,
                          out_size=fclayer1_size,
                          rand_seed=seed,
                          activation_function=tf.nn.relu,
                          index=0,
                          relu=True)
    # Dropout layer 3
    dropout_layer_2 = tf.nn.dropout(fc_layer_0.output(), keep_prob=0.5)
    
    # fully connected layer 2
    fc_layer_1 = fc_layer(input_x=fc_layer_0.output(),
                          in_size=fclayer1_size,
                          out_size=fclayer2_size,
                          rand_seed=seed,
                          activation_function=None,
                          index=1,
                          relu=False)
       # output - softmax
    logits_1=fc_layer(input_x=fc_layer_1.output(),
                          in_size=fclayer2_size,
                          out_size=num_labels,
                          rand_seed=seed,
                          activation_function=tf.nn.relu,index=2,
                          relu=False)
    logits_2=fc_layer(input_x=fc_layer_1.output(),
                          in_size=fclayer2_size,
                          out_size=num_labels,
                          rand_seed=seed,
                          activation_function=tf.nn.relu,index=3,
                          relu=False)
    logits_3=fc_layer(input_x=fc_layer_1.output(),
                          in_size=fclayer2_size,
                          out_size=num_labels,
                          rand_seed=seed,
                          activation_function=tf.nn.relu,index=4,
                          relu=False)
    logits_4=fc_layer(input_x=fc_layer_1.output(),
                          in_size=fclayer2_size,
                          out_size=num_labels,
                          rand_seed=seed,
                          activation_function=tf.nn.relu,index=5,
                          relu=False)
    logits_5=fc_layer(input_x=fc_layer_1.output(),
                          in_size=fclayer2_size,
                          out_size=num_labels,
                          rand_seed=seed,
                          activation_function=tf.nn.relu,index=6,
                          relu=False)
    
    y_pred = tf.stack([logits_1.output(), logits_2.output(), logits_3.output(), logits_4.output(), logits_5.output()])
    y_pred = tf.transpose(tf.argmax(y_pred, axis=2))  

    with tf.name_scope('loss'):

        loss1 = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits_1.output(),labels=input_y[:,0]))
        loss2 = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits_2.output(),labels=input_y[:,1]))
        loss3 = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits_3.output(),labels=input_y[:,2]))
        loss4 = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits_4.output(),labels=input_y[:,3]))
        loss5 = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits_5.output(),labels=input_y[:,4]))
        loss = loss1 + loss2 + loss3 + loss4 + loss5 
        tf.summary.scalar('loss', loss)
        print("Iteration done")

    return y_pred,loss

def train_step(loss, learning_rate=1e-3):
    
    #Optimizer function
    with tf.name_scope('train_step'):
        global_step = tf.Variable(0, trainable=False)
        learning_rate=1e-3
        tf.summary.scalar('learning_rate', learning_rate)
        step = tf.train.RMSPropOptimizer(learning_rate).minimize(loss)

    return step

def evaluate(predictions, labels):
    with tf.name_scope('accuracy'):
            correct_prediction = tf.equal(predictions, labels)
            correct_prediction=tf.cast(correct_prediction, tf.float32)
            correct_prediction=tf.reduce_min(correct_prediction,1)
            acc=tf.reduce_mean(correct_prediction)
            tf.summary.scalar('accuracy', acc)
            acc=acc*100
    return acc


In [11]:
# Training function for the LeNet model
def training(X_train, y_train, X_val, y_val,X_test,y_test, nfilter_1, nfilter_2,nfilter_3, nfilter_4,nfilter_5,nfilter_6,filter_1,filter_2,filter_3,filter_4,filter_5,filter_6,img_len=54, num_channels=3, l2_norm=0.01, 
             seed=235,
             learning_rate=1e-2,
             epoch=20,
             batch_size=245,
             verbose=False,
             pre_trained_model=None):
    best_acc_li=[]
    all_acc=[]
    

    with tf.name_scope('inputs'):
        xs = tf.placeholder(shape=[None, 54, 54, 3], dtype=tf.float32)
        ys = tf.placeholder(shape=[None,5 ], dtype=tf.int64)
    

    output, loss = LeNet(xs, ys, nfilter_1, nfilter_2,nfilter_3, nfilter_4,nfilter_5,nfilter_6,
        filter_1,filter_2,filter_3,filter_4,filter_5,filter_6, img_len=54, num_channels=3, l2_norm=0.01, seed=235)
    iters = int(X_train.shape[0] / batch_size)
    print('number of batches for training: {}'.format(iters))

    step = train_step(loss)
    eve = evaluate(output,ys)

    iter_total = 0
    best_acc = 0
    cur_model_name = 'lenet_{}'.format(int(time.time()))

    with tf.Session() as sess:
        merge = tf.summary.merge_all()

        writer = tf.summary.FileWriter("log/{}".format(cur_model_name), sess.graph)
        saver = tf.train.Saver()
        sess.run(tf.global_variables_initializer())
        
        if pre_trained_model is not None:
            try:
                print("Load the model from: {}".format(pre_trained_model))
                saver.restore(sess, 'model/{}'.format(pre_trained_model))
            except Exception:
                print("Load model Failed!")
                pass

        for epc in range(epoch):
            print("epoch {} ".format(epc + 1))

            for itr in range(iters):
                iter_total += 1

                training_batch_x = X_train[itr * batch_size: (1 + itr) * batch_size]
                training_batch_y = y_train[itr * batch_size: (1 + itr) * batch_size]

                _, cur_loss = sess.run([step, loss], feed_dict={xs: training_batch_x, ys: training_batch_y})
                
               
                if iter_total % 500 == 0:
                    #Validation 
                    valid_acc, merge_result = sess.run([eve, merge], feed_dict={xs: X_val, ys: y_val})
                    print(valid_acc)
                    if verbose:
                        print('{}/{} loss: {} validation accuracy : {}%'.format(
                            batch_size * (itr + 1),
                            X_train.shape[0],
                            cur_loss,
                            valid_acc))
                    all_acc.append(valid_acc)
                    writer.add_summary(merge_result, iter_total)

                    if valid_acc > best_acc:
                        print('Best validation accuracy! iteration:{} accuracy: {}%'.format(iter_total, valid_acc))
                        best_acc = valid_acc
                        best_acc_li.append(best_acc)
                        saver.save(sess, 'model/{}'.format(cur_model_name))
                    # test accuracy for every iteration
                    test_acc = sess.run(eve, feed_dict={xs: X_test, ys: y_test})
                    print('Test Accuracy : {}'.format(test_acc))
        #Test accuracy- final         
        test_acc = sess.run(eve, feed_dict={xs: X_test, ys: y_test})
        print('Test Accuracy : {}'.format(test_acc))

    print("Traning ends. The best valid accuracy is {}. Model named {}.".format(best_acc, cur_model_name))
    return(best_acc_li)

In [12]:

# Convolutional layer units
filter_1 = filter_2 = filter_3 = filter_4 = filter_5 = filter_6 = 5        
nfilter_1 = 48
nfilter_2 = 64                 
nfilter_3 = 128
nfilter_4 = 160    
nfilter_5=nfilter_6=192
# Fully connected layer units
fclayer1_size = fclayer2_size = 3072

In [11]:
start_time = time.time()
best_acc_li=[]
best_acc_li=training(X_train, y_train, X_val, y_val, X_test,y_test, nfilter_1, nfilter_2,nfilter_3, nfilter_4,nfilter_5,nfilter_6,filter_1,filter_2,filter_3,filter_4,filter_5,filter_6,img_len=54, num_channels=3, l2_norm=0.01, 
             seed=235,
             learning_rate=1e-3,
             epoch=60,
             batch_size=300,
             verbose=False,
             pre_trained_model=None)
run_time = time.time() - start_time

Iteration done
number of batches for training: 98
epoch 1 
epoch 2 
epoch 3 
epoch 4 
epoch 5 
epoch 6 
42.05
Best validation accuracy! iteration:500 accuracy: 42.04999923706055%
Test Accuracy : 36.6773796081543
epoch 7 
epoch 8 
epoch 9 
epoch 10 
epoch 11 
48.725
Best validation accuracy! iteration:1000 accuracy: 48.724998474121094%
Test Accuracy : 40.572391510009766
epoch 12 
epoch 13 
epoch 14 
epoch 15 
epoch 16 
48.55
Test Accuracy : 40.87847900390625
epoch 17 
epoch 18 
epoch 19 
epoch 20 
epoch 21 
49.8
Best validation accuracy! iteration:2000 accuracy: 49.79999923706055%
Test Accuracy : 42.347721099853516
epoch 22 
epoch 23 
epoch 24 
epoch 25 
epoch 26 
49.1
Test Accuracy : 41.092742919921875
epoch 27 
epoch 28 
epoch 29 
epoch 30 
epoch 31 
48.975
Test Accuracy : 41.80440902709961
epoch 32 
epoch 33 
epoch 34 
epoch 35 
epoch 36 
49.25
Test Accuracy : 41.429443359375
epoch 37 
epoch 38 
epoch 39 
epoch 40 
epoch 41 
49.3
Test Accuracy : 42.592594146728516
epoch 42 
epoch 43 

In [13]:
print("Run time is ",format(run_time))

Run time is  19223.646522521973
