In [1]:
import numpy as np
import tensorflow as tf
from six.moves import cPickle as pickle

In [2]:
def readData(pickleFiles):
    ''' read pickle data'''
    data = None
    labels = None
    for pf in pickleFiles:
        with open(pf, 'rb') as f:
            save = pickle.load(f)
            if data == None:
                data = save['data']
                labels = save['labels']
            else:
                data = np.concatenate((data, save['data']))
                labels = np.concatenate((labels, save['labels']))
        del save
    return (data, labels)

In [3]:
# read pickle files for training/testing data
# multiple pickle files are here due to upload size limit(25MB) in docker as images are processed separately
# due to failure to install h5py
train_pickle_files = ['SVHN_train0.pickle', 'SVHN_train1.pickle', 'SVHN_train2.pickle', 'SVHN_train3.pickle', \
                      'SVHN_train4.pickle', 'SVHN_train5.pickle', 'SVHN_train6.pickle']
test_pickle_files = ['SVHN_test0.pickle', 'SVHN_test1.pickle', 'SVHN_test2.pickle']
(train_data, train_labels) = readData(train_pickle_files)
(test_data, test_labels) = readData(test_pickle_files)

In [4]:
def accuracy(predictions, originals):
    ''' accuracy of our model'''
    out = np.argmax(predictions, 2).T
    ne = 0
    for l in range(out.shape[0]):
        for i in range(5):
            if (out[l][i] != originals[l][i]):
                ne = ne+1
                break
    equ = out.shape[0] - ne
    perc = (equ*100)/out.shape[0]
    print "%d OUTPUTS ARE CORRECT OUT OF %d WITH PERCENTAGE: %f" % (equ, out.shape[0], perc)

In [5]:
# hyper parameters
BATCH_SIZE = 64
IMAGE_SIZE = 32
NUM_CHANNELS = 1
DEPTH1 = 16
DEPTH2 = 32
DEPTH3 = 64
SEED = 66478
OUT_LABELS = 11
NUM_EPOCHS=6
train_size = train_labels.shape[0]

In [10]:
# model implementation
graph = tf.Graph()
with graph.as_default():
    # define input and output
    train_data_node = tf.placeholder(tf.float32, shape = (BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS))
    train_labels_node = tf.placeholder(tf.int64, shape = (BATCH_SIZE, 5))
    test_data_node = tf.constant(test_data)
    
    # variables for hidden layers
    conv1_weights = tf.Variable(tf.truncated_normal([5, 5, NUM_CHANNELS, DEPTH1], 
                                stddev = 0.1,
                                seed = SEED,
                                dtype = tf.float32))
    conv1_biases = tf.Variable(tf.constant(0.1, shape=[DEPTH1], dtype=tf.float32))
    conv2_weights = tf.Variable(tf.truncated_normal([5, 5, DEPTH1, DEPTH2],
                                stddev = 0.1,
                                seed = SEED,
                                dtype = tf.float32))
    conv2_biases = tf.Variable(tf.constant(0.1, shape=[DEPTH2], dtype=tf.float32))
    conv3_weights = tf.Variable(tf.truncated_normal([5, 5, DEPTH2, DEPTH3],
                                stddev = 0.1,
                                seed = SEED,
                                dtype = tf.float32))
    conv3_biases = tf.Variable(tf.constant(0.1, shape=[DEPTH3], dtype=tf.float32))
    
    #fully connected layers and biases
    fc1_weights = tf.Variable(tf.truncated_normal([DEPTH3, OUT_LABELS],
                                                 stddev = 0.1,
                                                 seed = SEED, dtype=tf.float32))
    fc1_biases = tf.Variable(tf.constant(0.1, shape=[OUT_LABELS], dtype=tf.float32))
    fc2_weights = tf.Variable(tf.truncated_normal([DEPTH3, OUT_LABELS],
                                                 stddev = 0.1,
                                                 seed = SEED, dtype=tf.float32))
    fc2_biases = tf.Variable(tf.constant(0.1, shape=[OUT_LABELS], dtype=tf.float32))
    fc3_weights = tf.Variable(tf.truncated_normal([DEPTH3, OUT_LABELS],
                                                 stddev = 0.1,
                                                 seed = SEED, dtype=tf.float32))
    fc3_biases = tf.Variable(tf.constant(0.1, shape=[OUT_LABELS], dtype=tf.float32))
    fc4_weights = tf.Variable(tf.truncated_normal([DEPTH3, OUT_LABELS],
                                                 stddev = 0.1,
                                                 seed = SEED, dtype=tf.float32))
    fc4_biases = tf.Variable(tf.constant(0.1, shape=[OUT_LABELS], dtype=tf.float32))
    fc5_weights = tf.Variable(tf.truncated_normal([DEPTH3, OUT_LABELS],
                                                 stddev = 0.1,
                                                 seed = SEED, dtype=tf.float32))
    fc5_biases = tf.Variable(tf.constant(0.1, shape=[OUT_LABELS], dtype=tf.float32))
    def model(data, train=False):
        # 1st convolutional layer
        conv = tf.nn.conv2d(data, conv1_weights, strides=[1,1,1,1], padding='VALID')
        relu = tf.nn.relu(tf.nn.bias_add(conv, conv1_biases))
        pool = tf.nn.max_pool(relu, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
        
        # 2nd convolutional layer
        conv = tf.nn.conv2d(pool, conv2_weights, strides=[1,1,1,1], padding='VALID')
        relu = tf.nn.relu(tf.nn.bias_add(conv, conv2_biases))
        pool = tf.nn.max_pool(relu, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
        
        # 3rd convolutional layer
        conv = tf.nn.conv2d(pool, conv3_weights, strides=[1,1,1,1], padding='VALID')
        hidden = tf.nn.relu(tf.nn.bias_add(conv, conv3_biases))
        if train:
            hidden = tf.nn.dropout(hidden, 0.95, seed=SEED)

        shape=hidden.get_shape().as_list()
        reshape=tf.reshape(hidden, [shape[0], shape[1]*shape[2]*shape[3]])
        # fully connected layers
        logits1 = tf.matmul(reshape, fc1_weights) + fc1_biases
        logits2 = tf.matmul(reshape, fc2_weights) + fc2_biases
        logits3 = tf.matmul(reshape, fc3_weights) + fc3_biases
        logits4 = tf.matmul(reshape, fc4_weights) + fc4_biases
        logits5 = tf.matmul(reshape, fc5_weights) + fc5_biases
        return [logits1, logits2, logits3, logits4, logits5]
    train_logits = model(train_data_node, train=True)
    # loss function
    loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(train_logits[0], train_labels_node[:,0])) + \
            tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(train_logits[1], train_labels_node[:,1])) + \
            tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(train_logits[2], train_labels_node[:,2])) + \
            tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(train_logits[3], train_labels_node[:,3])) + \
            tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(train_logits[4], train_labels_node[:,4]))
            
    batch = tf.Variable(0)
    learning_rate = tf.train.exponential_decay(0.01, batch*BATCH_SIZE, train_size, 0.95, staircase=True)
    optimizer = tf.train.MomentumOptimizer(learning_rate, 0.9).minimize(loss, global_step=batch)
    #optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=batch)
    train_prediction = tf.pack([tf.nn.softmax(train_logits[0]),
                                tf.nn.softmax(train_logits[1]),
                                tf.nn.softmax(train_logits[2]),
                                tf.nn.softmax(train_logits[3]),
                                tf.nn.softmax(train_logits[4])])
    test_logits = model(test_data_node, train=False)
    test_prediction = tf.pack([tf.nn.softmax(test_logits[0]),
                                tf.nn.softmax(test_logits[1]),
                                tf.nn.softmax(test_logits[2]),
                                tf.nn.softmax(test_logits[3]),
                                tf.nn.softmax(test_logits[4])])
with tf.Session(graph = graph) as session:
    tf.initialize_all_variables().run()
    print('Initialized')
    # run optimizer on NUM_EPOCHS iterations of training data minimizing loss
    for step in xrange(int(NUM_EPOCHS*train_size) // BATCH_SIZE):
        offset = (step*BATCH_SIZE) % (train_size - BATCH_SIZE)
        batch_data = train_data[offset:(offset + BATCH_SIZE),]
        batch_labels = train_labels[offset:(offset+BATCH_SIZE)]
        feed_dict = {train_data_node:batch_data, train_labels_node:batch_labels}
        _, l, lr,train_out = session.run([optimizer, loss, learning_rate, train_prediction], 
                                               feed_dict=feed_dict)
        if step%100 == 0:
            print "step: %d" % (step)
            accuracy(train_out, batch_labels)
            accuracy(test_prediction.eval(), test_labels)
    test_out = test_prediction.eval()
    accuracy(test_out, test_labels)

Initialized
step: 0
0 OUTPUTS ARE CORRECT OUT OF 64 WITH PERCENTAGE: 0.000000
2 OUTPUTS ARE CORRECT OUT OF 13068 WITH PERCENTAGE: 0.000000
step: 100
4 OUTPUTS ARE CORRECT OUT OF 64 WITH PERCENTAGE: 6.000000
1364 OUTPUTS ARE CORRECT OUT OF 13068 WITH PERCENTAGE: 10.000000
step: 200
10 OUTPUTS ARE CORRECT OUT OF 64 WITH PERCENTAGE: 15.000000
3327 OUTPUTS ARE CORRECT OUT OF 13068 WITH PERCENTAGE: 25.000000
step: 300
17 OUTPUTS ARE CORRECT OUT OF 64 WITH PERCENTAGE: 26.000000
4477 OUTPUTS ARE CORRECT OUT OF 13068 WITH PERCENTAGE: 34.000000
step: 400
20 OUTPUTS ARE CORRECT OUT OF 64 WITH PERCENTAGE: 31.000000
5031 OUTPUTS ARE CORRECT OUT OF 13068 WITH PERCENTAGE: 38.000000
step: 500
31 OUTPUTS ARE CORRECT OUT OF 64 WITH PERCENTAGE: 48.000000
5590 OUTPUTS ARE CORRECT OUT OF 13068 WITH PERCENTAGE: 42.000000
step: 600
27 OUTPUTS ARE CORRECT OUT OF 64 WITH PERCENTAGE: 42.000000
6150 OUTPUTS ARE CORRECT OUT OF 13068 WITH PERCENTAGE: 47.000000
step: 700
22 OUTPUTS ARE CORRECT OUT OF 64 WITH PERCE

In [13]:
# check some image randomly for sanity checking
with graph.as_default():
    print((test_out.shape))
    print(np.argmax(test_out, 2).T[11])
    print(np.argmax(test_out, 2).T[21])

(5, 13068, 11)
[ 2  0 10 10 10]
[ 1  6 10 10 10]


In [14]:
# check robustness of graph by measuring accuracy after adding 0.1 to input data
with tf.Session(graph = graph) as session:
    tf.initialize_all_variables().run()
    print('Initialized')
    for step in xrange(int(NUM_EPOCHS*train_size) // BATCH_SIZE):
        offset = (step*BATCH_SIZE) % (train_size - BATCH_SIZE)
        batch_data = train_data[offset:(offset + BATCH_SIZE),] + 0.1
        batch_labels = train_labels[offset:(offset+BATCH_SIZE)]
        feed_dict = {train_data_node:batch_data, train_labels_node:batch_labels}
        _, l, lr,train_out = session.run([optimizer, loss, learning_rate, train_prediction], 
                                               feed_dict=feed_dict)
        if step%1000 == 0:
            accuracy(train_out, batch_labels)
    accuracy(test_prediction.eval(), test_labels)

Initialized
0 OUTPUTS ARE CORRECT OUT OF 64 WITH PERCENTAGE: 0.000000
31 OUTPUTS ARE CORRECT OUT OF 64 WITH PERCENTAGE: 48.000000
30 OUTPUTS ARE CORRECT OUT OF 64 WITH PERCENTAGE: 46.000000
51 OUTPUTS ARE CORRECT OUT OF 64 WITH PERCENTAGE: 79.000000
8148 OUTPUTS ARE CORRECT OUT OF 13068 WITH PERCENTAGE: 62.000000
