In [73]:
import tensorflow as tf
import glob
import numpy as np

In [82]:
# Parameters
learning_rate = 0.001
training_iters = 15000
batch_size = 100
test_size = 200
dropout = 0.8  # probability that an element is kept

# how often to display training accuracy status
display_step_count = 10

# input parameters
height = 60
width = 80
channels = 3

# Network Parameters
n_input = 60*80 # input image
n_classes = 2 # face or no face

# tf Graph input
x = tf.placeholder("float", [None, n_input*channels])
y = tf.placeholder("float", [None, n_classes])
keep_prob = tf.placeholder(tf.float32)

In [83]:
def conv2d(x, w, b, strides=1):
    ''' perform a convolution and relu activation '''
    x = tf.nn.conv2d(x, w, strides=[1, strides, strides, 1], padding='SAME')
    x = tf.nn.bias_add(x, b)
    return tf.nn.relu(x)

def maxpool2d(x, k=2):
    ''' perform a maxpool operation for kxk blocks '''
    # MaxPool2D wrapper
    return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1],
                          padding='SAME')

def fullconnect(x, w, b, dropout):
    ''' perform a fully connected layer operation '''
    x = tf.add(tf.matmul(x, w), b)
    x = tf.nn.relu(x)
    x = tf.nn.dropout(x, dropout)
    return x

In [84]:
# create the network
# this displays output to verify that all of the layers are the correct size
# and that connections have been properly made

def conv_net(x, weights, biases, dropout):
    ''' create the convolutional neural network '''
    
    # Reshape the input picture for convolution layers
    x = tf.reshape(x, shape=[-1, 60, 80, 3])
    print(x.get_shape().as_list())
    
    # convolution and max pool layers
    conv1 = conv2d(x, weights['wc1'], biases['bc1'])
    print(conv1.get_shape().as_list())
    maxp1 = maxpool2d(conv1, k=2)
    print(maxp1.get_shape().as_list())
    conv2 = conv2d(maxp1, weights['wc2'], biases['bc2'])
    print(conv2.get_shape().as_list())
    maxp2 = maxpool2d(conv2, k=2)
    
    # reshape input for fully connected layers
    x_fc = tf.reshape(maxp2, shape=[-1, weights['wd1'].get_shape().as_list()[0]])
    
    print(x_fc.get_shape().as_list())
    
    # fully connected layers 
    fc1 = fullconnect(x_fc, weights['wd1'], biases['bd1'], dropout)
    print(fc1.get_shape().as_list())
    fc2 = fullconnect(fc1, weights['wd2'], biases['bd2'], dropout)
    print(fc2.get_shape().as_list())
    
    # output layer
    out = tf.add(tf.matmul(fc2, weights['out']), biases['out'])
    print(out.get_shape().as_list())
    return out

In [85]:
def one_hot_encode(input_array, digits):
    ''' takes an array of input labels and converts it to one-hot encoded labels '''
    enc_array = []
    for i in range(input_array.shape[0]):
        encoded = [0 for x in range(digits)]
        encoded[input_array[i]] = 1
        enc_array.append(encoded)
    return enc_array

In [86]:
def read_and_decode(reader, filename_queue, num_images):
    ''' takes a reader, file queue, and a number of images to make a batch out of.
        returns the batch of images and labels'''
    images = []
    labels = []
    
    for i in range(num_images):
        
        # get the next example from the tfrecord
        _, serialized_example = reader.read(filename_queue)
        
        # parse the example and store it in features
        features = tf.parse_single_example(
            serialized_example,
            features={
                'label': tf.FixedLenFeature([], tf.int64),
                'image_raw': tf.FixedLenFeature([], tf.string)
            })
        
        # extract the data from features
        image_data = features['image_raw']
        image = tf.image.decode_jpeg(image_data, channels=image_channels)
        label = features['label']

        # reshape the images and labels to something easier to deal with
        image = tf.reshape(image, [image_height, image_width, image_channels])
        image = tf.cast(image, tf.float32)
        label = tf.cast(label, tf.int32)
        
        images.append(image)
        labels.append(label)

    return images, labels

In [87]:
# the path where all of the training records are kept
train_record_path = '/media/tom/Files/MyPrograms/Python27/tensorflow_CNN/tfrecord_files/'
test_record_path = '/media/tom/Files/MyPrograms/Python27/tensorflow_CNN/tfrecord_files/'

# enumerate the files within the directories
train_files = glob.glob(train_record_path + 'train*.tfrecords')
test_files = glob.glob(test_record_path + 'test*.tfrecords')

# create queues for test and training data
train_queue = tf.train.string_input_producer(train_files, shuffle=True)
test_queue = tf.train.string_input_producer(test_files, shuffle=True)

# create readers to read data
train_reader = tf.TFRecordReader()
test_reader = tf.TFRecordReader()

# specify the operations to read data
train_image, train_label = read_and_decode(train_reader, train_queue, batch_size)
test_image, test_label = read_and_decode(test_reader, test_queue, test_size)

In [91]:
# Store layers weight & bias
weights = {
    # convolution layer
    # apply 64 of 7 x 7 filters to input shape of
    # 72 x 128 x 1 to produce 72 x 128 x 64 output
    'wc1': tf.Variable(tf.random_normal([5, 5, 3, 64])),
    # convolution layer
    # apply 64 of 5 x 5 filters to input shape of
    # 36 x 64 x 64 to produce 36 x 64 x 64 outputs
    'wc2': tf.Variable(tf.random_normal([3, 3, 64, 32])),
    
    # fully  connected
    # input shape of 9 x 16 x 32 = 18432, output of 4096
    'wd1': tf.Variable(tf.random_normal([32*15*20, 1024])),
    # fully connected
    # input shape of 4096, output of 1024
    'wd2': tf.Variable(tf.random_normal([1024, 256])),
    
    # final output layer
    # input of 256, output of n_classes for classification
    'out': tf.Variable(tf.random_normal([256, n_classes]))
}
biases = {
    # convolutions - one bias for each output layer
    'bc1': tf.Variable(tf.random_normal([64])),
    'bc2': tf.Variable(tf.random_normal([32])),
    'bc3': tf.Variable(tf.random_normal([32])),
    
    # fully connected - one bias for number of output nodes
    'bd1': tf.Variable(tf.random_normal([1024])),
    'bd2': tf.Variable(tf.random_normal([256])),
    
    # final layer - one bias for every output class
    'out': tf.Variable(tf.random_normal([n_classes]))
}

# Construct Model
pred = conv_net(x, weights, biases, dropout)

# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

# define the accuracy functions
correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

# Initializing the Variables
init = tf.global_variables_initializer()

[None, 60, 80, 3]
[None, 60, 80, 64]
[None, 30, 40, 64]
[None, 30, 40, 32]
[None, 9600]
[None, 1024]
[None, 256]
[None, 2]


In [None]:
with tf.Session() as sess:
    sess.run(init)
    
    # not sure why these are needed, but sess.run() will get stuck
    # if these are not included.
    # these are accompanied by two commands at the end of the with clause
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(coord=coord, sess=sess)
    
    # read in the test data so that it can be used without being re-read
    test_images, test_labels = sess.run([test_image, test_label])
    
    # processing to get the test data into a format that is more easily dealt with
    test_x = np.array(test_images)
    test_x = np.reshape(test_x, [test_data, n_input*channels])
    test_y = np.array(test_labels)
    test_y = one_hot_encode(test_y, n_classes)
    test_y = np.array(test_y)
    
    print('training iterations: ' + str(training_iters))
    
    epoch = 1
    while epoch * batch_size < training_iters:
        
        # get the training batch of images and labels
        batch_x, batch_y = sess.run([train_image, train_label])
        
        # process the training batch data
        batch_x = np.array(batch_x)
        batch_x = np.reshape(batch_x, [batch_size, n_input*channels])
        batch_y = np.array(batch_y)
        batch_y = one_hot_encode(batch_y, n_classes)
        batch_y = np.array(batch_y)
        
        # Run optimization (backpropagation)
        sess.run(optimizer, feed_dict={x: batch_x, y:batch_y, keep_prob: dropout})
        
        # we want to periodically display the accuracy of our model
        if epoch % display_step_count == 0:
            
            # Calculate the batch loss and accuracy
            loss, acc = sess.run([cost, accuracy], feed_dict={x:test_x, y:test_y, keep_prob: 1.})
            print ("Iter " + str(step*batch_size) + ", Minibatch Loss= " + \
                  "{:.6f}".format(loss) + ", Training Accuracy= " + \
                  "{:.5f}".format(acc))
        
        # increment the step for training
        epoch += 1
        
    print ("Optimization Finished!")
    
    # Calculate accuracy for test set
    print ("Testing Accuracy:", sess.run(accuracy, feed_dict={x: test_x, y: test_y, keep_prob: 1.}))
    
    # here, we could save the model to some format to use later
    # or run predictions on other images
    # i haven't implemented these yet since I don't have direct applicatins made yet
    # and these will depend upon the user
    
    coord.request_stop()
    coord.join(threads)
    

In [28]:
# visualize the tensorflow graph
# run this and then visualize with "tensorboard --logdir="CNN_graph"

# the operations in the graph can be given names so that they are easier to understand
# when visualizing in tensorboard

graph = tf.get_default_graph()

with tf.Session() as sess:
    summary_writer = tf.summary.FileWriter('CNN_graph', sess.graph)