In [1]:
import tensorflow as tf
import numpy as np
import math
import timeit
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
from data_utils import load_CIFAR10

def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=10000):
    """
    Load the CIFAR-10 dataset from disk and perform preprocessing to prepare
    it for the two-layer neural net classifier. These are the same steps as
    we used for the SVM, but condensed to a single function.  
    """
    # Load the raw CIFAR-10 data
    cifar10_dir = 'cifar-10-batches-py'
    X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)

    # Subsample the data
    mask = range(num_training, num_training + num_validation)
    X_val = X_train[mask]
    y_val = y_train[mask]
    mask = range(num_training)
    X_train = X_train[mask]
    y_train = y_train[mask]
    mask = range(num_test)
    X_test = X_test[mask]
    y_test = y_test[mask]

    # Normalize the data: subtract the mean image
    mean_image = np.mean(X_train, axis=0)
    X_train -= mean_image
    X_val -= mean_image
    X_test -= mean_image
    

    return X_train, y_train, X_val, y_val, X_test, y_test


# Invoke the above function to get our data.
X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data()
print('Train data shape: ', X_train.shape)
print('Train labels shape: ', y_train.shape)
print('Validation data shape: ', X_val.shape)
print('Validation labels shape: ', y_val.shape)
print('Test data shape: ', X_test.shape)
print('Test labels shape: ', y_test.shape)

Train data shape:  (49000, 32, 32, 3)
Train labels shape:  (49000,)
Validation data shape:  (1000, 32, 32, 3)
Validation labels shape:  (1000,)
Test data shape:  (10000, 32, 32, 3)
Test labels shape:  (10000,)


In [41]:
tf.reset_default_graph()

height = 32
width = 32
channels = 3
n_inputs = height * width

conv1_fmaps = 50
conv1_ksize = 3
conv1_stride = 1
conv1_pad = "SAME"

conv2_fmaps = 100
conv2_ksize = 3
conv2_stride = 1
conv2_pad = "SAME"

pool2_fmaps = conv2_fmaps

n_fc1 = 64
n_outputs = 10

with tf.name_scope("inputs"):
    X = tf.placeholder(tf.float32, shape=[None, height, width, channels], name="X")
    y = tf.placeholder(tf.int64, shape=[None], name="y")
    is_training = tf.placeholder(tf.bool)

conv1 = tf.layers.conv2d(X, filters=conv1_fmaps, kernel_size=conv1_ksize,
                         strides=conv1_stride, padding=conv1_pad,
                         activation=tf.nn.relu, name="conv1")

pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2)

conv2 = tf.layers.conv2d(pool1, filters=conv2_fmaps, kernel_size=conv2_ksize,
                         strides=conv2_stride, padding=conv2_pad,
                         activation=tf.nn.relu, name="conv2")

pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)
print("pool2",pool2)
pool2_flat = tf.reshape(pool2, shape=[-1, pool2_fmaps * 8 * 8])
print("pool2_flat",pool2_flat)

with tf.name_scope("fc1"):
    fc1 = tf.layers.dense(pool2_flat, n_fc1, activation=tf.nn.relu, name="fc1")

with tf.name_scope("output"):
    y_out = tf.layers.dense(fc1, n_outputs, name="output")
    Y_proba = tf.nn.softmax(y_out, name="Y_proba")
    
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y_out, labels=y)
mean_loss = tf.reduce_mean(xentropy)

# define our optimizer
#optimizer = tf.train.AdamOptimizer(5e-4) # select optimizer and set learning rate
#train_step = optimizer.minimize(mean_loss)


pool2 Tensor("max_pooling2d_2/MaxPool:0", shape=(?, 8, 8, 100), dtype=float32)
pool2_flat Tensor("Reshape:0", shape=(?, 6400), dtype=float32)


In [42]:
def run_model(session, predict, loss_val, Xd, yd,
              epochs=10, batch_size=100, print_every=100,
              training=None, plot_losses=False):
    # have tensorflow compute accuracy
    correct_prediction = tf.equal(tf.argmax(predict,1), y)
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    
    highest_accuracy = 0
    
    # shuffle indicies
    train_indicies = np.arange(Xd.shape[0])
    np.random.shuffle(train_indicies)

    training_now = training is not None
    
    # setting up variables we want to compute (and optimizing)
    # if we have a training function, add that to things we compute
    variables = [mean_loss,correct_prediction,accuracy]
    if training_now:
        variables[-1] = training
    
    # counter 
    iter_cnt = 0
    for e in range(epochs):
        # keep track of losses and accuracy
        correct = 0
        losses = []
        # make sure we iterate over the dataset once
        for i in range(int(math.ceil(Xd.shape[0]/batch_size))):
            # generate indicies for the batch
            start_idx = (i*batch_size)%Xd.shape[0]
            idx = train_indicies[start_idx:start_idx+batch_size]
            
            # create a feed dictionary for this batch
            feed_dict = {X: Xd[idx,:],
                         y: yd[idx],
                         is_training: training_now }
            # get batch size
            actual_batch_size = yd[idx].shape[0]
            
            # have tensorflow compute loss and correct predictions
            # and (if given) perform a training step
            loss, corr, _ = session.run(variables,feed_dict=feed_dict)
            
            # aggregate performance stats
            losses.append(loss*actual_batch_size)
            correct += np.sum(corr)
            
            # print every now and then
            #if training_now and (iter_cnt % print_every) == 0:
                #print("Iteration {0}: with minibatch training loss = {1:.3g} and accuracy of {2:.2g}"\
                     # .format(iter_cnt,loss,np.sum(corr)/actual_batch_size))
            iter_cnt += 1
        total_correct = correct/Xd.shape[0]
        total_loss = np.sum(losses)/Xd.shape[0]
        print("Epoch {2}, Overall loss = {0:.3g} and accuracy of {1:.3g}"\
              .format(total_loss,total_correct,e+1))
        #if plot_losses:
            #plt.plot(losses)
            #plt.grid(True)
            #plt.title('Epoch {} Loss'.format(e+1))
            #plt.xlabel('minibatch number')
            #plt.ylabel('minibatch loss')
            #plt.show()
            
    return total_loss,total_correct

with tf.Session() as sess:
    with tf.device("/cpu:0"): #"/cpu:0" or "/gpu:0" 
        learning_rates = [0.0004, 0.0005]
        print("length of learning rates:",len(learning_rates))
        for x in range(0, len(learning_rates)):
            optimizer = tf.train.AdamOptimizer(learning_rates[x]) # select optimizer and set learning rate
            train_step = optimizer.minimize(mean_loss)
            sess.run(tf.global_variables_initializer())
            print('Learning rate=',learning_rates[x])
            print('Training: ', x)
            run_model(sess,y_out,mean_loss,X_train,y_train,10,100,100,train_step,True)
            print('Validation: ',x)
            run_model(sess,y_out,mean_loss,X_val,y_val,1,100)
            print('Test: ',x)
            run_model(sess,y_out,mean_loss,X_test,y_test,1,100)

length of learning rates: 2
Learning rate= 0.0004
Training:  0
Epoch 1, Overall loss = 2.35 and accuracy of 0.212
Epoch 2, Overall loss = 1.7 and accuracy of 0.36
Epoch 3, Overall loss = 1.34 and accuracy of 0.504
Epoch 4, Overall loss = 1.09 and accuracy of 0.606
Epoch 5, Overall loss = 0.963 and accuracy of 0.656
Epoch 6, Overall loss = 0.872 and accuracy of 0.69
Epoch 7, Overall loss = 0.79 and accuracy of 0.718
Epoch 8, Overall loss = 0.722 and accuracy of 0.743
Epoch 9, Overall loss = 0.664 and accuracy of 0.764
Epoch 10, Overall loss = 0.61 and accuracy of 0.783
Validation:  0
Epoch 1, Overall loss = 1.24 and accuracy of 0.648
Test:  0
Epoch 1, Overall loss = 1.23 and accuracy of 0.649
Learning rate= 0.0005
Training:  1
Epoch 1, Overall loss = 2.26 and accuracy of 0.379
Epoch 2, Overall loss = 1.25 and accuracy of 0.564
Epoch 3, Overall loss = 1.03 and accuracy of 0.641
Epoch 4, Overall loss = 0.88 and accuracy of 0.696
Epoch 5, Overall loss = 0.77 and accuracy of 0.734
Epoch 6, 

In [44]:
print("The test training accuracy was able to be improved by")
print("increasing the number of maps in each of the convolution")
print("layer as well as increasing the batch size.")

The test training accuracy was able to be improved by
increasing the number of maps in each of the convolution
layer as well as increasing the batch size.
