In [1]:
# These are all the modules we'll be using later. Make sure you can import them
# before proceeding further.
from __future__ import print_function
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
import sys
from IPython.display import display, Image
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelBinarizer
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_curve, auc
from sklearn.preprocessing import OneHotEncoder
import pickle
import time
## cPicklne no worky in Python3
#from six.moves import cPickle as pickle
from six.moves import range

# Config the matlotlib backend as plotting inline in IPython
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
%matplotlib inline

import tensorflow as tf

In [2]:
def load_CIFAR_batch(filename):
  """ load single batch of cifar """
  with open(filename, 'rb') as f:
    datadict = pickle.load(f, encoding='latin')
    X = zero_center_pixels(datadict['data'])
    Y = datadict['labels']
    X = X.reshape(10000, 3, 32, 32).transpose(0,2,3,1).astype("float32")
    Y = LabelBinarizer().fit_transform(np.array(Y)) 
    return X, Y

In [3]:
def load_CIFAR10(ROOT):
  """ load all of cifar """
  xs = []
  ys = []
  for b in range(1,6):
    f = os.path.join(ROOT, 'data_batch_%d' % (b, ))
    X, Y = load_CIFAR_batch(f)
    xs.append(X)
    ys.append(Y)    
  Xtr = np.concatenate(xs)
  Ytr = np.concatenate(ys)
  del X, Y
  Xte, Yte = load_CIFAR_batch(os.path.join(ROOT, 'test_batch'))
  return Xtr, Ytr, Xte, Yte

In [4]:
def get_CIFAR10_data(num_training=50000, num_validation=0, num_test=10000):
    """
    Load the CIFAR-10 dataset from disk and perform preprocessing to prepare
    it for the two-layer neural net classifier. These are the same steps as
    we used for the SVM, but condensed to a single function.  
    """
    # Load the raw CIFAR-10 data
    cifar10_dir = './cifar-10-batches-py'
    X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)
        
    # Subsample the data
    mask = range(num_training, num_training + num_validation)
    X_val = X_train[mask]
    y_val = y_train[mask]
    mask = range(num_training)
    X_train = X_train[mask]
    y_train = y_train[mask]
    mask = range(num_test)
    X_test = X_test[mask]
    y_test = y_test[mask]

    X_train = X_train.astype("float32").copy()
    X_val = X_val.astype("float32").copy()
    x_test = X_test.astype("float32").copy()

    return X_train, y_train, X_val, y_val, X_test, y_test

In [5]:
def zero_center_pixels(data):
    scale = StandardScaler().fit(data)
    old_mean = scale.mean_.astype('float32')
    old_sigma = np.sqrt(scale.var_).astype('float32')
    return (data - old_mean) * 0.5 / old_sigma

In [6]:
train_dataset, train_labels, val_dataset, val_labels, test_dataset, test_labels = get_CIFAR10_data(50000, 0, 10000)
print('Train data shape: ', train_dataset.shape)
print('Train labels shape: ', train_labels.shape)
print('Validation data shape: ', val_dataset.shape)
print('Validation labels shape: ', val_labels.shape)
print('Test data shape: ', test_dataset.shape)
print('Test labels shape: ', test_labels.shape)



Train data shape:  (50000, 32, 32, 3)
Train labels shape:  (50000, 10)
Validation data shape:  (0, 32, 32, 3)
Validation labels shape:  (0, 10)
Test data shape:  (10000, 32, 32, 3)
Test labels shape:  (10000, 10)


In [7]:
'''
##  this code was repurposed from Week 7 Section Jupyter Notebook
## reshaping the data to 4D Tensor 

image_size = 32
num_labels = 10
num_channels = 3 # RGB

def reformat(dataset, labels):
    dataset = dataset.reshape((-1, image_size, image_size, num_channels)).astype(np.float32)
    labels = (np.arange(num_labels) == labels[:,None]).astype(np.float32)
    return(dataset, labels)

train_dataset, train_labels = reformat(train_dataset, train_labels)
#valid_dataset, valid_labels = reformat(valid_dataset, valid_labels)
test_dataset, test_labels = reformat(test_dataset, test_labels)
print('Training set', train_dataset.shape, train_labels.shape)
#print('Validation set', valid_dataset.shape, valid_labels.shape)
print('Test set', test_dataset.shape, test_labels.shape)
''';

---
- Step 5 -
============
Convolutional Neural Nets
---------

Now we turn to building a CNN using the architecture described in the TensorFlow website tutorial [Deep MNIST for Experts](https://www.tensorflow.org/versions/r0.11/tutorials/mnist/pros/index.html). It will be a small network with two convolutional layers, followed by one fully connected layer. We'll limit the depth of this model so that it will run fine on the CPU and give a more elaborate architecture below that you can experiment with. Most production CNNs run on GPUs because they require more computation power. You can also find this example explained in Chapter 5 of [First Contact with TensorFlow](http://www.jorditorres.org/first-contact-with-tensorflow/) and in Chapter 5 of "Getting started with TensorFlow" available on [Proquest through Hollis](http://proquest.safaribooksonline.com.ezp-prod1.hul.harvard.edu/book/programming/machine-learning/9781786468574)

### Other useful references
There are also web books: Michael A. Nielsen, [Neural Networks and Deep Learning](http://neuralnetworksanddeeplearning.com/chap6.html) and Goodfellow-et-al [Deep Learning](http://www.deeplearningbook.org/) (which is a preprint, available in HTML). There is also [Colah's blog](http://colah.github.io/) that explains convolutions and has some impressive visualizations. 

If you want to experiment hands-on with convolutions, then download [the GIMP](https://www.gimp.org/downloads/) (if you don't already have it), load up some of your favourite images, and then go to Filters $\rightarrow$ Generic $\rightarrow$ Convolution Matrix.  
[Here](https://docs.gimp.org/en/plug-in-convmatrix.html) are some examples.

---

### Choices to make in a CNN
* convolution kernel dimensions: the patch size (e.g. 5x5)
* stride length: 1, 2, something else
* the padding, SAME or VALID
* pooling: average or max pooling, pooling size  


To make the CNN, we will use two other `nn` functions in TensorFlow. The first is for the convolution itself, called [```tf.nn.conv2d()```](https://www.tensorflow.org/versions/r0.11/api_docs/python/nn.html#conv2d) . The function takes a 4D input tensor (hence the reformatting in the previous box), the weight variable and then both the stride and padding are specified as parameters.  

The second function is the pooling function, either [```tf.nn.max_pool```](https://www.tensorflow.org/versions/r0.11/api_docs/python/nn.html#max_pool) or [```tf.nn.avg_pool```](https://www.tensorflow.org/versions/r0.11/api_docs/python/nn.html#avg_pool). These functions take a 4D input tensor, along with the pooling size, the stride and the padding specified as parameters. We will show max_pool in the example.  

Note that between the parameters in convolution and the pooling, we have accumulated a number of hyper-parameters - this gives rise to a large number of options for tuning the best performing model, even without considering regularization/dropout and fully connected layers.

We redefine the weight and bias functions as per the TensorFlow example.

In [8]:
## this code was repurposed from Week 7 Section Jupyter Notebook

def accuracy(predictions, labels):
    return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
            / predictions.shape[0])

def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev=0.01)
    return tf.Variable(initial)

def bias_variable(shape):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)

In [9]:
## this code was repurposed from Week 7 Section Jupyter Notebook

# Re-define the function to include the keep probability
def run_session(num_epochs, name, k_prob=1.0):

    with tf.Session(graph=graph) as session:
        merged = tf.merge_all_summaries()  
        writer = tf.train.SummaryWriter("/tmp/tensorflowlogs", session.graph)
        tf.initialize_all_variables().run()
        print("Initialized")
        for epoch in range(num_epochs):
            offset = (epoch * batch_size) % (train_labels.shape[0] - batch_size)
            batch_data = train_dataset[offset:(offset + batch_size), :]
            batch_labels = train_labels[offset:(offset + batch_size), :]
            feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels, keep_prob : k_prob}
            _, l, predictions = session.run([optimizer, loss, train_prediction], feed_dict=feed_dict)
            if (epoch % 500 == 0):
                print("Minibatch loss at epoch {}: {}".format(epoch, l))
                print("Minibatch accuracy: {:.1f}".format(accuracy(predictions, batch_labels)))
                #print("Validation accuracy: {:.1f}".format(accuracy(valid_prediction.eval(), valid_labels)))
        print("Test accuracy: {:.1f}".format(accuracy(test_prediction.eval(), test_labels)))
        #test_preds[name] = test_prediction.eval().ravel()

In [10]:
## create a distorted image test set
## rotate each image 90 degrees counterclockwise

rotated_test_dataset = []
for i in test_dataset:
    rotated_i = np.rot90(i)
    rotated_test_dataset.append(rotated_i)
rotated_test_dataset = np.array(rotated_test_dataset)

In [11]:
## this code was repurposed from Week 7 Section Jupyter Notebook

image_size = 32
num_labels = 10
num_channels = 3 # RGB
batch_size = 16
patch_size = 5
depth1 = 32
depth2 = 64
num_hidden = 1024

graph = tf.Graph()

with graph.as_default():

    # Input data.
    tf_train_dataset = tf.placeholder(tf.float32, shape=(batch_size, image_size, image_size, num_channels))
    tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
    #tf_valid_dataset = tf.constant(valid_dataset)
    tf_test_dataset = tf.constant(rotated_test_dataset)

    # Variables.
    layer1_weights = weight_variable([patch_size, patch_size, num_channels, depth1])
    layer1_biases = bias_variable([depth1])
    layer2_weights = weight_variable([patch_size, patch_size, depth1, depth2])
    layer2_biases = bias_variable([depth2])
    layer3_weights = weight_variable([image_size // 4 * image_size // 4 * depth2, num_hidden])
    layer3_biases = bias_variable([num_hidden])
    layer4_weights = weight_variable([num_hidden, num_labels])
    layer4_biases = bias_variable([num_labels])

    keep_prob = tf.placeholder(tf.float32)
    # Model with dropout
    def model(data, proba=keep_prob):
        # Convolution
        conv1 = tf.nn.conv2d(data, layer1_weights, [1, 1, 1, 1], padding='SAME') + layer1_biases
        pooled1 = tf.nn.max_pool(tf.nn.relu(conv1), ksize=[1, 2, 2, 1],
                        strides=[1, 2, 2, 1], padding='SAME')
        # Convolution
        conv2 = tf.nn.conv2d(pooled1, layer2_weights, [1, 1, 1, 1], padding='SAME') + layer2_biases
        pooled2 = tf.nn.max_pool(tf.nn.relu(conv2), ksize=[1, 2, 2, 1],
                        strides=[1, 2, 2, 1], padding='SAME')
        # Fully Connected Layer
        shape = pooled2.get_shape().as_list()
        reshape = tf.reshape(pooled2, [shape[0], shape[1] * shape[2] * shape[3]])
        full3 = tf.nn.relu(tf.matmul(reshape, layer3_weights) + layer3_biases)
        # Dropout
        full3 = tf.nn.dropout(full3, proba)
        return(tf.matmul(full3, layer4_weights) + layer4_biases)
  
    # Training computation.
    logits = model(tf_train_dataset, keep_prob)
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, tf_train_labels))

    # Optimizer.
    optimizer = tf.train.AdamOptimizer(1e-4).minimize(loss)

    # Predictions for the training, validation, and test data.
    train_prediction = tf.nn.softmax(logits)
    #valid_prediction = tf.nn.softmax(model(tf_valid_dataset,1.0))
    test_prediction = tf.nn.softmax(model(tf_test_dataset,1.0))

In [12]:
## this code was repurposed from Week 7 Section Jupyter Notebook
t0 = time.time()
run_session(100001, "CNN", 0.5)
print(time.time() - t0)

Initialized
Minibatch loss at epoch 0: 2.306685209274292
Minibatch accuracy: 0.0
Minibatch loss at epoch 500: 1.8999409675598145
Minibatch accuracy: 25.0
Minibatch loss at epoch 1000: 1.8542370796203613
Minibatch accuracy: 31.2
Minibatch loss at epoch 1500: 1.1044995784759521
Minibatch accuracy: 62.5
Minibatch loss at epoch 2000: 1.5367530584335327
Minibatch accuracy: 56.2
Minibatch loss at epoch 2500: 1.6072032451629639
Minibatch accuracy: 56.2
Minibatch loss at epoch 3000: 1.9589229822158813
Minibatch accuracy: 31.2
Minibatch loss at epoch 3500: 1.6274793148040771
Minibatch accuracy: 25.0
Minibatch loss at epoch 4000: 1.3590004444122314
Minibatch accuracy: 56.2
Minibatch loss at epoch 4500: 1.501413106918335
Minibatch accuracy: 43.8
Minibatch loss at epoch 5000: 0.9400443434715271
Minibatch accuracy: 68.8
Minibatch loss at epoch 5500: 2.337679386138916
Minibatch accuracy: 25.0
Minibatch loss at epoch 6000: 0.9790550470352173
Minibatch accuracy: 68.8
Minibatch loss at epoch 6500: 1.14