### Load Images

In [1]:
import tensorflow as tf
from glob import glob
import numpy as np
import cv2

In [2]:
# function to take filenames and convert to numpy arrays
# TODO: use pickle to save the dataset instead
def convert_images(files):
    return np.asarray([cv2.imread(file) for file in files])

In [3]:
filenames = glob("./train/*")
filenames = sorted(filenames, key = lambda x:int(x.split('\\')[1].split('.png')[0]))

In [4]:
image_data = convert_images(filenames)

In [5]:
image_data.shape

(50000, 32, 32, 3)

### Load Labels

In [6]:
import pandas as pd

In [7]:
labels_df = pd.read_csv('trainLabels.csv')

In [8]:
labels_unique = labels_df.label.unique()

In [36]:
labels_df.head()

Unnamed: 0,id,label
0,1,frog
1,2,truck
2,3,truck
3,4,deer
4,5,automobile


In [10]:
# just get the id based on the filename
train_indeces = list((int(filenames[i].split('\\')[1].split('.png')[0]) for i in range(len(filenames))))

In [11]:
# get real labels of each
train_labels = [labels_df['label'][idx-1] for idx in train_indeces]

['frog',
 'truck',
 'truck',
 'deer',
 'automobile',
 'automobile',
 'bird',
 'horse',
 'ship',
 'cat',
 'deer',
 'horse',
 'horse',
 'bird',
 'truck',
 'truck',
 'truck',
 'cat',
 'bird',
 'frog',
 'deer',
 'cat',
 'frog',
 'frog',
 'bird',
 'frog',
 'cat',
 'dog',
 'deer',
 'airplane',
 'airplane',
 'truck',
 'automobile',
 'cat',
 'deer',
 'airplane',
 'cat',
 'horse',
 'cat',
 'cat',
 'dog',
 'bird',
 'bird',
 'horse',
 'automobile',
 'automobile',
 'automobile',
 'bird',
 'bird',
 'airplane',
 'truck',
 'dog',
 'horse',
 'truck',
 'bird',
 'bird',
 'dog',
 'bird',
 'deer',
 'cat',
 'automobile',
 'automobile',
 'ship',
 'bird',
 'automobile',
 'automobile',
 'deer',
 'truck',
 'horse',
 'ship',
 'dog',
 'truck',
 'frog',
 'horse',
 'cat',
 'automobile',
 'truck',
 'airplane',
 'cat',
 'automobile',
 'cat',
 'dog',
 'deer',
 'dog',
 'horse',
 'horse',
 'deer',
 'horse',
 'truck',
 'deer',
 'bird',
 'cat',
 'ship',
 'airplane',
 'automobile',
 'frog',
 'automobile',
 'automobile',
 

In [12]:
# convert each real label to corresponding index (0-9)
label_data = [np.where(label==labels_unique)[0][0] for label in train_labels]

## Training

For this project, we'll be utilizing deep learning, specifically [convolutional neural networks](https://en.wikipedia.org/wiki/Convolutional_neural_network). They are excellent for image problems due to their ability to capture spatial information of the pixels. Below is a nice image that describes a general CNN architecture for image classification:

<img src='./cnn.png'>

A good summary can be found [here](http://cs231n.github.io/convolutional-networks/)!

### Create Graph

Here I'll be defining a CNN architecture using the python library [Tensorflow](https://www.tensorflow.org/).

A summary of the network can be described in these steps:

- Data Augmentation (Flipping the image left/right)
- Image Whitening (Simple normalization of input pixels)
- Architecture of Actual Network:
    - **3 Convolutional Layers**
        - kernel_size = 3
        - stride = 1
        - number_outputs = 64 (image channels)
        - L2 Weight Regularization
        - Relu Activation
        - Batch Norm applied after Relu
    - **1 Pooling Layer**
        - Avg Pooling
        - kernel_size = (3,3)
    - The two are repeated (Convolution -> Pooling) -> (Convolution -> Pooling) -> ... **5 times**.
    - This ensures that the images's length and width dimensions can be cut down from **32 x 32 to 1 x 1**.
    - Final output (of one image) is flattened to a 10 x 1 tensor which is fed into a final **softmax activation** to predict the probabilities the given image belongs to a certain class.
- Loss: Cross entropy + 1e-6 * (total weight regularization)
- Optimizer: Adam

** Data Augmentation **
- This is an important step because it allows us to "artificially" increase our training set.
- More data = more information the neural network can learn
- Helps increase generalization
- Several augmentations are possible, but for now, we are only using random flip left/right.

** Image Whitening **
- Often used to decorrelate data, in this case our image pixel values
- Examples of techniques: PCA whitening, ZCA whitening
- Attempted to use ZCA whitening, but had worse results than regular normalization for now.
- Used normalization to whiten images:
    - First flattened the whole dataset (all 50,000 images) into one vector and found the mean and standard deviation.
    - Subtracted the mean and standard deviation across all pixels during training.

In [145]:
np.mean(image_data.flatten())

120.70756512369792

In [146]:
np.std(image_data.flatten())

64.150075891121233

In [22]:
tf.reset_default_graph()

inputs = tf.placeholder(tf.float32, (None,32,32,3))
labels = tf.placeholder(tf.int64, (None), name='labels')
is_training = tf.placeholder(tf.bool)
keep_prob = tf.placeholder_with_default(1.0, shape=()) # default placeholder for dropout probability
def image_augment(I):
    I = tf.image.random_flip_left_right(I)
    return I

#http://ufldl.stanford.edu/tutorial/unsupervised/PCAWhitening/ | mediocre results right now, ignore 
def zca_whiten(I):
    Im = tf.cast(tf.reshape(I, shape = [-1,3]),dtype=tf.float32)
    Im = Im - tf.reduce_mean(Im,0)
    cov = 1/(int(I.shape[0]))*tf.matmul(Im,Im,transpose_a = True)
    
    S,U,_ = tf.svd(cov,full_matrices=True)
    D = tf.transpose(tf.diag(1.0/tf.sqrt(S+eps)))
    
    # principal components (3x3)
    X = tf.matmul(tf.matmul(U,D),tf.transpose(U))
    
    return tf.reshape(tf.matmul(Im,X),[32,32,3]) # zca whitened image

augmented = tf.map_fn(image_augment,inputs)
final_input = tf.identity(augmented)
#whitened = tf.map_fn(zca_whiten,final_input)
whitened = (final_input - 120.70756512369792)/64.150075891121233
# 2^5 = 32
n_outputs = 64
last_output = n_outputs
pool_kernel = 3

for i in range(5):
    if (i==0):
        conv = tf.contrib.layers.conv2d(inputs = whitened, stride = 1, num_outputs = n_outputs, kernel_size = 3, weights_regularizer=tf.nn.l2_loss)
    else:
        conv= tf.contrib.layers.conv2d(inputs = pool, stride = 1, num_outputs = n_outputs, kernel_size = 3, weights_regularizer=tf.nn.l2_loss)
    bn1 = tf.contrib.layers.batch_norm(inputs = conv, is_training = is_training)
    
    conv2 = tf.contrib.layers.conv2d(inputs = bn1, stride = 1, num_outputs = n_outputs, kernel_size = 3, weights_regularizer=tf.nn.l2_loss)
    bn2 = tf.contrib.layers.batch_norm(inputs = conv2, is_training = is_training)
    
    if i==4:
        last_output = 10
        pool_kernel = 2
        
    conv3 = tf.contrib.layers.conv2d(inputs = bn2, stride = 1, num_outputs = last_output, kernel_size = 3, weights_regularizer=tf.nn.l2_loss)
    bn3 = tf.contrib.layers.batch_norm(inputs = conv3, is_training = is_training)
    
    pool = tf.contrib.layers.avg_pool2d(inputs = bn3, kernel_size = pool_kernel, padding = 'SAME')
    

output = tf.identity(tf.contrib.layers.flatten(pool), name='output')
    

loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=output, labels=labels)) + 1e-6 * tf.losses.get_regularization_loss()
optimizer = tf.train.AdamOptimizer(0.0005, 0.9, 0.999)

with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
    minimizer = optimizer.minimize(loss)

correct = tf.equal(tf.argmax(output, 1), labels)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

# Source: CS342 with Dr. Krähenbühl
print( "Total number of variables used:", np.sum([v.get_shape().num_elements() for v in tf.trainable_variables()]))

Total number of variables used: 488532


### Training for 200 Epochs

- Epochs are just iterations
- Each iteration we shuffle the dataset and then feed in the images into the CNN in batches (GPU can't fit all images at once).

In [23]:
# Batch size
BS = 64

# Start a session
sess = tf.Session()

# Set up training
sess.run(tf.global_variables_initializer())

for epoch in range(200):
    # Let's shuffle the data every epoch
    len_ = image_data.shape[0]
    np.random.seed(epoch)
    np.random.shuffle(image_data)
    np.random.seed(epoch)
    np.random.shuffle(label_data)
    # Go through the entire dataset once
    accuracy_vals, loss_vals = [], []
    final = 0
    for i in range(0, image_data.shape[0]-BS+1, BS):
        # Train a single batch
        batch_images, batch_labels = image_data[i:i+BS], label_data[i:i+BS]
        final = i+BS #useful for training last batch
        accuracy_val, loss_val, _ = sess.run([accuracy, loss, minimizer], feed_dict={inputs: batch_images, labels: batch_labels, is_training:True,keep_prob:0.5})
        accuracy_vals.append(accuracy_val)
        loss_vals.append(loss_val)
    # train final batch
    batch_images, batch_labels = image_data[final:len_-1], label_data[final:len_-1]
    accuracy_val, loss_val, _ = sess.run([accuracy, loss, minimizer], feed_dict={inputs: batch_images, labels: batch_labels, is_training:True,keep_prob:0.5})
    accuracy_vals.append(accuracy_val)
    loss_vals.append(loss_val)
    

    print('[%3d] Accuracy: %0.3f  \t  Loss: %0.3f'%(epoch, np.mean(accuracy_vals), np.mean(loss_vals)))
  


[  0] Accuracy: 0.496  	  Loss: 1.467
[  1] Accuracy: 0.674  	  Loss: 1.059
[  2] Accuracy: 0.741  	  Loss: 0.902
[  3] Accuracy: 0.781  	  Loss: 0.816
[  4] Accuracy: 0.807  	  Loss: 0.759
[  5] Accuracy: 0.823  	  Loss: 0.722
[  6] Accuracy: 0.838  	  Loss: 0.688
[  7] Accuracy: 0.851  	  Loss: 0.661
[  8] Accuracy: 0.862  	  Loss: 0.638
[  9] Accuracy: 0.870  	  Loss: 0.617
[ 10] Accuracy: 0.881  	  Loss: 0.594
[ 11] Accuracy: 0.888  	  Loss: 0.584
[ 12] Accuracy: 0.894  	  Loss: 0.567
[ 13] Accuracy: 0.903  	  Loss: 0.550
[ 14] Accuracy: 0.905  	  Loss: 0.541
[ 15] Accuracy: 0.912  	  Loss: 0.529
[ 16] Accuracy: 0.919  	  Loss: 0.516
[ 17] Accuracy: 0.920  	  Loss: 0.510
[ 18] Accuracy: 0.925  	  Loss: 0.502
[ 19] Accuracy: 0.928  	  Loss: 0.494
[ 20] Accuracy: 0.932  	  Loss: 0.487
[ 21] Accuracy: 0.934  	  Loss: 0.479
[ 22] Accuracy: 0.938  	  Loss: 0.471
[ 23] Accuracy: 0.940  	  Loss: 0.468
[ 24] Accuracy: 0.944  	  Loss: 0.460
[ 25] Accuracy: 0.945  	  Loss: 0.455
[ 26] Accura

### Save Model

In [40]:
# Source: CS342 with Dr. Krähenbühl
def save(output_file, graph=None, session=None):
    from zipfile import ZipFile
    from os import path, remove, rmdir
    from tempfile import mkdtemp

    tmp_dir = mkdtemp()
    tmp_output = path.join(tmp_dir, path.basename(output_file))
    with graph.as_default():
        saver = tf.train.Saver(allow_empty=True)
        saver.save(session, tmp_output, write_state=False)

    of = ZipFile(output_file, 'w')
    for f in glob(tmp_output+'.*'):
        of.write(f, path.basename(f))
        remove(f)
    of.close()
    rmdir(tmp_dir)

In [41]:
graph = tf.get_default_graph()
save('./week1_model.tfg',graph,sess)

### Load Model





In [3]:
def load(input_file, graph=None, session=None):
    from os import path
    from shutil import rmtree
    from tempfile import mkdtemp
    from zipfile import ZipFile

    tmp_dir = mkdtemp()

    f = ZipFile(input_file, 'r')
    f.extractall(tmp_dir)
    f.close()

    # Find the model name
    meta_files = glob(path.join(tmp_dir, '*.meta'))
    if len(meta_files) < 1:
        raise IOError( "[E] No meta file found, giving up" )
    if len(meta_files) > 1:
        raise IOError( "[E] More than one meta file found, giving up" )

    meta_file = meta_files[0]
    model_file = meta_file.replace('.meta', '')

    if graph is None:
        graph = tf.get_default_graph()
    if session is None:
        session = tf.get_default_session()
    if session is None:
        session = tf.Session()

    # Load the model in TF
    with graph.as_default():
        saver = tf.train.import_meta_graph(meta_file)
        if saver is not None:
            saver.restore(session, model_file)
    rmtree(tmp_dir)
    return graph

In [4]:
graph = load('./week1_model.tfg')

INFO:tensorflow:Restoring parameters from C:\Users\kurti\AppData\Local\Temp\tmprzj26v47\week1_model.tfg


### Make Test Predictions

In [24]:
test_files = glob('./test/*')
test_files = sorted(test_files, key = lambda x:int(x.split('\\')[1].split('.png')[0]))

In [45]:
image_test = convert_images(test_files)

In [46]:
image_test.shape

(300000, 32, 32, 3)

In [47]:
with graph.as_default():
    test_batch = 3000 # batch shape
    predictions = []
    for i in range(0,image_test.shape[0],test_batch):
        if i%90000==0:
            print(i)
        batch_images = image_test[i:i+test_batch]
        batch_pred = sess.run(tf.argmax(output, 1), feed_dict = {final_input: batch_images,is_training:False})
        for j in range(test_batch):
            predictions.append(batch_pred[j])

0
90000
180000
270000


In [48]:
test_classes = np.asarray([labels_unique[predictions[i]] for i in range(len(predictions))])
test_df = pd.DataFrame(test_classes)
test_df['id'] = pd.Series([i for i in range(1,len(predictions)+1)])
test_df['label'] = test_df[test_df.columns[0]]
test_df = test_df[['id','label']]
test_df

Unnamed: 0,id,label
0,1,deer
1,2,airplane
2,3,automobile
3,4,ship
4,5,bird
5,6,cat
6,7,airplane
7,8,dog
8,9,bird
9,10,dog


In [49]:
test_df.to_csv('cnn_week1.csv',index=False)

In [5]:
[tensor.name for tensor in tf.get_default_graph().as_graph_def().node]

['Placeholder',
 'labels',
 'Placeholder_1',
 'PlaceholderWithDefault/input',
 'PlaceholderWithDefault',
 'map/Shape',
 'map/strided_slice/stack',
 'map/strided_slice/stack_1',
 'map/strided_slice/stack_2',
 'map/strided_slice',
 'map/TensorArray',
 'map/TensorArrayUnstack/Shape',
 'map/TensorArrayUnstack/strided_slice/stack',
 'map/TensorArrayUnstack/strided_slice/stack_1',
 'map/TensorArrayUnstack/strided_slice/stack_2',
 'map/TensorArrayUnstack/strided_slice',
 'map/TensorArrayUnstack/range/start',
 'map/TensorArrayUnstack/range/delta',
 'map/TensorArrayUnstack/range',
 'map/TensorArrayUnstack/TensorArrayScatter/TensorArrayScatterV3',
 'map/Const',
 'map/TensorArray_1',
 'map/while/Enter',
 'map/while/Enter_1',
 'map/while/Merge',
 'map/while/Merge_1',
 'map/while/Less/Enter',
 'map/while/Less',
 'map/while/LoopCond',
 'map/while/Switch',
 'map/while/Switch_1',
 'map/while/Identity',
 'map/while/Identity_1',
 'map/while/TensorArrayReadV3/Enter',
 'map/while/TensorArrayReadV3/Enter_1

In [14]:
tf.get_default_graph().get_tensor_by_name('Placeholder_1:0')

<tf.Tensor 'Placeholder_1:0' shape=<unknown> dtype=bool>