In [None]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from IPython.display import Image
from rlx.ml import Batches, show_image_mosaic
from rlx import utils
from time import time
from rlx.utils import humanbytes
import tflearn, psutil, gc
%matplotlib inline
gc.collect()
print "free mem", humanbytes(psutil.virtual_memory().free)



## Exercise 1: Get CIFAR-10

download the dataset from https://www.cs.toronto.edu/~kriz/cifar.html and create 

- a function to load selected batches. the argument `batches` is a list. images must `numpy arrays` with **pixel values between 0 and 1**
- a function to make train/test splits. `train_pct` sets the percentage of images for train (for instace `0.8` is 80%). `shuffle=True` means that train and test are random partitions (if `false` then the train partition will be the first part of the data).

with `train_pct=.8`, variables names and shapes must be as follows:


        VARIABLE NAME   SHAPE
        
        imgs            (10000, 32, 32, 3)
        labels          (10000,)
        onehot          (10000, 10)

        train_imgs      (8000, 32, 32, 3)
        train_labels    (8000,)
        train_ohlabs    (8000, 10)

        test_imgs       (2000, 32, 32, 3)
        test_labels     (2000,)
        test_ohlabs     (2000, 10)

In [None]:
def load_cifar(batches = [1,2,3,4,5]):
    
    cifar10_dir ="/mnt/cifar-10-batches-py/"   # this is where you downloaded CIFAR10

    def onehot_labels(labels):
        return np.eye(10)[labels]

    imgs   = ...
    labels = ...
    ohlabs = ...
    
    return imgs, labels, ohlabs

def train_test_split(imgs, labels, ohlabs, train_pct=.8, shuffle=True):

    train_imgs   = ...
    train_ohlabs = ...
    train_labels = ...

    test_imgs   = ...
    test_ohlabs = ...
    test_labels = ...

    return train_imgs, train_labels, train_ohlabs, test_imgs, test_labels, test_ohlabs

load the data with your functions. we create the TEST/TRAIN split. ** we will use only batch 1  ** and select randomly **80%** for **TRAIN** and the rest for **TEST**

In [None]:
imgs, labels, ohlabs = load_cifar(batches = [1])
d = train_test_split(imgs, labels, ohlabs)
train_imgs, train_labels, train_ohlabs, test_imgs, test_labels, test_ohlabs = d

cnames = ["plane", "car", "bird", "cat", "deer", "dog", "frog", "horse", "boat", "truck"]

print "imgs  ", imgs.shape, "min", np.min(imgs), "max", np.max(imgs)
print "labels", labels.shape
print "onehot", ohlabs.shape
print "train_imgs  ", train_imgs.shape
print "train_labels", train_labels.shape
print "train_ohlabs", train_ohlabs.shape
print "test_imgs   ", test_imgs.shape
print "test_labels ", test_labels.shape
print "test_ohlabs ", test_ohlabs.shape
gc.collect()
print "free mem", humanbytes(psutil.virtual_memory().free)

In [None]:
show_image_mosaic(train_imgs, train_labels)

## Exercise 2: Create TF vars for following network

| layer   | input_size  | output_size | filter_size  | stride | n_filters |activation| var sizes  | params |
| ------- |:-----------:|:-----------:|:------------:|:------:|:---------:|:--------:|:--------------:| |
| conv1   | 32x32x3     | 32x32x9     | 5x5          |1       | 15        | relu     | W1 = [5,5,3,15]<br/> b = [15]||
| conv2   | 32x32x15    | 16x16x18    | 5x5          |2       | 18        | relu     | W2 = [5,5,15,18]<br/> b = [18]||
| conv3   | 16x16x18    | 8x8x20      | 3x3          |2       | 20        | relu     | W2 = [3,3,18,20]<br/> b =[20]||
| maxpool | 8x8x20      | 4x4x20      |              |        |           |          | | k = 2 |
| fc      | 4x4x20      |    100      |              |        |           | relu     | W3 = [320,100] <br/>b=[100]||
| dropout | 100         |   100       |              |        |           |          | | pkeep = .75 |
| output  | 100         |   10        |              |        |           | softmax  | W4 = [100,10] <br/>b=[10]||


In [None]:
f1_size, f2_size, f3_size, fc_size    = 5, 5, 3, 100
c1_nfilters, c2_nfilters, c3_nfilters = 15, 18, 20
c1_stride, c2_stride, c3_stride       = 1, 2, 2


In [None]:
def get_TF_vars():
    tf.reset_default_graph()

    with tf.name_scope("data"):
        X  = ...
        Y  = ...

    with tf.name_scope("weights_biases"):

        W1 = ...  
        b1 = ...

        W2 = ...
        b2 = ...

        W3 = ...
        b3 = ...

        W4 = ...
        b4 = ...

        W5 = ...
        b5 = ...
    
    return X, Y, W1, b1, W2, b2, W3, b3, W4, b4, W5, b5



In [None]:
tf_vars = get_TF_vars()

In [None]:
tf.all_variables()

## Exercise 3: create TF graph

- use dropout `pkeep` as given in the argument.
- use `tf.nn.relu`, `tf.matmul`, `tf.nn.dropout`, `tf.nn.softmax` for layers
- use `tf.nn.softmax_cross_entropy_with_logits` to compute the loss
- use `tf.train.AdamOptimizer` as optimizer with `learning_rate` from the argument.

In [None]:
def get_TF_graph(tf_vars, pkeep=0.75, learning_rate=0.001):
    
    X, Y, W1, b1, W2, b2, W3, b3, W4, b4, W5, b5 = tf_vars
        
    with tf.name_scope("layers"):
        
        C1 = ...

        C2 = ...

        C3 = ...

        FC = ...

        y_hat = ...

    with tf.name_scope("loss"):
        cross_entropy = ...
        loss = tf.reduce_mean(cross_entropy)*100

    with tf.name_scope("accuracy"):
        correct_pred = tf.equal(tf.argmax(y_hat, 1), tf.argmax(Y, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

    with tf.name_scope("optimizer"):
        train_step = ...
        
    return C1, FC, y_hat, loss, train_step, accuracy

In [None]:
tf_vars = get_TF_vars()
C1, FC, y_hat, loss, train_step, accuracy = get_TF_graph(tf_vars)

## Exercise 4: create optimizer loop

In [None]:
def fit (X_train, y_train, X_test, y_test, 
         model_name, loss, train_step, accuracy, 
         batch_size, n_epochs, log_freq):
    
    from rlx import ml

    X, Y, W1, b1, W2, b2, W3, b3, W4, b4, W5, b5 = tf_vars    
    
    # -----------------
    # YOUR CODE HERE
    # -----------------
        
    return log_train, log_test, model_name

def plot_results(log_train, log_test):
    k = log_train.rolling(window=10).mean().dropna()
    plt.plot(k.time, k.accuracy, color="blue", lw=2, label="train")
    plt.plot(log_test.time, log_test.accuracy, color="red",lw=2, label="test")
    plt.legend(loc="center left", bbox_to_anchor=(1,.5))
    plt.plot(log_train.time, log_train.accuracy, alpha=.3, color="blue")
    plt.grid()
    plt.xlabel("elapsed time (secs)")
    plt.ylabel("accuracy")
    plt.axhline(0.5, color="black")
    plt.xlim(0,log_train.time.max()+1)
    plt.title("final train_acc=%.4f, test_acc=%.4f"%(log_train.accuracy.values[-1], log_test.accuracy.values[-1]))        
        

### run optimization

In [None]:
tf_vars = get_TF_vars()
C1, FC, y_hat, loss, train_step, accuracy = get_TF_graph(tf_vars)

log_train, log_test, model_name = fit(train_imgs, train_ohlabs, test_imgs, test_ohlabs,
                         "cnn_cifar10", loss, train_step, accuracy,
                         batch_size=100, n_epochs=10, log_freq=80)

In [None]:
plot_results(log_train, log_test)

## Exercise 5: show confusion matrix (test), misses, filters and sample activations

In [None]:
X, Y, lr, W1, b1, W2, b2, W3, b3, W4, b4, W5, b5 = tf_vars    

In [None]:
with tf.Session() as sess:
    saver = tf.train.Saver()
    saver.restore(sess, "models/"+model_name+".tf")    
    C1_activations, FC_activations, test_preds, w1,w2,w3,w4,w5 = \
                sess.run([C1,FC, y_hat,W1,W2,W3,W4,W5], feed_dict={X:test_imgs})


### show confussion matrix for test imgs

In [None]:
...

### show first layer filters

In [None]:
...

### show some misses

In [None]:
...

### show conv1 activations for a random image

In [None]:
...