In [None]:
import tensorflow as tf
import tflearn
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from IPython.display import Image
from rlx.ml import Batches, show_image_mosaic
from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.conv import conv_2d, max_pool_2d
from tflearn.layers.normalization import local_response_normalization
from tflearn.layers.estimator import regression
from rlx.utils import humanbytes
from datetime import datetime
import psutil
import gc
%matplotlib inline

In [None]:
print "free mem", humanbytes(psutil.virtual_memory().free)

## Exercise 1: load weights for first convolutional layer as trained with ImageNet

get the pretrained weights for alexnet from https://www.cs.toronto.edu/~guerzhoy/tf_alexnet/

you should get weights such as the following:

In [None]:
Image(filename='imgs/alexnet_filters1.png')

In [None]:
def get_alexnet_weights():
    # ---------------------
    # YOUR CODE HERE
    w = ...
    return w

In [None]:
w = get_alexnet_weights()

print w.keys()
print "free mem", humanbytes(psutil.virtual_memory().free)

conv1_w = w["conv1"][0]
conv1_b = w["conv1"][1]
conv2_w = w["conv2"][0]
conv2_b = w["conv2"][1]
conv3_w = w["conv3"][0]
conv3_b = w["conv3"][1]
print conv1_w.shape, conv1_b.shape
print conv2_w.shape, conv2_b.shape
print conv3_w.shape, conv3_b.shape

conv2_w = np.concatenate((conv2_w,conv2_w),axis=2)
print conv2_w.shape

In [None]:
def display_imgs(w, figsize=(6,6)):
    plt.figure(figsize=figsize)
    w = (w-np.min(w))/(np.max(w)-np.min(w))
    for i in range(w.shape[-1]):
        plt.subplot(10,10,i+1)
        plt.imshow(w[:,:,:,i], interpolation="none")
        plt.axis("off")

display_imgs(conv1_w)

## Exercise 2: Load CIFAR-10


In [None]:
def load_cifar(batches = [1,2,3,4,5]):

    # --------------------------
    # YOUR CODE HERE
    # --------------------------
    ...
    
    return imgs, labels, ohlabs

def train_test_split(imgs, labels, ohlabs, train_pct=.8, shuffle=True):

    # --------------------------
    # YOUR CODE HERE
    # --------------------------
    ...

    return train_imgs, train_labels, train_ohlabs, test_imgs, test_labels, test_ohlabs

In [None]:
imgs, labels, ohlabs = load_cifar(batches = [1])
d = train_test_split(imgs, labels, ohlabs)
train_imgs, train_labels, train_ohlabs, test_imgs, test_labels, test_ohlabs = d

cnames = ["plane", "car", "bird", "cat", "deer", "dog", "frog", "horse", "boat", "truck"]

print "imgs  ", imgs.shape, "min", np.min(imgs), "max", np.max(imgs)
print "labels", labels.shape
print "onehot", ohlabs.shape
print "train_imgs  ", train_imgs.shape
print "train_labels", train_labels.shape
print "train_ohlabs", train_ohlabs.shape
print "test_imgs   ", test_imgs.shape
print "test_labels ", test_labels.shape
print "test_ohlabs ", test_ohlabs.shape
gc.collect()
print "free mem", humanbytes(psutil.virtual_memory().free)

In [None]:
show_image_mosaic(train_imgs, train_labels)

## Exercise 3: Build `tflearn` model

use the same network as in the previous problemset:

| layer   | input_size  | output_size | filter_size  | stride | n_filters |activation| var sizes  | params |
| ------- |:-----------:|:-----------:|:------------:|:------:|:---------:|:--------:|:--------------:| |
| conv1   | 32x32x3     | 32x32x9     | 5x5          |1       | 15        | relu     | W1 = [5,5,3,15]<br/> b = [15]||
| conv2   | 32x32x15    | 16x16x18    | 5x5          |2       | 18        | relu     | W2 = [5,5,15,18]<br/> b = [18]||
| conv3   | 16x16x18    | 8x8x20      | 3x3          |2       | 20        | relu     | W2 = [3,3,18,20]<br/> b =[20]||
| maxpool | 8x8x20      | 4x4x20      |              |        |           |          | | k = 2 |
| fc      | 4x4x20      |    100      |              |        |           | relu     | W3 = [320,100] <br/>b=[100]||
| dropout | 100         |   100       |              |        |           |          | | pkeep = .75 |
| output  | 100         |   10        |              |        |           | softmax  | W4 = [100,10] <br/>b=[10]||

In [None]:
def get_model():
    tf.reset_default_graph()
    num_classes=10
    
    # --------------------------
    # YOUR CODE HERE
    # --------------------------
    network = ...
    model   = ...
    
    return model
print "free mem", humanbytes(psutil.virtual_memory().free)

## Exercise 4: scale Alexnet weights for the first  layer

use [`skimage.transform.resize`](http://scikit-image.org/docs/dev/api/skimage.transform.html?highlight=resize#skimage.transform.resize). Weights in  `conv1` need to be transformed from  `[11,11,3,96]` to `[5,5,3,96]` by resizing **EACH** filter. Weights in `conv2` **DO NOT** need to be resized. Why?

In [None]:
def scale_conv1_weights(w):
    from skimage.transform import resize
    conv1_ws = np.zeros([5,5,3,96])
    
    # --------------------------
    # YOUR CODE HERE
    # --------------------------
    
    return conv1_ws

In [None]:
w = get_alexnet_weights()
conv1_ws = scale_conv1_weights(w)
print conv1_ws.shape

In [None]:
display_imgs(conv1_ws)

## Exercise 5: select random filters two first AlexNet layers and set weights in your model

- for layer 1 need to select filters so that [5,5,3,96] becomes [5,5,3,15]
- for layer 2 need to select filters so that [5,5,48,256] becomes [5,5,15,18] $\rightarrow$ **NEED TO SELECT THE FILTERS IN L2 CORRESPONDING TO THOSE SELECTED IN L1**. If filter number `i` is selected in the first layer, then you must choose between filters in `[:,:,i/2,:]` in layer 2. we use `i/2` assuming layer's 2 correct size is `[5,5,96,256]` by collating a copy of `[5,5,48.256]` with itself.



### Part A: select random filters from AlexNet

In [None]:
def select_filters(n_filters_layer_1, n_filters_layer_2):
    w        = get_alexnet_weights()
    conv1_ws = scale_conv1_weights(w)
    
    conv1_b  = w["conv1"][1]
    conv2_w  = w["conv2"][0]
    conv2_b  = w["conv2"][1]
    
    # --------------------------
    # YOUR CODE HERE
    # --------------------------
    selected_conv1_ws = ...
    selected_conv1_b  = ...

    selected_conv2_w  = ...
    selected_conv2_b  = ...

    return selected_conv1_ws, selected_conv1_b, selected_conv2_w, selected_conv2_b
    

check your code. observe how we use `tflearn.variables.get_all_trainable_variable()` to get all TF variables and discover the number of filters for the first two layers

In [None]:
model = get_model()
vars = {i.name:i for i in tflearn.variables.get_all_trainable_variable()}
n_filters_layer_1 = vars["conv1/W:0"].shape.as_list()[-1]
n_filters_layer_2 = vars["conv2/W:0"].shape.as_list()[-1]
print "filters in conv1:", n_filters_layer_1
print "filters in conv2:", n_filters_layer_2

r = select_filters(n_filters_layer_1, n_filters_layer_2)
selected_conv1_ws, selected_conv1_b, selected_conv2_w, selected_conv2_b = r

print "conv1 shapes:", selected_conv1_ws.shape, selected_conv1_b.shape
print "conv2 shapes:", selected_conv2_w.shape, selected_conv2_b.shape

display_imgs(selected_conv1_ws)

### Part B: set weights in filters from selected filters

user `model.set_weights` and the `vars` dictionary

In [None]:
def set_conv1_conv2_weights(model, selected_conv1_ws, selected_conv1_b, selected_conv2_w, selected_conv2_b):
    
    vars = {i.name:i for i in tflearn.variables.get_all_trainable_variable()}

    # --------------------------
    # YOUR CODE HERE
    # --------------------------
    model.set_weights( ... ) # for conv1/W:0
    model.set_weights( ... ) # for conv1/b:0
    model.set_weights( ... ) # for conv2/W:0
    model.set_weights( ... ) # for conv2/b:0

    return model

check your code

In [None]:
model = get_model()
model = set_conv1_conv2_weights(model, selected_conv1_ws, selected_conv1_b, selected_conv2_w, selected_conv2_b)

vars = {i.name:i for i in tflearn.variables.get_all_trainable_variable()}

print "check conv1/W", np.allclose(model.get_weights(vars["conv1/W:0"]), selected_conv1_ws)
print "check conv1/b", np.allclose(model.get_weights(vars["conv1/b:0"]), selected_conv1_b)
print "check conv2/W", np.allclose(model.get_weights(vars["conv2/W:0"]), selected_conv2_w)
print "check conv2/b", np.allclose(model.get_weights(vars["conv2/b:0"]), selected_conv2_b)

## Exercise 6: train model

if lucky in filter selection model will train faster!!

In [None]:
model = get_model()

r = select_filters(n_filters_layer_1, n_filters_layer_2)
selected_conv1_ws, selected_conv1_b, selected_conv2_w, selected_conv2_b = r

model = set_conv1_conv2_weights(model, selected_conv1_ws, selected_conv1_b, selected_conv2_w, selected_conv2_b)

model_name = "alexnet_cifar10_" + datetime.now().strftime("%m-%d_%H:%M")
print model_name

# --------------------------
# YOUR CODE HERE
# --------------------------
model.fit(...)


#### weights in layer 1 must have changed very little

In [None]:
w1 = model.get_weights(vars["conv1/W:0"])
display_imgs(w1)

In [None]:
display_imgs(selected_conv1_ws)