In [75]:
import tensorflow as tf
import numpy as np
import scipy
from scipy.io import loadmat

from PIL import Image

import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow
%matplotlib inline
import pdb

## 1. Constants

In [38]:
# Image dimensions constants. 
OUT_WIDTH = 800
OUT_HEIGHT = 600
OUT_CHANNELS = 3

## 2. Load VGG model. 

The first step is to load the VGG convolutional neural network as a native Tensorflow Model. 

In [15]:
VGG_PATH = "pretrained_models/imagenet-vgg-verydeep-19.mat"
# load the Pre-trained VGG weights
vgg = loadmat(VGG_PATH)
# isolate the layers - and not the metadata
layers = vgg['layers'][0]

Now, `layers` is an array, holding each of the weights for each layer as matrices. The order of the layers can be found [here](https://gist.github.com/ksimonyan/3785162f95cd2d5fee77#file-readme-md). 

To Load the VGG model as a Tensorflow model, I'll need to be do the following:

1. Load the weights for each model
2. Construct `tf.nn.conv2d` for each 2D convolutional layer - based on loaded weights
3. Apply th `relu` activation to each of these convolutional layers
4. Remove the max-pooling layers in the VGG network, and replace them with average-pooling layers
5. Omit the fully-connected layers in the VGG model. 

In [110]:
# encapsulate the functions needed to load the VGG model in a class
class VGGLoader:
    
    def __init__(self, vgg_path, out_height, out_width, out_channels):
        self.vgg_path = vgg_path
        self.out_height = out_height
        self.out_width = out_width
        self.out_channels = out_channels
        
    # load the model as a matrix
    # store the pre-trained weights as a class atribute
    def _load_layers(self):
        # load the VGG model
        vgg = loadmat(self.vgg_path)
        # return just the pre-trained weights
        self.layers =  vgg['layers'][0]
    
    # Load the weights of a given layer
    def _get_weights(self,layer):
        # load pre-trained weights and biases
        weight = self.layers[layer][0][0][2][0][0]
        bias = self.layers[layer][0][0][2][0][1]
        return weight, bias
    
    # create a tensorflow convolutional layer based on the pre-trained weights
    def _conv2d(self, prev_layer, layer):
    
        weight, bias = self._get_weights(layer)
        W = tf.constant(weight)
        b = tf.constant(np.reshape(bias, (bias.size)))
        return tf.nn.conv2d(
            prev_layer, filter=W, strides=[1, 1, 1, 1], padding='SAME') + b
    
    # apply a relu activation onto a Conv2d Layer
    def _relu(self, conv2d_layer):
        return tf.nn.relu(conv2d_layer)
    
    # create a convolutional layer on the pretrained weights, then apply an activation
    def _conv2d_relu(self, prev_layer, layer):
        return self._relu(self._conv2d(prev_layer, layer))

    # create an average-pooling layer
    def _avgpool(self, prev_layer):
        return tf.nn.avg_pool(prev_layer, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
    
    
    ########################################
    # Key method - Load and return the VGG model as a Tensorflow Model
    ########################################
    def get_model(self):
        # Load and save the pre-trained weights
        self._load_layers()
        
        # build up the sequential model
        model = {}
        model['input']   = tf.Variable(np.zeros((1, self.out_height, self.out_width, \
                                                 self.out_channels)), dtype = 'float32')
        model['conv1_1']  = self._conv2d_relu(model['input'], 0)
        model['conv1_2']  = self._conv2d_relu(model['conv1_1'], 2)
        model['avgpool1'] = self._avgpool(model['conv1_2'])
        model['conv2_1']  = self._conv2d_relu(model['avgpool1'], 5)
        model['conv2_2']  = self._conv2d_relu(model['conv2_1'], 7)
        model['avgpool2'] = self._avgpool(model['conv2_2'])
        model['conv3_1']  = self._conv2d_relu(model['avgpool2'], 10)
        model['conv3_2']  = self._conv2d_relu(model['conv3_1'], 12)
        model['conv3_3']  = self._conv2d_relu(model['conv3_2'], 14)
        model['conv3_4']  = self._conv2d_relu(model['conv3_3'], 16)
        model['avgpool3'] = self._avgpool(model['conv3_4'])
        model['conv4_1']  = self._conv2d_relu(model['avgpool3'], 19)
        model['conv4_2']  = self._conv2d_relu(model['conv4_1'], 21)
        model['conv4_3']  = self._conv2d_relu(model['conv4_2'], 23)
        model['conv4_4']  = self._conv2d_relu(model['conv4_3'], 25)
        model['avgpool4'] = self._avgpool(model['conv4_4'])
        model['conv5_1']  = self._conv2d_relu(model['avgpool4'], 28)
        model['conv5_2']  = self._conv2d_relu(model['conv5_1'], 30)
        model['conv5_3']  = self._conv2d_relu(model['conv5_2'], 32)
        model['conv5_4']  = self._conv2d_relu(model['conv5_3'], 34)
        model['avgpool5'] = self._avgpool(model['conv5_4'])
        return model

In [111]:
loader = VGGLoader(out_channels=OUT_CHANNELS, out_height=OUT_HEIGHT, out_width= OUT_WIDTH, vgg_path= VGG_PATH)

In [112]:
model = loader.get_model()

NameError: global name 'conv2d_layer' is not defined

In [108]:
def load_vgg_model(path):
    """
    Returns a model for the purpose of 'painting' the picture.
    Takes only the convolution layer weights and wrap using the TensorFlow
    Conv2d, Relu and AveragePooling layer. VGG actually uses maxpool but
    the paper indicates that using AveragePooling yields better results.
    The last few fully connected layers are not used.
    Here is the detailed configuration of the VGG model:
        0 is conv1_1 (3, 3, 3, 64)
        1 is relu
        2 is conv1_2 (3, 3, 64, 64)
        3 is relu    
        4 is maxpool
        5 is conv2_1 (3, 3, 64, 128)
        6 is relu
        7 is conv2_2 (3, 3, 128, 128)
        8 is relu
        9 is maxpool
        10 is conv3_1 (3, 3, 128, 256)
        11 is relu
        12 is conv3_2 (3, 3, 256, 256)
        13 is relu
        14 is conv3_3 (3, 3, 256, 256)
        15 is relu
        16 is conv3_4 (3, 3, 256, 256)
        17 is relu
        18 is maxpool
        19 is conv4_1 (3, 3, 256, 512)
        20 is relu
        21 is conv4_2 (3, 3, 512, 512)
        22 is relu
        23 is conv4_3 (3, 3, 512, 512)
        24 is relu
        25 is conv4_4 (3, 3, 512, 512)
        26 is relu
        27 is maxpool
        28 is conv5_1 (3, 3, 512, 512)
        29 is relu
        30 is conv5_2 (3, 3, 512, 512)
        31 is relu
        32 is conv5_3 (3, 3, 512, 512)
        33 is relu
        34 is conv5_4 (3, 3, 512, 512)
        35 is relu
        36 is maxpool
        37 is fullyconnected (7, 7, 512, 4096)
        38 is relu
        39 is fullyconnected (1, 1, 4096, 4096)
        40 is relu
        41 is fullyconnected (1, 1, 4096, 1000)
        42 is softmax
    """
    vgg = scipy.io.loadmat(path)

    vgg_layers = vgg['layers']
    def _weights(layer, expected_layer_name):
        """
        Return the weights and bias from the VGG model for a given layer.
        """
        W = vgg_layers[0][layer][0][0][2][0][0]
        b = vgg_layers[0][layer][0][0][2][0][1]
        layer_name = vgg_layers[0][layer][0][0][0][0]
        #pdb.set_trace()

        assert layer_name == expected_layer_name
        return W, b

    def _relu(conv2d_layer):
        """
        Return the RELU function wrapped over a TensorFlow layer. Expects a
        Conv2d layer input.
        """
        return tf.nn.relu(conv2d_layer)

    def _conv2d(prev_layer, layer, layer_name):
        """
        Return the Conv2D layer using the weights, biases from the VGG
        model at 'layer'.
        """
        W, b = _weights(layer, layer_name)
        W = tf.constant(W)
        b = tf.constant(np.reshape(b, (b.size)))
        return tf.nn.conv2d(
            prev_layer, filter=W, strides=[1, 1, 1, 1], padding='SAME') + b

    def _conv2d_relu(prev_layer, layer, layer_name):
        """
        Return the Conv2D + RELU layer using the weights, biases from the VGG
        model at 'layer'.
        """
        return _relu(_conv2d(prev_layer, layer, layer_name))

    def _avgpool(prev_layer):
        """
        Return the AveragePooling layer.
        """
        return tf.nn.avg_pool(prev_layer, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

    # Constructs the graph model.
    graph = {}
    graph['input']   = tf.Variable(np.zeros((1, IMAGE_HEIGHT, IMAGE_WIDTH, COLOR_CHANNELS)), dtype = 'float32')
    graph['conv1_1']  = _conv2d_relu(graph['input'], 0, 'conv1_1')
    graph['conv1_2']  = _conv2d_relu(graph['conv1_1'], 2, 'conv1_2')
    graph['avgpool1'] = _avgpool(graph['conv1_2'])
    graph['conv2_1']  = _conv2d_relu(graph['avgpool1'], 5, 'conv2_1')
    graph['conv2_2']  = _conv2d_relu(graph['conv2_1'], 7, 'conv2_2')
    graph['avgpool2'] = _avgpool(graph['conv2_2'])
    graph['conv3_1']  = _conv2d_relu(graph['avgpool2'], 10, 'conv3_1')
    graph['conv3_2']  = _conv2d_relu(graph['conv3_1'], 12, 'conv3_2')
    graph['conv3_3']  = _conv2d_relu(graph['conv3_2'], 14, 'conv3_3')
    graph['conv3_4']  = _conv2d_relu(graph['conv3_3'], 16, 'conv3_4')
    graph['avgpool3'] = _avgpool(graph['conv3_4'])
    graph['conv4_1']  = _conv2d_relu(graph['avgpool3'], 19, 'conv4_1')
    graph['conv4_2']  = _conv2d_relu(graph['conv4_1'], 21, 'conv4_2')
    graph['conv4_3']  = _conv2d_relu(graph['conv4_2'], 23, 'conv4_3')
    graph['conv4_4']  = _conv2d_relu(graph['conv4_3'], 25, 'conv4_4')
    graph['avgpool4'] = _avgpool(graph['conv4_4'])
    graph['conv5_1']  = _conv2d_relu(graph['avgpool4'], 28, 'conv5_1')
    graph['conv5_2']  = _conv2d_relu(graph['conv5_1'], 30, 'conv5_2')
    graph['conv5_3']  = _conv2d_relu(graph['conv5_2'], 32, 'conv5_3')
    graph['conv5_4']  = _conv2d_relu(graph['conv5_3'], 34, 'conv5_4')
    graph['avgpool5'] = _avgpool(graph['conv5_4'])
    return graph

In [109]:
IMAGE_WIDTH = 800
IMAGE_HEIGHT = 600
COLOR_CHANNELS = 3
load_vgg_model(VGG_PATH)

{'avgpool1': <tf.Tensor 'AvgPool:0' shape=(1, 300, 400, 64) dtype=float32>,
 'avgpool2': <tf.Tensor 'AvgPool_1:0' shape=(1, 150, 200, 128) dtype=float32>,
 'avgpool3': <tf.Tensor 'AvgPool_2:0' shape=(1, 75, 100, 256) dtype=float32>,
 'avgpool4': <tf.Tensor 'AvgPool_3:0' shape=(1, 38, 50, 512) dtype=float32>,
 'avgpool5': <tf.Tensor 'AvgPool_4:0' shape=(1, 19, 25, 512) dtype=float32>,
 'conv1_1': <tf.Tensor 'Relu:0' shape=(1, 600, 800, 64) dtype=float32>,
 'conv1_2': <tf.Tensor 'Relu_1:0' shape=(1, 600, 800, 64) dtype=float32>,
 'conv2_1': <tf.Tensor 'Relu_2:0' shape=(1, 300, 400, 128) dtype=float32>,
 'conv2_2': <tf.Tensor 'Relu_3:0' shape=(1, 300, 400, 128) dtype=float32>,
 'conv3_1': <tf.Tensor 'Relu_4:0' shape=(1, 150, 200, 256) dtype=float32>,
 'conv3_2': <tf.Tensor 'Relu_5:0' shape=(1, 150, 200, 256) dtype=float32>,
 'conv3_3': <tf.Tensor 'Relu_6:0' shape=(1, 150, 200, 256) dtype=float32>,
 'conv3_4': <tf.Tensor 'Relu_7:0' shape=(1, 150, 200, 256) dtype=float32>,
 'conv4_1': <tf.T

In [93]:
vgg['layers'][0][0][0][0][0][0]

u'relu1_1'

In [None]:
vgg_layers[0][layer][0][0][2][0][0]
vgg_layers[0][layer][0][0][2][0][1]