In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
from PIL import Image
from collections import OrderedDict
from functools import partial
from random import shuffle
import matplotlib.pyplot as plt
import os
import re

In [2]:
image_size = 224
input_shape = (1, image_size, image_size, 3)

# The VGG network requires the images to be zero mean
# This the mean value of the training data of the ImageNet 
# training set, which will be used to make images zero mean
vgg_mean = [123.68, 116.779, 103.939]

# Hyperparameters used to define the final loss
alpha = 1e3
beta = 1e-3

## Downloading and loading pretrained VGGNet Model

In [3]:
def load_weights(weights_file, end_layer):
    """
    This function loads the weights_file and reads the weights until
    the end_layer is reached.
    """
    
    layers = OrderedDict()
    weights = np.load(weights_file)
    sorted_weights = sorted(weights.items())

    for i, (k, w) in enumerate(sorted_weights):
        # If we have loaded the correct number of layers we stop
        # to save memory
        if sum([1 if len(v)==2 else 0 for v in layers.values()]) >= end_layer:
            break
        
        if k[:-2] not in layers:
            layers[k[:-2]] = {}
            
        if re.search(r'conv\d+_\d+_W', k) is not None:
            layers[k[:-2]]['weights'] = w
            print()
        if re.search(r'conv\d+_\d+_b', k) is not None:
            layers[k[:-2]]['bias'] = w
  
        print('Loading the weights for the layer {} and shape {}'.format(k,w.shape))

    return layers

In [4]:
os.path

<module 'posixpath' from '/usr/local/bin/../Cellar/python/3.7.4_1/bin/../Frameworks/Python.framework/Versions/3.7/lib/python3.7/posixpath.py'>

In [5]:
# Making sure the weights are downloaded
assert_msg = 'You need to download the vgg16_weights.npz'+\
                ' file by visiting https://www.cs.toronto.edu/~frossard/vgg16/vgg16_weights.npz'+\
                ' and place that in a folder called vgg in your project directory'
assert os.path.exists(os.path.join('vgg','vgg16_weights.npz')), assert_msg

vgg_layers = load_weights('vgg/vgg16_weights.npz',7)


Loading the weights for the layer conv1_1_W and shape (3, 3, 3, 64)
Loading the weights for the layer conv1_1_b and shape (64,)

Loading the weights for the layer conv1_2_W and shape (3, 3, 64, 64)
Loading the weights for the layer conv1_2_b and shape (64,)

Loading the weights for the layer conv2_1_W and shape (3, 3, 64, 128)
Loading the weights for the layer conv2_1_b and shape (128,)

Loading the weights for the layer conv2_2_W and shape (3, 3, 128, 128)
Loading the weights for the layer conv2_2_b and shape (128,)

Loading the weights for the layer conv3_1_W and shape (3, 3, 128, 256)
Loading the weights for the layer conv3_1_b and shape (256,)

Loading the weights for the layer conv3_2_W and shape (3, 3, 256, 256)
Loading the weights for the layer conv3_2_b and shape (256,)

Loading the weights for the layer conv3_3_W and shape (3, 3, 256, 256)
Loading the weights for the layer conv3_3_b and shape (256,)


In [6]:
type(vgg_layers)

collections.OrderedDict

In [8]:
vgg_layers

OrderedDict([('conv1_1',
              {'weights': array([[[[ 4.80015397e-01, -1.72696680e-01,  3.75577137e-02, ...,
                         -1.27135560e-01, -5.02991639e-02,  3.48965675e-02],
                        [ 5.50379455e-01,  2.08774377e-02,  9.88311544e-02, ...,
                         -8.48205537e-02, -5.11389151e-02,  3.74943428e-02],
                        [ 4.29470569e-01,  1.17273867e-01,  3.40129584e-02, ...,
                         -1.32241577e-01, -5.33475243e-02,  7.57738389e-03]],
               
                       [[ 4.08547401e-01, -1.70375049e-01, -4.96297423e-03, ...,
                         -1.22360572e-01, -2.76450396e-01, -3.90796512e-02],
                        [ 4.40074533e-01,  4.73412387e-02,  5.13819456e-02, ...,
                         -9.88498852e-02, -2.96195745e-01, -7.04357103e-02],
                        [ 3.73466998e-01,  1.62062630e-01,  1.70863140e-03, ...,
                         -1.48207128e-01, -2.35300660e-01, -6.30356818e-02]]

## Define functions

In [10]:
def define_inputs(input_shape):
    """
    This function defines the inputs (placeholders) and image to be generated (variable)
    """
    
    content = tf.placeholder(name='content', shape=input_shape, dtype=tf.float32)
    style = tf.placeholder(name='style', shape=input_shape, dtype=tf.float32)
    generated = tf.get_variable(name='generated', initializer=tf.random_normal_initializer(), 
                                shape=input_shape, dtype=tf.float32, trainable=True)
    
    return {'content': content, 'style': style, 'generated': generated}

def define_tf_weights():
    """
    This function defines the TensorFlow variables for VGG weights and biases
    """
    
    for k, w_dict in vgg_layers.items():
        w, b = w_dict['weights'], w_dict['bias']
        with tf.variable_scope(k):
            tf.get_variable(name='weights', initializer=tf.constant(w, dtype=tf.float32), trainable=False)
            tf.get_variable(name='bias', initializer=tf.constant(b, dtype=tf.float32), trainable=False)

In [11]:
def get_vgg_pooling_indices(sorted_layer_ids):
    """
    A pooling layer appears at the end of each convolution group (i.e. conv<group>_<id>_W)
    """
    pool_inds = []
    prev_layer_id = int(sorted_layer_ids[0][4])
    for ki, k in enumerate(sorted_layer_ids):
        layer_id = int(k[4])
        if layer_id != prev_layer_id:
            pool_inds.append(ki-1)
        prev_layer_id = layer_id
    return pool_inds

In [12]:
pool_inds = get_vgg_pooling_indices(list(vgg_layers.keys()))
print('pooling indices are: {}'.format(pool_inds))

pooling indices are: [1, 3]


In [13]:
def build_vggnet(inp, layer_ids, pool_inds, on_cpu=False):
    """ This function computes the output of the full VGGnet """
    outputs = OrderedDict()
    
    out = inp

    for lid in layer_ids:
        with tf.variable_scope(lid, reuse=tf.AUTO_REUSE):
            print('Computing outputs for the layer {}'.format(lid))
            w, b = tf.get_variable('weights'), tf.get_variable('bias')
            out = tf.nn.conv2d(filter=w, input=out, strides=[1,1,1,1], padding='SAME')
            out = tf.nn.relu(tf.nn.bias_add(value=out, bias=b))
            outputs[lid] = out

        if lid in pool_inds:
            with tf.name_scope(lid.replace('conv','pool')):
                out = tf.nn.avg_pool(input=out, ksize=[1,2,2,1], strides=[1, 2, 2, 1], padding='SAME')
                outputs[lid.replace('conv','pool')] = out

    return outputs

## Loss function

In [14]:
def define_content_loss(inputs, layer_ids, pool_inds, c_weight):

    c_outputs = build_vggnet(inputs["content"], layer_ids, pool_inds)
    g_outputs = build_vggnet(inputs["generated"], layer_ids, pool_inds)

    content_loss = c_weight * tf.reduce_mean(0.5*(list(c_outputs.values())[-1] - list(g_outputs.values())[-1])**2)
    
    return content_loss

In [15]:
def define_style_matrix(layer_out):
    """
    This function computes the style matrix, which essentially computes
    how correlated the activations of a given filter to all the other filers.
    Therefore, if there are C channels, the matrix will be of size C x C
    """
    n_channels = layer_out.get_shape().as_list()[-1]
    unwrapped_out = tf.reshape(layer_out, [-1, n_channels])
    style_matrix = tf.matmul(unwrapped_out, unwrapped_out, transpose_a=True)
    return style_matrix


In [16]:
def define_style_loss(inputs, layer_ids, pool_inds, s_weight, layer_weights=None):
    """
    This function computes the style loss using the style matrix computed for
    the style image and the generated image
    """
    c_outputs = build_vggnet(inputs["style"], layer_ids, pool_inds)
    g_outputs = build_vggnet(inputs["generated"], layer_ids, pool_inds)
    
    c_grams = [define_style_matrix(v) for v in list(c_outputs.values())]
    g_grams = [define_style_matrix(v) for v in list(g_outputs.values())]
    
    if layer_weights is None:
        style_loss =  s_weight * \
            tf.reduce_sum([(1.0/len(layer_ids)) * tf.reduce_mean((c - g)**2) for c,g in zip(c_grams, g_grams)])
    else:
        style_loss = s_weight * \
            tf.reduce_sum([tf.gather(layer_weights, i) * 0.5 * \
                            tf.reduce_mean((c - g)**2) for i,(c,g) in enumerate(zip(c_grams, g_grams))])
    
    return style_loss

## Define optimizer

In [17]:
def define_optimize(loss, learning_rate = 5.0):
    
    opt = tf.train.AdamOptimizer(learning_rate=learning_rate)
    opt_op = opt.minimize(loss)
    return opt_op