# Using TensorFlow for Stylenet/NeuralStyle
---------------------------------------

We use two images, an original image and a style image and try to make the original image in the style of the style image.

Reference paper:
https://arxiv.org/abs/1508.06576

Need to download the model 'imagenet-vgg-verydee-19.mat' from:
http://www.vlfeat.org/matconvnet/models/beta16/imagenet-vgg-verydeep-19.mat

We start by loading the necessary libraries and clearing any prior computational graph.

In [1]:
import os
import scipy.misc
import scipy.io
import numpy as np
import tensorflow as tf
from tensorflow.python.framework import ops
ops.reset_default_graph()

Start a graph session

In [2]:
sess = tf.Session()

Load image files and set algorithm parameters and weights.

In [20]:
# Image Files
original_image_file = 'temp/book_cover.jpg'
style_image_file = 'temp/starry_night.jpg'

# Saved VGG Network path
vgg_path = '/home/nick/Documents/tensorflow/vgg_19_models/imagenet-vgg-verydeep-19.mat'

# Default Arguments
original_image_weight = 5.0
style_image_weight = 500.0
regularization_weight = 100
learning_rate = 0.001
generations = 5000
output_generations = 250
beta1 = 0.9   # For the Adam optimizer
beta2 = 0.999 # For the Adam optimizer

Read in the images.

In [21]:
# Read in images
original_image = scipy.misc.imread(original_image_file)
style_image = scipy.misc.imread(style_image_file)

# Get shape of target and make the style image the same
target_shape = original_image.shape
style_image = scipy.misc.imresize(style_image, target_shape[1] / style_image.shape[1])

VGG-19 Layer setup (From the paper https://arxiv.org/abs/1508.06576 )

In [22]:
# VGG-19 Layer Setup
# From paper
vgg_layers = ['conv1_1', 'relu1_1',
              'conv1_2', 'relu1_2', 'pool1',
              'conv2_1', 'relu2_1',
              'conv2_2', 'relu2_2', 'pool2',
              'conv3_1', 'relu3_1',
              'conv3_2', 'relu3_2',
              'conv3_3', 'relu3_3',
              'conv3_4', 'relu3_4', 'pool3',
              'conv4_1', 'relu4_1',
              'conv4_2', 'relu4_2',
              'conv4_3', 'relu4_3',
              'conv4_4', 'relu4_4', 'pool4',
              'conv5_1', 'relu5_1',
              'conv5_2', 'relu5_2',
              'conv5_3', 'relu5_3',
              'conv5_4', 'relu5_4']

Extract weights and matrix means

In [23]:
def extract_net_info(path_to_params):
    vgg_data = scipy.io.loadmat(path_to_params)
    normalization_matrix = vgg_data['normalization'][0][0][0]
    mat_mean = np.mean(normalization_matrix, axis=(0,1))
    network_weights = vgg_data['layers'][0]
    return(mat_mean, network_weights)

Create the VGG-19 Network

In [24]:
def vgg_network(network_weights, init_image):
    network = {}
    image = init_image

    for i, layer in enumerate(vgg_layers):
        if layer[0] == 'c':
            weights, bias = network_weights[i][0][0][0][0]
            weights = np.transpose(weights, (1, 0, 2, 3))
            bias = bias.reshape(-1)
            conv_layer = tf.nn.conv2d(image, tf.constant(weights), (1, 1, 1, 1), 'SAME')
            image = tf.nn.bias_add(conv_layer, bias)
        elif layer[0] == 'r':
            image = tf.nn.relu(image)
        else:
            image = tf.nn.max_pool(image, (1, 2, 2, 1), (1, 2, 2, 1), 'SAME')
        network[layer] = image
    return(network)

Here we define which layers apply to the original or style image

In [25]:
original_layer = 'relu4_2'
style_layers = ['relu1_1', 'relu2_1', 'relu3_1', 'relu4_1', 'relu5_1']

Get network parameters

In [26]:
normalization_mean, network_weights = extract_net_info(vgg_path)

shape = (1,) + original_image.shape
style_shape = (1,) + style_image.shape
original_features = {}
style_features = {}

Define placeholder and VGG network

In [27]:
image = tf.placeholder('float', shape=shape)
vgg_net = vgg_network(network_weights, image)

Normalize original image

In [28]:
original_minus_mean = original_image - normalization_mean
original_norm = np.array([original_minus_mean])
original_features[original_layer] = sess.run(vgg_net[original_layer], feed_dict={image: original_norm})

Get style image network

In [29]:
image = tf.placeholder('float', shape=style_shape)
vgg_net = vgg_network(network_weights, image)
style_minus_mean = style_image - normalization_mean
style_norm = np.array([style_minus_mean])

for layer in style_layers:
    layer_output = sess.run(vgg_net[layer], feed_dict={image: style_norm})
    layer_output = np.reshape(layer_output, (-1, layer_output.shape[3]))
    style_gram_matrix = np.matmul(layer_output.T, layer_output) / layer_output.size
    style_features[layer] = style_gram_matrix

Make Combined Image

In [30]:
initial = tf.random_normal(shape) * 0.256
image = tf.Variable(initial)
vgg_net = vgg_network(network_weights, image)

Define our loss as (Style Loss + Total Variation Loss)

In [31]:
# Loss
original_loss = original_image_weight * (2 * tf.nn.l2_loss(vgg_net[original_layer] - original_features[original_layer]) /
                original_features[original_layer].size)

# Loss from Style Image
style_loss = 0
style_losses = []
for style_layer in style_layers:
    layer = vgg_net[style_layer]
    feats, height, width, channels = [x.value for x in layer.get_shape()]
    size = height * width * channels
    features = tf.reshape(layer, (-1, channels))
    style_gram_matrix = tf.matmul(tf.transpose(features), features) / size
    style_expected = style_features[style_layer]
    #style_temp_loss = sess.run(2 * tf.nn.l2_loss(style_gram_matrix - style_expected) / style_expected.size)
    #print('Layer: {}, Loss: {}'.format(style_layer, style_temp_loss))
    style_losses.append(2 * tf.nn.l2_loss(style_gram_matrix - style_expected) / style_expected.size)
style_loss += style_image_weight * tf.reduce_sum(style_losses)

# To Smooth the resuts, we add in total variation loss       
total_var_x = sess.run(tf.reduce_prod(image[:,1:,:,:].get_shape()))
total_var_y = sess.run(tf.reduce_prod(image[:,:,1:,:].get_shape()))
first_term = regularization_weight * 2
second_term_numerator = tf.nn.l2_loss(image[:,1:,:,:] - image[:,:shape[1]-1,:,:])
second_term = second_term_numerator / total_var_y
third_term = (tf.nn.l2_loss(image[:,:,1:,:] - image[:,:,:shape[2]-1,:]) / total_var_x)
total_variation_loss = first_term * (second_term + third_term)

# Combined Loss
loss = original_loss + style_loss + total_variation_loss

Now we extract the style layer information

In [32]:
style_layer = 'relu2_1'
layer = vgg_net[style_layer]
feats, height, width, channels = [x.value for x in layer.get_shape()]
size = height * width * channels
features = tf.reshape(layer, (-1, channels))
style_gram_matrix = tf.matmul(tf.transpose(features), features) / size
style_expected = style_features[style_layer]
style_losses.append(2 * tf.nn.l2_loss(style_gram_matrix - style_expected) / style_expected.size)

Declare optimization and initialize the variables in the graph.

In [33]:
# Declare Optimization Algorithm
optimizer = tf.train.AdamOptimizer(learning_rate, beta1, beta2)
train_step = optimizer.minimize(loss)

# Initialize Variables and start Training
sess.run(tf.global_variables_initializer())

For sanity and checking purposes, let's look at the matrices of intermediate images for the style layers.

In [34]:
for style_layer in style_layers:
    print('-------Layer: {} -------'.format(style_layer))
    layer = vgg_net[style_layer]
    print(sess.run(layer))
    feats, height, width, channels = [x.value for x in layer.get_shape()]
    size = height * width * channels
    print(size)
    print('')

-------Layer: relu1_1 -------
[[[[  5.46172917e-01   9.92441326e-02   1.65350467e-01 ...,
      1.01470089e+00   6.88668132e-01   6.07679486e-01]
   [  1.04633188e+00   1.42677456e-01   2.02847883e-01 ...,
      1.04049456e+00   4.46966410e-01   5.67616999e-01]
   [  9.44447458e-01   9.57916826e-02   5.35283238e-02 ...,
      1.01360738e+00   4.82194543e-01   4.57370222e-01]
   ..., 
   [  7.98409760e-01   2.62508690e-01   1.63674399e-01 ...,
      1.08287823e+00   6.82613015e-01   5.21779716e-01]
   [  2.50822067e-01   2.98981339e-01   1.77775383e-01 ...,
      9.34505463e-01   7.71890640e-01   4.95255679e-01]
   [  7.59016812e-01   2.45658815e-01   2.49062672e-01 ...,
      9.47369337e-01   4.31696802e-01   5.18461406e-01]]

  [[  3.50127578e-01   6.55320138e-02   1.13581538e-01 ...,
      8.30636561e-01   5.90469778e-01   4.08085167e-01]
   [  9.41003978e-01   1.56615585e-01   1.50822163e-01 ...,
      8.58183980e-01   2.27086306e-01   3.19344580e-01]
   [  7.40134895e-01   1.219328

In [35]:
# Declare Optimization Algorithm
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
train_step = optimizer.minimize(loss)

# Initialize Variables and start Training
sess.run(tf.global_variables_initializer())
for i in range(generations):
    
    sess.run(train_step)

    # Print update and save temporary output
    if (i+1) % output_generations == 0:
        print('Generation {} out of {}, loss: {}'.format(i + 1, generations,sess.run(loss)))
        image_eval = sess.run(image)
        best_image_add_mean = image_eval.reshape(shape[1:]) + normalization_mean
        output_file = 'temp_output_{}.jpg'.format(i)
        scipy.misc.imsave(output_file, best_image_add_mean)

Generation 100 out of 1000, loss: 197868048.0
Generation 200 out of 1000, loss: 105772656.0
Generation 300 out of 1000, loss: 73410864.0
Generation 400 out of 1000, loss: 57265060.0
Generation 500 out of 1000, loss: 47589056.0
Generation 600 out of 1000, loss: 41183492.0
Generation 700 out of 1000, loss: 36635288.0
Generation 800 out of 1000, loss: 33242754.0
Generation 900 out of 1000, loss: 30612820.0
Generation 1000 out of 1000, loss: 28516306.0


Plot images:

Save final image

In [None]:
image_eval = sess.run(image)
best_image_add_mean = image_eval.reshape(shape[1:]) + normalization_mean
output_file = 'final_output.jpg'
scipy.misc.imsave(output_file, best_image_add_mean)