# Using TensorFlow for Stylenet/NeuralStyle
---------------------------------------

We use two images, an original image and a style image and try to make the original image in the style of the style image.

Reference paper:
https://arxiv.org/abs/1508.06576

Need to download the model 'imagenet-vgg-verydee-19.mat' from:
http://www.vlfeat.org/matconvnet/models/beta16/imagenet-vgg-verydeep-19.mat

We start by loading the necessary libraries and clearing any prior computational graph.

In [323]:
import os
import scipy.misc
import scipy.io
import numpy as np
import tensorflow as tf
from tensorflow.python.framework import ops
ops.reset_default_graph()

In [324]:
# Start a graph session
sess = tf.Session()

In [325]:
# Image Files
original_image_file = 'temp/book_cover.jpg'
style_image_file = 'temp/starry_night.jpg'

# Saved VGG Network path
vgg_path = '/home/nick/Documents/tensorflow/vgg_19_models/imagenet-vgg-verydeep-19.mat'

# Default Arguments
original_image_weight = 0.0005
style_image_weight = 0.02
regularization_weight = 50.0
learning_rate = 0.1
generations = 500
output_generations = 100

In [326]:
# Read in images
original_image = scipy.misc.imread(original_image_file)
style_image = scipy.misc.imread(style_image_file)

# Get shape of target and make the style image the same
target_shape = original_image.shape
style_image = scipy.misc.imresize(style_image, target_shape[1] / style_image.shape[1])

In [327]:
# VGG-19 Layer Setup
# From paper
vgg_layers = ['conv1_1', 'relu1_1',
              'conv1_2', 'relu1_2', 'pool1',
              'conv2_1', 'relu2_1',
              'conv2_2', 'relu2_2', 'pool2',
              'conv3_1', 'relu3_1',
              'conv3_2', 'relu3_2',
              'conv3_3', 'relu3_3',
              'conv3_4', 'relu3_4', 'pool3',
              'conv4_1', 'relu4_1',
              'conv4_2', 'relu4_2',
              'conv4_3', 'relu4_3',
              'conv4_4', 'relu4_4', 'pool4',
              'conv5_1', 'relu5_1',
              'conv5_2', 'relu5_2',
              'conv5_3', 'relu5_3',
              'conv5_4', 'relu5_4']

In [328]:
# Extract weights and matrix means
def extract_net_info(path_to_params):
    vgg_data = scipy.io.loadmat(path_to_params)
    normalization_matrix = vgg_data['normalization'][0][0][0]
    mat_mean = np.mean(normalization_matrix, axis=(0,1))
    network_weights = vgg_data['layers'][0]
    return(mat_mean, network_weights)

In [329]:
# Create the VGG-19 Network
def vgg_network(network_weights, init_image):
    network = {}
    image = init_image

    for i, layer in enumerate(vgg_layers):
        if layer[1] == 'c':
            weights, bias = network_weights[i][0][0][0][0]
            weights = np.transpose(weights, (1, 0, 2, 3))
            bias = bias.reshape(-1)
            conv_layer = tf.nn.conv2d(image, tf.constant(weights), (1, 1, 1, 1), 'SAME')
            image = tf.nn.bias_add(conv_layer, bias)
        elif layer[1] == 'r':
            image = tf.nn.relu(image)
        else:
            image = tf.nn.max_pool(image, (1, 2, 2, 1), (1, 2, 2, 1), 'SAME')
        network[layer] = image
    return(network)

In [330]:
# Here we define which layers apply to the original or style image
original_layer = 'relu4_2'
style_layers = ['relu1_1', 'relu2_1', 'relu3_1', 'relu4_1', 'relu5_1']

In [331]:
# Get network parameters
normalization_mean, network_weights = extract_net_info(vgg_path)

shape = (1,) + original_image.shape
style_shape = (1,) + style_image.shape
original_features = {}
style_features = {}

In [332]:
# Get network parameters
image = tf.placeholder('float', shape=shape)
vgg_net = vgg_network(network_weights, image)

In [333]:
# Normalize original image
original_minus_mean = original_image - normalization_mean
original_norm = np.array([original_minus_mean])
original_features[original_layer] = sess.run(vgg_net[original_layer], feed_dict={image: original_norm})

In [334]:
# Get style image network
image = tf.placeholder('float', shape=style_shape)
vgg_net = vgg_network(network_weights, image)
style_minus_mean = style_image - normalization_mean
style_norm = np.array([style_minus_mean])

for layer in style_layers:
    layer_output = sess.run(vgg_net[layer], feed_dict={image: style_norm})
    layer_output = np.reshape(layer_output, (-1, layer_output.shape[3]))
    style_gram_matrix = np.matmul(layer_output.T, layer_output) / layer_output.size
    style_features[layer] = style_gram_matrix

In [335]:
# Make Combined Image
initial = tf.random_normal(shape) * 0.05
image = tf.Variable(initial)
vgg_net = vgg_network(network_weights, image)

In [336]:
# Loss
original_loss = original_image_weight * (2 * tf.nn.l2_loss(vgg_net[original_layer] - original_features[original_layer]) /
                original_features[original_layer].size)

# Loss from Style Image
style_loss = 0
style_losses = []
for style_layer in style_layers:
    layer = vgg_net[style_layer]
    feats, height, width, channels = [x.value for x in layer.get_shape()]
    size = height * width * channels
    features = tf.reshape(layer, (-1, channels))
    style_gram_matrix = tf.matmul(tf.transpose(features), features) / size
    style_expected = style_features[style_layer]
    #style_temp_loss = sess.run(2 * tf.nn.l2_loss(style_gram_matrix - style_expected) / style_expected.size)
    #print('Layer: {}, Loss: {}'.format(style_layer, style_temp_loss))
    style_losses.append(2 * tf.nn.l2_loss(style_gram_matrix - style_expected) / style_expected.size)
style_loss += style_image_weight * tf.reduce_sum(style_losses)

# To Smooth the resuts, we add in total variation loss       
total_var_x = sess.run(tf.reduce_prod(image[:,1:,:,:].get_shape()))
total_var_y = sess.run(tf.reduce_prod(image[:,:,1:,:].get_shape()))
first_term = regularization_weight * 2
second_term_numerator = tf.nn.l2_loss(image[:,1:,:,:] - image[:,:shape[1]-1,:,:])
second_term = second_term_numerator / total_var_y
third_term = (tf.nn.l2_loss(image[:,:,1:,:] - image[:,:,:shape[2]-1,:]) / total_var_x)
total_variation_loss = first_term * (second_term + third_term)

# Combined Loss
loss = original_loss + style_loss + total_variation_loss

Layer: relu1_1, Loss: 15014316.0
Layer: relu2_1, Loss: 15014316.0
Layer: relu3_1, Loss: 15014316.0
Layer: relu4_1, Loss: 15014316.0
Layer: relu5_1, Loss: 15014316.0


In [357]:
sess.run(layer)

array([[[[ 492.43005371,  458.42831421,  576.28674316]]]], dtype=float32)

In [358]:
size

3

In [359]:
sess.run(features)

array([[ 492.43005371,  458.42831421,  576.28674316]], dtype=float32)

In [360]:
sess.run(tf.matmul(tf.transpose(features), features) / size)

array([[  80829.1171875,   75247.9609375,   94593.6328125],
       [  75247.9609375,   70052.171875 ,   88062.0546875],
       [  94593.6328125,   88062.0546875,  110702.1328125]], dtype=float32)

In [366]:
style_expected

array([[ 3354.70092773,  3552.03027344,  4014.83984375],
       [ 3552.03027344,  3760.96704102,  4250.99951172],
       [ 4014.83984375,  4250.99951172,  4804.88085938]], dtype=float32)

In [300]:
style_layer = 'relu2_1'
layer = vgg_net[style_layer]
feats, height, width, channels = [x.value for x in layer.get_shape()]
size = height * width * channels
features = tf.reshape(layer, (-1, channels))
style_gram_matrix = tf.matmul(tf.transpose(features), features) / size
style_expected = style_features[style_layer]
style_losses.append(2 * tf.nn.l2_loss(style_gram_matrix - style_expected) / style_expected.size)

In [315]:
sess.run(layer)

array([[[[ 100.72416687,  106.6307373 ,  120.61589813]]]], dtype=float32)

In [316]:
style_expected

array([[ 2249.29736328,  2318.18383789,  2775.89428711],
       [ 2318.18383789,  2419.2878418 ,  2915.45068359],
       [ 2775.89428711,  2915.45068359,  3578.22509766]], dtype=float32)

In [317]:
sess.run(style_gram_matrix)

array([[ 1970.56860352,  2058.08105469,  2480.35449219],
       [ 2058.08105469,  2149.82885742,  2589.22558594],
       [ 2480.35449219,  2589.22558594,  3127.53442383]], dtype=float32)

In [318]:
sess.run(features)

array([[  71.19725037,   73.60102081,   92.87404633],
       [  65.66284943,   67.88374329,   85.64056396],
       [  70.32759857,   72.70740509,   91.70826721],
       [  68.9703064 ,   71.29055023,   89.97322845],
       [ 100.72416687,  105.67219543,  120.61589813],
       [  68.57144928,   70.89800262,   89.44815826],
       [  63.51005936,   65.65606689,   82.81027985],
       [  99.80554199,  106.6307373 ,  119.45858002],
       [ 100.27510834,  106.16234589,  120.04843903],
       [  64.67858124,   66.85785675,   84.34563446],
       [  65.95037079,   68.1950531 ,   85.99183655],
       [  66.53562927,   68.81031799,   86.76571655]], dtype=float32)

In [319]:
features.get_shape()

TensorShape([Dimension(12), Dimension(3)])

In [364]:
for style_layer in style_layers:
    print('-------Layer: {} -------'.format(style_layer))
    layer = vgg_net[style_layer]
    print(sess.run(layer))
    feats, height, width, channels = [x.value for x in layer.get_shape()]
    size = height * width * channels
    print(size)
    print('')

-------Layer: relu1_1 -------
[[[[ 0.08247361  0.04152144  0.10092877]
   [ 0.08947127  0.08691224  0.05242458]
   [ 0.14437069  0.07853114  0.07240488]
   ..., 
   [ 0.1007843   0.09986139  0.12209319]
   [ 0.05227378  0.10274027  0.09586678]
   [ 0.02098998  0.03495882  0.01644966]]

  [[ 0.04201204  0.09918579  0.10414404]
   [ 0.11072802  0.01286861  0.12579992]
   [ 0.11812787  0.07101136  0.07419724]
   ..., 
   [ 0.06674157  0.07611842  0.07811055]
   [ 0.1128375   0.09529858  0.05741931]
   [ 0.05386398  0.10804675  0.02621546]]

  [[ 0.08730088  0.07363851  0.08691902]
   [ 0.06481196  0.03303457  0.04696261]
   [ 0.06705463  0.07307769  0.08912132]
   ..., 
   [ 0.06230537  0.07266171  0.11867781]
   [ 0.08606199  0.04909804  0.1145003 ]
   [ 0.10217204  0.07850321  0.0741777 ]]

  ..., 
  [[ 0.07669023  0.13257091  0.11716817]
   [ 0.05869968  0.09233315  0.10477214]
   [ 0.06873928  0.13356456  0.07592094]
   ..., 
   [ 0.11683203  0.08175176  0.06290048]
   [ 0.06680899  0

In [344]:
sess.run(layer)

array([[[[ 50.61659241,  76.2098999 ,  65.95883179]]]], dtype=float32)

In [345]:
sess.run(loss)

3.3216602e+09

In [346]:
sess.run(style_loss)

537399.12

In [347]:
sess.run(total_variation_loss)

3.3211228e+09

In [355]:
sess.run(original_loss)

68.958351

In [349]:
print(total_var_y, total_var_x)

446946 446550


In [337]:
# Declare Optimization Algorithm
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
train_step = optimizer.minimize(loss)

# Initialize Variables and start Training
sess.run(tf.global_variables_initializer())

In [338]:
# Declare Optimization Algorithm
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
train_step = optimizer.minimize(loss)

# Initialize Variables and start Training
sess.run(tf.global_variables_initializer())
for i in range(generations):
    
    sess.run(train_step)

    # Print update and save temporary output
    if (i+1) % output_generations == 0:
        print('Generation {} out of {}'.format(i + 1, generations))
        image_eval = sess.run(image)
        best_image_add_mean = image_eval.reshape(shape[1:]) + normalization_mean
        output_file = 'temp_output_{}.jpg'.format(i)
        scipy.misc.imsave(output_file, best_image_add_mean)

Generation 100 out of 500
Generation 200 out of 500
Generation 300 out of 500
Generation 400 out of 500
Generation 500 out of 500


In [340]:
sess.run(image)

array([[[[-0.04712168,  0.03435499,  0.02002956],
         [-0.03250763,  0.00673179, -0.0147724 ],
         [ 0.02896713, -0.00411567,  0.08262652],
         ..., 
         [-0.1370527 , -0.01855541,  0.00446089],
         [-0.06849232, -0.04610289,  0.01645049],
         [-0.05741588,  0.01752681, -0.03479913]],

        [[-0.08423983,  0.02016895, -0.04017523],
         [ 0.02667994, -0.01776898, -0.04314912],
         [-0.01220408,  0.01039219,  0.10092995],
         ..., 
         [ 0.02354718,  0.00214768,  0.03652928],
         [ 0.00175504, -0.02626217, -0.01919887],
         [ 0.02099239,  0.02947534, -0.02159922]],

        [[ 0.03742211,  0.03676733,  0.03237842],
         [-0.05757976,  0.01306623, -0.03120742],
         [ 0.01698635,  0.04152049,  0.05278337],
         ..., 
         [ 0.00586959,  0.06157336,  0.01941444],
         [ 0.00551952, -0.04034816, -0.06150487],
         [-0.04799243,  0.0349594 , -0.10273691]],

        ..., 
        [[ 0.01167455, -0.08570861,

In [341]:
sess.run(tf.reduce_min(image))

-134365.08

In [342]:
sess.run(tf.reduce_max(image))

76.2099

In [None]:
# Save final image
image_eval = sess.run(image)
best_image_add_mean = image_eval.reshape(shape[1:]) + normalization_mean
output_file = 'final_output.jpg'
scipy.misc.imsave(output_file, best_image_add_mean)