In [13]:
%matplotlib inline

In [2]:
import tensorflow as tf
import numpy as np
import scipy.io
import scipy.misc
import vgg
import matplotlib.pyplot as plt
from sys import stderr

CONTENT_LAYER = 'relu4_2'
STYLE_LAYERS = ('relu1_1', 'relu2_1', 'relu3_1', 'relu4_1', 'relu5_1')

In [15]:
def imread(path):
    img = scipy.misc.imread(path).astype(np.float)
    if len(img.shape) == 2:
        # grayscale
        img = np.dstack((img,img,img))
    return img

def imsave(path, img):
    img = np.clip(img, 0, 255).astype(np.uint8)
    scipy.misc.imsave(path, img)
    
def _conv_layer(input, weights, bias):
    conv = tf.nn.conv2d(input, tf.constant(weights), strides=(1, 1, 1, 1),
            padding='SAME')
    return tf.nn.bias_add(conv, bias)


def _pool_layer(input):
    return tf.nn.max_pool(input, ksize=(1, 2, 2, 1), strides=(1, 2, 2, 1),
            padding='SAME')

def _tensor_size(tensor):
    from operator import mul
    return reduce(mul, (d.value for d in tensor.get_shape()), 1)


In [4]:
network = "imagenet-vgg-verydeep-19.mat"
initial = None
content_dir = "examples/1-content.jpg"
styles_dir = ["examples/1-style.jpg"]
iterations = 200
content_weight = 5e0
style_weight = 1e2
style_blend_weights = [1]
tv_weight = 1e2
learning_rate = 1e1
print_iterations = 50
checkpoint_iterations = 50

In [6]:
content = imread(content_dir)
styles = [imread(style) for style in styles_dir]
target_shape = content.shape
for i in range(len(styles)):
    style_scale = 1.0
    styles[i] = scipy.misc.imresize(styles[i], style_scale *
            target_shape[1] / styles[i].shape[1])

In [7]:
shape = (1,) + content.shape
style_shapes = [(1,) + style.shape for style in styles]
content_features = {}
style_features = [{} for _ in styles]

# compute content features in feedforward mode
g = tf.Graph()
with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
    image = tf.placeholder('float', shape=shape)
    net, mean_pixel = vgg.net(network, image)
    content_pre = np.array([vgg.preprocess(content, mean_pixel)])
    content_features[CONTENT_LAYER] = net[CONTENT_LAYER].eval(
            feed_dict={image: content_pre})

# compute style features in feedforward mode
for i in range(len(styles)):
    g = tf.Graph()
    with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
        image = tf.placeholder('float', shape=style_shapes[i])
        net, _ = vgg.net(network, image)
        style_pre = np.array([vgg.preprocess(styles[i], mean_pixel)])
        for layer in STYLE_LAYERS:
            features = net[layer].eval(feed_dict={image: style_pre})
            features = np.reshape(features, (-1, features.shape[3]))
            gram = np.matmul(features.T, features) / features.size
            style_features[i][layer] = gram

# make stylized image using backpropogation
with tf.Graph().as_default():
    # init image
    noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
    initial = tf.random_normal(shape) * 0.256
    image = tf.Variable(initial)
    net, _ = vgg.net(network, image)

    # content loss
    content_loss = content_weight * (2 * tf.nn.l2_loss(
            net[CONTENT_LAYER] - content_features[CONTENT_LAYER]) /
            content_features[CONTENT_LAYER].size)
    
    # style loss
    style_loss = 0
    for i in range(len(styles)):
        style_losses = []
        for style_layer in STYLE_LAYERS:
            layer = net[style_layer]
            _, height, width, number = map(lambda i: i.value, layer.get_shape())
            size = height * width * number
            feats = tf.reshape(layer, (-1, number))
            gram = tf.matmul(tf.transpose(feats), feats) / size
            style_gram = style_features[i][style_layer]
            style_losses.append(2 * tf.nn.l2_loss(gram - style_gram) / style_gram.size)
        style_loss += style_weight * style_blend_weights[i] * reduce(tf.add, style_losses)
        
    # total variation denoising
    tv_y_size = _tensor_size(image[:,1:,:,:])
    tv_x_size = _tensor_size(image[:,:,1:,:])
    tv_loss = tv_weight * 2 * (
            (tf.nn.l2_loss(image[:,1:,:,:] - image[:,:shape[1]-1,:,:]) /
                tv_y_size) +
            (tf.nn.l2_loss(image[:,:,1:,:] - image[:,:,:shape[2]-1,:]) /
                tv_x_size))
    # overall loss
    loss = content_loss + style_loss + tv_loss

    # optimizer setup
    train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)

    def print_progress(i, last=False):
        stderr.write('Iteration %d/%d\n' % (i + 1, iterations))
        if last or (print_iterations and i % print_iterations == 0):
            stderr.write('  content loss: %g\n' % content_loss.eval())
            stderr.write('    style loss: %g\n' % style_loss.eval())
            stderr.write('       tv loss: %g\n' % tv_loss.eval())
            stderr.write('    total loss: %g\n' % loss.eval())
    # optimization
    best_loss = float('inf')
    best = None
    with tf.Session() as sess:
        sess.run(tf.initialize_all_variables())
        for i in range(iterations):
            last_step = (i == iterations - 1)
            print_progress(i, last=last_step)
            train_step.run()

            if (checkpoint_iterations and i % checkpoint_iterations == 0) or last_step:
                this_loss = loss.eval()
                if this_loss < best_loss:
                    best_loss = this_loss
                    best = image.eval()
                    output = vgg.unprocess(best.reshape(shape[1:]), mean_pixel)


Instructions for updating:
Use `tf.global_variables_initializer` instead.


Iteration 1/200
  content loss: 2.13958e+06
    style loss: 5.859e+07
       tv loss: 26.1588
    total loss: 6.07297e+07
Iteration 2/200
Iteration 3/200
Iteration 4/200
Iteration 5/200
Iteration 6/200
Iteration 7/200
Iteration 8/200
Iteration 9/200
Iteration 10/200
Iteration 11/200
Iteration 12/200
Iteration 13/200
Iteration 14/200
Iteration 15/200
Iteration 16/200
Iteration 17/200
Iteration 18/200
Iteration 19/200
Iteration 20/200
Iteration 21/200
Iteration 22/200
Iteration 23/200
Iteration 24/200
Iteration 25/200
Iteration 26/200
Iteration 27/200
Iteration 28/200
Iteration 29/200
Iteration 30/200
Iteration 31/200
Iteration 32/200
Iteration 33/200
Iteration 34/200
Iteration 35/200
Iteration 36/200
Iteration 37/200
Iteration 38/200
Iteration 39/200
Iteration 40/200
Iteration 41/200
Iteration 42/200
Iteration 43/200
Iteration 44/200
Iteration 45/200
Iteration 46/200
Iteration 47/200
Iteration 48/200
Iteration 49/200
Iteration 50/200
Iteration 51/200
  content loss: 1.45329e+06
    styl

In [16]:
imsave('output.jpg', output)