In [1]:
import tensorflow as tf
import numpy as np
from sys import stderr
from PIL import Image
import scipy
import scipy.misc
import scipy.io

In [2]:
VGG19_LAYERS = (
    'conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1',

    'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2',

    'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3',
    'relu3_3', 'conv3_4', 'relu3_4', 'pool3',

    'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'conv4_3',
    'relu4_3', 'conv4_4', 'relu4_4', 'pool4',

    'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'conv5_3',
    'relu5_3', 'conv5_4', 'relu5_4'
)

CONTENT_LAYERS = ('relu4_2', 'relu5_2')
STYLE_LAYERS = ('relu1_1', 'relu2_1', 'relu3_1', 'relu4_1', 'relu5_1')

try:
    reduce
except NameError:
    from functools import reduce

def load_net(data_path):
    data = scipy.io.loadmat(data_path)
    if not all(i in data for i in ('layers', 'classes', 'normalization')):
        raise ValueError("You're using the wrong VGG19 data. Please follow the instructions in the README to download the correct data.")
    mean = data['normalization'][0][0][0]
    mean_pixel = np.mean(mean, axis=(0, 1))
    weights = data['layers'][0]
    return weights, mean_pixel

def net_preloaded(weights, input_image, pooling):
    net = {}
    current = input_image
    for i, name in enumerate(VGG19_LAYERS):
        kind = name[:4]
        if kind == 'conv':
            kernels, bias = weights[i][0][0][0][0]
            # matconvnet: weights are [width, height, in_channels, out_channels]
            # tensorflow: weights are [height, width, in_channels, out_channels]
            kernels = np.transpose(kernels, (1, 0, 2, 3))
            bias = bias.reshape(-1)
            current = _conv_layer(current, kernels, bias)
        elif kind == 'relu':
            current = tf.nn.relu(current)
        elif kind == 'pool':
            current = _pool_layer(current, pooling)
        net[name] = current

    assert len(net) == len(VGG19_LAYERS)
    return net

def _conv_layer(input, weights, bias):
    conv = tf.nn.conv2d(input, tf.constant(weights), strides=(1, 1, 1, 1),
            padding='SAME')
    return tf.nn.bias_add(conv, bias)


def _pool_layer(input, pooling):
    if pooling == 'avg':
        return tf.nn.avg_pool(input, ksize=(1, 2, 2, 1), strides=(1, 2, 2, 1),
                padding='SAME')
    else:
        return tf.nn.max_pool(input, ksize=(1, 2, 2, 1), strides=(1, 2, 2, 1),
                padding='SAME')

def preprocess(image, mean_pixel):
    return image - mean_pixel


def unprocess(image, mean_pixel):
    return image + mean_pixel

In [28]:
def stylize(network, initial, initial_noiseblend, content, styles, preserve_colors, iterations,
        content_weight, content_weight_blend, style_weight, style_layer_weight_exp, style_blend_weights, tv_weight,
        learning_rate, beta1, beta2, epsilon, pooling,
        print_iterations=None, checkpoint_iterations=None):
    """
    Stylize images.
    This function yields tuples (iteration, image); `iteration` is None
    if this is the final image (the last iteration).  Other tuples are yielded
    every `checkpoint_iterations` iterations.
    :rtype: iterator[tuple[int|None,image]]
    """
    shape = (1,) + content.shape
    style_shapes = [(1,) + style.shape for style in styles]
    content_features = {}
    style_features = [{} for _ in styles]

    vgg_weights, vgg_mean_pixel = load_net(network)

    layer_weight = 1.0
    style_layers_weights = {}
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] = layer_weight
        layer_weight *= style_layer_weight_exp

    # normalize style layer weights
    layer_weights_sum = 0
    for style_layer in STYLE_LAYERS:
        layer_weights_sum += style_layers_weights[style_layer]
    for style_layer in STYLE_LAYERS:
        style_layers_weights[style_layer] /= layer_weights_sum

    # compute content features in feedforward mode
    g = tf.Graph()
    with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
        image = tf.placeholder('float', shape=shape)
        net = net_preloaded(vgg_weights, image, pooling)
        content_pre = np.array([preprocess(content, vgg_mean_pixel)])
        for layer in CONTENT_LAYERS:
            content_features[layer] = net[layer].eval(feed_dict={image: content_pre})

    # compute style features in feedforward mode
    for i in range(len(styles)):
        g = tf.Graph()
        with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
            image = tf.placeholder('float', shape=style_shapes[i])
            net = net_preloaded(vgg_weights, image, pooling)
            style_pre = np.array([preprocess(styles[i], vgg_mean_pixel)])
            for layer in STYLE_LAYERS:
                features = net[layer].eval(feed_dict={image: style_pre})
                features = np.reshape(features, (-1, features.shape[3]))
                gram = np.matmul(features.T, features) / features.size
                style_features[i][layer] = gram

    initial_content_noise_coeff = 1.0 - initial_noiseblend

    # make stylized image using backpropogation
    with tf.Graph().as_default():
        if initial is None:
            noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
            initial = tf.random_normal(shape) * 0.256
        else:
            initial = np.array([preprocess(initial, vgg_mean_pixel)])
            initial = initial.astype('float32')
            noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
            initial = (initial) * initial_content_noise_coeff + (tf.random_normal(shape) * 0.256) * (1.0 - initial_content_noise_coeff)
        image = tf.Variable(initial)
        net = net_preloaded(vgg_weights, image, pooling)

        # content loss
        content_layers_weights = {}
        content_layers_weights['relu4_2'] = content_weight_blend
        content_layers_weights['relu5_2'] = 1.0 - content_weight_blend

        content_loss = 0
        content_losses = []
        for content_layer in CONTENT_LAYERS:
            content_losses.append(content_layers_weights[content_layer] * content_weight * (2 * tf.nn.l2_loss(
                    net[content_layer] - content_features[content_layer]) /
                    content_features[content_layer].size))
        content_loss += reduce(tf.add, content_losses)

        # style loss
        style_loss = 0
        for i in range(len(styles)):
            style_losses = []
            for style_layer in STYLE_LAYERS:
                layer = net[style_layer]
                _, height, width, number = map(lambda i: i.value, layer.get_shape())
                size = height * width * number
                feats = tf.reshape(layer, (-1, number))
                gram = tf.matmul(tf.transpose(feats), feats) / size
                style_gram = style_features[i][style_layer]
                style_losses.append(style_layers_weights[style_layer] * 2 * tf.nn.l2_loss(gram - style_gram) / style_gram.size)
            style_loss += style_weight * style_blend_weights[i] * reduce(tf.add, style_losses)

        # total variation denoising
        tv_y_size = _tensor_size(image[:,1:,:,:])
        tv_x_size = _tensor_size(image[:,:,1:,:])
        tv_loss = tv_weight * 2 * (
                (tf.nn.l2_loss(image[:,1:,:,:] - image[:,:shape[1]-1,:,:]) /
                    tv_y_size) +
                (tf.nn.l2_loss(image[:,:,1:,:] - image[:,:,:shape[2]-1,:]) /
                    tv_x_size))
        # overall loss
        loss = content_loss + style_loss + tv_loss

        # optimizer setup
        train_step = tf.train.AdamOptimizer(learning_rate, beta1, beta2, epsilon).minimize(loss)

        def print_progress():
            stderr.write('  content loss: %g\n' % content_loss.eval())
            stderr.write('    style loss: %g\n' % style_loss.eval())
            stderr.write('       tv loss: %g\n' % tv_loss.eval())
            stderr.write('    total loss: %g\n' % loss.eval())

        # optimization
        best_loss = float('inf')
        best = None
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            stderr.write('Optimization started...\n')
            if (print_iterations and print_iterations != 0):
                print_progress()
            for i in range(iterations):
                stderr.write('Iteration %4d/%4d\n' % (i + 1, iterations))
                train_step.run()

                last_step = (i == iterations - 1)
                if last_step or (print_iterations and i % print_iterations == 0):
                    print_progress()

                if (checkpoint_iterations and i % checkpoint_iterations == 0) or last_step:
                    this_loss = loss.eval()
                    if this_loss < best_loss:
                        best_loss = this_loss
                        best = image.eval()

                    img_out = unprocess(best.reshape(shape[1:]), vgg_mean_pixel)

                    if preserve_colors and preserve_colors == True:
                        original_image = np.clip(content, 0, 255)
                        styled_image = np.clip(img_out, 0, 255)

                        # Luminosity transfer steps:
                        # 1. Convert stylized RGB->grayscale accoriding to Rec.601 luma (0.299, 0.587, 0.114)
                        # 2. Convert stylized grayscale into YUV (YCbCr)
                        # 3. Convert original image into YUV (YCbCr)
                        # 4. Recombine (stylizedYUV.Y, originalYUV.U, originalYUV.V)
                        # 5. Convert recombined image from YUV back to RGB

                        # 1
                        styled_grayscale = rgb2gray(styled_image)
                        styled_grayscale_rgb = gray2rgb(styled_grayscale)

                        # 2
                        styled_grayscale_yuv = np.array(Image.fromarray(styled_grayscale_rgb.astype(np.uint8)).convert('YCbCr'))

                        # 3
                        original_yuv = np.array(Image.fromarray(original_image.astype(np.uint8)).convert('YCbCr'))

                        # 4
                        w, h, _ = original_image.shape
                        combined_yuv = np.empty((w, h, 3), dtype=np.uint8)
                        combined_yuv[..., 0] = styled_grayscale_yuv[..., 0]
                        combined_yuv[..., 1] = original_yuv[..., 1]
                        combined_yuv[..., 2] = original_yuv[..., 2]

                        # 5
                        img_out = np.array(Image.fromarray(combined_yuv, 'YCbCr').convert('RGB'))


                    yield (
                        (None if last_step else i),
                        img_out
                    )


In [29]:
def _tensor_size(tensor):
    from operator import mul
    return reduce(mul, (d.value for d in tensor.get_shape()), 1)

def rgb2gray(rgb):
    return np.dot(rgb[...,:3], [0.299, 0.587, 0.114])

def gray2rgb(gray):
    w, h = gray.shape
    rgb = np.empty((w, h, 3), dtype=np.float32)
    rgb[:, :, 2] = rgb[:, :, 1] = rgb[:, :, 0] = gray
    return rgb

In [30]:
def imgread(path):
    img = imread(path).astype(np.float)
    if len(img.shape) == 2:
        # grayscale
        img = np.dstack((img,img,img))
    elif img.shape[2] == 4:
        # PNG with alpha channel
        img = img[:,:,:3]
    return img


def imgsave(path, img):
    img = np.clip(img, 0, 255).astype(np.uint8)
    Image.fromarray(img).save(path, quality=95)


In [50]:
content_image = scipy.misc.imread('content.jpg')
style_images = [scipy.misc.imread(style) for style in ['style.jpg']]

In [None]:
target_shape = content_image.shape
for i in range(len(style_images)):
    style_scale = 1.0
    style_images[i] = scipy.misc.imresize(style_images[i], style_scale * target_shape[1] / style_images[i].shape[1])

style_blend_weights = [1.0/len(style_images) for _ in style_images]
initial = content_image
counter = 0

for iteration, image in stylize(
    network='imagenet-vgg-verydeep-19.mat',
    initial=initial,
    initial_noiseblend=1,
    content=content_image,
    styles=style_images,
    preserve_colors=None,
    iterations=1000,
    content_weight=5e0,
    content_weight_blend=1,
    style_weight=5e2,
    style_layer_weight_exp=1,
    style_blend_weights=[1.0/len(style_images) for _ in style_images],
    tv_weight=1e2,
    learning_rate=1e1,
    beta1=0.9,
    beta2=0.999,
    epsilon=1e-08,
    pooling='max',
    print_iterations=True,
    checkpoint_iterations=1
):
    counter += 1;
    scipy.misc.imsave('output' + str(counter) +  '.jpg', image)
        


Optimization started...
  content loss: 1.14971e+06
    style loss: 4.40616e+08
       tv loss: 26.1934
    total loss: 4.41766e+08
Iteration    1/1000
  content loss: 1.07131e+06
    style loss: 4.13418e+08
       tv loss: 20821.9
    total loss: 4.1451e+08
Iteration    2/1000
  content loss: 1.25081e+06
    style loss: 3.28639e+08
       tv loss: 47886.7
    total loss: 3.29938e+08
Iteration    3/1000
  content loss: 1.96858e+06
    style loss: 2.14123e+08
       tv loss: 84674.1
    total loss: 2.16176e+08
Iteration    4/1000
  content loss: 3.3458e+06
    style loss: 1.45174e+08
       tv loss: 133645
    total loss: 1.48654e+08
Iteration    5/1000
  content loss: 4.25563e+06
    style loss: 1.30845e+08
       tv loss: 179874
    total loss: 1.35281e+08
Iteration    6/1000
  content loss: 4.19832e+06
    style loss: 1.07054e+08
       tv loss: 211425
    total loss: 1.11464e+08
Iteration    7/1000
  content loss: 4.14657e+06
    style loss: 8.60712e+07
       tv loss: 234273
    to

    total loss: 8.67479e+06
Iteration   65/1000
  content loss: 5.06567e+06
    style loss: 3.21062e+06
       tv loss: 323180
    total loss: 8.59947e+06
Iteration   66/1000
  content loss: 5.0521e+06
    style loss: 3.15359e+06
       tv loss: 322191
    total loss: 8.52788e+06
Iteration   67/1000
  content loss: 5.04826e+06
    style loss: 3.08825e+06
       tv loss: 321293
    total loss: 8.45781e+06
Iteration   68/1000
  content loss: 5.04288e+06
    style loss: 3.0284e+06
       tv loss: 320432
    total loss: 8.39171e+06
Iteration   69/1000
  content loss: 5.02887e+06
    style loss: 2.97849e+06
       tv loss: 319564
    total loss: 8.32692e+06
Iteration   70/1000
  content loss: 5.01914e+06
    style loss: 2.9266e+06
       tv loss: 318760
    total loss: 8.2645e+06
Iteration   71/1000
  content loss: 5.0142e+06
    style loss: 2.87269e+06
       tv loss: 318011
    total loss: 8.2049e+06
Iteration   72/1000
  content loss: 5.00195e+06
    style loss: 2.82793e+06
       tv los

    total loss: 6.41244e+06
Iteration  130/1000
  content loss: 4.53881e+06
    style loss: 1.57882e+06
       tv loss: 277955
    total loss: 6.39559e+06
Iteration  131/1000
  content loss: 4.53288e+06
    style loss: 1.56878e+06
       tv loss: 277373
    total loss: 6.37903e+06
Iteration  132/1000
  content loss: 4.52638e+06
    style loss: 1.55961e+06
       tv loss: 276788
    total loss: 6.36278e+06
Iteration  133/1000
  content loss: 4.52064e+06
    style loss: 1.54994e+06
       tv loss: 276209
    total loss: 6.34679e+06
Iteration  134/1000
  content loss: 4.51436e+06
    style loss: 1.54104e+06
       tv loss: 275626
    total loss: 6.33104e+06
Iteration  135/1000
  content loss: 4.50849e+06
    style loss: 1.53199e+06
       tv loss: 275048
    total loss: 6.31553e+06
Iteration  136/1000
  content loss: 4.50277e+06
    style loss: 1.52303e+06
       tv loss: 274473
    total loss: 6.30027e+06
Iteration  137/1000
  content loss: 4.49704e+06
    style loss: 1.51432e+06
       

    total loss: 5.73752e+06
Iteration  195/1000
  content loss: 4.25485e+06
    style loss: 1.23327e+06
       tv loss: 244701
    total loss: 5.73282e+06
Iteration  196/1000
  content loss: 4.26762e+06
    style loss: 1.20267e+06
       tv loss: 244379
    total loss: 5.71466e+06
Iteration  197/1000
  content loss: 4.25916e+06
    style loss: 1.207e+06
       tv loss: 243911
    total loss: 5.71007e+06
Iteration  198/1000
  content loss: 4.24946e+06
    style loss: 1.21361e+06
       tv loss: 243402
    total loss: 5.70647e+06
Iteration  199/1000
  content loss: 4.25400e+06
    style loss: 1.19569e+06
       tv loss: 243010
    total loss: 5.6927e+06
Iteration  200/1000
  content loss: 4.24566e+06
    style loss: 1.19563e+06
       tv loss: 242539
    total loss: 5.68383e+06
Iteration  201/1000
  content loss: 4.24453e+06
    style loss: 1.19466e+06
       tv loss: 242106
    total loss: 5.68129e+06
Iteration  202/1000
  content loss: 4.23829e+06
    style loss: 1.19247e+06
       tv 

    total loss: 5.38274e+06
Iteration  260/1000
  content loss: 4.09411e+06
    style loss: 1.05586e+06
       tv loss: 219328
    total loss: 5.36929e+06
Iteration  261/1000
  content loss: 4.07627e+06
    style loss: 1.06598e+06
       tv loss: 218852
    total loss: 5.3611e+06
Iteration  262/1000
  content loss: 4.08017e+06
    style loss: 1.0631e+06
       tv loss: 218558
    total loss: 5.36183e+06
Iteration  263/1000
  content loss: 4.08004e+06
    style loss: 1.06274e+06
       tv loss: 218245
    total loss: 5.36103e+06
Iteration  264/1000
  content loss: 4.07173e+06
    style loss: 1.06958e+06
       tv loss: 217841
    total loss: 5.35915e+06
Iteration  265/1000
  content loss: 4.06623e+06
    style loss: 1.07823e+06
       tv loss: 217503
    total loss: 5.36197e+06
Iteration  266/1000
  content loss: 4.08669e+06
    style loss: 1.07394e+06
       tv loss: 217341
    total loss: 5.37797e+06
Iteration  267/1000
  content loss: 4.03404e+06
    style loss: 1.16868e+06
       tv

    total loss: 5.19858e+06
Iteration  325/1000
  content loss: 4.01209e+06
    style loss: 1.02899e+06
       tv loss: 202116
    total loss: 5.2432e+06
Iteration  326/1000
  content loss: 3.94793e+06
    style loss: 1.05243e+06
       tv loss: 201232
    total loss: 5.20159e+06
Iteration  327/1000
  content loss: 3.96679e+06
    style loss: 1.0135e+06
       tv loss: 201111
    total loss: 5.1814e+06
Iteration  328/1000
  content loss: 3.99051e+06
    style loss: 1.04145e+06
       tv loss: 201092
    total loss: 5.23306e+06
Iteration  329/1000
  content loss: 3.95782e+06
    style loss: 1.03314e+06
       tv loss: 200469
    total loss: 5.19143e+06
Iteration  330/1000
  content loss: 3.94752e+06
    style loss: 1.01935e+06
       tv loss: 200134
    total loss: 5.16701e+06
Iteration  331/1000
  content loss: 3.99829e+06
    style loss: 1.00046e+06
       tv loss: 200322
    total loss: 5.19907e+06
Iteration  332/1000
  content loss: 3.93419e+06
    style loss: 1.03511e+06
       tv 

    total loss: 5.00900e+06
Iteration  391/1000
  content loss: 3.89446e+06
    style loss: 927522
       tv loss: 187924
    total loss: 5.00991e+06
Iteration  392/1000
  content loss: 3.85082e+06
    style loss: 994450
       tv loss: 187385
    total loss: 5.03266e+06
Iteration  393/1000
  content loss: 3.91903e+06
    style loss: 968202
       tv loss: 187726
    total loss: 5.07496e+06
Iteration  394/1000
  content loss: 3.81567e+06
    style loss: 1.184e+06
       tv loss: 186750
    total loss: 5.18642e+06
Iteration  395/1000
  content loss: 3.96297e+06
    style loss: 1.27722e+06
       tv loss: 187710
    total loss: 5.4279e+06
Iteration  396/1000
  content loss: 3.75209e+06
    style loss: 2.05621e+06
       tv loss: 186077
    total loss: 5.99438e+06
Iteration  397/1000
  content loss: 4.02701e+06
    style loss: 2.02237e+06
       tv loss: 188141
    total loss: 6.23752e+06
Iteration  398/1000
  content loss: 3.74005e+06
    style loss: 2.19279e+06
       tv loss: 186176
  

    style loss: 1.27038e+06
       tv loss: 181161
    total loss: 5.22222e+06
Iteration  457/1000
  content loss: 3.87638e+06
    style loss: 932179
       tv loss: 181957
    total loss: 4.99052e+06
Iteration  458/1000
  content loss: 3.86778e+06
    style loss: 986994
       tv loss: 181959
    total loss: 5.03674e+06
Iteration  459/1000
  content loss: 3.78743e+06
    style loss: 1.18749e+06
       tv loss: 181225
    total loss: 5.15615e+06
Iteration  460/1000
  content loss: 3.88265e+06
    style loss: 962425
       tv loss: 181980
    total loss: 5.02705e+06
Iteration  461/1000
  content loss: 3.8414e+06
    style loss: 922598
       tv loss: 181455
    total loss: 4.94546e+06
Iteration  462/1000
  content loss: 3.81184e+06
    style loss: 1.05342e+06
       tv loss: 181077
    total loss: 5.04633e+06
Iteration  463/1000
  content loss: 3.87164e+06
    style loss: 981237
       tv loss: 181659
    total loss: 5.03454e+06
Iteration  464/1000
  content loss: 3.83809e+06
    style 

    style loss: 896878
       tv loss: 172448
    total loss: 4.83857e+06
Iteration  524/1000
  content loss: 3.80067e+06
    style loss: 870009
       tv loss: 172674
    total loss: 4.84336e+06
Iteration  525/1000
  content loss: 3.7355e+06
    style loss: 1.00274e+06
       tv loss: 172029
    total loss: 4.91027e+06
Iteration  526/1000
  content loss: 3.81163e+06
    style loss: 939932
       tv loss: 172549
    total loss: 4.92411e+06
Iteration  527/1000
  content loss: 3.7383e+06
    style loss: 971075
       tv loss: 171944
    total loss: 4.88131e+06
Iteration  528/1000
  content loss: 3.79262e+06
    style loss: 895842
       tv loss: 172244
    total loss: 4.8607e+06
Iteration  529/1000
  content loss: 3.76246e+06
    style loss: 970598
       tv loss: 171949
    total loss: 4.90501e+06
Iteration  530/1000
  content loss: 3.76977e+06
    style loss: 971382
       tv loss: 171919
    total loss: 4.91307e+06
Iteration  531/1000
  content loss: 3.7736e+06
    style loss: 921040


  content loss: 3.74436e+06
    style loss: 3.82096e+06
       tv loss: 183833
    total loss: 7.74916e+06
Iteration  591/1000
  content loss: 4.22572e+06
    style loss: 6.65379e+06
       tv loss: 188020
    total loss: 1.10675e+07
Iteration  592/1000
  content loss: 3.67999e+06
    style loss: 1.07016e+07
       tv loss: 186761
    total loss: 1.45683e+07
Iteration  593/1000
  content loss: 4.09667e+06
    style loss: 4.4061e+06
       tv loss: 192886
    total loss: 8.69566e+06
Iteration  594/1000
  content loss: 4.08041e+06
    style loss: 4.1054e+06
       tv loss: 196928
    total loss: 8.38274e+06
Iteration  595/1000
  content loss: 3.82729e+06
    style loss: 4.67845e+06
       tv loss: 199018
    total loss: 8.70477e+06
Iteration  596/1000
  content loss: 4.2261e+06
    style loss: 4.17469e+06
       tv loss: 204088
    total loss: 8.60488e+06
Iteration  597/1000
  content loss: 4.03186e+06
    style loss: 3.8396e+06
       tv loss: 203918
    total loss: 8.07538e+06
Iteratio