In [1]:
!nvidia-smi

Fri Aug 23 18:05:15 2019       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 430.40       Driver Version: 410.79       CUDA Version: 10.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Tesla K80           Off  | 00000000:00:04.0 Off |                    0 |
| N/A   73C    P0    70W / 149W |      0MiB / 11441MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage      |
|  No ru

In [2]:
from keras.preprocessing.image import load_img, save_img, img_to_array
from scipy.optimize import fmin_l_bfgs_b
import numpy as np
import time

from keras.applications import vgg19
from keras import backend as K

Using TensorFlow backend.


In [0]:
base_image_path = '/content/puc.jpg'
style_reference_image_path = '/content/van_gogh.jpg'
result_prefix = 'b'

In [0]:
iterations = 10
# these are the weights of the different loss components
total_variation_weight = 1.0 
style_weight = 1.0 
content_weight = 0.025

In [0]:
# dimensions of the generated picture - redimension to fixed height (400) and 
# proportional width
width, height = load_img(base_image_path).size
img_nrows = 400
img_ncols = int(width * img_nrows / height)

In [0]:
# util function to open, resize and format pictures into appropriate tensors

def preprocess_image(image_path):
    img = load_img(image_path, target_size=(img_nrows, img_ncols))
    img = img_to_array(img)
    # add +1 dimension on array for working with batch of images
    img = np.expand_dims(img, axis=0)
    # preprocesses an numpy or tensor array encoding a batch of images.
    # will convert the images from RGB to BGR,
    # then will zero-center each color channel with
    # respect to the ImageNet dataset,
    # without scaling
    img = vgg19.preprocess_input(img)
    return img

In [0]:
# util function to convert a tensor into a valid image

def deprocess_image(x):
    if K.image_data_format() == 'channels_first':                               
        x = x.reshape((3, img_nrows, img_ncols))
        x = x.transpose((1, 2, 0))
    else:
        x = x.reshape((img_nrows, img_ncols, 3))
    # Remove zero-center by mean pixel
    x[:, :, 0] += 103.939
    x[:, :, 1] += 116.779
    x[:, :, 2] += 123.68
    # 'BGR'->'RGB'
    x = x[:, :, ::-1]
    x = np.clip(x, 0, 255).astype('uint8')
    return x

In [0]:
# get tensor representations of our images
base_image = K.variable(preprocess_image(base_image_path))
style_reference_image = K.variable(preprocess_image(style_reference_image_path))

In [9]:
# this will contain our generated image
if K.image_data_format() == 'channels_first':
    combination_image = K.placeholder((1, 3, img_nrows, img_ncols))
else:
    combination_image = K.placeholder((1, img_nrows, img_ncols, 3))

W0823 18:05:18.021585 140117651953536 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.



In [0]:
# combine the 3 images into a single Keras tensor
input_tensor = K.concatenate([base_image,
                              style_reference_image,
                              combination_image], axis=0)

In [11]:
# build the VGG19 network with our 3 images as input
# the model will be loaded with pre-trained ImageNet weights.
# we also scrap the top layers. 
# the top layers are the layers that involve flattening the network so that 
# we can create a couple dense layers that output classification.
# we don't need these here, we only need layers involving convolutions, 
# so we toss those top layers away
model = vgg19.VGG19(input_tensor=input_tensor,
                    weights='imagenet', include_top=False)

print('Model loaded.')

model.layers

W0823 18:05:18.055385 140117651953536 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W0823 18:05:18.058183 140117651953536 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

W0823 18:05:18.100676 140117651953536 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:3976: The name tf.nn.max_pool is deprecated. Please use tf.nn.max_pool2d instead.

W0823 18:05:18.730839 140117651953536 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:174: The name tf.get_default_session is deprecated. Please use tf.compat.v1.get_default_session instead.

W0823 18:05:18.732833 140117651953536 d

Model loaded.


[<keras.engine.input_layer.InputLayer at 0x7f6f6029c710>,
 <keras.layers.convolutional.Conv2D at 0x7f6f6028ec88>,
 <keras.layers.convolutional.Conv2D at 0x7f6f6028ea90>,
 <keras.layers.pooling.MaxPooling2D at 0x7f6f8b437320>,
 <keras.layers.convolutional.Conv2D at 0x7f6f602a5ef0>,
 <keras.layers.convolutional.Conv2D at 0x7f6f5f9fd6d8>,
 <keras.layers.pooling.MaxPooling2D at 0x7f6f5fa19a90>,
 <keras.layers.convolutional.Conv2D at 0x7f6f5fa192e8>,
 <keras.layers.convolutional.Conv2D at 0x7f6f5f9c3550>,
 <keras.layers.convolutional.Conv2D at 0x7f6f5f9c3f28>,
 <keras.layers.convolutional.Conv2D at 0x7f6f5f975e48>,
 <keras.layers.pooling.MaxPooling2D at 0x7f6f5f9a7048>,
 <keras.layers.convolutional.Conv2D at 0x7f6f5f988978>,
 <keras.layers.convolutional.Conv2D at 0x7f6f5f9401d0>,
 <keras.layers.convolutional.Conv2D at 0x7f6f5f952a20>,
 <keras.layers.convolutional.Conv2D at 0x7f6f5f8eaef0>,
 <keras.layers.pooling.MaxPooling2D at 0x7f6f5f91be10>,
 <keras.layers.convolutional.Conv2D at 0x7f6f5

# VGG19 Model

#![Image of VGG19](https://www.researchgate.net/profile/Clifford_Yang/publication/325137356/figure/fig2/AS:670371271413777@1536840374533/llustration-of-the-network-architecture-of-VGG-19-model-conv-means-convolution-FC-means.jpg)

In [12]:
# get the symbolic outputs of each "key" layer (we gave them unique names).
outputs_dict = dict([(layer.name, layer.output) for layer in model.layers])
outputs_dict

{'block1_conv1': <tf.Tensor 'block1_conv1/Relu:0' shape=(3, 400, 615, 64) dtype=float32>,
 'block1_conv2': <tf.Tensor 'block1_conv2/Relu:0' shape=(3, 400, 615, 64) dtype=float32>,
 'block1_pool': <tf.Tensor 'block1_pool/MaxPool:0' shape=(3, 200, 307, 64) dtype=float32>,
 'block2_conv1': <tf.Tensor 'block2_conv1/Relu:0' shape=(3, 200, 307, 128) dtype=float32>,
 'block2_conv2': <tf.Tensor 'block2_conv2/Relu:0' shape=(3, 200, 307, 128) dtype=float32>,
 'block2_pool': <tf.Tensor 'block2_pool/MaxPool:0' shape=(3, 100, 153, 128) dtype=float32>,
 'block3_conv1': <tf.Tensor 'block3_conv1/Relu:0' shape=(3, 100, 153, 256) dtype=float32>,
 'block3_conv2': <tf.Tensor 'block3_conv2/Relu:0' shape=(3, 100, 153, 256) dtype=float32>,
 'block3_conv3': <tf.Tensor 'block3_conv3/Relu:0' shape=(3, 100, 153, 256) dtype=float32>,
 'block3_conv4': <tf.Tensor 'block3_conv4/Relu:0' shape=(3, 100, 153, 256) dtype=float32>,
 'block3_pool': <tf.Tensor 'block3_pool/MaxPool:0' shape=(3, 50, 76, 256) dtype=float32>,
 

# Compute the neural style loss

## Gram Matrix
is the dot product (aka "produto escalar") between the feature matrix and its transpose.

![Gram matrix calculation](http://ankitmathur.me/classes/final_files/image04.jpg)

The terms of this matrix are proportional to the covariances of corresponding sets of features, and thus captures information about which features tend to activate together. By only capturing these aggregate statistics across the image, they are blind to the specific arrangement of objects inside the image. **This is what allows them to capture information about style independent of content.**


In [0]:
# the gram matrix of an image tensor (feature-wise outer product)

def gram_matrix(x):
    assert K.ndim(x) == 3
    if K.image_data_format() == 'channels_first':                                
        features = K.batch_flatten(x)
    else:
        features = K.batch_flatten(K.permute_dimensions(x, (2, 0, 1)))    
    gram = K.dot(features, K.transpose(features)) 
    return gram

## Style Loss

is the (scaled, squared) Frobenius norm of the difference between the Gram matrices of the style and combination images.

![Style loss](http://ankitmathur.me/classes/final_files/image05.jpg)


This function is designed to **maintain the style of the reference image in the generated image**.
It's based on the gram matrices (which capture style) of feature maps from the style reference image and from the generated image.




In [0]:
def style_loss(style, combination):
    assert K.ndim(style) == 3
    assert K.ndim(combination) == 3
    S = gram_matrix(style)
    C = gram_matrix(combination)
    channels = 3
    size = img_nrows * img_ncols
    return K.sum(K.square(S - C)) / (4.0 * (channels ** 2) * (size ** 2))

## Content Loss

is the (scaled, squared) Euclidean distance between feature representations of the content and combination images.

![Content loss equation](http://ankitmathur.me/classes/final_files/image02.jpg)

This function is designed to **maintain the "content" of the base image in the generated image**.


In [0]:
def content_loss(base, combination):
    return K.sum(K.square(combination - base))

## Total Variation Loss

was designed to keep the generated image locally coherent, encouraging spatial smoothness

In [0]:
def total_variation_loss(x):
    assert K.ndim(x) == 4
    if K.image_data_format() == 'channels_first':
        a = K.square(
            x[:, :, :img_nrows - 1, :img_ncols - 1] - x[:, :, 1:, :img_ncols - 1])
        b = K.square(
            x[:, :, :img_nrows - 1, :img_ncols - 1] - x[:, :, :img_nrows - 1, 1:])
    else:
        a = K.square(
            x[:, :img_nrows - 1, :img_ncols - 1, :] - x[:, 1:, :img_ncols - 1, :])
        b = K.square(
            x[:, :img_nrows - 1, :img_ncols - 1, :] - x[:, :img_nrows - 1, 1:, :])
    return K.sum(K.pow(a + b, 1.25))

## Combining Loss Functions

So now it's time to merge all the loss functions together. **We'll assign each of them a weight that determines how much each will influence the overall cost**. Assigning heavier loss to the style will result in more style, whereas heavier content loss will result in the output being more true to the original content.

In [17]:
loss = K.variable(0.0)
layer_features = outputs_dict['block5_conv2']

# content loss just gets the content for the two features for calculating
base_image_features = layer_features[0, :, :, :]
combination_features = layer_features[2, :, :, :]
loss += content_weight * content_loss(base_image_features,
                                      combination_features)

# style loss actually iterates through the layers, and multiplies by the 
# weighting per layer here
feature_layers = ['block1_conv1', 'block2_conv1',
                  'block3_conv1', 'block4_conv1',
                  'block5_conv1']
for layer_name in feature_layers:
    layer_features = outputs_dict[layer_name]
    style_reference_features = layer_features[1, :, :, :]
    combination_features = layer_features[2, :, :, :]
    sl = style_loss(style_reference_features, combination_features)
    loss += (style_weight / len(feature_layers)) * sl
    
# total loss
loss += total_variation_weight * total_variation_loss(combination_image) 

W0823 18:05:19.747708 140117651953536 variables.py:2429] Variable += will be deprecated. Use variable.assign_add if you want assignment to the variable value or 'x = x + y' if you want a new python Tensor object.


In [18]:
# define gradients of the total loss relative to the combination image, 
# and use these gradients to iteratively improve upon our combination image 
# to minimise the loss

grads = K.gradients(loss, combination_image)

outputs = [loss]
if isinstance(grads, (list, tuple)):
    outputs += grads
else:
    outputs.append(grads)

f_outputs = K.function([combination_image], outputs)

W0823 18:05:20.029025 140117651953536 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/math_grad.py:1205: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


In [0]:
def eval_loss_and_grads(x):
    if K.image_data_format() == 'channels_first':
        x = x.reshape((1, 3, img_nrows, img_ncols))
    else:
        x = x.reshape((1, img_nrows, img_ncols, 3))
    outs = f_outputs([x])
    loss_value = outs[0]
    if len(outs[1:]) == 1:
        grad_values = outs[1].flatten().astype('float64')
    else:
        grad_values = np.array(outs[1:]).flatten().astype('float64')
    return loss_value, grad_values

## ``Evaluator`` class

makes it possible to compute loss and gradients in one pass while retrieving them via two separate functions, "loss" and "grads". This is done because ``scipy.optimize`` requires separate functions for loss and gradients, but computing them separately would be inefficient.

In [0]:
class Evaluator(object):

    def __init__(self):
        self.loss_value = None
        self.grads_values = None

    def loss(self, x):
        assert self.loss_value is None
        loss_value, grad_values = eval_loss_and_grads(x)
        self.loss_value = loss_value
        self.grad_values = grad_values
        return self.loss_value

    def grads(self, x):
        assert self.loss_value is not None
        grad_values = np.copy(self.grad_values)
        self.loss_value = None
        self.grad_values = None
        return grad_values

In [21]:
evaluator = Evaluator()

# run scipy-based optimization (L-BFGS) over the pixels of the generated image
# so as to minimize the neural style loss
x = preprocess_image(base_image_path)

for i in range(iterations):
    print('Start of iteration', i)
    start_time = time.time()
    x, min_val, info = fmin_l_bfgs_b(evaluator.loss, x.flatten(),
                                     fprime=evaluator.grads, maxfun=20)
    print('Current loss value:', min_val)
    # save current generated image
    img = deprocess_image(x.copy())
    fname = result_prefix + '_at_iteration_%d.png' % i
    save_img(fname, img)
    end_time = time.time()
    print('Image saved as', fname)
    print('Iteration %d completed in %ds' % (i, end_time - start_time))

Start of iteration 0
Current loss value: 4132342300.0
Image saved as b_at_iteration_0.png
Iteration 0 completed in 22s
Start of iteration 1
Current loss value: 2777454000.0
Image saved as b_at_iteration_1.png
Iteration 1 completed in 17s
Start of iteration 2
Current loss value: 2350677500.0
Image saved as b_at_iteration_2.png
Iteration 2 completed in 17s
Start of iteration 3
Current loss value: 2169016000.0
Image saved as b_at_iteration_3.png
Iteration 3 completed in 17s
Start of iteration 4
Current loss value: 2068068200.0
Image saved as b_at_iteration_4.png
Iteration 4 completed in 17s
Start of iteration 5
Current loss value: 1995752000.0
Image saved as b_at_iteration_5.png
Iteration 5 completed in 17s
Start of iteration 6
Current loss value: 1951018100.0
Image saved as b_at_iteration_6.png
Iteration 6 completed in 17s
Start of iteration 7
Current loss value: 1914645100.0
Image saved as b_at_iteration_7.png
Iteration 7 completed in 17s
Start of iteration 8
Current loss value: 1884814

## Reference 

https://www.researchgate.net/figure/llustration-of-the-network-architecture-of-VGG-19-model-conv-means-convolution-FC-means_fig2_325137356

https://machinelearningmastery.com/a-gentle-introduction-to-channels-first-and-channels-last-image-formats-for-deep-learning/

https://towardsdatascience.com/neural-networks-intuitions-2-dot-product-gram-matrix-and-neural-style-transfer-5d39653e7916

https://machinelearningmastery.com/how-to-load-convert-and-save-images-with-the-keras-api/

https://github.com/keras-team/keras-applications/blob/master/keras_applications/vgg19.py

https://github.com/hunter-heidenreich/ML-Open-Source-Implementations

https://www.quora.com/What-is-the-Gram-Matrix-of-a-set-of-images

https://harishnarayanan.org/writing/artistic-style-transfer/

https://www.tensorflow.org/api_docs/python/tf/keras

https://keras.io/examples/neural_style_transfer/