# Style transfer using VGG16 network

* `A Neural Algorithm of Artistic Style`, [arXiv:1508.06576](https://arxiv.org/abs/1508.06576)
* Leon A. Gatys, Alexander S. Ecker, and, Matthias Bethge


* `models/research/slim/nets`을 이용하여 만듦
* `image_size=224`로 고정
* 논문과는 달리 원래 vgg모델처럼 `max_pooling`을 사용 (논문은 `average_pooling` 사용)
  * `02_style_transfer.ipynb`에서는 vgg를 직접 수정하여 사용
* loss는 논문에 나온 그대로 사용 (조금 맘에 안들지만)
* hyperparameter들은 cs20 코드를 참조함
* input_image는 우리집 고양이
* style_image는 [Starry Night](https://en.wikipedia.org/wiki/The_Starry_Night)

In [None]:
#import sys
#sys.path.append("$HOME/models/research/slim/")

import os
import time

import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

from PIL import Image

import tensorflow as tf

slim = tf.contrib.slim

sess_config = tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True))
os.environ["CUDA_VISIBLE_DEVICES"]="0"

### Hyperparameters setting

In [None]:
input_data_path = '../input_data/'
content_image_name = 'my_cat1.jpg'
style_image_name = 'Gogh_The_Starry_Night.jpg'
image_size = L = 224
noise_ratio = 0.4
style_loss_weight = np.array([0.5, 1.0, 1.5, 3.0, 4.0])
style_loss_weight /= np.sum(style_loss_weight)
content_weight = 1.0
style_weight = 100.0
learning_rate = 1.0
max_steps = 500
print_steps = 100

### Load a VGG16 graph

In [None]:
from nets import vgg

In [None]:
# 여기를 직접 채워 넣으시면 됩니다.
content_image_p = tf.placeholder(tf.float32, [1, L, L, 3])
style_image_p = tf.placeholder(tf.float32, [1, L, L, 3])
content_image = tf.get_variable(name='content_image',
                                shape=[1, L, L, 3],
                                initializer=tf.zeros_initializer())
style_image = tf.get_variable(name='style_image',
                              shape=[1, L, L, 3],
                              initializer=tf.zeros_initializer())
generated_image = tf.get_variable(name='generated_image',
                                  shape=[1, L, L, 3],
                                  initializer=tf.random_uniform_initializer(minval=-20, maxval=20))
# tf.placeholder를 tf.Variable로 바꿈
content_image_op = content_image.assign(content_image_p)
style_image_op = style_image.assign(style_image_p)
# 초기 이미지는 content_image에 random noise를 섞음
generated_image_op = generated_image.assign(generated_image * noise_ratio + \
                                            content_image_p * (1.0 - noise_ratio))
# 여기를 직접 채워 넣으시면 됩니다.
# generated_image는 매 update 후에 아래의 값 사이로 clipping
norm_means = np.array([123.68, 116.779, 103.939])
min_vals = -norm_means
max_vals = 255. - norm_means
generated_image_clipping = generated_image.assign(tf.clip_by_value(generated_image,
                                                                   clip_value_min=min_vals,
                                                                   clip_value_max=max_vals))

In [None]:
# 여기를 직접 채워 넣으시면 됩니다.
with tf.variable_scope('', reuse=tf.AUTO_REUSE) as scope:
  _, feature_maps_c = vgg.vgg_16(content_image,
                                 num_classes=None,
                                 is_training=False)
  _, feature_maps_s = vgg.vgg_16(style_image,
                                 num_classes=None,
                                 is_training=False)
  _, feature_maps_g = vgg.vgg_16(generated_image,
                                 num_classes=None,
                                 is_training=False)

In [None]:
with tf.Session() as sess:
  writer = tf.summary.FileWriter("./graphs/01_style_transfer", sess.graph)
  writer.close()

## Build a model

### collecte feature maps

* content layers
  * `conv4_2`: key name -> 'vgg16/vgg_16/conv4/conv4_2'
* style layers
  * `conv1_1`: key name -> 'vgg16/vgg_16/conv1/conv1_1'
  * `conv2_1`: key name -> 'vgg16/vgg_16/conv2/conv2_1'
  * `conv3_1`: key name -> 'vgg16/vgg_16/conv3/conv3_1'
  * `conv4_1`: key name -> 'vgg16/vgg_16/conv4/conv4_1'
  * `conv5_1`: key name -> 'vgg16/vgg_16/conv5/conv5_1'

In [None]:
content_layers = feature_maps_c['vgg_16/conv4/conv4_2']
style_layers = [feature_maps_s['vgg_16/conv1/conv1_1'],
                feature_maps_s['vgg_16/conv2/conv2_1'],
                feature_maps_s['vgg_16/conv3/conv3_1'],
                feature_maps_s['vgg_16/conv4/conv4_1'],
                feature_maps_s['vgg_16/conv5/conv5_1']]
generated_layers = [feature_maps_g['vgg_16/conv4/conv4_2'],
                    feature_maps_g['vgg_16/conv1/conv1_1'],
                    feature_maps_g['vgg_16/conv2/conv2_1'],
                    feature_maps_g['vgg_16/conv3/conv3_1'],
                    feature_maps_g['vgg_16/conv4/conv4_1'],
                    feature_maps_g['vgg_16/conv5/conv5_1']]

### content loss

In [None]:
def content_loss(P, F, scope):
  """Calculate the content loss function between
  the feature maps of content image and generated image.
  
  Args:
    P: the feature maps of the content image
    F: the feature maps of the generated image
    scope: scope
    
  Returns:
    loss: content loss (sum of squared loss)
  """
  # 여기를 직접 채워 넣으시면 됩니다.
  assert F.shape == P.shape
  _, h, w, c = F.get_shape().as_list()
  #loss = tf.reduce_sum(tf.square(F - P)) / (2.0 * h * w * c)
  loss = tf.losses.mean_squared_error(F, P)
  #loss = 0.5 * tf.reduce_sum(tf.square(F - P)) # original loss on paper
  return loss

### style loss

In [None]:
def style_loss(style_layers, generated_layers, scope):
  """Calculate the style loss function between
  the gram matrix of feature maps of style image and generated image.
  
  Args:
    style_layers: list of the feature maps of the style image
    generated_layers: list of the feature maps of the generated image
    scope: scope
    
  Returns:
    loss: style loss (mean squared loss)
  """
  def _style_loss_one_layer(feature_map_s, feature_map_g):
    """Calculate the style loss for one layer.
    
    Args:
      feature_map_s: the feature map of the style image
        - G: the gram matrix of the feature_map_s
      feature_map_g: the feature map of the generated image
        - A: the gram matrix of the feature_map_g
      
    Returns:
      loss: style loss for one layer (mean squared loss)
    """
    assert feature_map_s.shape == feature_map_g.shape
    G = _gram_matrix(feature_map_s)
    A = _gram_matrix(feature_map_g)
    # 여기를 직접 채워 넣으시면 됩니다.
    #loss = tf.reduce_sum(tf.square(G - A)) / (4. * (h*w)**2 * c**2) # original loss on paper
    loss = tf.losses.mean_squared_error(G, A)
    return loss
  
  def _gram_matrix(feature_map):
    """Calculate the gram matrix for the feature map
    
    Args:
      feature_map: 4-rank Tensor [1, height, width, channels]
        - F = 2-rank Tensor [h * w, channels]
      
    Returns:
      gram_matrix: 2-rank Tensor [c, c] (F.transpose x F)
    """
    # 여기를 직접 채워 넣으시면 됩니다.
    F = tf.squeeze(feature_map, axis=0)
    h, w, c = F.get_shape().as_list()
    F = tf.reshape(feature_map, [h * w, c])
    # normalize for calculating squared Frobenius norm
    gram_matrix = tf.matmul(tf.transpose(F), F) / (h * w)
    return gram_matrix
    
    
  assert len(style_layers) == len(generated_layers)
  
  loss = 0.0
  for i in range(len(style_layers)):
    loss_one = _style_loss_one_layer(style_layers[i], generated_layers[i])
    loss += loss_one * style_loss_weight[i]

  return loss

### Total loss

In [None]:
loss_c = content_loss(content_layers, generated_layers[0],
                      scope='content_loss')
loss_s = style_loss(style_layers, generated_layers[1:],
                    scope='style_loss')

with tf.variable_scope('total_loss'):
  total_loss = content_weight * loss_c + style_weight * loss_s

### Define a optimizer

In [None]:
# 여기를 직접 채워 넣으시면 됩니다.
opt = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_op = opt.minimize(total_loss, var_list=generated_image)

### Restore VGG16 weights using `tf.saver.restore`

### Download the VGG16 checkpoint: 

```
$ CHECKPOINT_DIR='./checkpoints'
$ mkdir ${CHECKPOINT_DIR}
$ cd ${CHECKPOINT_DIR}
$ wget http://download.tensorflow.org/models/vgg_16_2016_08_28.tar.gz
$ tar -xvf vgg_16_2016_08_28.tar.gz
$ rm vgg_16_2016_08_28.tar.gz
```

### Read a image

In [None]:
def vgg_preprocessing(image):
  """vgg image preprocessing
  output image is applied by mean_image_subtraction
  
  _R_MEAN = 123.68
  _G_MEAN = 116.779
  _B_MEAN = 103.939

  Args:
    image (PIL image): image with shape [height, width, channels]
    
  Returns:
    image (np.int32): np.array with shape [1, 224, 224, 3] applied by mean_image_subtraction
  """
  image = image.resize((224, 224))
  image = np.asarray(image)
  image = image.astype(np.float32)
  image[:,:,0] -= 123.68 # for _R_MEAN
  image[:,:,1] -= 116.779 # for _G_MEAN
  image[:,:,2] -= 103.939 # for _B_MEAN
  image = np.expand_dims(image, axis=0)
  
  return image

In [None]:
content_image_ = Image.open(os.path.join(input_data_path + content_image_name))
style_image_ = Image.open(os.path.join(input_data_path + style_image_name))

content_image_ = vgg_preprocessing(content_image_)
style_image_ = vgg_preprocessing(style_image_)

In [None]:
def print_image(image):
  """print image
  
  Args:
    image: 4-rank np.array [1, h, w, 3]
  """
  print_image = np.squeeze(image, axis=0)
  print_image[:, :, 0] += 123.68
  print_image[:, :, 1] += 116.779
  print_image[:, :, 2] += 103.939
  print_image = np.clip(print_image, 0, 255).astype('uint8')

  plt.axis('off')
  plt.imshow(print_image)
  plt.show()

In [None]:
v = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='vgg_16')

In [None]:
saver = tf.train.Saver(var_list=v)

with tf.Session(config=sess_config) as sess:
  sess.run(tf.global_variables_initializer())
  sess.run([content_image_op, style_image_op, generated_image_op],
           feed_dict={content_image_p: content_image_,
                      style_image_p: style_image_})

  _, generated_image_ = sess.run([generated_image_clipping, generated_image])
  print_image(content_image_)
  print_image(style_image_)
  print_image(generated_image_) # initial_image = content_image + small noise

  # use saver object to load variables from the saved model
  saver.restore(sess, "../checkpoints/vgg_16.ckpt")
  
  start_time = time.time()
  for step in range(max_steps):
    _, loss_, loss_c_, loss_s_, _, generated_image_ = \
        sess.run([train_op, total_loss, loss_c, loss_s, generated_image_clipping, generated_image])
    if (step+1) % print_steps == 0:
      duration = time.time() - start_time
      start_time = time.time()
      print("step: {}  total_loss: {}  loss_c: {}  loss_s: {}  duration: {}".format((step+1), loss_, loss_c_, loss_s_, duration))
      print_image(generated_image_)

  print('training done!')

In [None]:
def save_image(image, content_image_name, style_image_name):
  """print image
  
  Args:
    image: 4-rank np.array [1, h, w, 3]
    content_image_name: (string) filename of content image
    style_image_name: (string) filename of style image
  """
  save_image = np.squeeze(image, axis=0)
  save_image[:, :, 0] += 123.68
  save_image[:, :, 1] += 116.779
  save_image[:, :, 2] += 103.939
  save_image = np.clip(save_image, 0, 255).astype('uint8')

  save_image = Image.fromarray(np.uint8(save_image*255))
  filename = os.path.splitext(os.path.basename(content_image_name))[0] + '_'
  filename += os.path.splitext(os.path.basename(style_image_name))[0] + '.jpg'
  save_image.save(filename)

In [None]:
save_image(generated_image_, content_image_name, style_image_name)