In [None]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
%matplotlib inline
mpl.rcParams['figure.figsize'] = (12,12)
mpl.rcParams['axes.grid'] = False
import time
import IPython.display as display
import PIL.Image

import tensorflow as tf
from keras.preprocessing import image

In [None]:
# 画像を読み込み配列に変換し正規化する
def load_image(input_path, size):
    image = tf.keras.preprocessing.image.load_img(input_path, target_size=size)
    image = tf.keras.preprocessing.image.img_to_array(image)
    image = np.expand_dims(image, axis=0)
    image /= 255
    return image


# テンソルを画像に戻す
def tensor_to_image(tensor):
    tensor = tensor.numpy()
    tensor *= 255
    tensor = np.array(tensor, dtype=np.uint8)
    if np.ndim(tensor)>3:
        assert tensor.shape[0] == 1
        tensor = tensor[0]
    return PIL.Image.fromarray(tensor)

In [None]:
class StyleContentModel():
    def __init__(self):
        # VGG19のどの層の出力を使うか指定する
        self.style_layers = ['block1_conv1', 'block2_conv1', 'block3_conv1', 'block4_conv1', 'block5_conv1']
        self.content_layers = ['block5_conv2']
        
        self.num_style_layers = len(self.style_layers)        
        self.vgg = self.get_vgg_model()
        self.vgg.trainable = False
    
    def __call__(self, inputs):
        preprocessed_input = tf.keras.applications.vgg19.preprocess_input(inputs * 255)
        vgg_outputs = self.vgg(preprocessed_input)
        
        style_outputs, content_outputs = (vgg_outputs[:self.num_style_layers], vgg_outputs[self.num_style_layers:])
        style_outputs = [self.gram_matrix(style_output) for style_output in style_outputs]
        
        style_dict = {style_name:value for style_name, value in zip(self.style_layers, style_outputs)}
        content_dict = {content_name:value  for content_name, value in zip(self.content_layers, content_outputs)}

        return {'style':style_dict, 'content':content_dict}
    
    # Keras API を利用してVGG19を取得する  
    def get_vgg_model(self):
        vgg = tf.keras.applications.vgg19.VGG19(include_top=False, weights='imagenet')
        vgg.trainable = False

        outputs = [vgg.get_layer(name).output for name in (self.style_layers + self.content_layers)]
        model = tf.keras.Model(vgg.input, outputs)
        
        return model
    
    # グラム行列を計算する
    def gram_matrix(self, input_tensor):
        result = tf.linalg.einsum('bijc,bijd->bcd', input_tensor, input_tensor)
        input_shape = tf.shape(input_tensor)
        num_locations = tf.cast(input_shape[1] * input_shape[2], tf.float32)
        return result / num_locations

In [None]:
# 合計損失を計算する
def compute_loss(model, base_image, style_targets, content_targets, style_weight, content_weight):
    model_outputs = model(base_image)
    style_outputs = model_outputs['style']
    content_outputs = model_outputs['content']
    
    style_loss = tf.add_n([tf.reduce_mean((style_outputs[name]-style_targets[name])**2) for name in style_outputs.keys()])
    style_loss *= style_weight / len(style_outputs)
    
    content_loss = tf.add_n([tf.reduce_mean((content_outputs[name]-content_targets[name])**2) for name in content_outputs.keys()])
    content_loss *= content_weight / len(content_outputs)
    
    loss = style_loss + content_loss
    return loss, style_loss, content_loss

# 損失を元に勾配を計算する
@tf.function()
def compute_grads(params):
    with tf.GradientTape() as tape:
        all_loss = compute_loss(**params)
        
    grads = tape.gradient(all_loss[0], params['base_image'])
    return grads, all_loss

In [None]:
def run_style_transfer(style_path, content_path, num_iteration, style_weight, content_weight, display_interval):
    size = image.load_img(content_path).size[::-1]
    noise_image = np.random.uniform(-20, 20, (1, size[0], size[1], 3)).astype(np.float32) / 255
    content_image = load_image(content_path, size)
    style_image = load_image(style_path, size)
    
    model = StyleContentModel()
    style_targets = model(style_image)['style']
    content_targets = model(content_image)['content']
    
    # 生成画像のベースとしてノイズ画像を使う
    # ベースにはコンテンツ画像またはスタイル画像を用いることもできる
    base_image = tf.Variable(noise_image)
    
    opt = tf.optimizers.Adam(learning_rate=0.02, beta_1=0.99, epsilon=1e-1)
    
    params = {
        'model': model,
        'base_image': base_image,
        'style_targets': style_targets,
        'content_targets': content_targets,
        'style_weight': style_weight,
        'content_weight': content_weight
    }
    
    best_loss = float('inf')
    best_image = None
    
    start = time.time()
    for i in range(num_iteration):
        grads, all_loss = compute_grads(params)
        loss, style_loss, content_loss = all_loss
        
        opt.apply_gradients([(grads, base_image)])
        clipped_image = tf.clip_by_value(base_image, clip_value_min=0., clip_value_max=255.0)
        base_image.assign(clipped_image)
        
        # 損失が減らなくなったら最適化を終了する        
        if loss < best_loss:
            best_loss = loss
            best_image = base_image
        elif loss > best_loss:
            tensor_to_image(base_image).save('output_' + str(i+1) + '.jpg')
            break
            
        if (i + 1) % display_interval == 0:
            display.clear_output(wait=True)
            display.display(tensor_to_image(base_image))
            tensor_to_image(base_image).save('output_' + str(i+1) + '.jpg')
            print(f'Train step: {i+1}')
            print('Total loss: {:.4e}, Style loss: {:.4e}, Content loss: {:.4e}'.format(loss, style_loss, content_loss))
            
    print('Total time: {:.4f}s'.format(time.time() - start))
    display.clear_output(wait=True)
    display.display(tensor_to_image(base_image))
    
    return best_image

In [None]:
style_path = '../input/neural-image-transfer/StarryNight.jpg'
content_path = '../input/neural-image-transfer/FlindersStStation.jpg'
num_iteration = 5000
style_weight = 1e-2
content_weight = 1e3
display_interval = 100

best_image = run_style_transfer(style_path, content_path, num_iteration, style_weight, content_weight, display_interval)