### Styletransfer mit Keras

Voraussetzung:
https://dl.dropboxusercontent.com/u/108721752/imagenet-vgg-verydeep-19.mat

In [None]:
import scipy.io
import scipy.misc
import tensorflow as tf
import numpy as np
import time
from IPython.display import Image
from functools import reduce

In [None]:
import keras

from keras.models import Sequential, Model
from keras.layers.core import Flatten, Dense, Dropout, Activation
from keras.layers.convolutional import Conv2D, MaxPooling2D, ZeroPadding2D
from keras.optimizers import SGD
from keras.utils.vis_utils import plot_model

In [None]:
%matplotlib inline
import matplotlib.image as mpimg
import matplotlib.pyplot as plt

#### Quell- und Stilbild

In [None]:
content_file_name = 'london.jpg'
style_file_name = '1-style.jpg'

In [None]:
image_content = scipy.misc.imread(content_file_name)
plt.figure(figsize=(11,11))
plt.imshow(image_content)
image_content = image_content.astype('float32')
image_content = np.ndarray.reshape(image_content,((1,) + image_content.shape)) 

In [None]:
image_style = scipy.misc.imread(style_file_name)
plt.figure(figsize=(11,11))
plt.imshow(image_style)
image_style = image_style.astype('float32')
image_style = np.ndarray.reshape(image_style,((1,) + image_style.shape))

In [None]:
print(image_content.shape)
print(image_style.shape)

#### VGG19 laden

In [None]:
data = scipy.io.loadmat('./imagenet-vgg-verydeep-19.mat')

#### Ein paar Hilfsfunktionen für das KNN

In [None]:
def _conv_layer(input, weights, bias):
    conv = tf.nn.conv2d(input, tf.constant(weights), strides=(1, 1, 1, 1),
            padding='SAME')
    return tf.nn.bias_add(conv, bias)

def _pool_layer(input):
    return tf.nn.max_pool(input, ksize=(1, 2, 2, 1), strides=(1, 2, 2, 1),
            padding='SAME')

def preprocess(image, mean_pixel):
    return (image - mean_pixel).astype('float32')

def unprocess(image, mean_pixel):
    return (image + mean_pixel).astype('float32')

#### KNN mit Tensorflow (tf.nn) erzeugen

In [None]:
def net(input_image):
    layers = (
        'conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1',
        'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2',
        'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3',
        'relu3_3', 'conv3_4', 'relu3_4', 'pool3',
        'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'conv4_3',
        'relu4_3', 'conv4_4', 'relu4_4', 'pool4',
        'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'conv5_3',
        'relu5_3', 'conv5_4', 'relu5_4'
    )
    weights = data['layers'][0]
    net = {}
    current = input_image
    for i, name in enumerate(layers):
        print(i,name)
        kind = name[:4]
        if kind == 'conv':
            kernels, bias = weights[i][0][0][0][0]
            kernels = np.transpose(kernels, (1, 0, 2, 3))
            bias = bias.reshape(-1)
            current = _conv_layer(current, kernels, bias)
        elif kind == 'relu':
            current = tf.nn.relu(current)
        elif kind == 'pool':
            current = _pool_layer(current)
        net[name] = current
    assert len(net) == len(layers)
    return net

#### KNN mit Keras erzeugen

In [None]:
def keras_model(input_image):
    layers = (
        'conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1',
        'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2',
        'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3',
        'relu3_3', 'conv3_4', 'relu3_4', 'pool3',
        'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'conv4_3',
        'relu4_3', 'conv4_4', 'relu4_4', 'pool4',
        'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'conv5_3',
        'relu5_3', 'conv5_4', 'relu5_4'
    )
    weights = data['layers'][0]
    model = Sequential()
    current = input_image
    for i, name in enumerate(layers):
        print(i,name)
        kind = name[:4]
        if kind == 'conv':
            kernels, bias = weights[i][0][0][0][0]
            print(kernels.shape, bias.shape)
            kernels_keras = np.transpose(kernels, (1,0,2,3))
           
            if i==0:
                model.add(ZeroPadding2D((1,1),data_format="channels_first",input_shape=(input_image.shape[3],input_image.shape[1],input_image.shape[2])))
            else:
                model.add(ZeroPadding2D((1,1),data_format="channels_first"))
            
            print(model.layers[i].input)
            print(model.layers[i].output)
            model.add(Conv2D(bias.size, (3, 3), name=name, data_format="channels_first", weights=[kernels_keras,bias[0]])) 
        elif kind == 'relu':
            model.add(Activation('relu', name=name))
        elif kind == 'pool':
            model.add(MaxPooling2D((2,2), data_format="channels_first", name=name, border_mode='same', strides=(2,2)))
             
    return model 

#### Berechnet den Pixel-Wert-Durchschnitt des VGG19. Dieser Wert wird für die Nomalisierung genutzt

In [None]:
data['normalization'][0][0][0].shape

In [None]:
mean = data['normalization'][0][0][0]
mean_pixel = np.mean(mean, axis=(0, 1))

In [None]:
mean_pixel

#### Das Zielbild wird vorverarbeitet

In [None]:
content_pre = preprocess(image_content, mean_pixel)

### Aktivitätsberechnung des VGG19 Netzwerks auf dem content_pre Bild

In [None]:
content_pre.shape

In [None]:
content_keras_model = keras_model(content_pre)

In [None]:
content_keras_model

In [None]:
content = net(content_pre)

In [None]:
content_keras_model.summary()

#### Layer für die Nutzung als Inhalts-Feature auswählen

In [None]:
CONTENT_LAYERS = ('conv1_1', 'conv2_1', 'conv4_1', 'conv4_2')
content_features = {}
content_keras_features ={}

In [None]:
for layer in CONTENT_LAYERS:
    tmpmodel=Model(inputs=content_keras_model.input, outputs=content_keras_model.get_layer(layer).output) 
    content_keras_features[layer]=np.transpose(tmpmodel.predict(np.transpose(content_pre,(0,3,1,2)) ),(0,2,3,1))
    print('content_keras_feaures shape:', layer, content_keras_features[layer].shape)

#### Im neuen Zielbild werden die Inhalts-Features verglichen

In [None]:
content_features=content_keras_features

#### Layer für die Nutzng als Style-Feature auswählen

In [None]:
STYLE_LAYERS = ('relu1_1', 'relu2_1', 'relu3_1', 'relu4_1', 'relu5_1')
style_pre = preprocess(image_style, mean_pixel)
style_keras_model = keras_model(style_pre)
style_keras_features ={}

In [None]:
for layer in STYLE_LAYERS:
    
    tmpmodel=Model(
        inputs=style_keras_model.input, 
        outputs=style_keras_model.get_layer(layer).output
    ) 
    
    features=np.transpose(tmpmodel.predict(np.transpose(style_pre,(0,3,1,2)) ),(0,2,3,1))
    print('features shape:', layer, features.shape)
    
    features = np.reshape(features, (-1, features.shape[3]))
    print('reshaped features shape:', layer, features.shape)
    
    gram = np.matmul(features.T, features) / features.size
    style_keras_features[layer] = gram
    print('gram shape:', layer, gram.shape)
    print('**************')    

#### Im Zielbild werden die Features des Style-Bildes verglichen

In [None]:
style_features=style_keras_features

#### Erzeuge ein neues initiales Bild mit Random Noise

In [None]:
initial = None
if initial is None:
    noise = np.random.normal(size=image_content.shape, scale=np.std(image_content) * 0.1)
    initial = tf.random_normal(image_content.shape) * 0.256
else:
    initial = np.array([preprocess(initial, mean_pixel)])
    initial = initial.astype('float32')

#### Das neue Bild als TensorFlow-Variable

In [None]:
image = tf.Variable(initial)
image_net = net(image)

#### Initiale Parameter setzen

In [None]:
content_weight= 5e0
style_weight= 1e2
tv_weight = 1e2
learning_rate = 1e1
iterations =  1000
checkpoint_iterations = 20
print_iterations = 10

#### Erzeugen der Content-Verlustfunktion
It measure how much the content features of the tensorflow variable "image" deviate from the content features of the content image.

In [None]:
content_loss = 0
content_losses = []
for content_layer in CONTENT_LAYERS:
    content_losses.append(2 * tf.nn.l2_loss(
                          image_net[content_layer] - content_features[content_layer]) / 
                          content_features[content_layer].size)
content_loss += content_weight * reduce(tf.add, content_losses)

In [None]:
content_loss

#### Erzeuge die Style-Verlustfunktion
It measure how much the style Gram matrices of the tensorflow variable "image" deviate from the style Gram matrices of the style image.

In [None]:
style_loss = 0
style_losses = []
for style_layer in STYLE_LAYERS:
    layer = image_net[style_layer]
    _, height, width, number = map(lambda i: i.value, layer.get_shape())
    size = height * width * number
    feats = tf.reshape(layer, (-1, number))
    gram = tf.matmul(tf.transpose(feats), feats) / size
    style_gram = style_features[style_layer]
    style_losses.append(2 * tf.nn.l2_loss(gram - style_gram) / style_gram.size)
style_loss += style_weight * reduce(tf.add, style_losses)

In [None]:
style_loss

In [None]:
def _tensor_size(tensor):
    from operator import mul
    return reduce(mul, (d.value for d in tensor.get_shape()), 1)

#### Erzeugung einer "Total Variation" Verlustfunktion

In [None]:
tv_y_size = _tensor_size(image[:,1:,:,:])
tv_x_size = _tensor_size(image[:,:,1:,:])
tv_loss = tv_weight * 2 * (
        (tf.nn.l2_loss(image[:,1:,:,:] - image[:,:image_content.shape[1]-1,:,:]) /
            tv_y_size) +
        (tf.nn.l2_loss(image[:,:,1:,:] - image[:,:,:image_content.shape[2]-1,:]) /
            tv_x_size))

In [None]:
print(image)

#### Definition der Verlustfunktion

In [None]:
loss = content_loss + style_loss + tv_loss

#### Nutzung des AdamOptimizers um die Verlustfunktion zu berechnen

In [None]:
train_op = tf.train.AdamOptimizer(learning_rate).minimize(loss)

In [None]:
def imsave(path, img):
    img = np.clip(img, 0, 255).astype(np.uint8)
    scipy.misc.imsave(path, img)

#### Optimierung starten

In [None]:
# optimization
best_loss = float('inf')
best = None

init_op = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init_op)
    for i in range(iterations):
        print(i)
        
        # the next step is the actual tensorflow training/minimization step
        train_op.run()
        
        # save the images after some iterations
        if i % checkpoint_iterations == 0 or i == iterations - 1:
            this_loss = loss.eval()
            if this_loss < best_loss:
                best_loss = this_loss
                
                # this is the best image so far
                best = image.eval()
                print("new minimum found")
                newimg= unprocess(best.reshape(image_content.shape[1:]), mean_pixel)
                img = np.clip(newimg, 0, 255).astype(np.uint8)
                plt.figure(figsize=(11,11))
                plt.imshow(img)
                plt.show()
                
            
            # save a check point
            import os
            try:
                os.makedirs('./checks/'+str.split(content_file_name,'.')[0])
            except OSError:
                pass
            timestr = time.strftime("%Y%m%d_%H%M%S")
            filename_cp = './checks/'+str.split(content_file_name,'.')[0]+'/'+timestr+'.jpg'
            
            cp = unprocess(best.reshape(image_content.shape[1:]), mean_pixel)
            imsave(filename_cp, cp)
           
        
        if i % print_iterations == 0 or i == iterations - 1:
            print('epoch %d/%d' % (i + 1, iterations))
            print('  content loss: %g' % content_loss.eval())
            print('    style loss: %g' % style_loss.eval())
            print('       tv loss: %g' % tv_loss.eval())
            print('    total loss: %g' % loss.eval())

    output = unprocess(best.reshape(image_content.shape[1:]), mean_pixel)

In [None]:
imsave('output_'+content_file_name, output)

In [None]:
Image(filename = 'output_'+content_file_name)