# Neural style

图片风格转换就是将一幅图片的内容与另外一幅图片的风格融合在一起，形成一幅独特的照片。  

图片

## 摘要：  
1 使用预训练好的VGG网络提取图片特征，低层特征用于重建图像风格，高层特征用于重建图片内容  
2 创建一幅随机噪声组成的图片，通过构建的content loss 和 style loss 迭代优化这幅图片，最终作为输出结果  
Note：content 和 style 图像的特征只计算一次，新建空的图片之后，迭代计算空图片在content 和 style 层的特征，计算当前的loss，反向更新空的图片像素值，最终该图片

## 优缺点  
优点：可以端到端的分离和重组任意图像的内容和风格，用Gram矩阵来作为图像的风格表示，从而可以量化两幅图片的风格差异，具有创造力  
缺点: 每转换一次，都需要重新训练整个模型，速度有待优化

## 实验：  
### 1 定义用于内容和风格重建的特征层

In [1]:
CONTENT_LAYERS = ('relu4_2', 'relu5_2')
STYLE_LAYERS = ('relu1_1', 'relu2_1', 'relu3_1', 'relu4_1', 'relu5_1')

### 2 前向提取content feature  

content： 输入的 content 图片
vgg.preprocess content 图片预处理，减去imagenet里的像素均值  


In [None]:
# compute content features in feedforward mode
g = tf.Graph()
with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
    image = tf.placeholder('float', shape=shape)
    net = vgg.net_preloaded(vgg_weights, image, pooling)
    content_pre = np.array([vgg.preprocess(content, vgg_mean_pixel)])
    for layer in CONTENT_LAYERS:
        content_features[layer] = net[layer].eval(feed_dict={image: content_pre})

### 3 前向提取 style feature  

用Gram矩阵来计算图像的风格，可以将Gram矩阵认为是同一层中不同的feature map之间的协方差矩阵

In [None]:
# compute style features in feedforward mode
for i in range(len(styles)):
    g = tf.Graph()
    with g.as_default(), g.device('/cpu:0'), tf.Session() as sess:
        image = tf.placeholder('float', shape=style_shapes[i])
        net = vgg.net_preloaded(vgg_weights, image, pooling)
        style_pre = np.array([vgg.preprocess(styles[i], vgg_mean_pixel)])
        for layer in STYLE_LAYERS:
            features = net[layer].eval(feed_dict={image: style_pre})
            features = np.reshape(features, (-1, features.shape[3]))
            gram = np.matmul(features.T, features) / features.size
            style_features[i][layer] = gram

### 4 新建空的图片，迭代优化  

image ： 新建待优化的图片，也就是最终的融合后的图片  
content_weight_blend ： 不同层的loss在总content loss 中的权重  
tf.nn.l2_loss(net[content_layer] - content_features[content_layer]) ：content图片和待优化图片在相同feature map 上的平方误差损失  
tf.nn.l2_loss(gram - style_gram) ： style图片和待优化图片在相同feature map 上的平方误差损失  
total variation denoising ： 用于使输出的图片看起来更自然  
loss ： 最终待优化的损失函数

In [None]:
# make stylized image using backpropogation
with tf.Graph().as_default():    
    noise = np.random.normal(size=shape, scale=np.std(content) * 0.1)
    initial = tf.random_normal(shape) * 0.256
    
    image = tf.Variable(initial)
    net = vgg.net_preloaded(vgg_weights, image, pooling)

    # content loss
    content_layers_weights = {}
    content_layers_weights['relu4_2'] = content_weight_blend
    content_layers_weights['relu5_2'] = 1.0 - content_weight_blend

    content_loss = 0
    content_losses = []
    for content_layer in CONTENT_LAYERS:
        content_losses.append(content_layers_weights[content_layer] * content_weight * (2 * tf.nn.l2_loss(
                net[content_layer] - content_features[content_layer]) /
                content_features[content_layer].size))
    content_loss += reduce(tf.add, content_losses)

    # style loss
    style_loss = 0
    for i in range(len(styles)):
        style_losses = []
        for style_layer in STYLE_LAYERS:
            layer = net[style_layer]
            _, height, width, number = map(lambda i: i.value, layer.get_shape())
            size = height * width * number
            feats = tf.reshape(layer, (-1, number))
            gram = tf.matmul(tf.transpose(feats), feats) / size
            style_gram = style_features[i][style_layer]
            style_losses.append(style_layers_weights[style_layer] * 2 * tf.nn.l2_loss(gram - style_gram) / style_gram.size)
        style_loss += style_weight * style_blend_weights[i] * reduce(tf.add, style_losses)

    # total variation denoising
    tv_y_size = _tensor_size(image[:,1:,:,:])
    tv_x_size = _tensor_size(image[:,:,1:,:])
    tv_loss = tv_weight * 2 * (
            (tf.nn.l2_loss(image[:,1:,:,:] - image[:,:shape[1]-1,:,:]) /
                tv_y_size) +
            (tf.nn.l2_loss(image[:,:,1:,:] - image[:,:,:shape[2]-1,:]) /
                tv_x_size))
    # overall loss
    loss = content_loss + style_loss + tv_loss

### 5 优化输出结果  
Luminosity transfer steps : 还不知道为什么用这个  


In [None]:
# optimizer setup
train_step = tf.train.AdamOptimizer(learning_rate, beta1, beta2, epsilon).minimize(loss)
# optimization
best_loss = float('inf')
best = None
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    stderr.write('Optimization started...\n')
    for i in range(iterations):
        stderr.write('Iteration %4d/%4d\n' % (i + 1, iterations))
        train_step.run()
        last_step = (i == iterations - 1)
        if (checkpoint_iterations and i % checkpoint_iterations == 0) or last_step:
            this_loss = loss.eval()
            if this_loss < best_loss:
                best_loss = this_loss
                best = image.eval()
            img_out = vgg.unprocess(best.reshape(shape[1:]), vgg_mean_pixel)
            if preserve_colors and preserve_colors == True:
                original_image = np.clip(content, 0, 255)
                styled_image = np.clip(img_out, 0, 255)
               
                # Luminosity transfer steps:
                # 1. Convert stylized RGB->grayscale accoriding to Rec.601 luma (0.299, 0.587, 0.114)
                # 2. Convert stylized grayscale into YUV (YCbCr)
                # 3. Convert original image into YUV (YCbCr)
                # 4. Recombine (stylizedYUV.Y, originalYUV.U, originalYUV.V)
                # 5. Convert recombined image from YUV back to RGB

                # 1
                styled_grayscale = rgb2gray(styled_image)
                styled_grayscale_rgb = gray2rgb(styled_grayscale)

                # 2
                styled_grayscale_yuv = np.array(Image.fromarray(styled_grayscale_rgb.astype(np.uint8)).convert('YCbCr'))

                # 3
                original_yuv = np.array(Image.fromarray(original_image.astype(np.uint8)).convert('YCbCr'))

                # 4
                w, h, _ = original_image.shape
                combined_yuv = np.empty((w, h, 3), dtype=np.uint8)
                combined_yuv[..., 0] = styled_grayscale_yuv[..., 0]
                combined_yuv[..., 1] = original_yuv[..., 1]
                combined_yuv[..., 2] = original_yuv[..., 2]

                # 5
                img_out = np.array(Image.fromarray(combined_yuv, 'YCbCr').convert('RGB'))
