In [11]:
import numpy as np
import tensorflow as tf
import scipy.io
import matplotlib.pyplot as plt
import scipy.misc

%matplotlib inline

In [23]:
def load_model(path):
    """
    Returns a model for the purpose of 'painting' the picture.
    Takes only the convolution layer weights and wrap using the TensorFlow
    Conv2d, Relu and AveragePooling layer. VGG actually uses maxpool but
    the paper indicates that using AveragePooling yields better results.
    The last few fully connected layers are not used.
    Here is the detailed configuration of the VGG model:
        0 is conv1_1 (3, 3, 3, 64)
        1 is relu
        2 is conv1_2 (3, 3, 64, 64)
        3 is relu    
        4 is maxpool
        5 is conv2_1 (3, 3, 64, 128)
        6 is relu
        7 is conv2_2 (3, 3, 128, 128)
        8 is relu
        9 is maxpool
        10 is conv3_1 (3, 3, 128, 256)
        11 is relu
        12 is conv3_2 (3, 3, 256, 256)
        13 is relu
        14 is conv3_3 (3, 3, 256, 256)
        15 is relu
        16 is conv3_4 (3, 3, 256, 256)
        17 is relu
        18 is maxpool
        19 is conv4_1 (3, 3, 256, 512)
        20 is relu
        21 is conv4_2 (3, 3, 512, 512)
        22 is relu
        23 is conv4_3 (3, 3, 512, 512)
        24 is relu
        25 is conv4_4 (3, 3, 512, 512)
        26 is relu
        27 is maxpool
        28 is conv5_1 (3, 3, 512, 512)
        29 is relu
        30 is conv5_2 (3, 3, 512, 512)
        31 is relu
        32 is conv5_3 (3, 3, 512, 512)
        33 is relu
        34 is conv5_4 (3, 3, 512, 512)
        35 is relu
        36 is maxpool
        37 is fullyconnected (7, 7, 512, 4096)
        38 is relu
        39 is fullyconnected (1, 1, 4096, 4096)
        40 is relu
        41 is fullyconnected (1, 1, 4096, 1000)
        42 is softmax
    """
    IMAGE_HEIGHT = 300
    IMAGE_WIDTH = 400
    COLOR_CHANNELS = 3
    vgg = scipy.io.loadmat(path)
    vgg_layers = vgg['layers']
    
    def _weights(layer, expected_layer_name):
        wb = vgg_layers[0][layer][0][0][2]
        W = wb[0][0]
        b = wb[0][1]
        layer_name = vgg_layers[0][layer][0][0][0][0]
        assert layer_name == expected_layer_name
        return W, b
    def _relu(conv2d_layer):
        return tf.nn.relu(conv2d_layer)
    def _conv2d(prev_layer, layer, layer_name):
        W, b = _weights(layer, layer_name)
        W = tf.constant(W)
        b = tf.constant(np.reshape(b, (b.size)))
        return tf.nn.conv2d(prev_layer, filter=W, strides=[1, 1, 1, 1], padding='SAME') + b
    def _conv2d_relu(prev_layer, layer, layer_name):
        return _relu(_conv2d(prev_layer, layer, layer_name))
    def _maxpool(prev_layer):
        return tf.nn.max_pool(prev_layer, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")
    def _avgpool(prev_layer):
        return tf.nn.avg_pool(prev_layer, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")
    
    graph = {}
    graph['input'] = tf.Variable(np.zeros((1, IMAGE_HEIGHT, IMAGE_WIDTH, COLOR_CHANNELS)), dtype='float32')
    graph['conv1_1'] = _conv2d_relu(graph['input'], 0, 'conv1_1')
    graph['conv1_2'] = _conv2d_relu(graph['conv1_1'], 2, 'conv1_2')
    graph['avgpool1'] = _avgpool(graph['conv1_2'])
    graph['conv2_1'] = _conv2d_relu(graph['avgpool1'], 5, 'conv2_1')
    graph['conv2_2'] = _conv2d_relu(graph['conv2_1'], 7, 'conv2_2')
    graph['avgpool2'] = _avgpool(graph['conv2_2'])
    graph['conv3_1'] = _conv2d_relu(graph['avgpool2'], 10, 'conv3_1')
    graph['conv3_2'] = _conv2d_relu(graph['conv3_1'], 12, 'conv3_2')
    graph['conv3_3'] = _conv2d_relu(graph['conv3_2'], 14, 'conv3_3')
    graph['conv3_4'] = _conv2d_relu(graph['conv3_3'], 16, 'conv3_4')
    graph['avgpool3'] = _avgpool(graph['conv3_4'])
    graph['conv4_1'] = _conv2d_relu(graph['avgpool3'], 19, 'conv4_1')
    graph['conv4_2'] = _conv2d_relu(graph['conv4_1'], 21, 'conv4_2')
    graph['conv4_3'] = _conv2d_relu(graph['conv4_2'], 23, 'conv4_3')
    graph['conv4_4'] = _conv2d_relu(graph['conv4_3'], 25, 'conv4_4')
    graph['avgpool4'] = _avgpool(graph['conv4_4'])
    graph['conv5_1'] = _conv2d_relu(graph['avgpool4'], 28, 'conv5_1')
    graph['conv5_2'] = _conv2d_relu(graph['conv5_1'], 30, 'conv5_2')
    graph['conv5_3'] = _conv2d_relu(graph['conv5_2'], 32, 'conv5_3')
    graph['conv5_4'] = _conv2d_relu(graph['conv5_3'], 34, 'conv5_4')
    graph['avgpool5'] = _avgpool(graph['conv5_4'])
    
    return graph

In [88]:
def compute_content_cost(a_C, a_G):
    m, n_H, n_W, n_C = a_G.get_shape().as_list()
    return tf.reduce_sum(tf.square(a_C - a_G)) / (4 * n_H * n_W * n_C)

In [89]:
def compute_style_cost(a_S, a_G):
    #compute Gram matrix
    assert a_S.shape == a_G.shape
    m, nH, nW, nC = a_S.shape
    a_S = tf.transpose(tf.reshape(a_S, [-1, nC]))
    a_G = tf.transpose(tf.reshape(a_G, [-1, nC]))
    G_S = tf.matmul(a_S, tf.transpose(a_S))
    G_G = tf.matmul(a_G, tf.transpose(a_G))
    assert G_S.shape == [nC, nC]
    return tf.reduce_sum(tf.square(G_S - G_G)) / (4 * (nH * nW * nC)**2)

def style_cost(model, sess , layers):
    J_style = 0
    for layer_name, coeff in layers:
        out = model[layer_name]
        a_S = sess.run(out)
        a_G = out
        J_sytle_layer = coeff * compute_style_cost(a_S, a_G)
        J_style += J_sytle_layer 
    return J_style

In [90]:
def generate_noise_image(content_image, noise_ratio = 0.6):
    """
    Generates a noisy image by adding random noise to the content_image
    """
    
    # Generate a random noise_image
    _, H, W, C = content_image.shape
    noise_image = np.random.uniform(-20, 20, (1, H, W, C)).astype('float32')
    
    # Set the input_image to be a weighted average of the content_image and a noise_image
    input_image = noise_image * noise_ratio + content_image * (1 - noise_ratio)
    
    return input_image

MEANS = np.array([123.68, 116.779, 103.939]).reshape((1,1,1,3))
def reshape_and_normalize_image(image):
    """
    Reshape and normalize the input image (content or style)
    """
    # Reshape image to mach expected input of VGG16
    image = np.reshape(image, ((1,) + image.shape))
    
    # Substract the mean to match the expected input of VGG16
    image = image - MEANS
    
    return image

def save_image(path, image):
    
    # Un-normalize the image so that it looks good
    image = image + MEANS
    
    # Clip and Save the image
    image = np.clip(image[0], 0, 255).astype('uint8')
    scipy.misc.imsave(path, image)

In [91]:
# path of content image
CONTENT_IMAGE_PATH = './images/cat.jpg'

# path of style image
STYLE_IMAGE_PATH = './images/sandstone.jpg'

# load and normalizer image
content_image = scipy.misc.imread(CONTENT_IMAGE_PATH)
content_image = reshape_and_normalize_image(content_image)

style_image = scipy.misc.imread(STYLE_IMAGE_PATH)
style_image = reshape_and_normalize_image(style_image)

# gernrate image
generate_image = generate_noise_image(content_image)
# plt.imshow(content_image[0, :])
# plt.imshow(style_image[0, :])
# plt.imshow(generate_image[0, :])

`imread` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``imageio.imread`` instead.
  
`imread` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``imageio.imread`` instead.
  # This is added back by InteractiveShellApp.init_path()


In [93]:
# path of VGG model
MODEL_PATH = "./pretrained-model/imagenet-vgg-verydeep-19"

# reser graph
tf.reset_default_graph()
model = load_model(MODEL_PATH)
sess = tf.Session()

# content cost
sess.run(model['input'].assign(content_image))
content_cost_layer = model['conv4_1']
a_C = sess.run(content_cost_layer)
a_G = content_cost_layer
J_content = compute_content_cost(a_C, a_G)

# style cost
STYLE_COST_LAYERS = [
    ('conv2_1', 0.2),
    ('conv3_1', 0.3),
]
sess.run(model['input'].assign(style_image))
J_style = style_cost(model ,sess, STYLE_COST_LAYERS)

# total cost
J = 10 * J_content + 10 * J_style

# train
train_step = tf.train.AdamOptimizer(2.).minimize(J)

sess.run(tf.global_variables_initializer())

# assign
sess.run(model['input'].assign(generate_image))

for i in range(200):
    sess.run(train_step)
    
    if i % 20 == 0:
        generate_image = sess.run(model['input'])
        save_image("./output/" + str(i) + ".png", generate_image)
        cost, c_cost, s_cost = sess.run([J, J_content, J_style])
        #
        print("Iteration " + str(i) + ": ") 
        print('cost: ' + str(cost))
        print('content cost: ' + str(c_cost))
        print('style cost: ' + str(s_cost))
        

`imsave` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``imageio.imwrite`` instead.


Iteration 0: 
cost: 19961408.0
content cost: 23843.004
style cost: 1972297.8
Iteration 20: 
cost: 2669879.2
content cost: 27984.865
style cost: 239003.06
Iteration 40: 
cost: 1032638.75
content cost: 28986.387
style cost: 74277.484
Iteration 60: 
cost: 623812.7
content cost: 28499.387
style cost: 33881.883
Iteration 80: 
cost: 483494.0
content cost: 27744.078
style cost: 20605.322
Iteration 100: 
cost: 418169.66
content cost: 27054.69
style cost: 14762.275
Iteration 120: 
cost: 380498.3
content cost: 26465.908
style cost: 11583.922
Iteration 140: 
cost: 355386.0
content cost: 25951.76
style cost: 9586.84
Iteration 160: 
cost: 337093.94
content cost: 25500.324
style cost: 8209.07
Iteration 180: 
cost: 323031.4
content cost: 25115.693
style cost: 7187.446


In [62]:
generate_image = sess.run(model['input'])
save_image('./output/cat.jpg', generate_image)

`imsave` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``imageio.imwrite`` instead.
  
