## Implementing neural style transfer model

This technique is outlined in the paper - <i>A Neural Algorithm of Artistic Style</i>. Based on the paper, I require a conv net which is trained on a large volume of images and has subsequently learned how to accurately classify images of different classes. I decided to use the VGG-19 model as described in the paper. I am using a [pretrained model](http://www.vlfeat.org/matconvnet/pretrained/#downloading-the-pre-trained-models) and am using a variation of a helper function found on [Github](https://github.com/JudasDie) to quickly read in the model. 

In [1]:
import scipy.io
import cv2
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import latex
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
np.random.seed(10)

%matplotlib inline
import warnings
warnings.filterwarnings('ignore')

In [27]:
# Global variables for image height,width
n_h = 400
n_w = 300

In [28]:
# Initial images with valid shapes used for testing and first run through model
# Creating a randomly generated image

gen_img = np.random.rand(n_w,n_h,3)
# Loading content and style images
content_img = cv2.imread("../../data/neural_style/content_1.jpg") # christmas tree with presents
content_img = cv2.resize(content_img, (n_w,n_h), interpolation=cv2.INTER_AREA)
style_img = cv2.imread("../../data/neural_style/style_1.jpg") # bright painting
style_img = cv2.resize(style_img, (n_w,n_h), interpolation=cv2.INTER_AREA)
# Adjusting shapes
gen_img.shape = (1,n_w,n_h,3)
content_img.shape = (1,n_w,n_h,3)
style_img.shape = (1,n_w,n_h,3)

In [35]:
# Getting the output shapes for each layer of the model
tf.reset_default_graph()
with tf.Session() as sess:
    model = load_vgg_model("../../vgg/imagenet-vgg-verydeep-19.mat",n_w,n_h,3)
    sess.run(model["input"].assign(gen_img))
    cv11 = sess.run(model["conv1_1"])
    _,cv11w,cv11h,cv11c = cv11.shape
    print("conv1_1:",cv11.shape)
    cv21 = sess.run(model["conv2_1"])
    _,cv21w,cv21h,cv21c = cv21.shape
    print("conv2_1:",cv21.shape)
    cv31 = sess.run(model["conv3_1"])
    _,cv31w,cv31h,cv31c = cv31.shape
    print("conv3_1:",cv31.shape)
    cv41 = sess.run(model["conv4_1"])
    _,cv41w,cv41h,cv41c = cv41.shape
    print("conv4_1:",cv41.shape)
    cv51 = sess.run(model["conv5_1"])
    _,cv51w,cv51h,cv51c = cv51.shape
    print("conv5_1:",cv51.shape)
    cv42 = sess.run(model["conv4_2"])
    print("conv4_2:",cv42.shape)
    _,cv42w,cv42h,cv42c = cv42.shape

conv1_1: (1, 300, 400, 64)
conv2_1: (1, 150, 200, 128)
conv3_1: (1, 75, 100, 256)
conv4_1: (1, 38, 50, 512)
conv5_1: (1, 19, 25, 512)
conv4_2: (1, 38, 50, 512)


## Building content and style cost functions

### Style cost function

$$ J_{content}(C,G) = \frac{1}{2} || a^{[l](C)} - a^{[l](G)} ||^2$$

### Content cost function

$$ E_l(S,G) = \frac{1}{4 (n_h n_w)^2 n_c^2} || g^{[l](S)} - g^{[l](G)} ||^2 $$

$$ J_{style} = \sum_{l=0}^{L} w_l * E_l $$

### Total cost

$$  J_{total} = \alpha * J_{content} + \beta * J_{style} $$

$$ \frac{\alpha}{\beta} = 1*10^{-3} $$


Terms:
- C: content image
- S: style image
- G: generated image
- g: gram-matrix between two channels of a given activation
- a<sup>(l)(x)</sup>: activation for layer l of image x
- w<sub>l</sub>: weighting factors for each layers contribution to the style loss

As described in the paper mentioned above, I will be matching the content representation on layer "conv4_2". I will be matching the style representation on the layers "conv1_1", "conv2_1", "conv3_1", "conv4_1", "conv5_1" with each receiving an equal weighting.

#### Content cost function

In [3]:
# Content cost function
def content_cost(cont_act,gen_act):
    """
    cont_act: content image activation for layer "conv4_2", shape (1,38,50,512)
    gen_act: generated image activation for layer "conv4_2"
    """
    m,n_w,n_h,n_c = cont_act.get_shape().as_list()
    cont_act_u = tf.reshape(cont_act,(n_w*n_h,n_c)) # shape:(1,n_h*n_w,n_c)
    gen_act_u = tf.reshape(gen_act,(n_w*n_h,n_c))
    content_diff = tf.square(tf.subtract(cont_act_u,gen_act_u))
    content_cost = (1/2) * tf.reduce_sum(content_diff)
    return content_cost

In [30]:
# Testing content cost function
tf.reset_default_graph()
with tf.Session() as sess:
    model = load_vgg_model("../../vgg/imagenet-vgg-verydeep-19.mat",n_w,n_h,3)
    # Computation graph
    Content = tf.placeholder(tf.float32,(None,cv42w,cv42h,cv42c))
    Gen = tf.placeholder(tf.float32,(None,cv42w,cv42h,cv42c))
    J_content = content_cost(Content, Gen)
    # Getting input data
    sess.run(model["input"].assign(content_img)) # getting content image
    aContent = sess.run(model["conv4_2"])
    sess.run(model["input"].assign(gen_img))
    aGen = sess.run(model["conv4_2"])
    # Running graph
    cost_content = sess.run(J_content,feed_dict={Content:aContent,Gen:aGen})
    print("Content cost:",cost_content)

Content cost: 141082440000.0


#### Style cost function

In [7]:
# Returns the gram matrix for a specific matrix of shape (n_c,n_h*n_w)
def gram_matrix(act):
    act_t = tf.transpose(act)
    return tf.matmul(act,act_t)

In [12]:
# Computes style loss for a single layer
def single_layer_style_cost(style_act,gen_act):
    m,n_w,n_h,n_c = style_act.get_shape().as_list()
    style_act_t = tf.transpose(tf.reshape(style_act, (n_h*n_w, n_c))) # shape: (1,n_c,n_w*n_h)
    gen_act_t = tf.transpose(tf.reshape(gen_act, (n_h*n_w, n_c)))
    
    gram_style = gram_matrix(style_act_t)
    gram_gen = gram_matrix(gen_act_t)
    style_diff = tf.square(tf.subtract(gram_style,gram_gen))
    style_cost = (1/(4 * (n_h*n_w)**2 * n_c**2)) * tf.reduce_sum(style_diff)
    return style_cost

In [32]:
# The style cost weights each layers activations equally
# Takes a list of activations for each layer corresponding to the generated image and style image (list of tf objs)
def style_cost(style_acts,gen_acts):
    total_cost = 0
    weight = 1 / len(style_acts)
    for i in range(len(style_acts)):
        temp_cost = single_layer_style_cost(style_acts[i],gen_acts[i])
        total_cost += (weight * temp_cost)
        
    return total_cost

In [33]:
# Testing the style cost function - with only two distinct layers of activations
tf.reset_default_graph()
with tf.Session() as sess:
    model = load_vgg_model("../../vgg/imagenet-vgg-verydeep-19.mat",n_w,n_h,3)
    # Computation graph
    Gen_1 = tf.placeholder(tf.float32,(None,cv11w,cv11h,cv11c))
    Gen_2 = tf.placeholder(tf.float32,(None,cv21w,cv21h,cv21c))
    Style_1 = tf.placeholder(tf.float32,(None,cv11w,cv11h,cv11c))
    Style_2 = tf.placeholder(tf.float32,(None,cv21w,cv21h,cv21c))
    # Getting input data
    sess.run(model["input"].assign(gen_img))
    agen_1 = sess.run(model["conv1_1"])
    agen_2 = sess.run(model["conv2_1"])
    sess.run(model["input"].assign(style_img))
    astyle_1 = sess.run(model["conv1_1"])
    astyle_2 = sess.run(model["conv2_1"])
    # Running graph
    J_style = style_cost([Style_1,Style_2],[Gen_1,Gen_2])
    cost_content = sess.run(J_style,feed_dict={Gen_1:agen_1,Gen_2:agen_2,Style_1:astyle_1,Style_2:astyle_2})
    print("Content cost:",cost_content)

Content cost: 233276720.0


#### Full cost function

In [15]:
# Full cost function
def cost_function(style_cost,content_cost,alpha=0.0001,beta=1):
    return alpha * content_cost + beta * style_cost

#### Placeholder initializer

In [36]:
# Initializing placeholders for the generated,style,and content images
# takes in a list of sizes(s) which holds the sizes (width,height,channels) for each layer - 
# Sizes in order of content img sizes first and then style sizes in cronological order
def get_placeholders(s):
    # Content placeholders
    cont = tf.placeholder(tf.float32,shape=(None,s[0],s[1],s[2]))
    gen_cont = tf.placeholder(tf.float32,shape=(None,s[0],s[1],s[2]))
    # Style placeholders:
    # "conv1_1"
    style_1 = tf.placeholder(tf.float32,shape=(None,s[2],s[4],s[5]))
    gen_1 = tf.placeholder(tf.float32,shape=(None,s[2],s[4],s[5]))
    # "conv2_1"
    style_2 = tf.placeholder(tf.float32,shape=(None,s[6],s[7],s[8]))
    gen_2 = tf.placeholder(tf.float32,shape=(None,s[6],s[7],s[8]))
    # "conv3_1"
    style_3 = tf.placeholder(tf.float32,shape=(None,s[9],s[10],s[11]))
    gen_3 = tf.placeholder(tf.float32,shape=(None,s[9],s[10],s[11]))
    # "conv4_1"
    style_4 = tf.placeholder(tf.float32,shape=(None,s[12],s[13],s[14]))
    gen_4 = tf.placeholder(tf.float32,shape=(None,s[12],s[13],s[14]))
    # "conv5_1"
    style_5 = tf.placeholder(tf.float32,shape=(None,s[15],s[16],s[17]))
    gen_5 = tf.placeholder(tf.float32,shape=(None,s[15],s[16],s[17]))
    
    return [cont,gen_cont,style_1,gen_1,style_2,gen_2,style_3,gen_3,style_4,gen_4,style_5,gen_5]

In [39]:
# Testing placeholder function
aS = [cv42w,cv42h,cv42c,cv11w,cv11h,cv11c,cv21w,cv21h,cv21c,cv31w,cv31h,cv31c,cv41w,cv41h,cv41c,cv51w,cv51h,cv51c]
tf.reset_default_graph()
with tf.Session() as sess:
    acts = get_placeholders(aS)
    print(acts[0].shape)
    print(acts[1].shape)
    print(acts[2].shape)
    print(acts[3].shape)

(?, 38, 50, 512)
(?, 38, 50, 512)
(?, 512, 400, 64)
(?, 512, 400, 64)


### Tensorflow model

In [52]:
# sizes: list of sizes as described in the get_placeholders function
# model: the VGG conv-net
# shape: specifies the shape of the images (width,height)
def model(gen_img,cont_img,style_img,sizes,shape,lr=0.0001,epochs=10,print_cost=True):
    tf.reset_default_graph() # reset graph
    costs = []
    cont_img.shape = (1,shape[0],shape[1],3) # making sure image shapes are correct
    style_img.shape = (1,shape[0],shape[1],3)
    gen_img.shape = (1,shape[0],shape[1],3)
    
    # Computation graph
    cont_act,gen_act,style_1,gen_1,style_2,gen_2,style_3,gen_3,style_4,gen_4,style_5,gen_5 = get_placeholders(sizes)
    cont_cost = content_cost(cont_act,gen_act)
    styl_cost = style_cost([style_1,style_2,style_3,style_4,style_5],[gen_1,gen_2,gen_3,gen_4,gen_5])
    cost = cost_function(styl_cost,cont_cost)
    optimizer = tf.train.AdamOptimizer(learning_rate=lr).minimize(cost)
    init = tf.global_variables_initializer()
    
    with tf.Session() as sess:
        model = load_vgg_model("../../vgg/imagenet-vgg-verydeep-19.mat",shape[0],shape[1],3)
        sess.run(init)
        # Get the content activations
        sess.run(model["input"].assign(content_img))
        acont_act = sess.run(model["conv4_2"])
        # Get the style activations
        sess.run(model["input"].assign(style_img))
        astyle_1_act = sess.run(model["conv1_1"])
        astyle_2_act = sess.run(model["conv2_1"])
        astyle_3_act = sess.run(model["conv3_1"])
        astyle_4_act = sess.run(model["conv4_1"])
        astyle_5_act = sess.run(model["conv5_1"])
        # setting the model for the generated image
        sess.run(model["input"].assign(gen_img)) 
        
        for epoch in range(1,epochs+1):
            # Getting generated image activations
            agen_cont = sess.run(model["conv4_2"])
            agen_1_act = sess.run(model["conv1_1"])
            agen_2_act = sess.run(model["conv2_1"])
            agen_3_act = sess.run(model["conv3_1"])
            agen_4_act = sess.run(model["conv4_1"])
            agen_5_act = sess.run(model["conv5_1"])
            
            # Running graph
            _,temp_cost = sess.run([optimizer,cost], feed_dict={cont_act:acont_act,gen_act:agen_cont,style_1:astyle_1_act,gen_1:agen_1_act,style_2:astyle_2_act,gen_2:agen_2_act,style_3:astyle_3_act,gen_3:agen_3_act,style_4:astyle_4_act,gen_4:agen_4_act,style_5:astyle_5_act,gen_5:agen_5_act})
            costs.append(temp_cost)
            
            if print_cost == True and epoch % 1 == 0: # always print
                print("cost at epoch {}: {}".format(epoch,temp_cost))
    
        generated_img = sess.run(model['input'])
        return generated_img

In [44]:
# Conv layer output sizes
aSizes = [cv42w,cv42h,cv42c,cv11w,cv11h,cv11c,cv21w,cv21h,cv21c,cv31w,cv31h,cv31c,cv41w,cv41h,cv41c,cv51w,cv51h,cv51c]

In [None]:
agen_img = model(gen_img,content_img,style_img,aSizes,[n_w,n_h],lr=0.0001,epochs=10)

#### Model loader helper function

In [4]:
# Loads the VGG model based on pretrained parameters
def load_vgg_model(path,img_height,img_width,num_channels):
    vgg = scipy.io.loadmat(path)
    vgg_layers = vgg['layers']
    
    def _weights(layer, expected_layer_name): # Return weights and bias from VGG model for layer
        wb = vgg_layers[0][layer][0][0][2]
        W = wb[0][0]
        b = wb[0][1]
        layer_name = vgg_layers[0][layer][0][0][0][0]
        assert layer_name == expected_layer_name
        return W, b

    def _relu(conv2d_layer): # Return relu function
        return tf.nn.relu(conv2d_layer)

    def _conv2d(prev_layer, layer, layer_name): # Return Conv2d layer using trained weights
        W, b = _weights(layer, layer_name)
        W = tf.constant(W)
        b = tf.constant(np.reshape(b, (b.size)))
        return tf.nn.conv2d(prev_layer, filter=W, strides=[1, 1, 1, 1], padding='SAME') + b

    def _conv2d_relu(prev_layer, layer, layer_name): # Returns Conv+Relu using pretrained weights
        return _relu(_conv2d(prev_layer, layer, layer_name))

    def _avgpool(prev_layer): # Returns AVGPool layer
        return tf.nn.avg_pool(prev_layer, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

    # Constructs the graph
    graph = {}
    graph['input']   = tf.Variable(np.zeros((1, img_height,img_width,num_channels)), dtype = 'float32')
    graph['conv1_1']  = _conv2d_relu(graph['input'], 0, 'conv1_1')
    graph['conv1_2']  = _conv2d_relu(graph['conv1_1'], 2, 'conv1_2')
    graph['avgpool1'] = _avgpool(graph['conv1_2'])
    graph['conv2_1']  = _conv2d_relu(graph['avgpool1'], 5, 'conv2_1')
    graph['conv2_2']  = _conv2d_relu(graph['conv2_1'], 7, 'conv2_2')
    graph['avgpool2'] = _avgpool(graph['conv2_2'])
    graph['conv3_1']  = _conv2d_relu(graph['avgpool2'], 10, 'conv3_1')
    graph['conv3_2']  = _conv2d_relu(graph['conv3_1'], 12, 'conv3_2')
    graph['conv3_3']  = _conv2d_relu(graph['conv3_2'], 14, 'conv3_3')
    graph['conv3_4']  = _conv2d_relu(graph['conv3_3'], 16, 'conv3_4')
    graph['avgpool3'] = _avgpool(graph['conv3_4'])
    graph['conv4_1']  = _conv2d_relu(graph['avgpool3'], 19, 'conv4_1')
    graph['conv4_2']  = _conv2d_relu(graph['conv4_1'], 21, 'conv4_2')
    graph['conv4_3']  = _conv2d_relu(graph['conv4_2'], 23, 'conv4_3')
    graph['conv4_4']  = _conv2d_relu(graph['conv4_3'], 25, 'conv4_4')
    graph['avgpool4'] = _avgpool(graph['conv4_4'])
    graph['conv5_1']  = _conv2d_relu(graph['avgpool4'], 28, 'conv5_1')
    graph['conv5_2']  = _conv2d_relu(graph['conv5_1'], 30, 'conv5_2')
    graph['conv5_3']  = _conv2d_relu(graph['conv5_2'], 32, 'conv5_3')
    graph['conv5_4']  = _conv2d_relu(graph['conv5_3'], 34, 'conv5_4')
    graph['avgpool5'] = _avgpool(graph['conv5_4'])
    
    return graph

### Creating a series of images which relate to the holidays