In [16]:
import tensorflow as tf
import numpy as np
import functools
import vgg19
from vgg19 import *
from utils import *

In [2]:
import scipy.misc, numpy as np, os, sys

# Load data

In [None]:
import boto3
import botocore
from io import BytesIO
from PIL import Image

In [None]:
BUCKET_NAME = 'fast-style-transfer'

In [None]:
s3 = boto3.resource('s3')
my_bucket = s3.Bucket(BUCKET_NAME)

### Get image

In [None]:
def get_train_image(image_path):
    with BytesIO() as files:
    #path = "train2014-images/COCO_train2014_000000000049.jpg"
    s3.Bucket(BUCKET_NAME).download_fileobj(image_path, files)
    image = np.asarray(Image.open(files))
    return(image)

### Get image keys

In [None]:
def get_content_train_images(prefix):
    #prefix = 'train2014-images/'
    image_names = []
    for i in my_bucket.objects.filter(Prefix=prefix):
        image_names.append(i.key)
    return(image_names)

In [None]:
style_target = get_train_image("train2014-images/COCO_train2014_000000000049.jpg")
content_targets = get_content_train_images('train2014-images/')

# Graph functions

In [7]:
WEIGHTS_INIT_STDEV = .1

### Convolutional layer with instance normalisation

In [8]:
def _conv_layer(net, num_filters, filter_size, strides, relu=True):
    weights_init = _conv_init_vars(net, num_filters, filter_size)
    strides_shape = [1, strides, strides, 1]
    net = tf.nn.conv2d(net, weights_init, strides_shape, padding='SAME')
    net = _instance_norm(net)
    if relu:
        net = tf.nn.relu(net)

    return net

In [9]:
def _instance_norm(net, train=True):
    batch, rows, cols, channels = [i.value for i in net.get_shape()]
    var_shape = [channels]
    mu, sigma_sq = tf.nn.moments(net, [1,2], keep_dims=True)
    shift = tf.Variable(tf.zeros(var_shape))
    scale = tf.Variable(tf.ones(var_shape))
    epsilon = 1e-3
    normalized = (net-mu)/(sigma_sq + epsilon)**(.5)
    return scale * normalized + shift

### Transpose convolutional layer with instance normalisation

In [10]:
def _conv_tranpose_layer(net, num_filters, filter_size, strides):
    weights_init = _conv_init_vars(net, num_filters, filter_size, transpose=True)

    batch_size, rows, cols, in_channels = [i.value for i in net.get_shape()]
    new_rows, new_cols = int(rows * strides), int(cols * strides)
    # new_shape = #tf.pack([tf.shape(net)[0], new_rows, new_cols, num_filters])

    new_shape = [batch_size, new_rows, new_cols, num_filters]
    tf_shape = tf.stack(new_shape)
    strides_shape = [1,strides,strides,1]

    net = tf.nn.conv2d_transpose(net, weights_init, tf_shape, strides_shape, padding='SAME')
    net = _instance_norm(net)
    return tf.nn.relu(net)

### Weight initialization

In [11]:
def _conv_init_vars(net, out_channels, filter_size, transpose=False):
    _, rows, cols, in_channels = [i.value for i in net.get_shape()]
    if not transpose:
        weights_shape = [filter_size, filter_size, in_channels, out_channels]
    else:
        weights_shape = [filter_size, filter_size, out_channels, in_channels]

    weights_init = tf.Variable(tf.truncated_normal(weights_shape, stddev=WEIGHTS_INIT_STDEV, seed=1), dtype=tf.float32)
    return weights_init

### Residual block

In [12]:
def _residual_block(net, filter_size=3):
    tmp = _conv_layer(net, 128, filter_size, 1)
    return net + _conv_layer(tmp, 128, filter_size, 1, relu=False)

### Graph construction

In [13]:
def image_maker(image):
    conv1 = _conv_layer(image, 32, 9, 1)
    conv2 = _conv_layer(conv1, 64, 3, 2)
    conv3 = _conv_layer(conv2, 128, 3, 2)
    resid1 = _residual_block(conv3, 3)
    resid2 = _residual_block(resid1, 3)
    resid3 = _residual_block(resid2, 3)
    resid4 = _residual_block(resid3, 3)
    resid5 = _residual_block(resid4, 3)
    conv_t1 = _conv_tranpose_layer(resid5, 64, 3, 2)
    conv_t2 = _conv_tranpose_layer(conv_t1, 32, 3, 2)
    conv_t3 = _conv_layer(conv_t2, 3, 9, 1, relu=False)
    preds = tf.nn.tanh(conv_t3) * 150 + 255./2
    return preds

## Optimize

### Settings

In [14]:
vgg19_path = '../vgg19/imagenet-vgg-verydeep-19.mat'
epochs = 2
batch_size = 4
content_weight = 7.5*2
style_weight = 1e2
tv_weight = 2e2
learning_rate = 1e-3
STYLE_LAYERS = ('relu1_1', 'relu2_1', 'relu3_1', 'relu4_1', 'relu5_1')
CONTENT_LAYER = 'relu4_2'

### Compute style features

In [17]:
style_features = {}

with tf.Graph().as_default(), tf.Session() as sess:
    style_target_reshape = np.array([style_target])
    style_image = tf.placeholder(tf.float32, shape = style_target_reshape.shape, name = 'style_image')
    style_image_proc = vgg19.preprocess(style_image)
    network = vgg19.net(vgg19_path, style_image_proc)
    for layer in STYLE_LAYERS:
        features = sess.run(network[layer], feed_dict={style_image: style_target_reshape})
        features = np.reshape(features, (-1, features.shape[3]))
        gram_matrix = np.matmul(features.T, features) / features.size
        style_features[layer] = gram_matrix

### Compute content loss

In [27]:
batch_shape = (batch_size, 256, 256 ,3)

with tf.Graph().as_default(), tf.Session() as sess:
    X_content = tf.placeholder(tf.float32, shape=batch_shape, name='X_content')
    X_proc = vgg19.preprocess(X_content)
    
    content_features = {}
    content_net = vgg19.net(vgg19_path, X_proc)
    content_features[CONTENT_LAYER] = content_net[CONTENT_LAYER]
    
    generated_image = image_maker(X_content/255.0)
    generated_image_proc = vgg19.preprocess(generated_image)
    
    generated_image_net = vgg19.net(vgg19_path, generated_image_proc)
    content_size = tf.size(content_features[CONTENT_LAYER], out_type=tf.float32)
    content_loss = content_weight * (tf.nn.l2_loss(generated_image_net[CONTENT_LAYER] - content_features[CONTENT_LAYER])) / content_size
    
    #Style loss
    style_losses = []
    for style_layer in STYLE_LAYERS:
        layer = generated_image_net[style_layer]
        layer_shape = layer.get_shape().as_list()
        size = layer_shape[1] * layer_shape[2] * layer_shape[3]
        feats = tf.reshape(layer, (layer_shape[0], layer_shape[1] * layer_shape[2], layer_shape[3]))
        feats_T = tf.transpose(feats, perm=[0,2,1])
        grams = tf.matmul(feats_T, feats) / size
        style_gram = style_features[style_layer]
        style_losses.append(2 * tf.nn.l2_loss(grams - style_gram))
        
    style_loss = style_weight * functools.reduce(tf.add, style_losses) / batch_size
    
    #total variation denoising
    tv_size = tf.size(generated_image, out_type=tf.float32) - 1
    y_tv = tf.nn.l2_loss(generated_image[:,1:,:,:] - generated_image[:,:-1,:,:])
    x_tv = tf.nn.l2_loss(generated_image[:,:,1:,:] - generated_image[:,:,:-1,:])
    tv_loss = tv_weight*2*(x_tv/tv_size + y_tv/tv_size)
    
    loss = content_loss + style_loss + tv_loss
    
    #optimize
    optimize = tf.train.AdamOptimizer(learning_rate).minimize(loss)
    sess.run(tf.global_variables_initializer())
    
    for epoch in range(epochs):
        num_examples = len(content_targets)
        batch_iterations = int(num_examples/batch_size)
        for i in range(2):
            print('i:',i)
            curr = i * batch_size
            step = curr + batch_size
            X_batch = np.zeros(batch_shape, dtype=np.float32)
            for j, img_p in enumerate(content_targets[curr:step]):
                X_batch[j] = get_train_image(img_p).astype(np.float32)        
            #optimize.run(feed_dict={X_content: X_batch})
            _, loss_value = sess.run([optimize, loss], feed_dict={X_content: X_batch})
            print('loss:', loss_value)
        print('epoch:', epoch)
        
    saver = tf.train.Saver()
    res = saver.save(sess, '../saved_model/model1.ckpt')

i: 0
loss: 83601610000000.0
i: 1
loss: 40560260000000.0
epoch: 0
i: 0
loss: 30803210000000.0
i: 1
loss: 25640850000000.0
epoch: 1


In [22]:
a = np.array([[1, 2], [3, 4]])

In [23]:
a

array([[1, 2],
       [3, 4]])

In [24]:
f = (4,) + a

In [25]:
f

array([[5, 6],
       [7, 8]])