In [14]:
import tensorflow as tf
from tensorflow import keras
from gram_matrix import gram_matrix
import cv2 as cv
# import matplotib as plt
import numpy as np

In [None]:
# the VGG-19 model is chosen over VGG-16 for better performance

# call pretrained VGG19 model
# def load_vgg_19():
    # include_top includes the dense layers after the block5_pool layer
    # model uses pretrained weights from imagenet
vgg = keras.applications.VGG19(include_top=True, weights='imagenet')

In [None]:
# print layers and output shape for each layer
for layer in vgg.layers:
    print(f'{layer.name} --> {layer.output_shape}')

In [None]:
# content layers
content_layers = ['block4_conv2']

# style layers
style_layers = ['block1_conv1', 'block2_conv1', 'block3_conv1', 'block4_conv1', 'block5_conv1']

In [None]:
def load_vgg_layers(layers):
    vgg = keras.applications.VGG19(include_top=False, weights='imagenet')
    # weights are immutable
    vgg.trainable = False

    outputs = [vgg.get_layer(layer).output for layer in layers]

    # build model
    model = keras.Model([vgg.input], outputs)
    return model
    

In [17]:
def load_img(path):
  # set max dimensions
  max_dim = 512

  # read image and convert to float
  img = tf.io.read_file(path)
  img = tf.image.decode_image(img, channels=3)
  img = tf.image.convert_image_dtype(img, tf.float32)

  # scale image
  shape = tf.cast(tf.shape(img)[:-1], tf.float32)
  long_dim = max(shape)
  scale = max_dim / long_dim

  # resize image
  new_shape = tf.cast(shape * scale, tf.int32)
  img = tf.image.resize(img, new_shape)
  img = img[tf.newaxis, :]
  return img

In [None]:
# load content image as 256 by 256 image
content_img = cv.resize(src=cv.imread('content.jpg'), dsize=(256, 256))
content_img.shape
# represent images using float32 since VGG model expects float32 inputs
content_img = tf.image.convert_image_dtype(content_img, dtype=tf.float32)
content_img

# load style image
style_img = cv.resize(src=cv.imread('style.jpg'), dsize=(256, 256))
# convert style image into float32
style_img = tf.image.convert_image_dtype(style_img, dtype=tf.float32)

In [22]:
# load content and style images

content_img = load_img('/Users/sophiexie/CS Projects/PicYourStyle/res/content.jpg')
style_img = load_img('/Users/sophiexie/CS Projects/PicYourStyle/res/style.jpg')

In [23]:
# extract style layers
get_img_style = load_vgg_layers(style_layers)

# style outputs
style_outputs = get_img_style(style_img * 255)

# examine each layer output
for layer, output_ in zip(style_layers, style_outputs):
    print(layer)
    print('shape: ', np.shape(output_))
    print('mean: ', np.mean(output_))

block1_conv1
shape:  (1, 361, 512, 64)
mean:  43.533783
block2_conv1
shape:  (1, 180, 256, 128)
mean:  171.75464
block3_conv1
shape:  (1, 90, 128, 256)
mean:  139.94604
block4_conv1
shape:  (1, 45, 64, 512)
mean:  587.8751
block5_conv1
shape:  (1, 22, 32, 512)
mean:  36.141705


In [None]:
# display graphs inside notebook
%matplotlib inline

plt.subplot(1, 2, 1)
plt.imshow(cv.cvtColor(np.array(content_img), cv.COLOR_BGR2RGB))
plt.subplot(1, 2, 2)
plt.imshow(cv.cvtColor(np.array(style_img), cv.COLOR_BGR2RGB))
plt.show()

In [None]:
# Adam optimizer
# beta_1 = 0.9 means that we average over the last 10 iterations' gradients
# higher beta_1 = averaging over more iterations
opt = tf.optimizers.Adam(learning_rate = 0.01, epsilon=0.1, beta_1=0.9)


In [None]:
# total loss = weighted average of content and style loss
def total_loss(content_output, style_output, content_target, style_target):
    # content weight
    content_weight = 0.001

    # style weight
    style_weight = 0.001

    # content loss
    content_loss = tf.reduce_mean((content_output - content_target) ** 2)

    # style loss
    # match each style output with corresponding target
    style_loss = tf.add_n([tf.reduce_mean(((output_ - target_) ** 2)) for output_, target_ in zip(style_output, style_target)])

    # combine losses
    total_loss = content_weight * content_loss + style_weight + style_loss

    return total_loss


In [None]:
vgg_model = load_vgg()
content_target = vgg_model(np.array([content_img * 255]))[0]
style_target = vgg_model(np.aray([style_img * 255]))[1]

In [None]:
def train_model(image, epoch):
    with tf.GradientTape as tape:
        # pass image to model
        output = vgg_model(image * 255)
        # calculate total loss
        loss = total_loss(output[0], output[1], content_target, style_target)
        gradient = tape.grandient(loss, image)
        # optimize
        opt.apply_gradients([(gradient, image)])
        image.assign(tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=1.0))

        # check loss every 100 epochs
        if epoch % 100 == 0:
            tf.print(f'Loss = {loss}')

In [None]:
EPOCHS = 10
content = tf.image.convert_image_dtype(content_img, tf.float32)
content = tf.Variable([content])

for i in range(EPOCHS):
    train_model(content, i)