In [22]:
import tensorflow as tf
from tensorflow import keras
from gram_matrix import gram_matrix
import cv2 as cv
# import matplotib as plt
import numpy as np

In [2]:
# the VGG-19 model is chosen over VGG-16 for better performance

# call pretrained VGG19 model
# def load_vgg_19():
    # include_top includes the dense layers after the block5_pool layer
    # model uses pretrained weights from imagenet
vgg = keras.applications.VGG19(include_top=True, weights='imagenet')

2023-05-29 19:43:54.652480: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg19/vgg19_weights_tf_dim_ordering_tf_kernels.h5


In [3]:
# print layers and output shape for each layer
for layer in vgg.layers:
    print(f'{layer.name} --> {layer.output_shape}')

input_1 --> [(None, 224, 224, 3)]
block1_conv1 --> (None, 224, 224, 64)
block1_conv2 --> (None, 224, 224, 64)
block1_pool --> (None, 112, 112, 64)
block2_conv1 --> (None, 112, 112, 128)
block2_conv2 --> (None, 112, 112, 128)
block2_pool --> (None, 56, 56, 128)
block3_conv1 --> (None, 56, 56, 256)
block3_conv2 --> (None, 56, 56, 256)
block3_conv3 --> (None, 56, 56, 256)
block3_conv4 --> (None, 56, 56, 256)
block3_pool --> (None, 28, 28, 256)
block4_conv1 --> (None, 28, 28, 512)
block4_conv2 --> (None, 28, 28, 512)
block4_conv3 --> (None, 28, 28, 512)
block4_conv4 --> (None, 28, 28, 512)
block4_pool --> (None, 14, 14, 512)
block5_conv1 --> (None, 14, 14, 512)
block5_conv2 --> (None, 14, 14, 512)
block5_conv3 --> (None, 14, 14, 512)
block5_conv4 --> (None, 14, 14, 512)
block5_pool --> (None, 7, 7, 512)
flatten --> (None, 25088)
fc1 --> (None, 4096)
fc2 --> (None, 4096)
predictions --> (None, 1000)


In [50]:
# content layers
content_layers = ['block4_conv2']

# style layers
style_layers = ['block1_conv1', 'block2_conv1', 'block3_conv1', 'block4_conv1', 'block5_conv1']

In [62]:
def load_vgg_layers(layers):
    vgg = keras.applications.VGG19(include_top=False, weights='imagenet')
    # weights are immutable
    vgg.trainable = False

    outputs = [vgg.get_layer(layer).output for layer in layers]

    # build model
    model = keras.Model([vgg.input], outputs)
    return model
    

In [54]:
def load_img(path):
  # set max dimensions
  max_dim = 512

  # read image and convert to float
  img = cv.imread(path)
  img = tf.image.decode_image(img, channels=3)
  img = tf.image.convert_image_dtype(img, tf.float32)

  # scale image
  shape = tf.cast(tf.shape(img)[:-1], tf.float32)
  long_dim = max(shape)
  scale = max_dim / long_dim

  # resize image
  new_shape = tf.cast(shape * scale, tf.int32)
  img = tf.image.resize(img, new_shape)
  img = img[tf.newaxis, :]
  return img

In [52]:
# load content image as 256 by 256 image
content_img = cv.resize(src=cv.imread('content.jpg'), dsize=(256, 256))
content_img.shape
# represent images using float32 since VGG model expects float32 inputs
content_img = tf.image.convert_image_dtype(content_img, dtype=tf.float32)
content_img

# load style image
style_img = cv.resize(src=cv.imread('style.jpg'), dsize=(256, 256))
# convert style image into float32
style_img = tf.image.convert_image_dtype(style_img, dtype=tf.float32)
style_img

<tf.Tensor: shape=(256, 256, 3), dtype=float32, numpy=
array([[[0.6313726 , 0.6627451 , 0.48235297],
        [0.6156863 , 0.58431375, 0.33333334],
        [0.5529412 , 0.4666667 , 0.16862746],
        ...,
        [0.6627451 , 0.45098042, 0.2627451 ],
        [0.63529414, 0.44705886, 0.3254902 ],
        [0.61960787, 0.54901963, 0.52156866]],

       [[0.72156864, 0.6784314 , 0.4039216 ],
        [0.6313726 , 0.5411765 , 0.21568629],
        [0.6509804 , 0.52156866, 0.15686275],
        ...,
        [0.74509805, 0.46274513, 0.27450982],
        [0.69411767, 0.43137258, 0.3019608 ],
        [0.72156864, 0.6156863 , 0.56078434]],

       [[0.6901961 , 0.5764706 , 0.21568629],
        [0.6862745 , 0.54509807, 0.14901961],
        [0.7137255 , 0.5568628 , 0.14509805],
        ...,
        [0.7490196 , 0.40784317, 0.21960786],
        [0.6627451 , 0.34117648, 0.19215688],
        [0.7490196 , 0.59607846, 0.48627454]],

       ...,

       [[0.41176474, 0.69411767, 0.75294125],
        [0.38

In [55]:
# load content and style images
content_img = load_img('content.jpg')
style_img = load_img('style.jpg')

In [66]:
# extract style layers
get_img_style = load_vgg_layers(style_layers)

# style outputs
style_outputs = get_img_style(style_img * 255)

# examine each layer output
for layer, output_ in zip(style_layers, style_outputs):
    print(layer)
    print('shape: ', np.shape(output_))
    print('mean: ', np.mean(output_))

block1_conv1
shape:  (1, 361, 512, 64)
mean:  43.533783
block2_conv1
shape:  (1, 180, 256, 128)
mean:  171.75464
block3_conv1
shape:  (1, 90, 128, 256)
mean:  139.94604
block4_conv1
shape:  (1, 45, 64, 512)
mean:  587.8751
block5_conv1
shape:  (1, 22, 32, 512)
mean:  36.141705


In [27]:
# display graphs inside notebook
%matplotlib inline

plt.subplot(1, 2, 1)
plt.imshow(cv.cvtColor(np.array(content_img), cv.COLOR_BGR2RGB))
plt.subplot(1, 2, 2)
plt.imshow(cv.cvtColor(np.array(style_img), cv.COLOR_BGR2RGB))
plt.show()

ModuleNotFoundError: No module named 'matplotlib'

In [45]:
# Adam optimizer
# beta_1 = 0.9 means that we average over the last 10 iterations' gradients
# higher beta_1 = averaging over more iterations
opt = tf.optimizers.Adam(learning_rate = 0.01, epsilon=0.1, beta_1=0.9)


In [46]:
# total loss = weighted average of content and style loss
def total_loss(content_output, style_output, content_target, style_target):
    # content weight
    content_weight = 0.001

    # style weight
    style_weight = 0.001

    # content loss
    content_loss = tf.reduce_mean((content_output - content_target) ** 2)

    # style loss
    # match each style output with corresponding target
    style_loss = tf.add_n([tf.reduce_mean(((output_ - target_) ** 2)) for output_, target_ in zip(style_output, style_target)])

    # combine losses
    total_loss = content_weight * content_loss + style_weight + style_loss

    return total_loss


In [47]:
vgg_model = load_vgg()
content_target = vgg_model(np.array([content_img * 255]))[0]
style_target = vgg_model(np.aray([style_img * 255]))[1]

calculating gram matrix: 
original shape: 
KerasTensor(type_spec=TensorSpec(shape=(4,), dtype=tf.int32, name=None), inferred_value=[None, None, None, 64], name='tf.compat.v1.shape_50/Shape:0', description="created by layer 'tf.compat.v1.shape_50'")
flattened tensor: 
KerasTensor(type_spec=TensorSpec(shape=(2,), dtype=tf.int32, name=None), inferred_value=[None, None], name='tf.compat.v1.shape_51/Shape:0', description="created by layer 'tf.compat.v1.shape_51'")
gram matrix: 
calculating gram matrix: 
original shape: 
KerasTensor(type_spec=TensorSpec(shape=(4,), dtype=tf.int32, name=None), inferred_value=[None, None, None, 128], name='tf.compat.v1.shape_52/Shape:0', description="created by layer 'tf.compat.v1.shape_52'")
flattened tensor: 
KerasTensor(type_spec=TensorSpec(shape=(2,), dtype=tf.int32, name=None), inferred_value=[None, None], name='tf.compat.v1.shape_53/Shape:0', description="created by layer 'tf.compat.v1.shape_53'")
gram matrix: 
calculating gram matrix: 
original shape: 


InvalidArgumentError: Exception encountered when calling layer 'tf.reshape_29' (type TFOpLambda).

{{function_node __wrapped__Reshape_device_/job:localhost/replica:0/task:0/device:CPU:0}} Input to reshape is a tensor with 131072 values, but the requested shape has 256 [Op:Reshape]

Call arguments received by layer 'tf.reshape_29' (type TFOpLambda):
  • tensor=tf.Tensor(shape=(1, 16, 16, 512), dtype=float32)
  • shape=['tf.Tensor(shape=(), dtype=int32)', 'tf.Tensor(shape=(), dtype=int32)']
  • name=None

In [32]:
def train_model(image, epoch):
    with tf.GradientTape as tape:
        # pass image to model
        output = vgg_model(image * 255)
        # calculate total loss
        loss = total_loss(output[0], output[1], content_target, style_target)
        gradient = tape.grandient(loss, image)
        # optimize
        opt.apply_gradients([(gradient, image)])
        image.assign(tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=1.0))

        # check loss every 100 epochs
        if epoch % 100 == 0:
            tf.print(f'Loss = {loss}')

In [33]:
EPOCHS = 10
content = tf.image.convert_image_dtype(content_img, tf.float32)
content = tf.Variable([content])

for i in range(EPOCHS):
    train_model(content, i)

AttributeError: __enter__