In [1]:
import tensorflow as tf
import keras
import numpy as np
import matplotlib.pyplot as plt

In [2]:
# Neural style transfer : Transfering the style (color , textures or visual patterns) from a reference image to the target image

Content Loss :
Consider an original image of a cat and a generated image created using neural style transfer. To calculate the content loss, we compare the activations of a layer in the convnet for both the original cat image and the generated image. If the activations of this layer are similar for both images, it suggests that the generated image preserves the content of the original cat image.

Style loss:
Consider a reference image with a distinct artistic style, such as a painting by Van Gogh. To transfer the style of this painting to another image using neural style transfer, we calculate the style loss by comparing the Gram matrices of activations across multiple layers of the convnet for both the reference image and the generated image. By preserving similar feature correlations, the generated image adopts the artistic style of the reference image.

The Gram matrix is a mathematical representation of the correlations between the feature maps of a given layer in a convolutional neural network, used in style transfer algorithms to capture the style or texture of an image.

In [3]:
# Neural Transfer Learning can be implemented using any pretrained covnets( We are using VGG19 here)

In [4]:
base_image_path = r"artifacts\building.jpg"
reference_image_path = r"artifacts\building.jpg" 

In [5]:
base_image_path

'artifacts\\building.jpg'

In [6]:
original_width,original_height = keras.utils.load_img(base_image_path).size

In [7]:
img_height = 400
img_width = round(original_width*img_height / original_height) # Dimensions of the generated image

In [8]:
# Auxilary functions for preprocessing and deprocessing images(modified for VGG19 preprocessing)

def preprocess_image(image_path): # Function for resizing, then formating image to array and preprocess according to input required by the model 
    img = keras.utils.load_img(image_path,target_size=(img_height,img_width)) # Resizing the image
    img = keras.utils.img_to_array(img) # Converting the image to an array
    img = np.expand_dims(img, axis=0) # Adding a dimension to the array
    img = keras.applications.vgg19.preprocess_input(img)
    return img

def deprocess_image(img): # Function to convert array to image
    img = img.reshape((img_height,img_width,3)) # 3 for 3 channels
    # Zero centering by removing the mean pixel value from Imagenet(Reversing the preprocessing steps of the vgg16 preprocess step)
    img[:,:,0] += 103.939
    img[:,:,1] += 116.779
    img[:,:,2] += 123.68
    img = img[:,:,::-1] # Converting image from 'BGR' to 'RGB'('RBG' to 'BRG' reversal part of preprocess step of vgg19)
    img = np.clip(img,0,255).astype('uint8')
    return img

In [9]:
from keras.models import Model

In [10]:
# Setting the model

model = keras.applications.vgg19.VGG19(weights='imagenet',include_top=False)
output_dict = dict([(layer.name,layer.output) for layer in model.layers])
feature_extractor = Model(inputs=model.inputs,outputs=output_dict) # Feature extractor defined to return a dictionary with activation values for each target layer

In [11]:
# Function for computing the content loss
def content_loss(base_img,combination_img):
    return tf.reduce_sum(tf.square(combination_img - base_img))

In [12]:
# Functions for gram matrix computation and style loss calculation
def gram_matrix(x):
    x = tf.transpose(x,(2,0,1))
    features = tf.reshape(x,(tf.shape(x)[0],-1))
    gram = tf.matmul(features,tf.transpose(features))
    return gram

def style_loss(style_img, combination_img):
    S = gram_matrix(style_img)
    C = gram_matrix(combination_img)
    channels = 3
    size = img_height * img_width
    return tf.reduce_sum(tf.square(S - C)) / (4.0 * (channels ** 2) * (size ** 2))

In [13]:
# Function for variation loss calculation ( It operates on the pixels of the generated combination image. Encourages spatial continuity in the generated image to avoid overly pixelated results)

def total_variation_loss(x):

    a = tf.square(
        x[:, : img_height - 1, : img_width - 1, :] - x[:, 1:, : img_width - 1, :]
    )
    b = tf.square(
        x[:, : img_height - 1, : img_width - 1, :] - x[:, : img_height - 1, 1:, :]
    )
    
    return tf.reduce_sum(tf.pow(a + b, 1.25))

In [14]:
# The total loss will be the weighted average of the three losses

In [15]:
# Style layers will be a list of layers at both top and bottom part of the model
# Content layer will be a sinle upper layer

In [16]:
model.summary()

In [17]:
style_layer_names = [
    "block1_conv1", "block2_conv1", "block3_conv1", "block4_conv1", "block5_conv1" 
]

content_layer_name = "block5_conv2"

#Weight defined for each loss type contribution to the total loss
total_variation_weight = 1e-6 # Contribution weight of the total variation loss
style_weight = 1e-6 # Contribution weight of the style loss
content_weight = 2.5e-6 # Contribution weight of the content loss

def compute_loss(combination_image, base_image, style_reference_image):
    input_tensor = tf.concat(
        [base_image, style_reference_image, combination_image], axis=0)
    features = feature_extractor(input_tensor) # Extracting the activation values and layer name dictionary for all the 3 images
    loss = tf.zeros(shape=()) # Loss initialization 
    layer_features = features[content_layer_name]
    base_image_features = layer_features[0, :, :, :]
    combination_features = layer_features[2, :, :, :]
    loss = loss + content_weight * content_loss( # Content Loss calculation using the feature maps of the 3 images in their corresponding layers
        base_image_features, combination_features
    )

    for layer_name in style_layer_names: # Style loss calculation
        layer_features = features[layer_name]
        style_reference_features = layer_features[1, :, :, :]
        combination_features = layer_features[2, :, :, :]
        style_loss_value = style_loss(
        style_reference_features, combination_features)
        loss += (style_weight / len(style_layer_names)) * style_loss_value
        
    loss += total_variation_weight * total_variation_loss(combination_image) # Total loss calculation
    return loss



In [18]:
from keras.optimizers import SGD,schedules
from keras.optimizers.schedules import ExponentialDecay

In [22]:
# Using learning rate schedule( modifies the learning rate)

@tf.function
def compute_loss_and_grads(
    combination_image, base_image, style_reference_image
):
    with tf.GradientTape() as tape:
        loss = compute_loss(combination_image, base_image, style_reference_image)
    grads = tape.gradient(loss,combination_image)
    return loss,grads

optimizer = SGD(ExponentialDecay( # Start with 100.0 as learning rate and reduce it by 4 percent every 100 steps
    initial_learning_rate=100, decay_steps=100, decay_rate=0.96
))

In [23]:
base_image = preprocess_image(base_image_path)
style_reference_image = preprocess_image(reference_image_path)
combination_image = tf.Variable(preprocess_image(base_image_path)) # Using a variable to store the combination image(copy of base image) since we will be updating it during training

iterations = 500

for i in range(1,iterations+1):
    loss,grads = compute_loss_and_grads(combination_image, base_image, style_reference_image)
    optimizer.apply_gradients([(grads,combination_image)]) # Updating the combination image using the gradient output reducing the total loss
    if i % 100 == 0:
        print(f"Iteration {i}: loss={loss:.2f}")
        img = deprocess_image(combination_image.numpy())
        fname = f"artifacts\combination_image_at_iteration_{i}.png"
        keras.utils.save_img(fname,img) # Saving the combined image at regular intervals

Iteration 100: loss=2819.10
Iteration 200: loss=2819.10
Iteration 300: loss=2819.10
Iteration 400: loss=2819.10
Iteration 500: loss=2819.10
