# 1. Simple style transfer

In this section we will directly apply the style transfer to the image by treating the image pixels as weights and optimizing them. First we will load our content and our style image:

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision.transforms import ToTensor,ToPILImage,Normalize
from utils.model import construct_style_loss_model,construct_decoder_from_encoder
from utils.losses import content_gatyes,style_gatyes,style_mmd_polynomial,adaIN,style_mmd_gaussian
from utils.utility import normalize
import cv2
from copy import deepcopy
from torchvision.models import vgg19,VGG19_Weights
from PIL import Image
import torch.optim as optim
from tqdm import tqdm
%load_ext autoreload
%autoreload 2

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("On device: ", device)

On device:  cuda


In [3]:
CONTENT_IMAGE_PATH = "./content_frames/864.jpg"
STYLE_IMAGE_PATH = "./style_frames/1051.jpg"
IMAGE_SIZE = (512,512)

In [12]:
# Since PIL has the format [W x H x C], and ToTensor() transforms it into [C x H x W], we have to permute the tensor to shape [C x W x H]
content_image = ToTensor()(Image.open(CONTENT_IMAGE_PATH).convert('RGB').resize(IMAGE_SIZE)).permute(0,2,1)
style_image = ToTensor()(Image.open(STYLE_IMAGE_PATH).convert('RGB').resize(IMAGE_SIZE)).permute(0,2,1)

In [151]:
# Next we load the model. We will use the standard vgg19 model by pytorch. 
# We will use the model without the classification head and add a normalization layer to match the distribution of the models training data:
# load model
vgg = vgg19(VGG19_Weights.DEFAULT)

# remove classification head
vgg = vgg.features

# prepend a normalization layer
vgg = nn.Sequential(Normalize(mean = (0.485, 0.456, 0.406),std=(0.229, 0.224, 0.225)), *vgg)

# lets print the model
vgg



Sequential(
  (0): Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
  (1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (2): ReLU(inplace=True)
  (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (4): ReLU(inplace=True)
  (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (6): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (7): ReLU(inplace=True)
  (8): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (9): ReLU(inplace=True)
  (10): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (11): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (12): ReLU(inplace=True)
  (13): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (14): ReLU(inplace=True)
  (15): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (16): ReLU(inplace=True)
  (17): Conv2d(256, 256, kernel_size=(3, 3),

In [152]:
# next we define which layers we will use as content and weight layers. Note that the indeces match the indices of the printed vgg model. 
# So index (6) means using the layer "Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))".
# Note that in theory not only conv layers can be used. Some papers also use the ReLU layers between conv.
CONTENT_LAYERS = [3,6]
STYLE_LAYERS = [6,8,11]

# Each layers gets a weighting. Default is just 1.0 for every layer. Note that these lists have to have the same length as the lists for choosing the layers.
CONTENT_LAYERS_WEIGHTS = [1.0,1.0]
STYLE_LAYERS_WEIGHTS = [1.0,1.0,1.0]

if not len(CONTENT_LAYERS) == len(CONTENT_LAYERS_WEIGHTS):
    raise AssertionError("CONTENT_LAYERS and CONTENT_LAYERS_WEIGHTS have to have the same length but were {0} and {1} respectively".format(len(CONTENT_LAYERS),len(CONTENT_LAYERS_WEIGHTS)))
if not len(STYLE_LAYERS) == len(STYLE_LAYERS_WEIGHTS):
    raise AssertionError("STYLE_LAYERS and STYLE_LAYERS_WEIGHTS have to have the same length but were {0} and {1} respectively".format(len(STYLE_LAYERS),len(STYLE_LAYERS_WEIGHTS)))

In [153]:
# Based on these information we construct our style loss model. As input it will take a tuple containing an image and two empty lists.
# It will return a tuple containing the output and two lists containing the features from the chosen content and style layers respectively.
style_loss_model = construct_style_loss_model(vgg,CONTENT_LAYERS,STYLE_LAYERS)
style_loss_model

range(0, 28)


Sequential(
  (Model layer: 0 | Content layer: False | Style layer: False): Parallel(
    (layer): Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
  )
  (Model layer: 1 | Content layer: False | Style layer: False): Parallel(
    (layer): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  )
  (Model layer: 2 | Content layer: False | Style layer: False): Parallel(
    (layer): ReLU(inplace=True)
  )
  (Model layer: 3 | Content layer: True | Style layer: False): Parallel(
    (layer): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  )
  (Model layer: 4 | Content layer: False | Style layer: False): Parallel(
    (layer): ReLU(inplace=True)
  )
  (Model layer: 5 | Content layer: False | Style layer: False): Parallel(
    (layer): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (Model layer: 6 | Content layer: True | Style layer: True): Parallel(
    (layer): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1)

In [154]:
# set the model to eval just in case it contains e.g. Dropout layers
style_loss_model = style_loss_model.eval()
style_loss_model.requires_grad_(False)

# lets bring everything to the correct device
style_loss_model = style_loss_model.to(device)
content_image = content_image.to(device)
style_image = style_image.to(device)

In [155]:
# The algorithm returns better results if we set the initial image to the content image
# We could also use random noise: torch.rand_like(content_image)
img = nn.Parameter(content_image.clone().to(device))
optimizer = optim.LBFGS([img],lr=0.5)

In [156]:
# we precompute the content and style features of the content and style images respectively
with torch.no_grad():
    _,content_features_target,_ = style_loss_model((content_image.unsqueeze(0),[],[]))
    _,_,style_features_target = style_loss_model((style_image.unsqueeze(0),[],[]))

In [157]:
STYLE_WEIGHT = 10000000.0
LOSS_CONTENT = content_gatyes
# Possible values for loss style are style_gatyes,style_mmd_polynomial,style_mmd_gaussian,adaIN
# Style_mmd_gaussian does not work well 
# You might have to lover STYLE_WEIGHT when choosing adaIN
LOSS_STYLE = style_gatyes

In [158]:
# LBFGS works a bit different then other pytorch optimizers. It requires a loss function in which the magic happens. Dont worry about it.
def compute_losses(): 

    # Clip all values of the image to the range [0,1]
    with torch.no_grad():
        img.clamp_(0, 1)

    # initialize (reset) optimizer
    optimizer.zero_grad()

    # get the features from the chosen content and style layers for our image
    _,content_features,style_features= style_loss_model((img.unsqueeze(0),[],[]))

    # calculate loss for every layer and sum it up
    content_loss = 0.0
    for f,f_target,weight in zip(content_features,content_features_target, CONTENT_LAYERS_WEIGHTS):
        content_loss += weight*LOSS_CONTENT(normalize(f,f_target)).mean()

    # calculate loss for every layer and sum it up
    style_loss = 0.0
    for f,f_target,weight in zip(style_features,style_features_target, STYLE_LAYERS_WEIGHTS):
        style_loss += weight*LOSS_STYLE(normalize(f,f_target)).mean()

    style_loss *= STYLE_WEIGHT
    
    loss = content_loss+style_loss
    loss.backward()

    return (content_loss+style_loss).item()

In [159]:
# where to save the current image
SAVING_PATH = "./result.jpg"

In [160]:
for i in tqdm(range(10)):

    optimizer.step(compute_losses)

    # Clip all values of the image to the range [0,1]
    with torch.no_grad():
        img.clamp_(0, 1)
    
    
    pil = ToPILImage()(img.squeeze(0).permute(0,2,1))
    pil.save(SAVING_PATH)


  0%|          | 0/10 [00:00<?, ?it/s]

content loss:  tensor(0., device='cuda:0', grad_fn=<AddBackward0>)  style loss:  tensor(33.2265, device='cuda:0', grad_fn=<MulBackward0>)
content loss:  tensor(2.1302e-08, device='cuda:0', grad_fn=<AddBackward0>)  style loss:  tensor(33.2219, device='cuda:0', grad_fn=<MulBackward0>)
content loss:  tensor(1.2586e-05, device='cuda:0', grad_fn=<AddBackward0>)  style loss:  tensor(33.0099, device='cuda:0', grad_fn=<MulBackward0>)
content loss:  tensor(0.0885, device='cuda:0', grad_fn=<AddBackward0>)  style loss:  tensor(25.0858, device='cuda:0', grad_fn=<MulBackward0>)
content loss:  tensor(0.1399, device='cuda:0', grad_fn=<AddBackward0>)  style loss:  tensor(20.1362, device='cuda:0', grad_fn=<MulBackward0>)
content loss:  tensor(0.2217, device='cuda:0', grad_fn=<AddBackward0>)  style loss:  tensor(15.9718, device='cuda:0', grad_fn=<MulBackward0>)
content loss:  tensor(0.2950, device='cuda:0', grad_fn=<AddBackward0>)  style loss:  tensor(13.4848, device='cuda:0', grad_fn=<MulBackward0>)
co

 10%|█         | 1/10 [00:02<00:18,  2.07s/it]

content loss:  tensor(1.0796, device='cuda:0', grad_fn=<AddBackward0>)  style loss:  tensor(1.7218, device='cuda:0', grad_fn=<MulBackward0>)
content loss:  tensor(1.0841, device='cuda:0', grad_fn=<AddBackward0>)  style loss:  tensor(1.5385, device='cuda:0', grad_fn=<MulBackward0>)
content loss:  tensor(1.0843, device='cuda:0', grad_fn=<AddBackward0>)  style loss:  tensor(1.3958, device='cuda:0', grad_fn=<MulBackward0>)
content loss:  tensor(1.0814, device='cuda:0', grad_fn=<AddBackward0>)  style loss:  tensor(1.2831, device='cuda:0', grad_fn=<MulBackward0>)
content loss:  tensor(1.0786, device='cuda:0', grad_fn=<AddBackward0>)  style loss:  tensor(1.1952, device='cuda:0', grad_fn=<MulBackward0>)
content loss:  tensor(1.0753, device='cuda:0', grad_fn=<AddBackward0>)  style loss:  tensor(1.1171, device='cuda:0', grad_fn=<MulBackward0>)
content loss:  tensor(1.0761, device='cuda:0', grad_fn=<AddBackward0>)  style loss:  tensor(1.0457, device='cuda:0', grad_fn=<MulBackward0>)
content loss:

 20%|██        | 2/10 [00:03<00:14,  1.87s/it]

content loss:  tensor(1.0564, device='cuda:0', grad_fn=<AddBackward0>)  style loss:  tensor(0.5972, device='cuda:0', grad_fn=<MulBackward0>)
content loss:  tensor(1.0549, device='cuda:0', grad_fn=<AddBackward0>)  style loss:  tensor(0.5810, device='cuda:0', grad_fn=<MulBackward0>)
content loss:  tensor(1.0528, device='cuda:0', grad_fn=<AddBackward0>)  style loss:  tensor(0.5673, device='cuda:0', grad_fn=<MulBackward0>)
content loss:  tensor(1.0502, device='cuda:0', grad_fn=<AddBackward0>)  style loss:  tensor(0.5505, device='cuda:0', grad_fn=<MulBackward0>)
content loss:  tensor(1.0475, device='cuda:0', grad_fn=<AddBackward0>)  style loss:  tensor(0.5384, device='cuda:0', grad_fn=<MulBackward0>)
content loss:  tensor(1.0442, device='cuda:0', grad_fn=<AddBackward0>)  style loss:  tensor(0.5258, device='cuda:0', grad_fn=<MulBackward0>)
content loss:  tensor(1.0416, device='cuda:0', grad_fn=<AddBackward0>)  style loss:  tensor(0.5162, device='cuda:0', grad_fn=<MulBackward0>)
content loss:

 30%|███       | 3/10 [00:05<00:12,  1.83s/it]

content loss:  tensor(1.0012, device='cuda:0', grad_fn=<AddBackward0>)  style loss:  tensor(0.4211, device='cuda:0', grad_fn=<MulBackward0>)
content loss:  tensor(0.9987, device='cuda:0', grad_fn=<AddBackward0>)  style loss:  tensor(0.4162, device='cuda:0', grad_fn=<MulBackward0>)
content loss:  tensor(0.9966, device='cuda:0', grad_fn=<AddBackward0>)  style loss:  tensor(0.4135, device='cuda:0', grad_fn=<MulBackward0>)
content loss:  tensor(0.9947, device='cuda:0', grad_fn=<AddBackward0>)  style loss:  tensor(0.4097, device='cuda:0', grad_fn=<MulBackward0>)
content loss:  tensor(0.9925, device='cuda:0', grad_fn=<AddBackward0>)  style loss:  tensor(0.4060, device='cuda:0', grad_fn=<MulBackward0>)
content loss:  tensor(0.9900, device='cuda:0', grad_fn=<AddBackward0>)  style loss:  tensor(0.4017, device='cuda:0', grad_fn=<MulBackward0>)
content loss:  tensor(0.9869, device='cuda:0', grad_fn=<AddBackward0>)  style loss:  tensor(0.3971, device='cuda:0', grad_fn=<MulBackward0>)
content loss:

 40%|████      | 4/10 [00:07<00:11,  1.84s/it]

content loss:  tensor(0.9628, device='cuda:0', grad_fn=<AddBackward0>)  style loss:  tensor(0.3558, device='cuda:0', grad_fn=<MulBackward0>)
content loss:  tensor(0.9618, device='cuda:0', grad_fn=<AddBackward0>)  style loss:  tensor(0.3536, device='cuda:0', grad_fn=<MulBackward0>)
content loss:  tensor(0.9604, device='cuda:0', grad_fn=<AddBackward0>)  style loss:  tensor(0.3513, device='cuda:0', grad_fn=<MulBackward0>)
content loss:  tensor(0.9586, device='cuda:0', grad_fn=<AddBackward0>)  style loss:  tensor(0.3483, device='cuda:0', grad_fn=<MulBackward0>)
content loss:  tensor(0.9568, device='cuda:0', grad_fn=<AddBackward0>)  style loss:  tensor(0.3456, device='cuda:0', grad_fn=<MulBackward0>)
content loss:  tensor(0.9553, device='cuda:0', grad_fn=<AddBackward0>)  style loss:  tensor(0.3428, device='cuda:0', grad_fn=<MulBackward0>)
content loss:  tensor(0.9538, device='cuda:0', grad_fn=<AddBackward0>)  style loss:  tensor(0.3411, device='cuda:0', grad_fn=<MulBackward0>)
content loss:

 50%|█████     | 5/10 [00:09<00:09,  1.86s/it]

content loss:  tensor(0.9382, device='cuda:0', grad_fn=<AddBackward0>)  style loss:  tensor(0.3184, device='cuda:0', grad_fn=<MulBackward0>)
content loss:  tensor(0.9370, device='cuda:0', grad_fn=<AddBackward0>)  style loss:  tensor(0.3172, device='cuda:0', grad_fn=<MulBackward0>)
content loss:  tensor(0.9360, device='cuda:0', grad_fn=<AddBackward0>)  style loss:  tensor(0.3157, device='cuda:0', grad_fn=<MulBackward0>)
content loss:  tensor(0.9347, device='cuda:0', grad_fn=<AddBackward0>)  style loss:  tensor(0.3147, device='cuda:0', grad_fn=<MulBackward0>)
content loss:  tensor(0.9338, device='cuda:0', grad_fn=<AddBackward0>)  style loss:  tensor(0.3133, device='cuda:0', grad_fn=<MulBackward0>)
content loss:  tensor(0.9326, device='cuda:0', grad_fn=<AddBackward0>)  style loss:  tensor(0.3122, device='cuda:0', grad_fn=<MulBackward0>)
content loss:  tensor(0.9319, device='cuda:0', grad_fn=<AddBackward0>)  style loss:  tensor(0.3107, device='cuda:0', grad_fn=<MulBackward0>)
content loss:

 60%|██████    | 6/10 [00:11<00:07,  1.89s/it]

tensor(0.9211, device='cuda:0', grad_fn=<AddBackward0>)  style loss:  tensor(0.2979, device='cuda:0', grad_fn=<MulBackward0>)
content loss:  tensor(0.9203, device='cuda:0', grad_fn=<AddBackward0>)  style loss:  tensor(0.2969, device='cuda:0', grad_fn=<MulBackward0>)
content loss:  tensor(0.9198, device='cuda:0', grad_fn=<AddBackward0>)  style loss:  tensor(0.2959, device='cuda:0', grad_fn=<MulBackward0>)
content loss:  tensor(0.9191, device='cuda:0', grad_fn=<AddBackward0>)  style loss:  tensor(0.2949, device='cuda:0', grad_fn=<MulBackward0>)
content loss:  tensor(0.9185, device='cuda:0', grad_fn=<AddBackward0>)  style loss:  tensor(0.2944, device='cuda:0', grad_fn=<MulBackward0>)
content loss:  tensor(0.9179, device='cuda:0', grad_fn=<AddBackward0>)  style loss:  tensor(0.2937, device='cuda:0', grad_fn=<MulBackward0>)
content loss:  tensor(0.9174, device='cuda:0', grad_fn=<AddBackward0>)  style loss:  tensor(0.2929, device='cuda:0', grad_fn=<MulBackward0>)
content loss:  tensor(0.9165

 70%|███████   | 7/10 [00:13<00:05,  1.90s/it]

content loss:  tensor(0.9082, device='cuda:0', grad_fn=<AddBackward0>)  style loss:  tensor(0.2831, device='cuda:0', grad_fn=<MulBackward0>)
content loss:  tensor(0.9077, device='cuda:0', grad_fn=<AddBackward0>)  style loss:  tensor(0.2826, device='cuda:0', grad_fn=<MulBackward0>)
content loss:  tensor(0.9071, device='cuda:0', grad_fn=<AddBackward0>)  style loss:  tensor(0.2821, device='cuda:0', grad_fn=<MulBackward0>)
content loss:  tensor(0.9065, device='cuda:0', grad_fn=<AddBackward0>)  style loss:  tensor(0.2816, device='cuda:0', grad_fn=<MulBackward0>)
content loss:  tensor(0.9061, device='cuda:0', grad_fn=<AddBackward0>)  style loss:  tensor(0.2811, device='cuda:0', grad_fn=<MulBackward0>)
content loss:  tensor(0.9055, device='cuda:0', grad_fn=<AddBackward0>)  style loss:  tensor(0.2806, device='cuda:0', grad_fn=<MulBackward0>)
content loss:  tensor(0.9053, device='cuda:0', grad_fn=<AddBackward0>)  style loss:  tensor(0.2799, device='cuda:0', grad_fn=<MulBackward0>)
content loss:

 80%|████████  | 8/10 [00:15<00:03,  1.91s/it]

content loss:  tensor(0.8993, device='cuda:0', grad_fn=<AddBackward0>)  style loss:  tensor(0.2734, device='cuda:0', grad_fn=<MulBackward0>)
content loss:  tensor(0.8990, device='cuda:0', grad_fn=<AddBackward0>)  style loss:  tensor(0.2730, device='cuda:0', grad_fn=<MulBackward0>)
content loss:  tensor(0.8986, device='cuda:0', grad_fn=<AddBackward0>)  style loss:  tensor(0.2726, device='cuda:0', grad_fn=<MulBackward0>)
content loss:  tensor(0.8983, device='cuda:0', grad_fn=<AddBackward0>)  style loss:  tensor(0.2722, device='cuda:0', grad_fn=<MulBackward0>)
content loss:  tensor(0.8978, device='cuda:0', grad_fn=<AddBackward0>)  style loss:  tensor(0.2719, device='cuda:0', grad_fn=<MulBackward0>)
content loss:  tensor(0.8976, device='cuda:0', grad_fn=<AddBackward0>)  style loss:  tensor(0.2715, device='cuda:0', grad_fn=<MulBackward0>)
content loss:  tensor(0.8971, device='cuda:0', grad_fn=<AddBackward0>)  style loss:  tensor(0.2712, device='cuda:0', grad_fn=<MulBackward0>)
content loss:

 90%|█████████ | 9/10 [00:17<00:01,  1.92s/it]

content loss:  tensor(0.8928, device='cuda:0', grad_fn=<AddBackward0>)  style loss:  tensor(0.2661, device='cuda:0', grad_fn=<MulBackward0>)
content loss:  tensor(0.8924, device='cuda:0', grad_fn=<AddBackward0>)  style loss:  tensor(0.2658, device='cuda:0', grad_fn=<MulBackward0>)
content loss:  tensor(0.8922, device='cuda:0', grad_fn=<AddBackward0>)  style loss:  tensor(0.2654, device='cuda:0', grad_fn=<MulBackward0>)
content loss:  tensor(0.8920, device='cuda:0', grad_fn=<AddBackward0>)  style loss:  tensor(0.2652, device='cuda:0', grad_fn=<MulBackward0>)
content loss:  tensor(0.8917, device='cuda:0', grad_fn=<AddBackward0>)  style loss:  tensor(0.2648, device='cuda:0', grad_fn=<MulBackward0>)
content loss:  tensor(0.8915, device='cuda:0', grad_fn=<AddBackward0>)  style loss:  tensor(0.2644, device='cuda:0', grad_fn=<MulBackward0>)
content loss:  tensor(0.8912, device='cuda:0', grad_fn=<AddBackward0>)  style loss:  tensor(0.2641, device='cuda:0', grad_fn=<MulBackward0>)
content loss:

100%|██████████| 10/10 [00:19<00:00,  1.90s/it]

tensor(0.8882, device='cuda:0', grad_fn=<AddBackward0>)  style loss:  tensor(0.2604, device='cuda:0', grad_fn=<MulBackward0>)
content loss:  tensor(0.8880, device='cuda:0', grad_fn=<AddBackward0>)  style loss:  tensor(0.2600, device='cuda:0', grad_fn=<MulBackward0>)



