In [None]:
# TODO: style loss nem biztos jó, lehet le kell osztani az MSE-t
# TODO: eredmény megjelenítésre függvény

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision.models as models
import torchvision.transforms as tf
from torch.autograd import Variable


In [None]:
# function to get feature maps from VGG
def get_features(image, model, layers):
    features = {}

    x = image
    for name, number in model._modules.items():
        x = number(x)

        if name in layers:
            features[layers[name]] = x
            
    return features


In [None]:
# image transform
# source: https://github.com/Shashi456/Neural-Style/blob/master/Neural%20Style%20Transfer/train_Pytorch.py
transform = tf.Compose([
    tf.Resize(512),
    tf.ToTensor(),
    tf.Lambda(lambda x:x[torch.LongTensor([2, 1,0])]),
    tf.Normalize(mean=[0.40760392, 0.45795686, 0.48501961], std=[0.225, 0.224, 0.229]),
    tf.Lambda(lambda x: x.mul_(255))
    ])

In [None]:
# function for image loading and transforming
def img_load(path):
    img = Image.open(path)
    img = Variable(transform(img))
    img = img.unsqueeze(0)
    return img

In [None]:
# function for calculating gram matrix of feature map
def gram_matrix(input):
    '''
    Gram matrix should have shape of K * N, where K is the number of feature maps at the given layer,
    N is the length of the vector after transforming the 2D feature map to 1D vector.
    N = a * b if one feature map has a shape of a * b.
    So input has a shape of K * a * b
    '''
    batch_size, K, a, b = input.size()
    vecs = input.view(K, a * b)

    # definition of Gram matrix
    gram = vecs @ vecs.T

    # returning normalized matrix
    return gram / (K * a * b)

In [None]:
# loading pretrained VGG
vgg = models.vgg19(pretrained=True).features

# freezing weights
for param in vgg.parameters():
    param.requires_grad_(False)
    
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(torch.cuda.is_available())
vgg.to(device)

In [None]:
# loading images
content = img_load('content.png').to(device)
style = img_load('style.png').to(device)

In [None]:
# using original content image to use style transfer on
x = content.clone().requires_grad_(True).to(device)

# using Adam to optimize our image
optimizer = optim.Adam([x], lr=1e-4)

In [None]:
# layers to caclulate stlye and content losses with (see 3rd cell)
layers = {'0': 'conv1_1',
            '5':  'conv2_1',
            '10': 'conv3_1',
            '19': 'conv4_1',
            '21': 'conv4_2',
            '28': 'conv5_1'}

In [None]:
# target content feature map
content_features = get_features(content, vgg, layers)

style_layers = ['conv1_1', 'conv2_1', 'conv3_1', 'conv4_1', 'conv5_1']

# calculating gram matrices of each style layer to use as targets
style_grams = {layer: gram_matrix(content_features[layer]) for layer in style_layers}

In [None]:
# training parameters
EPOCHS = 100
content_weight = 1
style_weight = 1e7

In [None]:
for epoch in range(EPOCHS):

    # content loss
    x_features = get_features(x, vgg, layers)
    content_loss = F.mse_loss(x_features["conv4_2"], content_features["conv4_2"])

    # style loss
    # summing up the losses from each style layer
    style_loss = 0
    for layer in style_layers:
        x_style = x_features[layer]
        style_loss += F.mse_loss(gram_matrix(x_style), style_grams[layer])

    # total loss is content loss + style loss with weights as seen in the paper
    total_loss = content_weight * content_loss + style_weight * style_loss

    optimizer.zero_grad()
    total_loss.backward()
    optimizer.step()

    if (epoch + 1) % 10 == 0:
            print('Epoch: %d | Total Loss: %.5f' % (epoch + 1, total_loss.item()))

print('-----------------------------')
print('Finished Training')