In [10]:
from __future__ import print_function

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from PIL import Image
import matplotlib.pyplot as plt

import torchvision.transforms as transforms
import torchvision.models as models
from torchvision.utils import save_image
import torchvision.transforms as T

import copy
import os
import random
import io

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
class ContentLoss(nn.Module):

    def __init__(self, target,):
        super(ContentLoss, self).__init__()
        # we 'detach' the target content from the tree used
        # to dynamically compute the gradient: this is a stated value,
        # not a variable. Otherwise the forward method of the criterion
        # will throw an error.
        self.target = target.detach()

    def forward(self, input):
        self.loss = F.mse_loss(input, self.target)
        return input

######################################################################
# .. Note::
#    **Important detail**: although this module is named ``ContentLoss``, it
#    is not a true PyTorch Loss function. If you want to define your content
#    loss as a PyTorch Loss function, you have to create a PyTorch autograd function 
#    to recompute/implement the gradient manually in the ``backward``
#    method.

######################################################################
# Style Loss
# ~~~~~~~~~~
# 
# The style loss module is implemented similarly to the content loss
# module. It will act as a transparent layer in a
# network that computes the style loss of that layer. In order to
# calculate the style loss, we need to compute the gram matrix :math:`G_{XL}`. A gram
# matrix is the result of multiplying a given matrix by its transposed
# matrix. In this application the given matrix is a reshaped version of
# the feature maps :math:`F_{XL}` of a layer :math:`L`. :math:`F_{XL}` is reshaped to form :math:`\hat{F}_{XL}`, a :math:`K`\ x\ :math:`N`
# matrix, where :math:`K` is the number of feature maps at layer :math:`L` and :math:`N` is the
# length of any vectorized feature map :math:`F_{XL}^k`. For example, the first line
# of :math:`\hat{F}_{XL}` corresponds to the first vectorized feature map :math:`F_{XL}^1`.
# 
# Finally, the gram matrix must be normalized by dividing each element by
# the total number of elements in the matrix. This normalization is to
# counteract the fact that :math:`\hat{F}_{XL}` matrices with a large :math:`N` dimension yield
# larger values in the Gram matrix. These larger values will cause the
# first layers (before pooling layers) to have a larger impact during the
# gradient descent. Style features tend to be in the deeper layers of the
# network so this normalization step is crucial.
# 

def gram_matrix(input):
    a, b, c, d = input.size()  # a=batch size(=1)
    # b=number of feature maps
    # (c,d)=dimensions of a f. map (N=c*d)

    features = input.view(a * b, c * d)  # resize F_XL into \hat F_XL

    G = torch.mm(features, features.t())  # compute the gram product

    # we 'normalize' the values of the gram matrix
    # by dividing by the number of element in each feature maps.
    return G.div(a * b * c * d)


######################################################################
# Now the style loss module looks almost exactly like the content loss
# module. The style distance is also computed using the mean square
# error between :math:`G_{XL}` and :math:`G_{SL}`.
# 

class StyleLoss(nn.Module):

    def __init__(self, target_feature):
        super(StyleLoss, self).__init__()
        self.target = gram_matrix(target_feature).detach()

    def forward(self, input):
        G = gram_matrix(input)
        self.loss = F.mse_loss(G, self.target)
        return input


######################################################################
# Importing the Model
# -------------------
# 
# Now we need to import a pretrained neural network. We will use a 19
# layer VGG network like the one used in the paper.
# 
# PyTorch’s implementation of VGG is a module divided into two child
# ``Sequential`` modules: ``features`` (containing convolution and pooling layers),
# and ``classifier`` (containing fully connected layers). We will use the
# ``features`` module because we need the output of the individual
# convolution layers to measure content and style loss. Some layers have
# different behavior during training than evaluation, so we must set the
# network to evaluation mode using ``.eval()``.
# 

cnn = models.vgg19(pretrained=True).features.to(device).eval()



######################################################################
# Additionally, VGG networks are trained on images with each channel
# normalized by mean=[0.485, 0.456, 0.406] and std=[0.229, 0.224, 0.225].
# We will use them to normalize the image before sending it into the network.
# 

cnn_normalization_mean = torch.tensor([0.485, 0.456, 0.406]).to(device)
cnn_normalization_std = torch.tensor([0.229, 0.224, 0.225]).to(device)

# create a module to normalize input image so we can easily put it in a
# ``nn.Sequential``
class Normalization(nn.Module):
    def __init__(self, mean, std):
        super(Normalization, self).__init__()
        # .view the mean and std to make them [C x 1 x 1] so that they can
        # directly work with image Tensor of shape [B x C x H x W].
        # B is batch size. C is number of channels. H is height and W is width.
        self.mean = torch.tensor(mean).view(-1, 1, 1)
        self.std = torch.tensor(std).view(-1, 1, 1)

    def forward(self, img):
        # normalize ``img``
        return (img - self.mean) / self.std


######################################################################
# A ``Sequential`` module contains an ordered list of child modules. For
# instance, ``vgg19.features`` contains a sequence (``Conv2d``, ``ReLU``, ``MaxPool2d``,
# ``Conv2d``, ``ReLU``…) aligned in the right order of depth. We need to add our
# content loss and style loss layers immediately after the convolution
# layer they are detecting. To do this we must create a new ``Sequential``
# module that has content loss and style loss modules correctly inserted.
# 

# desired depth layers to compute style/content losses :
content_layers_default = ['conv_4']
style_layers_default = ['conv_1', 'conv_2', 'conv_3', 'conv_4', 'conv_5']

def get_style_model_and_losses(cnn, normalization_mean, normalization_std,
                               style_img, content_img,
                               content_layers=content_layers_default,
                               style_layers=style_layers_default):
    # normalization module
    normalization = Normalization(normalization_mean, normalization_std).to(device)

    # just in order to have an iterable access to or list of content/style
    # losses
    content_losses = []
    style_losses = []

    # assuming that ``cnn`` is a ``nn.Sequential``, so we make a new ``nn.Sequential``
    # to put in modules that are supposed to be activated sequentially
    model = nn.Sequential(normalization)

    i = 0  # increment every time we see a conv
    for layer in cnn.children():
        if isinstance(layer, nn.Conv2d):
            i += 1
            name = 'conv_{}'.format(i)
        elif isinstance(layer, nn.ReLU):
            name = 'relu_{}'.format(i)
            # The in-place version doesn't play very nicely with the ``ContentLoss``
            # and ``StyleLoss`` we insert below. So we replace with out-of-place
            # ones here.
            layer = nn.ReLU(inplace=False)
        elif isinstance(layer, nn.MaxPool2d):
            name = 'pool_{}'.format(i)
        elif isinstance(layer, nn.BatchNorm2d):
            name = 'bn_{}'.format(i)
        else:
            raise RuntimeError('Unrecognized layer: {}'.format(layer.__class__.__name__))

        model.add_module(name, layer)

        if name in content_layers:
            # add content loss:
            target = model(content_img).detach()
            content_loss = ContentLoss(target)
            model.add_module("content_loss_{}".format(i), content_loss)
            content_losses.append(content_loss)

        if name in style_layers:
            # add style loss:
            target_feature = model(style_img).detach()
            style_loss = StyleLoss(target_feature)
            model.add_module("style_loss_{}".format(i), style_loss)
            style_losses.append(style_loss)

    # now we trim off the layers after the last content and style losses
    for i in range(len(model) - 1, -1, -1):
        if isinstance(model[i], ContentLoss) or isinstance(model[i], StyleLoss):
            break

    model = model[:(i + 1)]

    return model, style_losses, content_losses


######################################################################
# Gradient Descent
# ----------------
# 
# As Leon Gatys, the author of the algorithm, suggested `here <https://discuss.pytorch.org/t/pytorch-tutorial-for-neural-transfert-of-artistic-style/336/20?u=alexis-jacq>`__, we will use
# L-BFGS algorithm to run our gradient descent. Unlike training a network,
# we want to train the input image in order to minimize the content/style
# losses. We will create a PyTorch L-BFGS optimizer ``optim.LBFGS`` and pass
# our image to it as the tensor to optimize.
# 

def get_input_optimizer(input_img):
    # this line to show that input is a parameter that requires a gradient
    optimizer = optim.LBFGS([input_img])
    return optimizer


######################################################################
# Finally, we must define a function that performs the neural transfer. For
# each iteration of the networks, it is fed an updated input and computes
# new losses. We will run the ``backward`` methods of each loss module to
# dynamically compute their gradients. The optimizer requires a “closure”
# function, which reevaluates the module and returns the loss.
# 
# We still have one final constraint to address. The network may try to
# optimize the input with values that exceed the 0 to 1 tensor range for
# the image. We can address this by correcting the input values to be
# between 0 to 1 each time the network is run.
# 

def run_style_transfer(cnn, normalization_mean, normalization_std,
                       content_img, style_img, input_img, num_steps=300,
                       style_weight=1000000, content_weight=1000):
    """Run the style transfer."""
    print('Building the style transfer model..')
    model, style_losses, content_losses = get_style_model_and_losses(cnn,
        normalization_mean, normalization_std, style_img, content_img)

    # We want to optimize the input and not the model parameters so we
    # update all the requires_grad fields accordingly

    # we also only want to style_transfer if the style_loss is higher than 1000
    # if style_loss < 1000:
    #     return input_img
    

    input_img.requires_grad_(True)
    model.requires_grad_(False)

    optimizer = get_input_optimizer(input_img)

    print('Optimizing..')
    run = [0]
    while run[0] <= num_steps:

        def closure():
            # correct the values of updated input image
            with torch.no_grad():
                input_img.clamp_(0, 1)

            optimizer.zero_grad()
            model(input_img)
            style_score = 0
            content_score = 0

            for sl in style_losses:
                style_score += sl.loss
            for cl in content_losses:
                content_score += cl.loss

            style_score *= style_weight
            content_score *= content_weight

            loss = style_score + content_score
            loss.backward()

            run[0] += 1
            if run[0] % 50 == 0:
                print("run {}:".format(run))
                print('Style Loss : {:4f} Content Loss: {:4f}'.format(
                    style_score.item(), content_score.item()))
                print()
      
            return style_score + content_score
        
        optimizer.step(closure)

    # a last correction...
    with torch.no_grad():
        input_img.clamp_(0, 1)

    return input_img

# function that returns the style_score and content_score for a given style_img
def get_style_score(style_img, content_img, cnn, normalization_mean, normalization_std,
                    style_weight=1000000, content_weight=1000):
    # compare the style img and content img and get the initial loss and return the style score, to see how much the images differ
    
    model, style_losses, content_losses = get_style_model_and_losses(cnn,
        normalization_mean, normalization_std, style_img, content_img)
    input_img = content_img.clone()
    input_img.requires_grad_(True)
    model.requires_grad_(False)


    with torch.no_grad():
        input_img.clamp_(0, 1)

    optimizer = get_input_optimizer(input_img) 
    optimizer.zero_grad()
    model(input_img)
    style_score = 0
    print(style_losses)
    for sl in style_losses:
        style_score += sl.loss

    style_score *= style_weight

    loss = style_score
    loss.backward()

    return style_score.item()




In [4]:
# desired size of the output image
imsize = 512 if torch.cuda.is_available() else 128  # use small size if no GPU

loader = transforms.Compose([
    transforms.Resize(imsize),  # scale imported image
    transforms.ToTensor()])  # transform it into a torch tensor


def image_loader(image_name):
    image = Image.open(image_name)
    # fake batch dimension required to fit network's input dimensions
    image = loader(image).unsqueeze(0)
    return image.to(device, torch.float)

# save_image function
def save_image(tensor, filename):
    # unnormalize
    tensor = tensor.cpu().clone()
    tensor = tensor.squeeze(0)
    tensor = unloader(tensor)
    # save image
    im = Image.fromarray(tensor)
    im.save(filename)

# load images
def unloader(tensor):
    image = tensor.cpu().clone()
    image = image.squeeze(0)
    image = transforms.ToPILImage()(image)
    return image

In [5]:
# style_img = image_loader("../339039a4-ce3be003.jpg")
# content_img = image_loader("../aachen_000001_000019_leftImg8bit.png")

# # resize the larger image to the smaller one
# if style_img.size() < content_img.size():
#     content_img = transforms.Resize(style_img.size()[2:])(content_img)
# elif style_img.size() > content_img.size():
#     style_img = transforms.Resize(content_img.size()[2:])(style_img)

# assert style_img.size() == content_img.size(), \
#     "we need to import style and content images of the same size"

# get_style_score(style_img, content_img, cnn, cnn_normalization_mean, cnn_normalization_std,
#                     style_weight=1000000, content_weight=1000)

In [11]:
style_dir = "data"
content_dir = "data/leftImg8bit/train"

# # run style transfer for all images in the content directory and save the results in folder "results"	
# for content_img_name in os.listdir(content_dir):
#     content_img = image_loader(os.path.join(content_dir, content_img_name))
#     for style_img_name in os.listdir(style_dir):
#         style_img = image_loader(os.path.join(style_dir, style_img_name))
#         output = run_style_transfer(cnn, cnn_normalization_mean, cnn_normalization_std,
#                                     content_img, style_img, content_img)
#         output_name = os.path.join("results", content_img_name[:-4] + "_" + style_img_name[:-4] + ".jpg")
#         save_image(output, output_name)

# # run style transfer for all images in the content dir and style dir
# for content_subdir in os.listdir(content_dir):
#     for content_img_name in os.listdir(os.path.join(content_dir, content_subdir)):
#         content_img = image_loader(os.path.join(content_dir, content_subdir, content_img_name))
#         for style_img_name in os.listdir(style_dir):
#             style_img = image_loader(os.path.join(style_dir, style_img_name))

#             # resize the larger image to the smaller one
#             if style_img.size() < content_img.size():
#                 content_img = transforms.Resize(style_img.size()[2:])(content_img)
#             elif style_img.size() > content_img.size():
#                 style_img = transforms.Resize(content_img.size()[2:])(style_img)

#             output = run_style_transfer(cnn, cnn_normalization_mean, cnn_normalization_std,
#                                         content_img, style_img, content_img)
#             output_name = os.path.join("results", content_subdir, content_img_name[:-4] + "_" + style_img_name[:-4] + ".jpg")

#             # create the results subfolder if it does not exist
#             if not os.path.exists(os.path.join("results", content_subdir)):
#                 os.makedirs(os.path.join("results", content_subdir))
#             save_image(output, output_name)

for content_subdir in os.listdir(content_dir):
    for content_img_name in os.listdir(os.path.join(content_dir, content_subdir)):
        content_img = image_loader(os.path.join(content_dir, content_subdir, content_img_name))
        style_img_name = random.choice(os.listdir(style_dir))
        style_img = image_loader(os.path.join(style_dir, style_img_name))
        
        # resize the larger image to the smaller one
        if style_img.size() < content_img.size():
            content_img = transforms.Resize(style_img.size()[2:])(content_img)
        elif style_img.size() > content_img.size():
            style_img = transforms.Resize(content_img.size()[2:])(style_img)
            
        # if style score is lower than 1000, then skip this pair of images and choose new style image
        style_score = get_style_score(style_img, content_img, cnn, cnn_normalization_mean, cnn_normalization_std,
                    style_weight=1000000, content_weight=1000)
        if style_score < 1500:
            continue

        output = run_style_transfer(cnn, cnn_normalization_mean, cnn_normalization_std,
                                    content_img, style_img, content_img)
        output_name = os.path.join("results", content_img_name[:-4] + "_" + style_img_name[:-4] + ".jpg")

        transform = T.ToPILImage()
        output = transform(output.squeeze(0))
        output.save(output_name)

  self.mean = torch.tensor(mean).view(-1, 1, 1)
  self.std = torch.tensor(std).view(-1, 1, 1)


[StyleLoss(), StyleLoss(), StyleLoss(), StyleLoss(), StyleLoss()]
[StyleLoss(), StyleLoss(), StyleLoss(), StyleLoss(), StyleLoss()]
Building the style transfer model..
Optimizing..
run [50]:
Style Loss : 402.894287 Content Loss: 388.491821

run [100]:
Style Loss : 322.455017 Content Loss: 351.689728

run [150]:
Style Loss : 292.315369 Content Loss: 336.862366

run [200]:
Style Loss : 329.276672 Content Loss: 551.741760

run [250]:
Style Loss : 281.530670 Content Loss: 344.138214

run [300]:
Style Loss : 386.537933 Content Loss: 787.857544

[StyleLoss(), StyleLoss(), StyleLoss(), StyleLoss(), StyleLoss()]
[StyleLoss(), StyleLoss(), StyleLoss(), StyleLoss(), StyleLoss()]
[StyleLoss(), StyleLoss(), StyleLoss(), StyleLoss(), StyleLoss()]
Building the style transfer model..
Optimizing..
run [50]:
Style Loss : 762.186157 Content Loss: 987.491272

run [100]:
Style Loss : 662.913452 Content Loss: 815.800903

run [150]:
Style Loss : 621.253296 Content Loss: 779.747070

run [200]:
Style Loss : 6