# Image Combination

Importing the libraries

In [1]:
import torch
import cv2
import numpy as np
import torch.optim as optim
import torchvision.models as models
import torch.nn as nn
import scipy.misc

Some utility function to read images and turn them into a Pytorch tensor

In [4]:
def loadim(path):
    im = cv2.imread(path, cv2.IMREAD_COLOR)
    im = np.array([im[:, :, 2], im[:, :, 1], im[:, :, 0]])
    im = torch.from_numpy(im)
    im = im.type('torch.FloatTensor')
    im = im/128 - 2

    return im

Defining the model we will use to extract features, in this case, VGG11

In [5]:
vgg11 = models.vgg11(pretrained=True)

In the following block, we define a series of architectures derived from VGG11, that we will use to extract different level features. It is known that higher layers extract higher level features. We only used the convoutional layers of the VGG for this experiment. We didn't use the dense layers output features.

In [6]:
class VGG16_conv7(nn.Module):
    def __init__(self):
        super(VGG16_conv7, self).__init__()
        self.features = nn.Sequential(
            # stop at conv7
            *list(vgg11.features.children())[:-3]
        )

    def forward(self, x):
        x = self.features(x)
        return x

class VGG16_conv6(nn.Module):
    def __init__(self):
        super(VGG16_conv6, self).__init__()
        self.features = nn.Sequential(
            # stop at conv6
            *list(vgg11.features.children())[:-5]
        )

    def forward(self, x):
        x = self.features(x)
        return x

class VGG16_conv5(nn.Module):
    def __init__(self):
        super(VGG16_conv5, self).__init__()
        self.features = nn.Sequential(
            # stop at conv5
            *list(vgg11.features.children())[:-8]
        )

    def forward(self, x):
        x = self.features(x)
        return x

class VGG16_conv4(nn.Module):
    def __init__(self):
        super(VGG16_conv4, self).__init__()
        self.features = nn.Sequential(
            # stop at conv4
            *list(vgg11.features.children())[:-10]
        )

    def forward(self, x):
        x = self.features(x)
        return x

class VGG16_conv3(nn.Module):
    def __init__(self):
        super(VGG16_conv3, self).__init__()
        self.features = nn.Sequential(
            # stop at conv3
            *list(vgg11.features.children())[:-13]
        )

    def forward(self, x):
        x = self.features(x)
        return x

class VGG16_conv2(nn.Module):
    def __init__(self):
        super(VGG16_conv2, self).__init__()
        self.features = nn.Sequential(
            # stop at conv2
            *list(vgg11.features.children())[:-15]
        )

    def forward(self, x):
        x = self.features(x)
        return x

class VGG16_conv1(nn.Module):
    def __init__(self):
        super(VGG16_conv1, self).__init__()
        self.features = nn.Sequential(
            # stop at conv1
            *list(vgg11.features.children())[:-18]
        )

    def forward(self, x):
        x = self.features(x)
        return x
    
# Just realized I forgot the 8th layer

First of all, we compute the features we want our output image to have. For instance, low level features similar to the style image and higher level features similar to the content image. The features we're going to use are, respectively, the lower layers activations and higher layers activations.

After that, we will optimize the pixels of a new image in order for them to have similar activations to the ones desired. 

To do that, the first thing we need to do is initialize the new image. Some options would be the style image, the content image or even a random initialization of the pixels. To do our first experiments, we chose to have as an initialization the content image.

To find the opitmal parameters, we define a loss function which is the MSE between the desired features and the ones we actually have, for all the layers.

Then, we will use the SGD technique to find the parameters (in this case, input image pixel values) that minimize this function.

To calculate the gradient of the loss with respect to the pixel values, we backpropagate the error from each layer down to the pixel values, then we update the pixel values. We will repeat this for a few iterations, saving the result in each step

In [7]:
def main():
    vgg1 = VGG16_conv1()
    vgg2 = VGG16_conv2()
    vgg3 = VGG16_conv3()
    vgg4 = VGG16_conv4()
    vgg5 = VGG16_conv5()
    vgg6 = VGG16_conv6()
    vgg7 = VGG16_conv7()

    cont_im = loadim('landscape-small.png')

    cont_im = cont_im.unsqueeze(0)

    cont_im.requires_grad = True

    style_im = loadim('van-gogh-small.png')
    style_im = style_im.unsqueeze(0)

    opt = optim.SGD([cont_im], lr=0.0001)

    y1_targ = vgg1(style_im)
    y2_targ = vgg2(style_im)
    y3_targ = vgg3(style_im)
    y4_targ = vgg4(style_im)
    y5_targ = vgg5(style_im)
    y6_targ = vgg6(style_im)
    y7_targ = vgg7(style_im)
    
    input_im = cont_im

    for i in range(20):

        print('Iteration', i)

        opt.zero_grad()

        y1_ = vgg1(input_im)
        y2_ = vgg2(input_im)
        y3_ = vgg3(input_im)
        y4_ = vgg4(input_im)
        y5_ = vgg5(input_im)
        y6_ = vgg6(input_im)
        y7_ = vgg7(input_im)

        y1_d = y1_targ - y1_
        y2_d = y2_targ - y2_
        y3_d = y3_targ - y3_
        y4_d = y4_targ - y4_
        y5_d = y5_targ - y5_
        y6_d = y6_targ - y6_
        y7_d = y7_targ - y7_

        y1_d = y1_d * y1_d
        y2_d = y2_d * y2_d
        y3_d = y3_d * y3_d
        y4_d = y4_d * y4_d
        y5_d = y5_d * y5_d
        y6_d = y6_d * y6_d
        y7_d = y7_d * y7_d

        loss = torch.tensor(0, dtype=torch.float)
        loss.requires_grad = True
        for dif in [y1_d, y2_d, y3_d, y4_d, y5_d, y6_d, y7_d]:
            l = torch.sum(dif)
            loss = loss + l

        loss.backward(retain_graph=True)
        opt.step()

        b = cont_im[0].detach().numpy()

        b = np.rollaxis(b, 0, 3)

        scipy.misc.imsave('content' + str(i) + '.jpg', b)

In [8]:
if __name__ == '__main__':
    main()

Iteration 0


`imsave` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``imageio.imwrite`` instead.


Iteration 1
Iteration 2
Iteration 3
Iteration 4
Iteration 5
Iteration 6
Iteration 7
Iteration 8
Iteration 9
Iteration 10
Iteration 11
Iteration 12
Iteration 13
Iteration 14
Iteration 15
Iteration 16
Iteration 17
Iteration 18
Iteration 19


One of the resulting images is:
![title](1234/content7.jpg)