In [5]:
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
import torchvision.models as models
from torchvision.transforms import v2
from tqdm import tqdm
from PIL import Image
import os
import matplotlib.pyplot as plt

##### Modules in vgg19
- **0**: conv1_1 - Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
- **1**: ReLU(inplace=True)
- **2**: conv1_2 - Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
- **3**: ReLU(inplace=True)
- **4**: pool1 - MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
- **5**: conv2_1 - Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
- **6**: ReLU(inplace=True)
- **7**: conv2_2 - Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
- **8**: ReLU(inplace=True)
- **9**: pool2 - MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
- **10**: conv3_1 - Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
- **11**: ReLU(inplace=True)
- **12**: conv3_2 - Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
- **13**: ReLU(inplace=True)
- **14**: conv3_3 - Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
- **15**: ReLU(inplace=True)
- **16**: conv3_4 - Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
- **17**: ReLU(inplace=True)
- **18**: pool3 - MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
- **19**: conv4_1 - Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
- **20**: ReLU(inplace=True)
- **21**: conv4_2 - Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
- **22**: ReLU(inplace=True)
- **23**: conv4_3 - Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
- **24**: ReLU(inplace=True)
- **25**: conv4_4 - Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
- **26**: ReLU(inplace=True)
- **27**: pool4 - MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
- **28**: conv5_1 - Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
- **29**: ReLU(inplace=True)
- **30**: conv5_2 - Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
- **31**: ReLU(inplace=True)
- **32**: conv5_3 - Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
- **33**: ReLU(inplace=True)
- **34**: conv5_4 - Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
- **35**: ReLU(inplace=True)
- **36**: pool5 - MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)


In [6]:
class VGG19(nn.Module):
    def __init__(self, pool_type: str):
        super().__init__()
        if pool_type == "avg":
            self.layer_list = []
            self.vgg19 = models.vgg19(weights=models.VGG19_Weights.DEFAULT).features
            for layer in self.vgg19:
                if isinstance(layer, torch.nn.modules.pooling.MaxPool2d):
                    self.layer_list.append(
                        nn.AvgPool2d(
                            kernel_size=2, stride=2, padding=0, ceil_mode=False
                        )
                    )
                else:
                    self.layer_list.append(layer)
            self.final_model = nn.Sequential(*self.layer_list)

        else:
            self.final_model = models.vgg19().features

        for param in self.final_model.parameters():
            param.requires_grad = False

    def forward(self, input, layer_keys):
        out = {}
        last_layer_key = str(max([int(key) for key in layer_keys]))
        for name, layer in self.final_model.named_children():
            out[name] = layer(input)
            input = out[name]
            if name == last_layer_key:
                return [out[key] for key in layer_keys]

        return [out[key] for key in layer_keys]

In [7]:
class NeuralStyleTransfer:
    def __init__(
        self,
        content_image_path,
        style_image_path,
        content_layer_name,
        style_layers_list,
        style_layer_weights,
        optimizer="LBFGS",
        alpha=1,
        beta=1000,
        lr=1,
        device="cuda" if torch.cuda.is_available() else "cpu",
        num_steps=500,
        resize_value=256,
    ):
        self.device = device
        self.lr = lr
        self.alpha = alpha
        self.beta = beta
        self._optim = optimizer
        self.content_layer = content_layer_name
        self.style_layers_list = style_layers_list
        self.style_weights = style_layer_weights
        self.num_steps = num_steps
        self.content_image = self.load_and_transform(
            image_path=content_image_path, resize_value=resize_value
        )
        self.style_image = self.load_and_transform(
            image_path=style_image_path, resize_value=resize_value
        )
        self.model = VGG19(pool_type="avg").to(device).eval()

    def load_and_transform(self, image_path, resize_value):
        image = Image.open(image_path)
        transformations = v2.Compose(
            [
                v2.Resize(resize_value),
                v2.ToImage(),
                v2.ToDtype(torch.float32, scale=True),
                v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
            ]
        )

        return transformations(image).unsqueeze(0).to(self.device)

    @staticmethod
    def generate_white_noise(image, device):
        batch, channels, h, w = image.shape
        random_image = torch.randn(
            size=[batch, channels, h, w], requires_grad=True, device=device
        )
        return random_image

    @staticmethod
    def tensor_to_image(tensor):
        tensor = tensor.clone().detach().squeeze(0).cpu()

        tensor = tensor.mul(torch.tensor([0.229, 0.224, 0.225]).view(3, 1, 1))
        tensor = tensor.add(torch.tensor([0.485, 0.456, 0.406]).view(3, 1, 1))
        tensor = torch.clamp(tensor, 0, 1)

        return v2.ToPILImage()(tensor)

    @staticmethod
    def get_gram_matrix(input):
        batch, channel, height, width = input.shape
        reshaped_input = input.view(batch, channel, height * width)
        gram_matrix = torch.bmm(reshaped_input, reshaped_input.transpose(1, 2))
        gram_matrix = torch.div(gram_matrix, height * width)
        return gram_matrix

    def visualize_content_rep(self):
        print(f"Using Device: {self.device}")
        random_image = self.generate_white_noise(self.content_image, self.device)

        original_img_features = self.model(self.content_image, self.content_layer)[
            0
        ].detach()

        progress_bar = tqdm(range(self.num_steps), desc="Optimizing", unit="step")
        if self._optim == "LBFGS":
            optimizer = optim.LBFGS([random_image], lr=self.lr)
            for step in progress_bar:
                current_loss = [0.0]

                def closure():
                    optimizer.zero_grad()
                    random_img_features = self.model(random_image, self.content_layer)[
                        0
                    ]
                    content_loss = torch.mean(
                        (original_img_features - random_img_features) ** 2
                    )

                    content_loss.backward()
                    current_loss[0] = content_loss.item()
                    return content_loss

                optimizer.step(closure)
                progress_bar.set_postfix(loss=current_loss[0])

        elif self._optim == "Adam":
            optimizer = optim.Adam([random_image], lr=self.lr)
            for step in progress_bar:
                optimizer.zero_grad()
                random_img_features = self.model(random_image, self.content_layer)[0]
                content_loss = torch.mean(
                    (original_img_features - random_img_features) ** 2
                )
                content_loss.backward()
                optimizer.step()
                progress_bar.set_postfix(loss=content_loss.item())
        generated_image = self.tensor_to_image(random_image)
        # plt.imshow(generated_image)
        # plt.axis("off")
        # plt.show()
        return generated_image

    def visualize_style_rep(self):
        print(f"Using Device: {self.device}")
        random_image = self.generate_white_noise(self.style_image, self.device)

        original_style_features = self.model(self.style_image, self.style_layers_list)
        original_gram_matrices = []
        for f in original_style_features:
            gram_matrix = self.get_gram_matrix(f)
            original_gram_matrices.append(gram_matrix)

        progress_bar = tqdm(range(self.num_steps), desc="Optimizing", unit="step")
        if self._optim == "LBFGS":
            optimizer = optim.LBFGS([random_image], lr=self.lr)
            for step in progress_bar:
                current_loss = [0.0]

                def closure():
                    optimizer.zero_grad()
                    style_loss = torch.zeros([], device=self.device, requires_grad=True)

                    style_outputs = self.model(random_image, self.style_layers_list)
                    for idx, o in enumerate(style_outputs):
                        G = self.get_gram_matrix(o)
                        style_loss = torch.add(
                            style_loss,
                            torch.mean((G - original_gram_matrices[idx]) ** 2)
                            * self.style_weights[idx],
                        )

                    style_loss.backward()

                    current_loss[0] = style_loss.item()
                    return style_loss

                optimizer.step(closure)
                progress_bar.set_postfix(loss=current_loss[0])

        elif self._optim == "Adam":
            optimizer = optim.Adam([random_image], lr=self.lr)
            for step in progress_bar:
                optimizer.zero_grad()
                style_loss = torch.zeros([], device=self.device, requires_grad=True)
                style_outputs = self.model(random_image, self.style_layers_list)

                for idx, o in enumerate(style_outputs):
                    G = self.get_gram_matrix(o)
                    style_loss = torch.add(
                        self.style_weights[idx]
                        * torch.mean((G - original_gram_matrices[idx]) ** 2),
                        style_loss,
                    )

                style_loss.backward()
                optimizer.step()
                progress_bar.set_postfix(loss=style_loss.item())
        generated_image = self.tensor_to_image(random_image)
        # plt.imshow(generated_image)
        # plt.axis("off")
        # plt.show()
        return generated_image

    def style_transfer(self):
        print(f"Using Device: {self.device}")
        random_image = self.generate_white_noise(self.content_image, self.device)

        original_content_features = self.model(self.content_image, self.content_layer)[
            0
        ]

        origianl_style_features = self.model(self.style_image, self.style_layers_list)

        original_gram_matrices = []
        for style_feat in origianl_style_features:
            gram_matrix = self.get_gram_matrix(style_feat)
            original_gram_matrices.append(gram_matrix)

        progress_bar = tqdm(range(self.num_steps), desc="Optimizing", unit="step")
        if self._optim == "LBFGS":
            optimizer = optim.LBFGS([random_image], lr=self.lr)
            for step in progress_bar:
                current_loss = [0.0]

                def closure():
                    optimizer.zero_grad()

                    random_img_features = self.model(
                        random_image, self.content_layer + self.style_layers_list
                    )

                    random_img_content_feat = random_img_features[0]
                    random_img_style_feat = random_img_features[1:]

                    content_loss = torch.mean(
                        (original_content_features - random_img_content_feat) ** 2
                    )

                    style_loss = torch.tensor(
                        0.0, device=self.device, requires_grad=True
                    )
                    for idx, feat in enumerate(random_img_style_feat):
                        G = self.get_gram_matrix(feat)
                        style_loss = torch.add(
                            style_loss,
                            torch.mean((G - original_gram_matrices[idx]) ** 2)
                            * self.style_weights[idx],
                        )

                    total_loss = self.alpha * content_loss + self.beta * style_loss
                    total_loss.backward()

                    current_loss[0] = total_loss.item()
                    return total_loss

                optimizer.step(closure)
                progress_bar.set_postfix(loss=current_loss[0])

        elif self._optim == "Adam":
            optimizer = optim.Adam([random_image], lr=self.lr)
            for step in progress_bar:
                optimizer.zero_grad()
                random_img_features = self.model(
                    random_image, self.content_layer + self.style_layers_list
                )

                random_img_content_feat = random_img_features[0]
                random_img_style_feat = random_img_features[1:]

                content_loss = torch.mean(
                    (original_content_features - random_img_content_feat) ** 2
                )

                style_loss = torch.zeros([], device=self.device)
                for idx, feat in enumerate(random_img_style_feat):
                    G = self.get_gram_matrix(feat)
                    style_loss += (
                        torch.mean((G - original_gram_matrices[idx]) ** 2)
                        * self.style_weights[idx]
                    )

                total_loss = self.alpha * content_loss + self.beta * style_loss
                total_loss.backward()
                optimizer.step()
                progress_bar.set_postfix(loss=total_loss.item())

        generated_image = self.tensor_to_image(random_image)
        # plt.imshow(generated_image)
        # plt.axis("off")
        # plt.show()
        return generated_image

# Experiments

### Visualizing Content representations from different layers:

In [8]:
def save_images(images_list, image_names):
    for image, name in zip(images_list, image_names):
        image.save(name)

In [None]:
def visualize_contents(layers=["1", "6", "11", "20", "29"]):
    generated_images = []
    for layer_i in tqdm(layers, total=len(layers)):
        nst = NeuralStyleTransfer(
            content_image_path=r"E:\Resources\My Projects\A Neural Algorithm of Artistic Style - Paper Implementation\Neural-Style-Transfer-and-Fast-Neural-Style-Transfer\assets\Content Images\Tuebingen_Neckarfront.jpg",
            style_image_path=r"E:\Resources\My Projects\A Neural Algorithm of Artistic Style - Paper Implementation\Neural-Style-Transfer-and-Fast-Neural-Style-Transfer\assets\Style Images\the-starry-night.jpg",
            content_layer_name=[layer_i],  #
            style_layers_list=[str(i) for i in [1, 6, 11, 20, 29]],  #  [1,6,11,20,29]
            style_layer_weights=[1e3 / n**2 for n in [64,128,256,512,512,]],
            optimizer="LBFGS",
            num_steps=75,
            lr=1,
            resize_value=256,
            alpha=1,
            beta=1000,
        )
        generated_image = nst.visualize_content_rep()
        generated_images.append(generated_image)
    return generated_images

In [6]:
generated_images = visualize_contents()

  0%|          | 0/5 [00:00<?, ?it/s]

Using Device: cuda


Optimizing: 100%|██████████| 75/75 [00:01<00:00, 44.34step/s, loss=1.73e-5]
 20%|██        | 1/5 [00:03<00:13,  3.26s/it]

Using Device: cuda


Optimizing: 100%|██████████| 75/75 [01:25<00:00,  1.14s/step, loss=0.00132]
 40%|████      | 2/5 [01:29<02:36, 52.17s/it]

Using Device: cuda


Optimizing: 100%|██████████| 75/75 [01:41<00:00,  1.35s/step, loss=0.000431]
 60%|██████    | 3/5 [03:12<02:30, 75.41s/it]

Using Device: cuda


Optimizing: 100%|██████████| 75/75 [02:30<00:00,  2.01s/step, loss=0.00111]
 80%|████████  | 4/5 [05:45<01:45, 105.93s/it]

Using Device: cuda


Optimizing: 100%|██████████| 75/75 [01:18<00:00,  1.05s/step, loss=0.000155]
100%|██████████| 5/5 [07:07<00:00, 85.51s/it] 


In [7]:
names = [
    "./Content_Visualization/relu1_1.png",
    "./Content_Visualization/relu2_1.png",
    "./Content_Visualization/relu3_1.png",
    "./Content_Visualization/relu4_1.png",
    "./Content_Visualization/relu5_1.png",
]

save_images(generated_images, names)

### Visualizing Style representations from different layers/combination of layers:

- [relu1_1+relu2_1] , 



- [relu1_1 + relu2_1 + relu3_1], 



- [relu1_1 + relu2_1 + relu3_1 + relu4_1], 



- [relu1_1 + relu2_1 + relu3_1 + relu4_1 + relu5_1]

In [10]:
def visualize_style(
    layers_list=[[1], [1, 6], [1, 6, 11], [1, 6, 11, 20], [1, 6, 11, 20, 29]],
):
    generated_images = []
    for layers in tqdm(layers_list, total=len(layers_list)):
        layers = [str(i) for i in layers]
        nst = NeuralStyleTransfer(
            content_image_path=r"E:\Resources\My Projects\A Neural Algorithm of Artistic Style - Paper Implementation\Neural-Style-Transfer-and-Fast-Neural-Style-Transfer\assets\Content Images\Tuebingen_Neckarfront.jpg",
            style_image_path=r"E:\Resources\My Projects\A Neural Algorithm of Artistic Style - Paper Implementation\Neural-Style-Transfer-and-Fast-Neural-Style-Transfer\assets\Style Images\the-starry-night.jpg",
            content_layer_name=["11"],  #
            style_layers_list=layers,
            style_layer_weights=[
                1e3 / n**2
                for n in [
                    64,
                    128,
                    256,
                    512,
                    512,
                ]
            ],
            optimizer="Adam",
            num_steps=500,
            lr=1,
            resize_value=512,
            alpha=1,
            beta=1000,
        )

        generated_image = nst.visualize_style_rep()
        generated_images.append(generated_image)
    return generated_images

In [None]:
generated_images = visualize_style()

  0%|          | 0/4 [00:00<?, ?it/s]

Using Device: cuda


Optimizing: 100%|██████████| 500/500 [00:28<00:00, 17.76step/s, loss=1.6e-5]
 25%|██▌       | 1/4 [00:29<01:28, 29.40s/it]

Using Device: cuda


Optimizing: 100%|██████████| 500/500 [00:52<00:00,  9.43step/s, loss=7.22e-5]
 50%|█████     | 2/4 [01:24<01:28, 44.41s/it]

Using Device: cuda


Optimizing: 100%|██████████| 500/500 [01:36<00:00,  5.16step/s, loss=7.79e-5]
 75%|███████▌  | 3/4 [03:03<01:09, 69.51s/it]

Using Device: cuda


Optimizing: 100%|██████████| 500/500 [02:12<00:00,  3.77step/s, loss=7.74e-5]
100%|██████████| 4/4 [05:20<00:00, 80.03s/it]


In [None]:
names = [
    "./Style_Visualization/relu1_1.png",
    "./Style_Visualization/relu1_1+relu2_1.png",
    "./Style_Visualization/relu1_1+relu2_1+relu3_1.png",
    "./Style_Visualization/relu1_1+relu2_1+relu3_1+relu4_1.png",
    "./Style_Visualization/relu1_1+relu2_1+relu3_1+relu4_1+relu5_1.png",
]

save_images(generated_images, names)

## Style Transfer:

In [11]:
content_path = r"E:\Resources\My Projects\A Neural Algorithm of Artistic Style - Paper Implementation\Neural-Style-Transfer-and-Fast-Neural-Style-Transfer\assets\Content Images\Tuebingen_Neckarfront.jpg"

style_paths = [
    r"E:\Resources\My Projects\A Neural Algorithm of Artistic Style - Paper Implementation\Neural-Style-Transfer-and-Fast-Neural-Style-Transfer\assets\Style Images\the-starry-night.jpg",
    r"E:\Resources\My Projects\A Neural Algorithm of Artistic Style - Paper Implementation\Neural-Style-Transfer-and-Fast-Neural-Style-Transfer\assets\Style Images\photo-1596120717372-f31ed4704450.jpg",
    r"E:\Resources\My Projects\A Neural Algorithm of Artistic Style - Paper Implementation\Neural-Style-Transfer-and-Fast-Neural-Style-Transfer\assets\Style Images\868625.jpg",
]

In [17]:
def NST(content_path, style_paths, content_layer=["11"]):
    generated_images = []
    for style in tqdm(style_paths):
        nst = nst = NeuralStyleTransfer(
            content_image_path=content_path,
            style_image_path=style,
            content_layer_name=[content_layer],  #
            style_layers_list=[str(i) for i in [1, 6, 11, 20, 29]],  #  [1,6,11,20,29]
            style_layer_weights=[
                1e3 / n**2
                for n in [
                    64,
                    128,
                    256,
                    512,
                    512,
                ]
            ],
            optimizer="LBFGS",
            num_steps=50,
            lr=1,
            resize_value=512,
            alpha=1,
            beta=1000,
        )
        generated_image = nst.style_transfer()
        generated_images.append(generated_image)
    return generated_images

In [None]:
generated_images = NST(content_path, style_paths)

  0%|          | 0/3 [00:00<?, ?it/s]

Using Device: cuda


Optimizing: 100%|██████████| 50/50 [04:35<00:00,  5.52s/step, loss=0.412]
 33%|███▎      | 1/3 [04:37<09:14, 277.18s/it]

Using Device: cuda


Optimizing: 100%|██████████| 50/50 [05:45<00:00,  6.91s/step, loss=0.9]
 67%|██████▋   | 2/3 [10:26<05:19, 319.41s/it]

Using Device: cuda


Optimizing: 100%|██████████| 50/50 [07:09<00:00,  8.58s/step, loss=0.442]
100%|██████████| 3/3 [17:43<00:00, 354.47s/it]


In [66]:
names = [
    "./Style Transfer/with_style_1.png",
    "./Style Transfer/with_style_2.png",
    "./Style Transfer/with_style_3.png",
]

save_images(generated_images, names)

In [18]:
generated_images = NST(content_path, style_paths, "20")

  0%|          | 0/3 [00:00<?, ?it/s]

Using Device: cuda


Optimizing: 100%|██████████| 50/50 [04:57<00:00,  5.94s/step, loss=0.165]
 33%|███▎      | 1/3 [04:58<09:57, 298.55s/it]

Using Device: cuda


Optimizing: 100%|██████████| 50/50 [05:59<00:00,  7.19s/step, loss=0.362]
 67%|██████▋   | 2/3 [11:01<05:36, 336.29s/it]

Using Device: cuda


Optimizing: 100%|██████████| 50/50 [05:49<00:00,  6.98s/step, loss=0.199]
100%|██████████| 3/3 [16:53<00:00, 338.00s/it]


In [19]:
names = [
    "./Style Transfer/Content_layer_relu4_1_style_1.png",
    "./Style Transfer/Content_layer_relu4_1_style_2.png",
    "./Style Transfer/Content_layer_relu4_1_style_3.png",
]

save_images(generated_images, names)