<a href="https://colab.research.google.com/github/martinpius/Computer-Vission/blob/main/Neural_style_transfer_NST_Pytorch_implementation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount("/content/drive", force_remount = True)
try:
  COLAB = True
  import torch
  print(f">>>> You are on CoLaB with torch version {torch.__version__}")
except Exception as e:
  print(f">>>> {type(e)}: {e}\n>>>> please correct {type(e)} and re-load your drive")
def time_fmt(t: float = 132.19)->float:
  h = int(t / (60 * 60))
  m = int(t % (60 * 60) / 60)
  s = int(t % 60)
  return f"hrs: {h} min: {m:>02} sec: {s:>05.2f}"
if torch.cuda.is_available():
  device = torch.device("cuda")
else:
  device = torch.device("cpu")
print(f">>>> testing time formating function....\n>>>> time elapsed\t{time_fmt()}")


Mounted at /content/drive
>>>> You are on CoLaB with torch version 1.9.0+cu102
>>>> testing time formating function....
>>>> time elapsed	hrs: 0 min: 02 sec: 12.00


In [2]:
# In this notebook we are going to implement a neural style transfer network from scratch in pytorch.
# The general idea behind the neural style transfer technique is to create a new image by combine the
# contents of two separate images. Training is done by minimizing the two types of losses due to the 
# difference between the target/generated image and the original image, the style image and the generated image.
# so basically we will have three images (the original image, the style image and the generated image== noisy at the start)
# this network make the use of ussual computer vision models such as vggnet where specific layers are identified and trained
# key note: the weights of the pre-trained network stays frozen  and in training we only want the noisy-image to resemble the 
# original image with the style of the style-image ie, we are modifying inputs rather than the weights!!!!!!!!
# In this implementantion we consider vgg19 architecture(pre-trained net). We discard the classifier and select every convolutional
# layer which comes after the maxpooling.
# THE LOSS FUNCTION COMBINE TWO COMPONENTS - THE STYLE LOSS AND THE CONTENT LOSS connected by two hyper-parameters
# Loss = J_content[alpha*L(C, G)] + J_style[beta*L(C,S)] where C== loss contribution due to content(original image)
# S and G due to stlye and generated images. The content loss is simply the norm of every layer between the content and generated 
# images. For the style loss we need to compute Grahm matrices for the style and generated images.

In [3]:
import torch, torchvision, os
import torchvision.transforms as transforms
import random, time, datetime
import numpy as np
import torch.nn as nn
import torch.optim as optim
from tensorflow import summary
from torchvision.utils import save_image
from PIL import Image
%load_ext tensorboard


In [4]:
os.chdir("/content/drive/MyDrive/NLT")

In [5]:
# set the seed values for reproducability:
seed = 1234
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True

In [6]:
# We start by loading and extracting the layers of vgg19 pretrained network
model = torchvision.models.vgg19(pretrained = True).features
print(model)

Downloading: "https://download.pytorch.org/models/vgg19-dcbb9e9d.pth" to /root/.cache/torch/hub/checkpoints/vgg19-dcbb9e9d.pth


HBox(children=(FloatProgress(value=0.0, max=574673361.0), HTML(value='')))


Sequential(
  (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU(inplace=True)
  (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (3): ReLU(inplace=True)
  (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (6): ReLU(inplace=True)
  (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (8): ReLU(inplace=True)
  (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (11): ReLU(inplace=True)
  (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (13): ReLU(inplace=True)
  (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (15): ReLU(inplace=True)
  (16): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (17): ReLU(inplace=True)
  (18): MaxPo

In [7]:
# we select layer after every maxpooling start with  layer 0:
class VGG_Edited(nn.Module):
  def __init__(self):
    super(VGG_Edited, self).__init__()
    self.selected_layers = ["0", "5", "10", "19","28"]
    # so basically we need the layers up to 28th as seen from the print above
    self.model = torchvision.models.vgg19(pretrained = True).features[:29]

  def forward(self, input_tensor):
    features = [] # container for features related to the selected layers
    for layer_num, layer in enumerate(self.model):
      # pass the input (image) and place to the container 
      # the features corresponds to the selected layers
      input_tensor = layer(input_tensor) # output after apssing through the selected conv-layer
      if str(layer_num) in self.selected_layers:
        features.append(input_tensor)
    return features


In [8]:
# We now load an image using the following function:
def load_image(img_name):
  image = Image.open(img_name)
  # transform and add batch dimension
  image = mytransform(image).unsqueeze(0)
  return image.to(device = device)
  

In [9]:
mytransform = transforms.Compose([
                                  transforms.Resize((356, 356)),
                                  transforms.ToTensor(),
                                  
])

In [12]:
# Load in the data (only 3 images)
orig_img = load_image("che.jpeg")
style_img = load_image("orig.jpg")
# we may create a noisy image or copy the original image as generate image to start with
generated = orig_img.clone().requires_grad_(True)
# instantiate the model class
model = VGG_Edited().to(device = device).eval()
print(orig_img.shape)
print(style_img.shape)
print(generated.shape)

torch.Size([1, 3, 356, 356])
torch.Size([1, 3, 356, 356])
torch.Size([1, 3, 356, 356])


In [13]:

# Model's Hyperparameters
alpha = 1 # for the content loss
beta = 0.01 # for the style loss
EPOCHS = 5000
learning_rate = 1e-3
optimizer = optim.Adam(params = [generated], lr = learning_rate)

tic = time.time()
for epoch in range(EPOCHS):
  print(f"\n>>>> train start for epoch {epoch + 1}\n>>>> please wait while the model is training")
  generated_features = model(generated)
  org_features = model(orig_img)
  style_features = model(style_img)
  # computing the loss at every conv-layer (5 of them)
  style_loss = 0
  content_loss = 0
  for g_feature, s_feature, o_feature in zip(generated_features, style_features, org_features):
    batch, c, h, w = g_feature.shape
    content_loss+= torch.mean((g_feature - o_feature)**2)
    # we now compute the Grahm matrices to get style loss
    G = g_feature.view(c, h*w).mm(g_feature.view(c, h*w).t()) # for the generated img
    A = s_feature.view(c, h*w).mm(s_feature.view(c, h*w).t()) # for the style img
    style_loss+= torch.mean((G-A)**2)
  # the total loss will be
  total_loss = alpha * content_loss + beta * style_loss
  optimizer.zero_grad()
  total_loss.backward()
  optimizer.step()

  if epoch % 200 == 0:
    print(f"\n>>>> total loss is {total_loss:.4f}")
    save_image(generated, 'marto_che.png')
toc = time.time()
print(f"\n>>>> total time elapsed: {time_fmt(toc - tic)}")


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
>>>> train start for epoch 3340
>>>> please wait while the model is training

>>>> train start for epoch 3341
>>>> please wait while the model is training

>>>> train start for epoch 3342
>>>> please wait while the model is training

>>>> train start for epoch 3343
>>>> please wait while the model is training

>>>> train start for epoch 3344
>>>> please wait while the model is training

>>>> train start for epoch 3345
>>>> please wait while the model is training

>>>> train start for epoch 3346
>>>> please wait while the model is training

>>>> train start for epoch 3347
>>>> please wait while the model is training

>>>> train start for epoch 3348
>>>> please wait while the model is training

>>>> train start for epoch 3349
>>>> please wait while the model is training

>>>> train start for epoch 3350
>>>> please wait while the model is training

>>>> train start for epoch 3351
>>>> please wait while the model is training
