In [7]:
import torch
import torchvision.transforms as transforms
import torchvision.models as models
from PIL import Image
import requests
from io import BytesIO

import torch.nn.functional as F
import torch.nn as nn

class VGG(nn.Module):
    
    # Note: layers = list of layers we want to get the features of
    def __init__(self, layers):
        super().__init__()
        
        # Sort just in case
        layers = sorted(set(layers))
        
        self.layers = layers
        
        # ImageNet normalization
        self.normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                              std=[0.229, 0.224, 0.225])

        # Pretrained model- we only want the features and only those which include the layers we want 
        self.model = models.vgg19(pretrained=True).features[:layers[-1]+1]
        self.model.eval()
        self.model.requires_grad_(False)
        
        
    def forward(self, input, layers=None):
        # Sort or get default layer (for image)
        layers = self.layers if layers is None else sorted(set(layers))
        features = {}
        
        index = 0
        
        for l in layers:
            # Efficient! Only get features from the layers we currently need
            input = self.model[index:l+1](input)
            index = l+1
            features[l]=input
        return features
         
        
class ContentLoss(nn.Module):
    def __init__(self, target):
        super().__init__()
        self.register_buffer('target', target)

    def forward(self, input):
        # Get the number of feature maps (channels) and the dimensions of each map
        N = input.size(1)  # Number of feature maps
        M = input.size(2) * input.size(3)  # Height times width of the feature map
        
        # Calculate the content loss
        loss = F.mse_loss(input,self.target, reduction='sum')
        # Normalize the loss by the number of elements in the feature maps
        normalized_loss = loss / (2 * N * M)
        return normalized_loss

    
    
# 4d Tensor -> Gram Matrix
# class GramMatrix(nn.Module):
#     def forward(self, v):
#         # Flatten
#         v_f = v.flatten(-2)
#         # Transpose (switch last two layers)
#         v_f_t = v_f.transpose(-2, -1)
#         # Matrix multiplication
#         v_mul = v_f @ v_f_t
#         # Normalize
#         gram = v_mul / (v_mul.shape[0] * v_mul.shape[1])
#         return gram
class GramMatrix(nn.Module):
    def forward(self, v):
        # Get batch size, number of feature maps (channels), height, and width
        b, c, h, w = v.size()
        # Flatten the feature maps
        v_f = v.view(b, c, h*w)
        # Transpose the feature maps
        v_f_t = v_f.transpose(1, 2)
        # Compute the gram product
        v_mul = torch.bmm(v_f, v_f_t)
        # Normalize the gram matrix by dividing by the number of elements in each feature map
        gram = v_mul / (c * h * w)
        return gram


# class StyleLoss(nn.Module):
#     # Register target gram matrix for reuse
#     def __init__(self, target_gram):
#         super().__init__()
#         self.register_buffer('target', target_gram)

#     # Forward pass- Gram Matrix distance
#     def forward(self, input):
#         return nn.MSELoss()(GramMatrix()(input), self.target)
class StyleLoss(nn.Module):
    def __init__(self, target_gram):
        super(StyleLoss, self).__init__()
        self.target = target_gram

    def forward(self, G, input):

        self.loss = nn.functional.mse_loss(G, self.target, reduction='sum')
        N = input.size(0)
        M = input.size(1) * input.size(2)  # Height times width of the feature map.
        self.loss /= (4 * N * M)
        return self.loss
    
    
    
class TVLoss(nn.Module):
    def forward(self, input):
        x_diff = input[..., :-1, :-1] - input[..., :-1, 1:]
        y_diff = input[..., :-1, :-1] - input[..., 1:, :-1]
        diff = x_diff**2 + y_diff**2
        return torch.sum(diff / (input.shape[-2] * input.shape[-1]))

    
    
import matplotlib.pyplot as plt
def display_image(tensor):
    plt.imshow(tensor.squeeze().permute(1, 2, 0)  )

def size_to_fit(size, max_dim, scale_up=False):
    w, h = size
    if not scale_up and max(h, w) <= max_dim:
        return w, h
    new_w, new_h = max_dim, max_dim
    if h > w:
        new_w = round(max_dim * w / h)
    else:
        new_h = round(max_dim * h / w)
    return new_w, new_h


# Function to load and preprocess the image
def load_image(url, size=224):
    response = requests.get(url)
    image = Image.open(BytesIO(response.content))

    # Transformation: Resize and center crop
    transform = transforms.Compose([
        transforms.Resize(size),  # Resize so the smaller side is size
        transforms.CenterCrop(size),  # Center crop to the desired size
        transforms.ToTensor(),  # Convert the PIL Image to a tensor
    ])

    image = transform(image).unsqueeze(0)  # Add a batch dimension
    return image

content_url = "https://static1.smartbear.co/smartbearbrand/media/images/home/sb-hero-bg-img.jpg"
content_image = load_image(content_url)
style_url = "https://collectionapi.metmuseum.org/api/collection/v1/iiif/436535/796067/main-image"
style_image = load_image(style_url)


content_layers = [22]
content_weights = [350] 

style_layers = [1, 6, 11, 20, 29]
style_weights = [256, 64, 16, 4, 1]
layers = style_layers + content_layers

vgg = VGG(layers=style_layers + content_layers)


style_features = vgg(style_image, style_layers)
style_targets = []
for i in style_features.keys():
    print(i)
    style_targets.append(GramMatrix()(style_features[i]).detach())

content_features = vgg(content_image, content_layers)
content_targets = []
for i in content_features.keys():
    content_targets.append(content_features[i].detach())

loss_fns = []
for style_target in style_targets:
    loss_fns.append(StyleLoss(style_target))
for content_target in content_targets:
    loss_fns.append(ContentLoss(content_target))


from tqdm import tqdm
output_image = content_image.detach().clone()
output_image.requires_grad_(True)
style_weights = [1e3/n**2 for n in [64,128,256,512,512]]
content_weights = [1e0]
weights = style_weights  + content_weights
from torch import optim
losses = []
images = []
opt = optim.Adam([output_image], lr=5/255)
for i in tqdm(range(500)):
#     print(i)
    if (i%100==0):
        images.append(output_image.clone().detach())
#         display_image(output_image.clone().detach()
    features = vgg(output_image)

    loss = torch.tensor(0.)

    x = 0
    style_loss_tot = 0
    for layer in style_layers:
        current_features = GramMatrix()(features[layer])
        style_lossnow = weights[x] * loss_fns[x](current_features, features[layer])
        style_loss_tot += style_lossnow
        loss = loss +style_lossnow
        x += 1
        
    print(style_loss_tot.item())
#     x = len(style_layers)
    content_loss_tot = 0
    for layer in content_layers:
        current_features = features[layer]
        content_lossnow = weights[x] * loss_fns[x](current_features)
        print(content_lossnow.item())
        print(content_lossnow)
        loss = loss +  content_lossnow 
        content_loss_tot = content_lossnow
    
#         x += 1
#     print(content_loss_tot.item())
        
#     tv = TVLoss()(output_image)
# #     print(tv.item())
#     loss = loss + tv
#     print(f"x {loss.item()}")
    
        
    opt.zero_grad()
#     losses.append(loss.item())

    loss.backward()

    opt.step()
#     print(f"Loss {loss.item()}")
    with torch.no_grad():
        output_image.clamp_(0, 1)


1
6
11
20
29


  0%|                                           | 1/500 [00:00<02:05,  3.98it/s]

2.0554602997435723e-07
0.0
tensor(0., grad_fn=<MulBackward0>)


  0%|▏                                          | 2/500 [00:00<02:02,  4.06it/s]

1.95248887280286e-07
0.0018849928164854646
tensor(0.0019, grad_fn=<MulBackward0>)


  1%|▎                                          | 3/500 [00:00<02:01,  4.10it/s]

3.5962227684649406e-07
0.49015718698501587
tensor(0.4902, grad_fn=<MulBackward0>)


  1%|▎                                          | 4/500 [00:00<01:59,  4.14it/s]

1.607347144272353e-07
0.25256726145744324
tensor(0.2526, grad_fn=<MulBackward0>)


  1%|▍                                          | 5/500 [00:01<01:59,  4.16it/s]

1.4668920300664468e-07
0.20890489220619202
tensor(0.2089, grad_fn=<MulBackward0>)


  1%|▌                                          | 6/500 [00:01<01:58,  4.15it/s]

1.6538868408133567e-07
0.15655061602592468
tensor(0.1566, grad_fn=<MulBackward0>)


  1%|▌                                          | 7/500 [00:01<01:58,  4.15it/s]

1.7623489156903815e-07
0.1275736540555954
tensor(0.1276, grad_fn=<MulBackward0>)


  2%|▋                                          | 8/500 [00:01<01:58,  4.16it/s]

1.7726749490520888e-07
0.10911277681589127
tensor(0.1091, grad_fn=<MulBackward0>)


  2%|▊                                          | 9/500 [00:02<01:58,  4.15it/s]

1.7717744071887864e-07
0.09765422344207764
tensor(0.0977, grad_fn=<MulBackward0>)


  2%|▊                                          | 9/500 [00:02<02:04,  3.95it/s]


KeyboardInterrupt: 

In [8]:
# Import necessary libraries
import torch
import torchvision.transforms as transforms
import torchvision.models as models
from PIL import Image
import requests
from io import BytesIO
import torch.nn as nn
import matplotlib.pyplot as plt

# Define the VGG class
class VGG(nn.Module):
    def __init__(self, layers):
        super(VGG, self).__init__()
        self.layers = sorted(set(layers))
        self.normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        self.model = models.vgg19(pretrained=True).features[:max(layers)+1]
        self.model.eval()
        for param in self.model.parameters():
            param.requires_grad = False

    def forward(self, x):
        x = self.normalize(x)
        features = []
        for i, layer in enumerate(self.model):
            x = layer(x)
            if i in self.layers:
                features.append(x)
        return features

# Define the ContentLoss class
class ContentLoss(nn.Module):
    def __init__(self, target):
        super(ContentLoss, self).__init__()
        self.target = target.detach()

    def forward(self, input):
        return nn.functional.mse_loss(input, self.target)

# Define the StyleLoss class
class StyleLoss(nn.Module):
    def __init__(self, target_feature):
        super(StyleLoss, self).__init__()
        self.target = self.gram_matrix(target_feature).detach()

    def gram_matrix(self, input):
        a, b, c, d = input.size()  
        features = input.view(a * b, c * d)  
        G = torch.mm(features, features.t())  
        return G.div(a * b * c * d)

    def forward(self, input):
        G = self.gram_matrix(input)
        return nn.functional.mse_loss(G, self.target)

# Define the function to load and preprocess the image
def load_image(url, size=None, max_size=None):
    response = requests.get(url)
    image = Image.open(BytesIO(response.content)).convert("RGB")

    if max_size is not None:
        scale = max_size / max(image.size)
        size = np.array(image.size) * scale
        image = image.resize(size.astype(int), Image.ANTIALIAS)

    if size is not None:
        image = transforms.Resize(size)(image)

    image = transforms.ToTensor()(image).unsqueeze(0)
    return image

# Define content and style images
content_url = "https://static1.smartbear.co/smartbearbrand/media/images/home/sb-hero-bg-img.jpg"
content_image = load_image(content_url, size=(224, 224))
style_url = "https://collectionapi.metmuseum.org/api/collection/v1/iiif/436535/796067/main-image"
style_image = load_image(style_url, size=(224, 224))

# Define VGG layers
content_layers = [22]
style_layers = [1, 6, 11, 20, 29]
all_layers = style_layers + content_layers
vgg = VGG(all_layers)

# Get features
content_features = vgg(content_image)
style_features = vgg(style_image)

# Initialize loss functions
content_loss_fn = ContentLoss(content_features[0])
style_loss_fns = [StyleLoss(style_feature) for style_feature in style_features]

# Initialize output image and optimizer
output_image = content_image.clone()
optimizer = torch.optim.Adam([output_image.requires_grad_()], lr=0.01)

# Run the style transfer
num_steps = 300
style_weight = 1e6
content_weight = 1e0

for step in range(num_steps):
    output_features = vgg(output_image)
    content_loss = content_loss_fn(output_features[0])
    style_loss = 0

    for fn, feature in zip(style_loss_fns, output_features[1:]):
        style_loss += fn(feature)

    total_loss = content_weight * content_loss + style_weight * style_loss

    optimizer.zero_grad()
    total_loss.backward()
    optimizer.step()

    if step % 50 == 0:
        print(f"Step {step}, Total loss: {total_loss.item()}")

# Display the final image
plt.imshow(output_image.squeeze().permute(1, 2, 0).detach().numpy())
plt.show()


  return nn.functional.mse_loss(G, self.target)


RuntimeError: The size of tensor a (128) must match the size of tensor b (64) at non-singleton dimension 1