In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, TensorDataset
from torchvision import transforms, models
from PIL import Image
import os
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt

import sys

In [2]:
torch.backends.cudnn.benchmark = True
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
vgg = models.vgg19(pretrained = True).features
for param in vgg.parameters():
    param.requires_grad_(False)
vgg.to(device)



Sequential(
  (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU(inplace=True)
  (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (3): ReLU(inplace=True)
  (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (6): ReLU(inplace=True)
  (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (8): ReLU(inplace=True)
  (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (11): ReLU(inplace=True)
  (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (13): ReLU(inplace=True)
  (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (15): ReLU(inplace=True)
  (16): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (17): ReLU(inplace=True)
  (18): MaxPoo

In [4]:
def load_dataset(folder,transform):
    images = [os.path.join(folder,f) for f in os.listdir(folder) if f.endswith(('.jpg','.png'))]
    tensors = [transform(Image.open(img).convert('RGB')) for img in images]
    return TensorDataset(torch.stack(tensors))

In [5]:
transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
])

In [6]:
vangogh_dataset = load_dataset("cleandata/augmented_vangogh", transform)
monet_dataset = load_dataset("cleandata/augmented_monet", transform)
content_dataset = load_dataset("cleandata/augmented_content", transform)

In [7]:
# Create DataLoaders
vangogh_loader = DataLoader(vangogh_dataset, batch_size=32, shuffle=True, num_workers=4)
monet_loader = DataLoader(monet_dataset, batch_size=32, shuffle=True, num_workers=4)
content_loader = DataLoader(content_dataset, batch_size=32, shuffle=True, num_workers=4)

## CONVERTING TENSOR TO IMAGES

- Moves the tensor to CPU and detaches it to prevent modifying the original data.  
- Converts the PyTorch tensor into a NumPy array and removes unnecessary dimensions using `.squeeze()`.  
- Transposes the array from PyTorch's `(C, H, W)` format to `(H, W, C)` for compatibility with image-processing libraries.  
- Denormalizes the image by reversing ImageNet normalization using mean `(0.485, 0.456, 0.406)` and standard deviation `(0.229, 0.224, 0.225)`.  
- Clips pixel values to the range `[0,1]` to ensure proper visualization.  
- Useful for displaying or saving model-generated images in a human-readable format. 🚀

In [8]:
def im_convert(tensor):
    image = tensor.to("cpu").clone().detach()
    image = image.numpy()
    if len(image.shape) == 4:  # If batched, remove batch dimension one-by-one
        image = image.transpose(0, 2, 3, 1)  # B, C, H, W -> B, H, W, C
    else:
        image = image.transpose(1, 2, 0)  # C, H, W -> H, W, C
    image = image * np.array((0.229, 0.224, 0.225)) + np.array((0.485, 0.456, 0.406))
    image = image.clip(0, 1)
    return image

In [9]:
def get_features( image, model, layers = None):
    if layers is None:
        layers = {
            '0': 'conv1_1',
            '5': 'conv2_1',
            '10': 'conv3_1',
            '19': 'conv4_1',
            '21': 'conv4_2',  # content layer
            '28': 'conv5_1'
        }
    features = {}
    x = image
    for name , layer in model._modules.items():
        x = layer(x)
        if name in layers:
            features[layers[name]] = x
    return features
    

In [10]:
def gram_matrix(tensor):
    b, d, h, w = tensor.size()  # batch_size, depth, height, width
    tensor = tensor.view(b, d, h * w)  # Reshape to [batch_size, depth, height*width]
    gram = torch.bmm(tensor, tensor.transpose(1, 2))  # Batch matrix multiplication
    return gram

In [11]:
class NSTModel:
    def __init__(self):
        self.vgg = vgg
        self.style_weights = {
            'conv1_1': 1.0,
            'conv2_1': 0.75,
            'conv3_1': 0.2,
            'conv4_1': 0.2,
            'conv5_1': 0.2
        }
        self.content_weight = 1
        self.style_weight = 1e3

    def process_batch(self, content_img, style_img):
        content_img = content_img.to(device)
        style_img = style_img.to(device)

        # Extract features
        content_features = get_features(content_img, self.vgg)
        style_features = get_features(style_img, self.vgg)
        style_grams = {layer: gram_matrix(style_features[layer]) for layer in style_features}

        # Initialize target as content image copy
        target = content_img.clone().requires_grad_(True).to(device)
        optimizer = optim.Adam([target], lr=0.003)

        # Optimization loop (no loss storage or printing)
        steps = 1000
        for _ in range(steps):
            target_features = get_features(target, self.vgg)
            content_loss = torch.mean((target_features['conv4_2'] - content_features['conv4_2'])**2)
            style_loss = 0
            for layer in self.style_weights:
                target_feature = target_features[layer]
                target_gram = gram_matrix(target_feature)
                b, d, h, w = target_feature.shape
                style_gram = style_grams[layer]
                layer_style_loss = self.style_weights[layer] * torch.mean((target_gram - style_gram)**2)
                style_loss += layer_style_loss / (d * h * w)
            total_loss = self.content_weight * content_loss + self.style_weight * style_loss

            optimizer.zero_grad()
            total_loss.backward()
            optimizer.step()

        return target

In [12]:
def train_nst(content_loader, vangogh_loader, monet_loader, epochs=1):
    model = NSTModel()
    os.makedirs("output/vangogh", exist_ok=True)
    os.makedirs("output/monet", exist_ok=True)

    vangogh_iter = iter(vangogh_loader)
    monet_iter = iter(monet_loader)

    for epoch in range(epochs):
        print(f"Epoch {epoch+1}/{epochs}")
        pbar = tqdm(total=len(content_loader), desc="Processing", unit="batch", 
                    dynamic_ncols=True, file=sys.stdout)
        
        # Lists to store results for the entire epoch
        vangogh_results = []
        monet_results = []
        batch_indices = []

        for i, content_batch in enumerate(content_loader):
            content_img = content_batch[0]  # Unwrap from TensorDataset

            # Get style images
            vangogh_img = next(vangogh_iter, None)
            monet_img = next(monet_iter, None)
            if vangogh_img is None or monet_img is None:
                vangogh_iter = iter(vangogh_loader)
                monet_iter = iter(monet_loader)
                vangogh_img = next(vangogh_iter)[0]
                monet_img = next(monet_iter)[0]

            vangogh_img = vangogh_img[0]  # Unwrap
            monet_img = monet_img[0]      # Unwrap

            # Process and store results
            vangogh_result = model.process_batch(content_img, vangogh_img)
            monet_result = model.process_batch(content_img, monet_img)
            vangogh_results.append(vangogh_result)
            monet_results.append(monet_result)
            batch_indices.append(i)

            # Update progress bar with batch count
            pbar.set_postfix({'Batches': f'{i+1}/{len(content_loader)}'})
            pbar.update(1)
            pbar.refresh()

        # Save all images after the epoch completes
        print(f"Saving images for epoch {epoch+1}...")
        for batch_idx, (vangogh_batch, monet_batch) in enumerate(zip(vangogh_results, monet_results)):
            vangogh_outputs = im_convert(vangogh_batch)  # Shape: [16, H, W, C]
            monet_outputs = im_convert(monet_batch)      # Shape: [16, H, W, C]
            for img_idx in range(vangogh_outputs.shape[0]):  # Iterate over batch dimension
                global_idx = batch_idx * 16 + img_idx
                plt.imsave(f"output/vangogh/stylized_epoch{epoch+1}_{global_idx}.png", vangogh_outputs[img_idx])
                plt.imsave(f"output/monet/stylized_epoch{epoch+1}_{global_idx}.png", monet_outputs[img_idx])
        pbar.close()

    print("🎯 Style transfer complete!")

In [13]:
train_nst(content_loader, vangogh_loader, monet_loader, epochs=10)

Epoch 1/10
Processing:   0%|          | 0/52 [00:00<?, ?batch/s]

KeyboardInterrupt: 

In [None]:
def stylize_image(image_path, style):
    model = NSTModel()
    transform = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])
    
    content_img = transform(Image.open(image_path).convert("RGB")).unsqueeze(0)
    
    # Use a random style image from the dataset
    if style.lower() == "vangogh":
        style_img = next(iter(vangogh_loader))[0]
    elif style.lower() == "monet":
        style_img = next(iter(monet_loader))[0]
    else:
        raise ValueError("Style must be 'vangogh' or 'monet'")

    result = model.process_batch(content_img, style_img)
    output_image = im_convert(result)
    
    os.makedirs("output", exist_ok=True)
    output_path = f"output/stylized_{style}.png"
    plt.imsave(output_path, output_image)
    
    plt.figure(figsize=(8, 4))
    plt.subplot(1, 2, 1)
    plt.imshow(Image.open(image_path))
    plt.title("Original Image")
    plt.axis("off")
    
    plt.subplot(1, 2, 2)
    plt.imshow(output_image)
    plt.title(f"Stylized ({style.capitalize()})")
    plt.axis("off")
    plt.show()
    
    print(f"🎨 Stylized image saved at: {output_path}")