# **`1.`** Imports and Dependencies

In [1]:
# Import custom utility functions: for image processing, Gram matrix, etc.
import utils.image_utils as utils

# Import function to create a video from saved image frames (optional)
from utils.video_utils import create_video_from_intermediate_results

# PyTorch imports for building and training the neural network
import torch
from torch.optim import Adam, LBFGS         # Optimizers: Adam (gradient-based), LBFGS (quasi-Newton)
from torch.autograd import Variable         # Allows tracking gradients on tensors
import numpy as np                          # For numerical operations and random noise initialization
import os                                   # For filesystem operations like paths and directories

# **`2.`** Loss Function Builder

In [2]:
def build_loss(neural_net, optimizing_img, target_representations, content_feature_maps_index, style_feature_maps_indices, config):
    """
    Calculates the total loss from content, style, and total variation losses.

    Parameters:
    - neural_net: The pretrained CNN (e.g., VGG19) used to extract features.
    - optimizing_img: The image we are optimizing to blend content + style.
    - target_representations: A tuple/list of precomputed feature maps:
        [target_content, target_style]
    - content_feature_maps_index: Index of layer to compute content loss from.
    - style_feature_maps_indices: List of indices for style layers.
    - config: Configuration dict holding the loss weights.
    """

    # Get target content and style features from earlier layers of the model
    target_content_representation = target_representations[0]
    target_style_representation = target_representations[1]

    # Pass the current image through the network to get its feature maps
    current_set_of_feature_maps = neural_net(optimizing_img)

    # Extract the content representation from the designated content layer
    current_content_representation = current_set_of_feature_maps[content_feature_maps_index].squeeze(axis=0)

    # Calculate content loss using Mean Squared Error (MSE)
    content_loss = torch.nn.MSELoss(reduction='mean')(target_content_representation, current_content_representation)

    # Initialize style loss
    style_loss = 0.0

    # Compute Gram matrices for current image's style layers
    current_style_representation = [
        utils.gram_matrix(x) for cnt, x in enumerate(current_set_of_feature_maps) if cnt in style_feature_maps_indices
    ]

    # Calculate style loss across all selected layers (sum of MSE between Gram matrices)
    for gram_gt, gram_hat in zip(target_style_representation, current_style_representation):
        style_loss += torch.nn.MSELoss(reduction='sum')(gram_gt[0], gram_hat[0])

    # Normalize style loss by the number of style layers
    style_loss /= len(target_style_representation)

    # Total variation loss: regularization to promote image smoothness
    tv_loss = utils.total_variation(optimizing_img)

    # Total loss is a weighted combination of the three loss types
    total_loss = (
        config['content_weight'] * content_loss +
        config['style_weight'] * style_loss +
        config['tv_weight'] * tv_loss
    )

    return total_loss, content_loss, style_loss, tv_loss

# **`3.`** Optimization Step Builder

In [3]:
def make_tuning_step(neural_net, optimizer, target_representations, content_feature_maps_index, style_feature_maps_indices, config):
    """
    Creates and returns a function that performs one step of optimization.

    Parameters:
    - neural_net: The CNN model used for feature extraction.
    - optimizer: The optimizer (Adam or LBFGS) used to update image pixels.
    - target_representations: Target feature maps for content and style.
    - content_feature_maps_index: Index of CNN layer used for content loss.
    - style_feature_maps_indices: List of layers used for style loss.
    - config: Holds the weights for each loss term.
    """

    def tuning_step(optimizing_img):
        # Compute total loss and individual loss components
        total_loss, content_loss, style_loss, tv_loss = build_loss(
            neural_net, optimizing_img, target_representations,
            content_feature_maps_index, style_feature_maps_indices, config
        )

        # Backpropagate to compute gradients
        total_loss.backward()

        # Perform optimization step to update image
        optimizer.step()

        # Clear gradients for next iteration
        optimizer.zero_grad()

        return total_loss, content_loss, style_loss, tv_loss

    return tuning_step

---

# **`4.`** Main Neural Style Transfer Function

---

In [4]:
def neural_style_transfer(config):
    """
    Main function to perform neural style transfer. For convenience of the user the
    FUNCTION is further divided into BLOCKS for easier understanding.

    Parameters:
    - config: Dictionary containing all hyperparameters and paths.
    """
    
    # ------------------------ 📥 Block 1: Input Setup ------------------------ #
   
    # Construct full paths to the content and style images
    content_img_path = os.path.join(config['content_images_dir'], config['content_img_name'])
    style_img_path = os.path.join(config['style_images_dir'], config['style_img_name'])

    # Create a unique output folder name
    out_dir_name = 'combined_' + os.path.splitext(os.path.basename(content_img_path))[0] + '_' + os.path.splitext(os.path.basename(style_img_path))[0]

    # Define where to save the output images
    dump_path = os.path.join(config['output_img_dir'], out_dir_name)
    os.makedirs(dump_path, exist_ok=True)  # Create the directory if it doesn't exist


    # ---------------- 📡 Block 2: Load and Preprocess Images ---------------- #

    # Choose the device: GPU if available, else CPU
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Load and preprocess the content image to target height
    content_img = utils.prepare_img(content_img_path, config['height'], device)

    # Load and preprocess the style image to same height
    style_img = utils.prepare_img(style_img_path, config['height'], device)


    # ---------------- 🖼️ Block 3: Initialize Optimization Image ---------------- #

    # Choose how to initialize the image being optimized
    if config['init_method'] == 'random':
        # Initialize with Gaussian noise (mean=0, std=90), same shape as content image
        gaussian_noise_img = np.random.normal(loc=0, scale=90., size=content_img.shape).astype(np.float32)
        init_img = torch.from_numpy(gaussian_noise_img).float().to(device)
    elif config['init_method'] == 'content':
        init_img = content_img  # Start with the content image
    else:  # 'style'
        # Resize style image to match content image's shape
        style_img_resized = utils.prepare_img(style_img_path, np.asarray(content_img.shape[2:]), device)
        init_img = style_img_resized

    # Mark image as a trainable variable (requires gradients)
    optimizing_img = Variable(init_img, requires_grad=True)


    # ---------------- 🔧 Block 4: Model and Feature Extraction ---------------- #

    # Load the model and indices for layers used in content/style loss
    neural_net, content_feature_maps_index_name, style_feature_maps_indices_names = utils.prepare_model(config['model'], device)
    print(f'Using {config["model"]} in the optimization procedure.')

    # Extract feature maps from content and style images
    content_img_set_of_feature_maps = neural_net(content_img)
    style_img_set_of_feature_maps = neural_net(style_img)

    # Select content feature from specified layer
    target_content_representation = content_img_set_of_feature_maps[content_feature_maps_index_name[0]].squeeze(0)

    # Compute style features (Gram matrices) from specified layers
    target_style_representation = [
        utils.gram_matrix(x) for cnt, x in enumerate(style_img_set_of_feature_maps)
        if cnt in style_feature_maps_indices_names[0]
    ]

    target_representations = [target_content_representation, target_style_representation]


    # ---------------- 🔁 Block 5: Optimization Loop ---------------- #

    # Set iteration counts based on optimizer
    num_of_iterations = {
        "lbfgs": 1000,
        "adam": 1000
    }

        ### ---------- 🔁 Block 5(a): Using Adam Optimizer ------- ###

    # If using Adam optimizer
    if config['optimizer'] == 'adam':
        optimizer = Adam((optimizing_img,), lr=1e1)  # High LR for faster updates
        tuning_step = make_tuning_step(
            neural_net, optimizer, target_representations,
            content_feature_maps_index_name[0], style_feature_maps_indices_names[0], config
        )
        for cnt in range(num_of_iterations[config['optimizer']]):
            total_loss, content_loss, style_loss, tv_loss = tuning_step(optimizing_img)
            with torch.no_grad():
                print(f'Adam | iteration: {cnt:03}, total loss={total_loss.item():12.4f}, content_loss={config["content_weight"] * content_loss.item():12.4f}, style loss={config["style_weight"] * style_loss.item():12.4f}, tv loss={config["tv_weight"] * tv_loss.item():12.4f}')
                utils.save_and_maybe_display(optimizing_img, dump_path, config, cnt, num_of_iterations[config['optimizer']], should_display=False)

        ### ------ 🔁 Block 5(b): Optimization using LBFGS ------- ###

    elif config['optimizer'] == 'lbfgs':
        optimizer = LBFGS((optimizing_img,), max_iter=num_of_iterations['lbfgs'], line_search_fn='strong_wolfe')
        cnt = 0

        def closure():
            nonlocal cnt
            if torch.is_grad_enabled():
                optimizer.zero_grad()
            total_loss, content_loss, style_loss, tv_loss = build_loss(
                neural_net, optimizing_img, target_representations,
                content_feature_maps_index_name[0], style_feature_maps_indices_names[0], config
            )
            if total_loss.requires_grad:
                total_loss.backward()
            with torch.no_grad():
                print(f'L-BFGS | iteration: {cnt:03}, total loss={total_loss.item():12.4f}, content_loss={config["content_weight"] * content_loss.item():12.4f}, style loss={config["style_weight"] * style_loss.item():12.4f}, tv loss={config["tv_weight"] * tv_loss.item():12.4f}')
                utils.save_and_maybe_display(optimizing_img, dump_path, config, cnt, num_of_iterations[config['optimizer']], should_display=False)
            cnt += 1
            return total_loss

        optimizer.step(closure)

    return dump_path  # Path where output images are saved


# **`5.`** Configuration Setup and Execution

In [5]:
# Get the current working directory — used as the base path for other folders.
notebook_dir = os.getcwd()

# Create a default folder to store all related data: images, outputs, etc.
default_resource_dir = os.path.join(notebook_dir, 'data')

# Define the folder path where content images are located
content_images_dir = os.path.join(default_resource_dir, 'content-images')

# Define the folder path where style images are located
style_images_dir = os.path.join(default_resource_dir, 'style-images')

# Define the folder path where output (stylized) images will be saved
output_img_dir = os.path.join(default_resource_dir, 'output-images')

# Image naming format: (padding, extension) — e.g., %04d.jpg → 0001.jpg
img_format = (4, '.jpg')  # Ensures consistent sorting and easy video creation


# **`6.`** Configuration Dictionary (Hyperparameters)

In [7]:
optimization_config = {
    # Filename of the content image to be used (must exist in content_images_dir)
    "content_img_name": "birds.jpg",

    # Filename of the style image to transfer artistic style from
    "style_img_name": "Mona_Lisa.jpg",

    # Height to resize both images to — helps standardize dimensions
    "height": 500,  # Reasonable size for quality + efficiency trade-off

    # Loss weights — how much importance to assign to each type of loss:
    "content_weight": 1e5,  # High weight: preserves structure of the original image
    "style_weight": 1e-1,    # Slightly lower: allows strong but not overpowering stylization
    "tv_weight": 1e2,         # Keeps pixel-level smoothness (too high = overly blurry)

    # Choose optimizer:
    "optimizer": "adam",   # More stable convergence and better final output (Adam also supported)

    # Pretrained model used to extract features
    "model": "vgg16",       # Deeper model than vgg16 — captures more detailed features

    # Initialization method for the optimizing image
    "init_method": "random",  # Can be "content", "style", or "random"
                               # "content": more stable, faster convergence
                               # "random": more creativity, unstable
                               # "style": can overly distort content

    # Frequency at which intermediate images are saved
    "saving_freq": 1,       # Every 5 iterations → useful for making progress videos

    # Paths to required directories — must be correctly set to avoid file errors
    "content_images_dir": content_images_dir,
    "style_images_dir": style_images_dir,
    "output_img_dir": output_img_dir,

    # Image saving format (see above)
    "img_format": img_format,
}


# **`7.`** Run the Style Transfer Process

In [8]:
# Call the main function to perform neural style transfer
# All outputs will be saved in the directory returned by this function
results_path = neural_style_transfer(optimization_config)



Using vgg16 in the optimization procedure.
Adam | iteration: 000, total loss=27948097536.0000, content_loss=12610676562.5000, style loss=147080089.6000, tv loss=15190339200.0000
Adam | iteration: 001, total loss=23144196096.0000, content_loss=8701750000.0000, style loss=74992460.8000, tv loss=14367452800.0000
Adam | iteration: 002, total loss=19813142528.0000, content_loss=6249530078.1250, style loss=41806624.0000, tv loss=13521804800.0000
Adam | iteration: 003, total loss=17455468544.0000, content_loss=4767692968.7500, style loss=26336208.0000, tv loss=12661440000.0000
Adam | iteration: 004, total loss=15668339712.0000, content_loss=3848325390.6250, style loss=18653096.0000, tv loss=11801361600.0000
Adam | iteration: 005, total loss=14204331008.0000, content_loss=3232678320.3125, style loss=14592750.4000, tv loss=10957060000.0000
Adam | iteration: 006, total loss=12936210432.0000, content_loss=2783733593.7500, style loss=12376666.4000, tv loss=10140100800.0000
Adam | iteration: 007, t

In [None]:
# Join all the intermediate photos to create the Video
create_video_from_intermediate_results(results_path, img_format)

| Parameter        | Value       | Why it's used                                   |
| ---------------- | ----------- | ----------------------------------------------- |
| `content_weight` | `1e5`       | Prioritizes preserving shapes and layout.       |
| `style_weight`   | `1e4`       | Stylizes textures without overwhelming content. |
| `tv_weight`      | `1`         | Adds slight smoothness (regularization).        |
| `height`         | `500`       | Balances visual quality and GPU memory usage.   |
| `lr` (Adam)      | `1e1`       | A high learning rate to update pixels quickly.  |
| `iterations`     | `1000–3000` | Empirically chosen for quality convergence.     |


# **`A.`** Check pixel intensity before and after morphing

In [None]:
import cv2
import matplotlib.pyplot as plt

# Load the two images in RGB format
image_path_1 = "D:/NST/data/output-images/combined_birds_Mona_Lisa/0000.jpg"
image_path_2 = "D:/NST/data/output-images/combined_birds_Mona_Lisa/1001.jpg"

image1 = cv2.imread(image_path_1)  # Load first image in color (RGB)
image2 = cv2.imread(image_path_2)  # Load second image in color (RGB)

# Check if the images are loaded
if image1 is None or image2 is None:
    print("Error: One or both images not found.")
    exit()

# Convert the images from BGR to RGB (OpenCV loads images in BGR by default)
image1_rgb = cv2.cvtColor(image1, cv2.COLOR_BGR2RGB)
image2_rgb = cv2.cvtColor(image2, cv2.COLOR_BGR2RGB)

# Define colors for plotting
colors = ('r', 'g', 'b')  # Red, Green, Blue

# Create subplots: 2 rows for images, 2 columns for histograms
fig, axs = plt.subplots(2, 2, figsize=(12, 12))

# Plot the first image
axs[0, 0].imshow(image1_rgb)
axs[0, 0].axis('off')  # Turn off axis
axs[0, 0].set_title("Original Bird image")

# Plot histogram for the first image
for i, col in enumerate(colors):
    hist = cv2.calcHist([image1_rgb], [i], None, [256], [0, 256])
    axs[0, 1].plot(hist, color=col, linewidth=2)
axs[0, 1].set_title("Histogram of Original Image")
axs[0, 1].set_xlim([0, 256])
axs[0, 1].set_xlabel("Pixel Intensity")
axs[0, 1].set_ylabel("Frequency")
axs[0, 1].legend(["Red", "Green", "Blue"])

# Plot the second image
axs[1, 0].imshow(image2_rgb)
axs[1, 0].axis('off')  # Turn off axis
axs[1, 0].set_title("Post Mona Lisa Style merging")

# Plot histogram for the second image
for i, col in enumerate(colors):
    hist = cv2.calcHist([image2_rgb], [i], None, [256], [0, 256])
    axs[1, 1].plot(hist, color=col, linewidth=2)
axs[1, 1].set_title("Histogram post NST using VGG-16")
axs[1, 1].set_xlim([0, 256])
axs[1, 1].set_xlabel("Pixel Intensity")
axs[1, 1].set_ylabel("Frequency")
axs[1, 1].legend(["Red", "Green", "Blue"])

# Adjust layout to prevent overlap
plt.tight_layout()
plt.show()
