In [1]:
print('running')
import os
from PIL import Image
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision.utils import save_image
from torch.utils.data import DataLoader, Dataset
import matplotlib.pyplot as plt
from torchvision import transforms
from itertools import zip_longest
from torch.utils.tensorboard import SummaryWriter



running


2025-02-13 01:16:08.715026: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:479] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-02-13 01:16:08.738470: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:10575] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-02-13 01:16:08.738547: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1442] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-02-13 01:16:08.753359: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
device = torch.device('cuda:3' if torch.cuda.is_available() else 'cpu')


In [3]:
BATCH_SIZE = 2
EPOCHS = 200

In [4]:
# Define the ImageDataset class
class ImageDataset(Dataset):
    def __init__(self, folder_path, transform=None):
        self.file_paths = [
            os.path.join(folder_path, file_name) for file_name in os.listdir(folder_path)
        ]
        self.transform = transform

    def __len__(self):
        return len(self.file_paths)

    def __getitem__(self, idx):
        img_path = self.file_paths[idx]
        image = Image.open(img_path).convert("RGB")  # Open the image and convert to RGB
        if self.transform:
            image = self.transform(image)  # Apply transformations if provided
        return image

In [5]:
#  Define transformation pipeline
transform_pipeline = transforms.Compose([
    transforms.ToTensor(),  # Convert PIL Image to Tensor
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])  # Normalize to [-1, 1]
])

In [6]:
def load_images(folder_path):
    dataset = ImageDataset(folder_path, transform=transform_pipeline)
    # Added num_workers and pin_memory for better performance
    return DataLoader(
        dataset, 
        batch_size=BATCH_SIZE, 
        shuffle=True, 
        num_workers=4,  # Adjust this based on your CPU cores
        pin_memory=True  # Speeds up data transfer to GPU if using CUDA
    )

In [7]:
source = load_images('/home/umang.shikarvar/distance_exp/west_bengal_same_class_count_10_120_1000/images')
target = load_images('/home/umang.shikarvar/distance_exp/haryana_same_class_count_10_120_1000/images')

In [8]:
# Define models
class ConvolutionalBlock(nn.Module):
    def __init__(self, in_channels, out_channels, is_downsampling=True, add_activation=True, **kwargs):
        super().__init__()
        if is_downsampling:
            self.conv = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, padding_mode="reflect", **kwargs),
                nn.InstanceNorm2d(out_channels),
                nn.ReLU(inplace=True) if add_activation else nn.Identity(),
            )
        else:
            self.conv = nn.Sequential(
                nn.ConvTranspose2d(in_channels, out_channels, **kwargs),
                nn.InstanceNorm2d(out_channels),
                nn.ReLU(inplace=True) if add_activation else nn.Identity(),
            )

    def forward(self, x):
        return self.conv(x)

class ResidualBlock(nn.Module):
    def __init__(self, channels):
        super().__init__()
        self.block = nn.Sequential(
            ConvolutionalBlock(channels, channels, add_activation=True, kernel_size=3, padding=1),
            ConvolutionalBlock(channels, channels, add_activation=False, kernel_size=3, padding=1),
        )

    def forward(self, x):
        return x + self.block(x)

In [9]:
# Self-Attention Module

class SelfAttention(nn.Module):
    def __init__(self, in_channels):
        super(SelfAttention, self).__init__()
        self.query = nn.Conv2d(in_channels, in_channels // 8, kernel_size=1)
        self.key = nn.Conv2d(in_channels, in_channels // 8, kernel_size=1)
        self.value = nn.Conv2d(in_channels, in_channels, kernel_size=1)
        self.softmax = nn.Softmax(dim=-1)

    def forward(self, x):
        batch, C, H, W = x.shape
        query = self.query(x).view(batch, -1, W * H).permute(0, 2, 1)
        key = self.key(x).view(batch, -1, W * H)
        attention = self.softmax(torch.bmm(query, key))
        value = self.value(x).view(batch, -1, W * H)
        out = torch.bmm(value, attention.permute(0, 2, 1)).view(batch, C, H, W)
        return x + out  # Residual connection




In [10]:
# Adding SE Block (Channel Attention)
class SEBlock(nn.Module):
    def __init__(self, in_channels, reduction=16):
        super(SEBlock, self).__init__()
        self.global_avg_pool = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Sequential(
            nn.Linear(in_channels, in_channels // reduction),
            nn.ReLU(inplace=True),
            nn.Linear(in_channels // reduction, in_channels),
            nn.Sigmoid()
        )

    def forward(self, x):
        batch, C, _, _ = x.size()
        y = self.global_avg_pool(x).view(batch, C)
        y = self.fc(y).view(batch, C, 1, 1)
        return x * y.expand_as(x)


In [11]:
# Modify Convolution Layers to Use Spectral Normalization

import torch.nn.utils.spectral_norm as spectral_norm

class SpectralConv(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride, padding):
        super(SpectralConv, self).__init__()
        self.conv = spectral_norm(nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, padding_mode="reflect"))

    def forward(self, x):
        return self.conv(x)


In [12]:
class Generator(nn.Module):
    def __init__(self, img_channels, num_features=64, num_residuals=6):
        super().__init__()
        self.initial_layer = nn.Sequential(
            nn.Conv2d(img_channels, num_features, kernel_size=7, stride=1, padding=3, padding_mode="reflect"),
            nn.InstanceNorm2d(num_features),
            nn.ReLU(inplace=True),
        )

        self.downsampling_layers = nn.ModuleList([
            ConvolutionalBlock(num_features, num_features * 2, is_downsampling=True, kernel_size=3, stride=2, padding=1),
            ConvolutionalBlock(num_features * 2, num_features * 4, is_downsampling=True, kernel_size=3, stride=2, padding=1),
        ])

        self.attention1 = SelfAttention(num_features * 4)  # Add Self-Attention at bottleneck

        self.residual_layers = nn.Sequential(*[ResidualBlock(num_features * 4) for _ in range(num_residuals)])

        self.channel_attention = SEBlock(num_features * 4)  # Apply SE Block after residual layers

        self.upsampling_layers = nn.ModuleList([
            ConvolutionalBlock(num_features * 4, num_features * 2, is_downsampling=False, kernel_size=3, stride=2, padding=1, output_padding=1),
            ConvolutionalBlock(num_features * 2, num_features * 1, is_downsampling=False, kernel_size=3, stride=2, padding=1, output_padding=1),
        ])

        self.last_layer = nn.Conv2d(num_features, img_channels, kernel_size=7, stride=1, padding=3, padding_mode="reflect")

    def forward(self, x):
        x = self.initial_layer(x)
        for layer in self.downsampling_layers:
            x = layer(x)

        x = self.attention1(x)  # Self-Attention

        x = self.residual_layers(x)

        x = self.channel_attention(x)  # SE Block for channel attention

        for layer in self.upsampling_layers:
            x = layer(x)

        return torch.tanh(self.last_layer(x))


In [13]:
import cv2
import numpy as np

def denoise_image(image_path):
    image = cv2.imread(image_path)
    denoised = cv2.bilateralFilter(image, d=9, sigmaColor=75, sigmaSpace=75)
    return denoised


In [14]:
def enhance_edges(image_path):
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    laplacian = cv2.Laplacian(image, cv2.CV_64F)
    enhanced = cv2.addWeighted(image, 1.5, laplacian, -0.5, 0)
    return enhanced


In [15]:
class Discriminator(nn.Module):
    def __init__(self, in_channels=3, features=[64, 128, 256, 512]):
        super().__init__()

        self.initial_layer = nn.Sequential(
            spectral_norm(nn.Conv2d(in_channels, features[0], kernel_size=4, stride=2, padding=1, padding_mode="reflect")),
            nn.LeakyReLU(0.2, inplace=True),
        )

        layers = []
        in_channels = features[0]

        for feature in features[1:]:
            layers.append(
                spectral_norm(nn.Conv2d(in_channels, feature, kernel_size=4, stride=2 if feature != features[-1] else 1, padding=1, padding_mode="reflect"))
            )
            layers.append(nn.InstanceNorm2d(feature))
            layers.append(nn.LeakyReLU(0.2, inplace=True))

            # Add Self-Attention after 256 channels
            if feature == 256:
                layers.append(SelfAttention(feature))

            in_channels = feature

        layers.append(spectral_norm(nn.Conv2d(in_channels, 1, kernel_size=4, stride=1, padding=1, padding_mode="reflect")))

        self.model = nn.Sequential(*layers)

    def forward(self, x):
        x = self.initial_layer(x)
        return self.model(x)


In [16]:

generator_g = Generator(img_channels=3).to(device)  # G: X → Y
generator_f = Generator(img_channels=3).to(device)  # F: Y → X
discriminator_x = Discriminator().to(device)  # Discriminator for domain X
discriminator_y = Discriminator().to(device) 

In [17]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

# Count parameters for each model
g_g_params = count_parameters(generator_g)
g_f_params = count_parameters(generator_f)
d_x_params = count_parameters(discriminator_x)
d_y_params = count_parameters(discriminator_y)

# Print the results
print(f"Trainable Parameters:")
print(f"Generator G (X → Y): {g_g_params:,}")
print(f"Generator F (Y → X): {g_f_params:,}")
print(f"Discriminator X: {d_x_params:,}")
print(f"Discriminator Y: {d_y_params:,}")

# Total trainable parameters
total_params = g_g_params + g_f_params + d_x_params + d_y_params
print(f"Total Trainable Parameters: {total_params:,}")

Trainable Parameters:
Generator G (X → Y): 7,928,403
Generator F (Y → X): 7,928,403
Discriminator X: 2,846,977
Discriminator Y: 2,846,977
Total Trainable Parameters: 21,550,760


In [18]:


# Instantiate models
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 # Discriminator for domain Y

# Optimizers
lr = 2e-4
betas = (0.5, 0.999)

generator_g_optimizer = optim.Adam(generator_g.parameters(), lr=lr, betas=betas)
generator_f_optimizer = optim.Adam(generator_f.parameters(), lr=lr, betas=betas)
discriminator_x_optimizer = optim.Adam(discriminator_x.parameters(), lr=lr, betas=betas)
discriminator_y_optimizer = optim.Adam(discriminator_y.parameters(), lr=lr, betas=betas)

# Learning rate schedulers
def lr_lambda(epoch):
    return 1.0 - max(0, epoch - 100) / 100  # Linear decay after 100 epochs

scheduler_g = optim.lr_scheduler.LambdaLR(generator_g_optimizer, lr_lambda=lr_lambda)
scheduler_f = optim.lr_scheduler.LambdaLR(generator_f_optimizer, lr_lambda=lr_lambda)
scheduler_dx = optim.lr_scheduler.LambdaLR(discriminator_x_optimizer, lr_lambda=lr_lambda)
scheduler_dy = optim.lr_scheduler.LambdaLR(discriminator_y_optimizer, lr_lambda=lr_lambda)

# Loss functions
mse_loss = nn.MSELoss()
l1_loss = nn.L1Loss()

def cycle_consistency_loss(real, cycled):
    return l1_loss(real, cycled) * 10.0  # Weight factor 10.0

def identity_loss(real, same):
    return l1_loss(real, same) * 5.0  # Weight factor 5.0

# Initialize TensorBoard writer
writer = SummaryWriter(log_dir='tb_logs')

# Training loop
EPOCHS = 200  # Define total epochs
for epoch in range(EPOCHS):
    g_loss_total, f_loss_total, dx_loss_total, dy_loss_total = 0, 0, 0, 0
    cycle_loss_total, identity_loss_total = 0, 0

    for real_x_batch, real_y_batch in zip_longest(source, target, fillvalue=None):
        if real_x_batch is None or real_y_batch is None:
            continue

        real_x, real_y = real_x_batch.to(device), real_y_batch.to(device)

        # ------------------------
        # Train Generators G and F
        # ------------------------

        # Identity loss (G(Y) ≈ Y and F(X) ≈ X)
        identity_x = generator_f(real_x)
        identity_y = generator_g(real_y)
        id_loss_x = identity_loss(real_x, identity_x)
        id_loss_y = identity_loss(real_y, identity_y)

        # Adversarial loss
        fake_y = generator_g(real_x)  # G(X)
        fake_x = generator_f(real_y)  # F(Y)

        adv_loss_g = mse_loss(discriminator_y(fake_y), torch.ones_like(discriminator_y(fake_y)))
        adv_loss_f = mse_loss(discriminator_x(fake_x), torch.ones_like(discriminator_x(fake_x)))

        # Cycle-consistency loss
        cycle_x = generator_f(fake_y)  # F(G(X)) ≈ X
        cycle_y = generator_g(fake_x)  # G(F(Y)) ≈ Y
        cycle_loss_x = cycle_consistency_loss(real_x, cycle_x)
        cycle_loss_y = cycle_consistency_loss(real_y, cycle_y)

        # Total generator loss
        total_g_loss = adv_loss_g + cycle_loss_x + id_loss_y
        total_f_loss = adv_loss_f + cycle_loss_y + id_loss_x

        generator_g_optimizer.zero_grad()
        generator_f_optimizer.zero_grad()
        total_g_loss.backward(retain_graph=True)
        total_f_loss.backward()
        generator_g_optimizer.step()
        generator_f_optimizer.step()

        # -------------------------
        # Train Discriminators X, Y
        # -------------------------

        # Discriminator X loss
        real_loss_x = mse_loss(discriminator_x(real_x), torch.ones_like(discriminator_x(real_x)))
        fake_loss_x = mse_loss(discriminator_x(fake_x.detach()), torch.zeros_like(discriminator_x(fake_x)))
        dx_loss = (real_loss_x + fake_loss_x) * 0.5

        discriminator_x_optimizer.zero_grad()
        dx_loss.backward()
        discriminator_x_optimizer.step()

        # Discriminator Y loss
        real_loss_y = mse_loss(discriminator_y(real_y), torch.ones_like(discriminator_y(real_y)))
        fake_loss_y = mse_loss(discriminator_y(fake_y.detach()), torch.zeros_like(discriminator_y(fake_y)))
        dy_loss = (real_loss_y + fake_loss_y) * 0.5

        discriminator_y_optimizer.zero_grad()
        dy_loss.backward()
        discriminator_y_optimizer.step()

        # Accumulate losses
        g_loss_total += total_g_loss.item()
        f_loss_total += total_f_loss.item()
        dx_loss_total += dx_loss.item()
        dy_loss_total += dy_loss.item()
        cycle_loss_total += cycle_loss_x.item() + cycle_loss_y.item()
        identity_loss_total += id_loss_x.item() + id_loss_y.item()

    # Learning rate scheduling
    scheduler_g.step()
    scheduler_f.step()
    scheduler_dx.step()
    scheduler_dy.step()

    # Log losses to TensorBoard
    writer.add_scalar('Loss/Generator_G', g_loss_total, epoch + 1)
    writer.add_scalar('Loss/Generator_F', f_loss_total, epoch + 1)
    writer.add_scalar('Loss/Discriminator_X', dx_loss_total, epoch + 1)
    writer.add_scalar('Loss/Discriminator_Y', dy_loss_total, epoch + 1)
    writer.add_scalar('Loss/Cycle_Consistency', cycle_loss_total, epoch + 1)
    writer.add_scalar('Loss/Identity', identity_loss_total, epoch + 1)

    # Print epoch summary
    print(
        f"Epoch [{epoch + 1}/{EPOCHS}]: "
        f"G_loss: {g_loss_total:.4f}, F_loss: {f_loss_total:.4f}, "
        f"D_X_loss: {dx_loss_total:.4f}, D_Y_loss: {dy_loss_total:.4f}, "
        f"Cycle_loss: {cycle_loss_total:.4f}, Identity_loss: {identity_loss_total:.4f}"
    )

    # Save model checkpoints every 10 epochs
    if (epoch + 1) % 20 == 0:
        torch.save(generator_g.state_dict(), f'generator_WB_to_Haryana_{epoch+1}.pth')
        torch.save(generator_f.state_dict(), f'generator_Haryana_to_WB_{epoch+1}.pth')


OutOfMemoryError: CUDA out of memory. Tried to allocate 4.88 GiB. GPU 3 has a total capacity of 79.25 GiB of which 4.33 GiB is free. Including non-PyTorch memory, this process has 74.91 GiB memory in use. Of the allocated memory 72.12 GiB is allocated by PyTorch, and 2.30 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [1]:
import torch.distributed as dist
print(f"NCCL Backend Available: {dist.is_nccl_available()}")


NCCL Backend Available: True


In [3]:
import socket

def find_free_port():
    """
    Finds an available port on the system dynamically.
    Returns:
        int: Available port number.
    """
    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
        s.bind(("", 0))  # Bind to an available port
        return s.getsockname()[1]

# Example Usage
free_port = find_free_port()
print(f"Using free port: {free_port}")


Using free port: 50175
