<a href="https://colab.research.google.com/github/modi2009/ComputerVision/blob/GANS/Pix2Pix.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.
import kagglehub
vikramtiwari_pix2pix_dataset_path = kagglehub.dataset_download('vikramtiwari/pix2pix-dataset')

print('Data source import complete.')


In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory



# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All"
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import torch
import torch.nn as nn
import torchvision.utils
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
import os
from PIL import Image
import albumentations as A
from albumentations.pytorch import ToTensorV2


  check_for_updates()


# **Discriminator**

In [None]:
# Create Convolutional Block function
def Conv2D(num_channel, number_filter,kernel_size = 4, stride = 2, padding = 1, padding_mode = 'reflect', dropout = 0):
  layer = nn.Sequential(
      nn.Conv2d(num_channel, number_filter, kernel_size = kernel_size, stride = stride, padding = padding, padding_mode = padding_mode, bias = False),
      nn.BatchNorm2d(number_filter),
      nn.LeakyReLU(0.2),
      nn.Dropout(dropout)
  )
  return layer

In [None]:
# Create Discriminator Model
class Discriminator(nn.Module):
    def __init__(self, in_channels = 3, features = [64,128,256,512]):
        super().__init__()
        self.initial = nn.Sequential(
            nn.Conv2d(in_channels*2, features[0], kernel_size = 4, stride = 2, padding = 1, padding_mode = 'reflect'),
            nn.LeakyReLU(0.2),
        )

        layers = []
        in_channels = features[0]
        for feature in features[1:]:
            layer = Conv2D(in_channels, feature, stride = 1 if feature == features[-1] else 2)
            layers.append(layer)
            in_channels = feature

        layers.append(
            nn.Conv2d(
                in_channels, 1, kernel_size = 4, stride = 1, padding = 1 , padding_mode = 'reflect'
            )
        )
        self.model = nn.Sequential(*layers)

    def forward(self, x, y):
        x = torch.concat([x,y], dim = 1)
        x = self.initial(x)
        x = self.model(x)
        return x


In [None]:
def testDiscriminator():
    x = torch.randn((1, 3, 256, 256))
    y = torch.randn((1, 3, 256, 256))
    model = Discriminator()
    preds = model(x, y)
    return preds.shape
testDiscriminator()

torch.Size([1, 1, 30, 30])

# **Generator**

In [None]:
# Create Convolutional Transpose Block function
def ConvTranspose2D(num_channel, number_filter,kernel_size = 4, stride = 2, padding = 1, dropout = 0):
  layer = nn.Sequential(
      nn.ConvTranspose2d(num_channel, number_filter, kernel_size = kernel_size, stride = stride, padding = padding, bias = False),
      nn.BatchNorm2d(number_filter),
      nn.ReLU(True),
      nn.Dropout(dropout)
  )
  return layer

In [None]:
# Create Discriminator Model
class Generator(nn.Module):
    def __init__(self, in_channels = 3, features = [64,128,256,512,512,512]):
        super().__init__()

        # create encoder

        # create initial layer of encoder
        self.initial_down = nn.Sequential(
            nn.Conv2d(in_channels, features[0], kernel_size = 4, stride = 2, padding = 1, padding_mode = 'reflect'),
            nn.LeakyReLU(0.2),
        )

        # create rest of encoder
        self.layers_down = nn.ModuleList()
        for i in range(len(features)):
            if i != len(features) - 1 :
                layer = Conv2D(features[i], features[i+1], padding = 1)
                self.layers_down.append(layer)
            else:
                layer = Conv2D(features[-1], features[-1], padding = 1)
                self.layers_down.append(layer)


        # create bottleneck
        self.bottelneck = nn.Sequential(
            nn.Conv2d(features[-1], features[-1], 4, 2, 1, padding_mode = 'reflect'),
            nn.ReLU(True)
        )



        # create decoder
        self.layers_up = nn.ModuleList()
        # reverse features [512,512,512,256,128,64]
        features_up = [features[-1], features[-1]]
        features_up.extend(features[::-1])
        for i in range(len(features_up)):
            if i != len(features_up) - 1 :
                dropout = 0
                if i <= 2:
                    dropout = 0.5
                layer = ConvTranspose2D(features_up[i] * 2 if i != 0 else features_up[i], features_up[i+1], dropout = dropout)
                self.layers_up.append(layer)
        # create last layer of decoder
        self.last_up = nn.Sequential(
            nn.ConvTranspose2d(features_up[-1], in_channels, 4, 2, 1),
            nn.Tanh()
        )



    def forward(self, x):
        # encoder initial
        x = self.initial_down(x)

        # encoder rest layers and create copy (skip connections) of them
        skip_connections = []
        for layer in self.layers_down:
            x = layer(x)
            skip_connections.append(x)

        # bottleneck
        x = self.bottelneck(x)

        # decoder first layer
        x = self.layers_up[0](x)

        # decoder middle layers
        skip_connections = list(reversed(skip_connections))

        i = 0
        for layer in self.layers_up[1:]:
            x = layer(torch.cat((x, skip_connections[i]), dim=1))
            i = i + 1

        # decoder last layer
        x = self.last_up(x)

        return x



In [None]:
def testGenerator():
    x = torch.randn((1, 3, 256, 256))
    model = Generator()
    preds = model(x)
    return preds.shape
testGenerator()

torch.Size([1, 3, 256, 256])

# **Load and Augment Dataset**

In [None]:
# data augmentations
both_transform = A.Compose([
    A.Resize(256,256),
])

input_only_transform = A.Compose([
    A.Normalize([.5,.5,.5], [.5,.5,.5], max_pixel_value = 255.0),
    ToTensorV2()
])

target_only_transform = A.Compose([
    A.Normalize([.5,.5,.5], [.5,.5,.5], max_pixel_value = 255.0),
    ToTensorV2()
])

In [None]:
class MapDataset(Dataset):
    def __init__(self, root_dir, both_transform=None, input_only_transform=None, target_only_transform=None):
        self.root_dir = root_dir
        self.list_files = os.listdir(self.root_dir)
        self.both_transform = both_transform
        self.input_only_transform = input_only_transform
        self.target_only_transform = target_only_transform

    def __len__(self):
        return len(self.list_files)

    def __getitem__(self, idx):
        img_path = os.path.join(self.root_dir, self.list_files[idx])
        image = np.array(Image.open(img_path))
        input_image = image[:,:600, :]
        target_image = image[:,600:, :]

        # Apply transformations
        if self.both_transform:
            augmented = self.both_transform(image=input_image, mask=target_image)
            input_image = augmented["image"]
            target_image = augmented["mask"]

        if self.input_only_transform:
            input_image = self.input_only_transform(image=input_image)["image"]

        if self.target_only_transform:
            target_image = self.target_only_transform(image=target_image)["image"]

        return input_image, target_image


In [None]:
BATCH_SIZE = 16
NUM_WORKERS = 4

In [None]:
# Load data and create DataLoader
train_dataset = MapDataset(
    root_dir='/kaggle/input/pix2pix-dataset/maps/maps/train',
    both_transform=both_transform,
    input_only_transform=input_only_transform,
    target_only_transform=target_only_transform,
)

val_dataset = MapDataset(
    root_dir='/kaggle/input/pix2pix-dataset/maps/maps/val',
    both_transform=both_transform,
    input_only_transform=input_only_transform,
    target_only_transform=target_only_transform,
)

train_loader = DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    num_workers=NUM_WORKERS,
    shuffle=True
)

val_loader = DataLoader(
    val_dataset,
    batch_size=BATCH_SIZE,
    num_workers=NUM_WORKERS,
    shuffle=False
)


# **Create Loss Functions**

In [None]:
# initialize criterion loss
criterion = nn.BCEWithLogitsLoss()
L1_LOSS = nn.L1Loss()
# discriminator loss function
def discriminator_loss(real_preds, fake_preds):
  # initialize targets
  target_true = torch.ones_like(real_preds)
  target_false = torch.zeros_like(fake_preds)

  # compute losses
  real_loss = criterion(real_preds, target_true)
  fake_loss = criterion(fake_preds, target_false)

  return (real_loss + fake_loss)/2

def generator_loss(fake_preds, fake_image, target_image, lamda_l1):

  # initialize target
  target_true = torch.ones_like(fake_preds)

  # compute loss
  gen_loss = criterion(fake_preds, target_true)
  l1_loss = L1_LOSS(fake_image, target_image) * lamda_l1
  return gen_loss + l1_loss

# **Train Model**

In [None]:
import torch.nn.functional as F
import pytorch_lightning as pl
from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor, ModelCheckpoint
from torchinfo import summary
import torch.optim as optim

In [None]:
class Pix2Pix(pl.LightningModule):
    def __init__(self, gen, disc, lr_gen, lr_disc, lambda_l1,image_save_dir="/kaggle/working/image"):
        super().__init__()
        self.disc = disc
        self.gen = gen
        self.lr_gen = lr_gen
        self.lr_disc = lr_disc
        self.lambda_l1 = lambda_l1
        self.automatic_optimization = False  # Enable manual optimization
        self.accumulate_grad_batches = 4  # Manual gradient accumulation
        self.accumulate_steps = 0  # Counter for accumulated steps
        self.image_save_dir = image_save_dir

    def forward(self, x):
        return self.gen(x)

    def configure_optimizers(self):
        """Define optimizers for Generator and Discriminator."""
        opt_gen = optim.Adam(self.gen.parameters(), lr=self.lr_gen, betas=(0.5, 0.999))
        opt_disc = optim.Adam(self.disc.parameters(), lr=self.lr_disc, betas=(0.5, 0.999))
        return [opt_gen, opt_disc]

    def training_step(self, batch, batch_idx):
        input_image, target_image = batch
        opt_gen, opt_disc = self.optimizers()

        # Train Discriminator
        y_fake = self(input_image)
        real_preds = self.disc(input_image, target_image)
        fake_preds = self.disc(input_image, y_fake.detach())

        # Calculate discriminator loss
        disc_loss = discriminator_loss(real_preds, fake_preds)
        opt_disc.zero_grad()
        self.manual_backward(disc_loss)
        opt_disc.step()

        # Train Generator
        self.accumulate_steps += 1
        fake_preds = self.disc(input_image, y_fake)
        gen_loss = generator_loss(fake_preds, y_fake, target_image, self.lambda_l1)

        # Accumulate gradients for the generator
        opt_gen.zero_grad()
        self.manual_backward(gen_loss)

        # Perform the optimization step every `accumulate_grad_batches` steps
        if self.accumulate_steps == self.accumulate_grad_batches:
            opt_gen.step()
            self.accumulate_steps = 0  # Reset counter

            # Log generator loss
            self.log("gen_loss", gen_loss, prog_bar=True, on_epoch=True)

            # Save images after every 10th step (or adjust as needed)
            if batch_idx % 10 == 0:  # You can adjust this condition as per your requirement
                self.save_generated_images(input_image, y_fake, target_image, batch_idx)

        # Log discriminator loss
        self.log("loss_disc", disc_loss, prog_bar=True, logger=True)
        return gen_loss
    def save_generated_images(self, input_image, generated_image, target_image, batch_idx):
        """Function to save images to the directory."""
        # Convert the tensor images to a format that can be saved (e.g., to range [0, 1] and [0, 255])
        input_image = (input_image + 1) / 2  # Denormalize if required
        generated_image = (generated_image + 1) / 2  # Denormalize if required
        target_image = (target_image + 1) / 2  # Denormalize if required

        # Save images using torchvision's save_image
        torchvision.utils.save_image(input_image, os.path.join(self.image_save_dir, f"input_{batch_idx}.png"))
        torchvision.utils.save_image(generated_image, os.path.join(self.image_save_dir, f"generated_{batch_idx}.png"))
        torchvision.utils.save_image(target_image, os.path.join(self.image_save_dir, f"target_{batch_idx}.png"))


In [None]:
# Initialize Hyperparameters
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
LEARNING_RATE = 2e-4
IMAGE_SIZE = 256
CHANNELS_IMAGE = 3
L1_LAMDA = 100
NUM_EPOCH = 500


In [None]:
disc = Discriminator(CHANNELS_IMAGE)
gen = Generator(CHANNELS_IMAGE)

In [None]:
model = Pix2Pix(gen, disc, LEARNING_RATE, LEARNING_RATE, L1_LAMDA)

In [None]:
os.makedirs('image',exist_ok = True)

In [None]:
trainer = pl.Trainer(
    devices=1,  # or 2 for multiple T4 GPUs # Accumulate gradients for 4 steps to simulate a larger batch size
    max_epochs=100,
    precision='16-mixed',
)  # Adjust max_epochs as needed
trainer.fit(model, train_loader)

2025-05-03 07:11:24.432817: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1746256284.626989      31 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1746256284.685031      31 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Training: |          | 0/? [00:00<?, ?it/s]