In [None]:
import torch

In [8]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
torch.cuda.is_available()

True

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

## Queestion 4

In [None]:
import os
import itertools
import numpy as np
import matplotlib.pyplot as plt
import cv2
from PIL import Image
from pathlib import Path
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torchvision.utils import save_image

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')

train_folder = 'person-face-sketches/train'
train_sketches_folder = os.path.join(train_folder, 'sketches')
train_photos_folder = os.path.join(train_folder, 'photos')

test_folder = 'person-face-sketches/test'
test_sketches_folder = os.path.join(test_folder, 'sketches')
test_photos_folder = os.path.join(test_folder, 'photos')

In [None]:
class ImageDataset(Dataset):
    def __init__(self, sketches_dir, photos_dir, transform=None):
        self.sketches_dir = Path(sketches_dir)
        self.photos_dir = Path(photos_dir)
        self.sketches = sorted(os.listdir(sketches_dir))
        self.photos = sorted(os.listdir(photos_dir))
        self.transform = transform

    def __len__(self):
        return max(len(self.sketches), len(self.photos))

    def __getitem__(self, idx):
        sketch_path = self.sketches_dir / self.sketches[idx % len(self.sketches)]
        photo_path = self.photos_dir / self.photos[idx % len(self.photos)]
        sketch = Image.open(sketch_path).convert('RGB')
        photo = Image.open(photo_path).convert('RGB')

        if self.transform:
            sketch = self.transform(sketch)
            photo = self.transform(photo)

        return {'sketch': sketch, 'photo': photo}

In [None]:
transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

batch_size = 1
num_workers = os.cpu_count()

train_dataset = ImageDataset(train_sketches_folder, train_photos_folder, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)

test_dataset = ImageDataset(test_sketches_folder, test_photos_folder, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=num_workers)

In [None]:
import torch
import torch.nn as nn

def residual_block(x, filters):
    shortcut = x
    x = nn.Conv2d(filters, filters, kernel_size=3, padding=1)(x)
    x = nn.BatchNorm2d(filters)(x)
    x = nn.ReLU(inplace=True)(x)
    x = nn.Conv2d(filters, filters, kernel_size=3, padding=1)(x)
    x = nn.BatchNorm2d(filters)(x)
    x = torch.add(x,shortcut)
    return x


class Generator(nn.Module):
    def __init__(self, input_nc=3, output_nc=3, ngf=64, n_blocks=9):
        super(Generator, self).__init__()
        self.initial = nn.Sequential(
            nn.Conv2d(input_nc, ngf, kernel_size=7, padding=3),
            nn.InstanceNorm2d(ngf),
            nn.ReLU(inplace=True)
        )
        self.down1 = nn.Sequential(
            nn.Conv2d(ngf, ngf * 2, kernel_size=3, stride=2, padding=1),
            nn.InstanceNorm2d(ngf * 2),
            nn.ReLU(inplace=True)
        )
        self.down2 = nn.Sequential(
            nn.Conv2d(ngf * 2, ngf * 4, kernel_size=3, stride=2, padding=1),
            nn.InstanceNorm2d(ngf * 4),
            nn.ReLU(inplace=True)
        )
        self.res_blocks = nn.Sequential(*[residual_block(ngf*4) for _ in range(n_blocks)])
        self.up1 = nn.Sequential(
            nn.ConvTranspose2d(ngf * 4, ngf * 2, kernel_size=3, stride=2, padding=1, output_padding=1),
            nn.InstanceNorm2d(ngf * 2),
            nn.ReLU(inplace=True)
        )
        self.up2 = nn.Sequential(
            nn.ConvTranspose2d(ngf * 2, ngf, kernel_size=3, stride=2, padding=1, output_padding=1),
            nn.InstanceNorm2d(ngf),
            nn.ReLU(inplace=True)
        )
        self.final = nn.Sequential(
            nn.Conv2d(ngf, output_nc, kernel_size=7, padding=3),
            nn.Tanh()
        )

    def forward(self, x):
        x = self.initial(x)
        x = self.down1(x)
        x = self.down2(x)
        x = self.res_blocks(x)
        x = self.up1(x)
        x = self.up2(x)
        x = self.final(x)
        return x


class Discriminator(nn.Module):
    def __init__(self, input_nc=3, ndf=64):
        super(Discriminator, self).__init__()
        self.model = nn.Sequential(
            nn.Conv2d(input_nc, ndf, kernel_size=4, stride=2, padding=1),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(ndf, ndf * 2, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(ndf * 2),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(ndf * 2, ndf * 4, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(ndf * 4),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(ndf * 4, ndf * 8, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(ndf * 8),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(ndf * 8, 1, kernel_size=4, padding=1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.model(x)



In [None]:
G_A2B = Generator().to(device)
G_B2A = Generator().to(device)
D_A = Discriminator().to(device)
D_B = Discriminator().to(device)

criterion_GAN = nn.MSELoss()
criterion_cycle = nn.L1Loss()
criterion_identity = nn.L1Loss()

lr = 0.0002
beta1 = 0.5
optimizer_G = optim.Adam(itertools.chain(G_A2B.parameters(), G_B2A.parameters()), lr=lr, betas=(beta1, 0.999))
optimizer_D_A = optim.Adam(D_A.parameters(), lr=lr, betas=(beta1, 0.999))
optimizer_D_B = optim.Adam(D_B.parameters(), lr=lr, betas=(beta1, 0.999))

n_epochs = 10
decay_start_epoch = 100
scheduler_G = optim.lr_scheduler.LambdaLR(optimizer_G, lr_lambda=lambda epoch: 1.0)
scheduler_D_A = optim.lr_scheduler.LambdaLR(optimizer_D_A, lr_lambda=lambda epoch: 1.0)
scheduler_D_B = optim.lr_scheduler.LambdaLR(optimizer_D_B, lr_lambda=lambda epoch: 1.0)

In [None]:
class ReplayBuffer:
    def __init__(self, max_size=50):
        self.max_size = max_size
        self.data = []

    def push_and_pop(self, data):
        to_return = []
        for element in data:
            element = torch.unsqueeze(element, 0)
            if len(self.data) < self.max_size:
                self.data.append(element)
                to_return.append(element)
            else:
                if np.random.uniform(0, 1) > 0.5:
                    idx = np.random.randint(0, self.max_size)
                    to_return.append(self.data[idx].clone())
                    self.data[idx] = element
                else:
                    to_return.append(element)
        return torch.cat(to_return)

In [None]:
fake_A_buffer = ReplayBuffer()
fake_B_buffer = ReplayBuffer()

real_label = 1.0
fake_label = 0.0

In [None]:
def save_checkpoint(epoch):
    torch.save({
        'G_A2B_state_dict': G_A2B.state_dict(),
        'G_B2A_state_dict': G_B2A.state_dict(),
        'D_A_state_dict': D_A.state_dict(),
        'D_B_state_dict': D_B.state_dict(),
        'optimizer_G_state_dict': optimizer_G.state_dict(),
        'optimizer_D_A_state_dict': optimizer_D_A.state_dict(),
        'optimizer_D_B_state_dict': optimizer_D_B.state_dict(),
        'scheduler_G_state_dict': scheduler_G.state_dict(),
        'scheduler_D_A_state_dict': scheduler_D_A.state_dict(),
        'scheduler_D_B_state_dict': scheduler_D_B.state_dict(),
    }, f'checkpoint_epoch_{epoch}.pth')

In [12]:
for epoch in range(1, n_epochs + 1):
    for i, batch in enumerate(train_loader):
        real_A = batch['photo'].to(device)
        real_B = batch['sketch'].to(device)

        # Generators forward
        fake_B = G_A2B(real_A)
        cycle_A = G_B2A(fake_B)
        fake_A = G_B2A(real_B)
        cycle_B = G_A2B(fake_A)

        # Identity loss
        loss_identity_B = criterion_identity(G_A2B(real_B), real_B) * 5.0
        loss_identity_A = criterion_identity(G_B2A(real_A), real_A) * 5.0

        # GAN loss
        loss_GAN_A2B = criterion_GAN(D_B(fake_B), torch.ones_like(D_B(fake_B)).to(device))
        loss_GAN_B2A = criterion_GAN(D_A(fake_A), torch.ones_like(D_A(fake_A)).to(device))

        # Cycle consistency loss
        loss_cycle_A = criterion_cycle(cycle_A, real_A) * 10.0
        loss_cycle_B = criterion_cycle(cycle_B, real_B) * 10.0

        # Total generators loss
        loss_G = loss_GAN_A2B + loss_GAN_B2A + loss_cycle_A + loss_cycle_B + loss_identity_A + loss_identity_B
        optimizer_G.zero_grad()
        loss_G.backward()
        optimizer_G.step()

        # Discriminator A
        loss_D_A_total = (criterion_GAN(D_A(real_A), torch.ones_like(D_A(real_A)).to(device)) +
                          criterion_GAN(D_A(fake_A_buffer.push_and_pop(fake_A).detach()), torch.zeros_like(D_A(fake_A).detach()).to(device))) * 0.5
        optimizer_D_A.zero_grad()
        loss_D_A_total.backward()
        optimizer_D_A.step()

        # Discriminator B
        loss_D_B_total = (criterion_GAN(D_B(real_B), torch.ones_like(D_B(real_B)).to(device)) +
                          criterion_GAN(D_B(fake_B_buffer.push_and_pop(fake_B).detach()), torch.zeros_like(D_B(fake_B).detach()).to(device))) * 0.5
        optimizer_D_B.zero_grad()
        loss_D_B_total.backward()
        optimizer_D_B.step()

        if i % 100 == 0:
            print(f"Epoch [{epoch}/{n_epochs}] Batch [{i}/{len(train_loader)}] "
                  f"Loss_D_A: {loss_D_A_total.item():.4f} Loss_D_B: {loss_D_B_total.item():.4f} "
                  f"Loss_G: {loss_G.item():.4f}")

    save_checkpoint(epoch)

    os.makedirs('samples', exist_ok=True)
    with torch.no_grad():
        sample_real_A = next(iter(train_loader))['photo'].to(device)
        sample_real_B = next(iter(train_loader))['sketch'].to(device)
        fake_B = G_A2B(sample_real_A)
        save_image(fake_B, f'samples/fake_B_epoch_{epoch}.png', normalize=True)
        fake_A = G_B2A(sample_real_B)
        save_image(fake_A, f'samples/fake_A_epoch_{epoch}.png', normalize=True)

def test():
    G_A2B.eval()
    G_B2A.eval()
    os.makedirs('test_results', exist_ok=True)
    with torch.no_grad():
        for i, batch in enumerate(test_loader):
            real_A = batch['photo'].to(device)
            real_B = batch['sketch'].to(device)

            fake_B = G_A2B(real_A)
            fake_A = G_B2A(real_B)

            save_image(fake_B, f'test_results/fake_B_{i}.png', normalize=True)
            save_image(fake_A, f'test_results/fake_A_{i}.png', normalize=True)
    print("Testing completed. Generated images are saved in 'test_results' folder.")

test()


Epoch [1/10] Batch [0/20655] Loss_D_A: 0.0 Loss_D_B: 0.0 Loss_G: 0.0
Epoch [1/10] Batch [0/20655] Loss_D_A: 0.756834089756012 Loss_D_B: 0.5830504894256592 Loss_G: 0.2680385112762451
Epoch [1/10] Batch [100/20655] Loss_D_A: 0.8872256278991699 Loss_D_B: 0.6495956182479858 Loss_G: 0.341248095035553
Epoch [1/10] Batch [200/20655] Loss_D_A: 0.3654525876045227 Loss_D_B: 0.8645493984222412 Loss_G: 0.15156441926956177
Epoch [1/10] Batch [300/20655] Loss_D_A: 0.6661231517791748 Loss_D_B: 0.5995445251464844 Loss_G: 0.3765715956687927
Epoch [1/10] Batch [400/20655] Loss_D_A: 0.3590942621231079 Loss_D_B: 0.8491266965866089 Loss_G: 0.583227813243866
Epoch [1/10] Batch [500/20655] Loss_D_A: 0.6020272970199585 Loss_D_B: 0.9163699746131897 Loss_G: 0.6042278409004211
Epoch [1/10] Batch [600/20655] Loss_D_A: 0.5524541139602661 Loss_D_B: 0.8228917121887207 Loss_G: 0.1271839737892151
Epoch [1/10] Batch [700/20655] Loss_D_A: 0.17287975549697876 Loss_D_B: 0.48835867643356323 Loss_G: 0.23472917079925537
Epoc

# residual block

In [None]:
!pip install gradio

Collecting gradio
  Downloading gradio-5.1.0-py3-none-any.whl.metadata (15 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0 (from gradio)
  Downloading fastapi-0.115.2-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.4.0-py3-none-any.whl.metadata (2.9 kB)
Collecting gradio-client==1.4.0 (from gradio)
  Downloading gradio_client-1.4.0-py3-none-any.whl.metadata (7.1 kB)
Collecting httpx>=0.24.1 (from gradio)
  Downloading httpx-0.27.2-py3-none-any.whl.metadata (7.1 kB)
Collecting huggingface-hub>=0.25.1 (from gradio)
  Downloading huggingface_hub-0.25.2-py3-none-any.whl.metadata (13 kB)
Collecting markupsafe~=2.0 (from gradio)
  Downloading MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.0 kB)
Collecting orjson~=3.0 (from gradio)
  Downloading orjson-3.10.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata

In [None]:
import gradio as gr
import os
from google.colab import drive
from PIL import Image

# Mount Google Drive
drive.mount('/content/drive', force_remount=True)

# Path to save images
drive_folder = '/content/drive/MyDrive/person-face-sketches/test'

# Ensure the folder exists
if not os.path.exists(drive_folder):
    os.makedirs(drive_folder)

# Function to save the uploaded image
def save_image(image):
    if image:
        img = Image.fromarray(image)
        file_path = os.path.join(drive_folder, 'uploaded_image.png')
        img.save(file_path)
        return f"Image saved to {file_path}"
    return "No image received."

# Gradio interface
interface = gr.Interface(
    fn=save_image,
    inputs=gr.Image(type="numpy", label="Upload or Take Photo"),
    outputs="text",
    live=True
)

# Launch the Gradio interface
interface.launch()

Mounted at /content/drive
Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://bd87ae122d0e959083.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




## Question 2

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Subset

# Hyperparameters
batch_size = 128
lr = 0.0002
z_dim = 100
img_size = 32
channels_img = 3
num_epochs = 15

# Load CIFAR-10 dataset and filter only 'cat' (class 3) and 'dog' (class 5)
transform = transforms.Compose([
    transforms.Resize(img_size),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

cifar10 = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
indices = [i for i, label in enumerate(cifar10.targets) if label in [3, 5]]  # Cats (3), Dogs (5)
subset_cifar10 = Subset(cifar10, indices)

dataloader = DataLoader(subset_cifar10, batch_size=batch_size, shuffle=True)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:04<00:00, 42478080.78it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data


In [4]:

# Generator model
class Generator(nn.Module):
    def __init__(self, z_dim, img_channels):
        super(Generator, self).__init__()
        self.net = nn.Sequential(
            nn.ConvTranspose2d(z_dim, 512, kernel_size=4, stride=1, padding=0),
            nn.BatchNorm2d(512),
            nn.ReLU(True),
            nn.ConvTranspose2d(512, 256, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(True),
            nn.ConvTranspose2d(256, 128, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(True),
            nn.ConvTranspose2d(128, img_channels, kernel_size=4, stride=2, padding=1),
            nn.Tanh()
        )

    def forward(self, z):
        return self.net(z)

# Discriminator model
class Discriminator(nn.Module):
    def __init__(self, img_channels):
        super(Discriminator, self).__init__()

        def conv_block(in_channels, out_channels, stride=2):
            return nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=4, stride=stride, padding=1),
                nn.BatchNorm2d(out_channels),
                nn.LeakyReLU(0.2, inplace=True)
            )

        self.feature_extractor = nn.Sequential(
            conv_block(img_channels, 64, stride=2),
            conv_block(64, 128, stride=2),
            conv_block(128, 256, stride=2),
            conv_block(256, 512, stride=2),
            nn.Flatten()
        )

        self.fc = nn.Sequential(
            nn.Linear(512 * 2 * 2 * 2, 1),
            nn.Sigmoid()
        )

    def forward(self, img1, img2):
        feat1 = self.feature_extractor(img1)
        feat2 = self.feature_extractor(img2)
        combined = torch.cat((feat1, feat2), dim=1)
        return self.fc(combined)

# Initialize generator and discriminator
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
gen = Generator(z_dim, channels_img).to(device)
disc = Discriminator(channels_img).to(device)


Using device: cuda


In [6]:
# Loss function (Binary Cross-Entropy)
criterion = nn.BCELoss()

# Optimizers
optimizer_gen = optim.Adam(gen.parameters(), lr=lr, betas=(0.5, 0.999))
optimizer_disc = optim.Adam(disc.parameters(), lr=lr, betas=(0.5, 0.999))

In [7]:
# Training loop
def train():
    for epoch in range(num_epochs):
        for batch_idx, (real_imgs, _) in enumerate(dataloader):
            real_imgs = real_imgs.to(device)
            batch_size = real_imgs.size(0)

            # Labels for real and fake
            real_labels = torch.ones(batch_size, 1).to(device)
            fake_labels = torch.zeros(batch_size, 1).to(device)

            # Train Discriminator
            z = torch.randn(batch_size, z_dim, 1, 1).to(device)
            fake_imgs = gen(z)

            real_imgs_sampled = real_imgs[:batch_size]  # Subset of real images for comparison
            fake_imgs_sampled = fake_imgs[:batch_size]  # Subset of generated images

            real_similarity = disc(real_imgs_sampled, real_imgs_sampled)
            fake_similarity = disc(fake_imgs_sampled, real_imgs_sampled)

            loss_disc_real = criterion(real_similarity, real_labels)
            loss_disc_fake = criterion(fake_similarity, fake_labels)
            loss_disc = (loss_disc_real + loss_disc_fake) / 2

            optimizer_disc.zero_grad()
            loss_disc.backward()
            optimizer_disc.step()

            # Train Generator
            z = torch.randn(batch_size, z_dim, 1, 1).to(device)
            fake_imgs = gen(z)
            fake_similarity = disc(fake_imgs, real_imgs_sampled)
            loss_gen = criterion(fake_similarity, real_labels)  # We want fake to be similar to real

            optimizer_gen.zero_grad()
            loss_gen.backward()
            optimizer_gen.step()

            # Print losses and similarity scores
            if batch_idx % 100 == 0:
                print(f"Epoch [{epoch+1}/{num_epochs}], Batch [{batch_idx}/{len(dataloader)}]")
                print(f"  Loss D: {loss_disc.item()}, Loss G: {loss_gen.item()}")
                print(f"  Real Similarity Score: {real_similarity.mean().item():.4f}, Fake Similarity Score: {fake_similarity.mean().item():.4f}")

train()


Files already downloaded and verified
Epoch [1/15], Batch [0/79]
  Loss D: 0.7192784547805786, Loss G: 2.7665815353393555
  Real Similarity Score: 0.5680, Fake Similarity Score: 0.0679
Epoch [2/15], Batch [0/79]
  Loss D: 0.007846756838262081, Loss G: 7.438920974731445
  Real Similarity Score: 0.9852, Fake Similarity Score: 0.0008
Epoch [3/15], Batch [0/79]
  Loss D: 0.1496402621269226, Loss G: 4.956753253936768
  Real Similarity Score: 0.9857, Fake Similarity Score: 0.0331
Epoch [4/15], Batch [0/79]
  Loss D: 0.21915686130523682, Loss G: 3.6877284049987793
  Real Similarity Score: 0.8542, Fake Similarity Score: 0.0481
Epoch [5/15], Batch [0/79]
  Loss D: 0.8029453754425049, Loss G: 4.265120983123779
  Real Similarity Score: 0.7195, Fake Similarity Score: 0.0620
Epoch [6/15], Batch [0/79]
  Loss D: 0.26160210371017456, Loss G: 4.040586948394775
  Real Similarity Score: 0.8261, Fake Similarity Score: 0.0679
Epoch [7/15], Batch [0/79]
  Loss D: 0.12147576361894608, Loss G: 4.604707717895

## Question 3

In [15]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
from torchvision.utils import save_image
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from pathlib import Path

# Check for GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')

# Define the paths
train_folder = 'drive/MyDrive/person-face-sketches/train'
train_sketches_folder = os.path.join(train_folder, 'sketches')
train_photos_folder = os.path.join(train_folder, 'photos')

# Create sample output directory
sample_folder = 'samples'
os.makedirs(sample_folder, exist_ok=True)

# Custom Dataset
class ImageDataset(Dataset):
    def __init__(self, sketches_dir, photos_dir, transform=None, max_images=500):
        self.sketches_dir = Path(sketches_dir)
        self.photos_dir = Path(photos_dir)
        self.sketches = sorted(os.listdir(sketches_dir))[:max_images]
        self.photos = sorted(os.listdir(photos_dir))[:max_images]
        self.transform = transform

    def __len__(self):
        return min(len(self.sketches), len(self.photos))

    def __getitem__(self, idx):
        sketch_path = self.sketches_dir / self.sketches[idx % len(self.sketches)]
        photo_path = self.photos_dir / self.photos[idx % len(self.photos)]
        sketch = Image.open(sketch_path).convert('RGB')
        photo = Image.open(photo_path).convert('RGB')

        if self.transform:
            sketch = self.transform(sketch)
            photo = self.transform(photo)

        return {'sketch': sketch, 'photo': photo}

# Transforms
transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# Datasets and Dataloaders
batch_size = 8  # Increased batch size for faster training
num_workers = os.cpu_count()

train_dataset = ImageDataset(train_sketches_folder, train_photos_folder, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)

# Define the Generator
class Generator(nn.Module):
    def __init__(self):
        super(Generator, self).__init__()
        self.main = nn.Sequential(
            nn.Conv2d(6, 64, kernel_size=4, stride=2, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 128, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.ConvTranspose2d(64, 3, kernel_size=4, stride=2, padding=1),
            nn.Tanh()
        )

    def forward(self, x, condition):
        x_cond = torch.cat((x, condition), 1)
        return self.main(x_cond)

# Define the Discriminator
class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()
        self.main = nn.Sequential(
            nn.Conv2d(6, 64, kernel_size=4, stride=2, padding=1),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(64, 128, kernel_size=4, stride=2, padding=1),
            nn.BatchNorm2d(128),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(128, 1, kernel_size=4, stride=1, padding=1)  # No sigmoid here
        )

    def forward(self, x, condition):
        x_cond = torch.cat((x, condition), 1)
        return self.main(x_cond)

# Initialize models
G = Generator().to(device)
D = Discriminator().to(device)

# Loss function
criterion = nn.BCEWithLogitsLoss()  # Use BCEWithLogitsLoss for stable gradients

# Optimizers
lr = 0.0002
beta1 = 0.5
optimizer_G = optim.Adam(G.parameters(), lr=lr, betas=(beta1, 0.999))
optimizer_D = optim.Adam(D.parameters(), lr=lr, betas=(beta1, 0.999))

# Training Loop
n_epochs = 5
for epoch in range(n_epochs):
    for i, batch in enumerate(train_loader):
        real_sketch = batch['sketch'].to(device)
        real_photo = batch['photo'].to(device)

        # Forward pass through D to determine the size of the output
        output_size = D(real_photo, real_sketch).size()

        # Create labels to match the output size
        real_labels = torch.ones(output_size, device=device)
        fake_labels = torch.zeros(output_size, device=device)

        # Train Discriminator with real data
        D.zero_grad()
        output_real = D(real_photo, real_sketch)
        loss_D_real = criterion(output_real, real_labels)

        # Train Discriminator with fake data
        fake_photo = G(real_sketch, real_sketch)
        output_fake = D(fake_photo.detach(), real_sketch)
        loss_D_fake = criterion(output_fake, fake_labels)

        # Total Discriminator loss
        loss_D = (loss_D_real + loss_D_fake) / 2
        loss_D.backward()
        optimizer_D.step()

        # Train Generator
        G.zero_grad()
        output_fake = D(fake_photo, real_sketch)
        loss_G = criterion(output_fake, real_labels)  # Generator wants output to be classified as real
        loss_G.backward()
        optimizer_G.step()

        if i % 100 == 0:
            print(f"Epoch [{epoch+1}/{n_epochs}], Batch [{i}/{len(train_loader)}], "
                  f"Loss_D: {loss_D.item():.4f}, Loss_G: {loss_G.item():.4f}")

    # Save sample images after each epoch
    with torch.no_grad():
        fake_sample = G(real_sketch, real_sketch)
        save_image(fake_sample, f'{sample_folder}/fake_epoch_{epoch+1}.png', normalize=True)

print("Training complete.")


Using device: cuda
Epoch [1/5], Batch [0/63], Loss_D: 0.7138, Loss_G: 0.7126
Epoch [2/5], Batch [0/63], Loss_D: 0.6436, Loss_G: 0.9134
Epoch [3/5], Batch [0/63], Loss_D: 0.5922, Loss_G: 0.9112
Epoch [4/5], Batch [0/63], Loss_D: 0.5229, Loss_G: 0.9925
Epoch [5/5], Batch [0/63], Loss_D: 0.5257, Loss_G: 0.9960
Training complete.


## Question 1

In [None]:
import os
import cv2
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator

data_dir = r"Signatures"
IMG_WIDTH, IMG_HEIGHT, IMG_CHANNELS = 64, 64, 1

def load_images(data_dir):
    images = []
    labels = []
    for folder in os.listdir(data_dir):
        folder_path = os.path.join(data_dir, folder)
        if os.path.isdir(folder_path):
            for img_file in os.listdir(folder_path):
                img_path = os.path.join(folder_path, img_file)
                img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
                img = cv2.resize(img, (IMG_WIDTH, IMG_HEIGHT)).astype("float32") / 255.0
                img = np.expand_dims(img, axis=-1)
                images.append(img)
                labels.append(folder)
    return np.array(images)

images = load_images(data_dir)
datagen = ImageDataGenerator(rotation_range=15, width_shift_range=0.1, height_shift_range=0.1, zoom_range=0.1, shear_range=0.1, fill_mode='nearest')
datagen.fit(images)

import matplotlib.pyplot as plt
def show_sample_images(images, num_samples=8):
    plt.figure(figsize=(10, 4))
    for i in range(num_samples):
        ax = plt.subplot(2, 4, i + 1)
        plt.imshow(images[i].reshape(IMG_WIDTH, IMG_HEIGHT), cmap="gray")
        plt.axis("off")
    plt.show()

show_sample_images(images)

import tensorflow as tf
from tensorflow.keras import layers, Model, backend as K

LATENT_DIM = 16

def build_encoder(input_shape=(IMG_WIDTH, IMG_HEIGHT, IMG_CHANNELS), latent_dim=LATENT_DIM):
    inputs = layers.Input(shape=input_shape)
    x = layers.Conv2D(32, (3, 3), activation="relu", strides=2, padding="same")(inputs)
    x = layers.Conv2D(64, (3, 3), activation="relu", strides=2, padding="same")(x)
    x = layers.Flatten()(x)
    x = layers.Dense(128, activation="relu")(x)
    z_mean = layers.Dense(latent_dim)(x)
    z_log_var = layers.Dense(latent_dim)(x)

    def sampling(args):
        z_mean, z_log_var = args
        epsilon = K.random_normal(shape=(K.shape(z_mean)[0], K.int_shape(z_mean)[1]))
        return z_mean + K.exp(0.5 * z_log_var) * epsilon

    z = layers.Lambda(sampling)([z_mean, z_log_var])
    return Model(inputs, [z_mean, z_log_var, z])

def build_decoder(latent_dim=LATENT_DIM):
    latent_inputs = layers.Input(shape=(latent_dim,))
    x = layers.Dense(16 * 16 * 64, activation="relu")(latent_inputs)
    x = layers.Reshape((16, 16, 64))(x)
    x = layers.Conv2DTranspose(64, (3, 3), activation="relu", strides=2, padding="same")(x)
    x = layers.Conv2DTranspose(32, (3, 3), activation="relu", strides=2, padding="same")(x)
    outputs = layers.Conv2DTranspose(1, (3, 3), activation="sigmoid", padding="same")(x)
    return Model(latent_inputs, outputs)

class VAE(Model):
    def __init__(self, encoder, decoder, beta=0.001):
        super(VAE, self).__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.beta = beta

    def call(self, inputs):
        z_mean, z_log_var, z = self.encoder(inputs)
        return self.decoder(z)

    def vae_loss(self, inputs, outputs):
        reconstruction_loss = tf.reduce_mean(tf.keras.losses.binary_crossentropy(inputs, outputs))
        z_mean, z_log_var, _ = self.encoder(inputs)
        kl_loss = -0.5 * K.sum(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1)
        return tf.reduce_mean(reconstruction_loss + self.beta * kl_loss)

encoder = build_encoder()
decoder = build_decoder()
vae = VAE(encoder, decoder)

vae.compile(optimizer='adam', loss=vae.vae_loss)
BATCH_SIZE, EPOCHS = 32, 10

def create_tf_dataset(images, batch_size):
    dataset = tf.data.Dataset.from_tensor_slices((images, images)).shuffle(len(images)).batch(batch_size).prefetch(buffer_size=tf.data.experimental.AUTOTUNE).repeat()
    return dataset

train_dataset = create_tf_dataset(images, BATCH_SIZE)
steps_per_epoch = len(images) // BATCH_SIZE
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0005)
vae.compile(optimizer=optimizer, loss=vae.vae_loss)

vae.fit(train_dataset, epochs=EPOCHS, steps_per_epoch=steps_per_epoch)

encoder.save("vae_encoder.h5")
decoder.save("vae_decoder.h5")
vae.save("vae_model.h5")

def train_gan(generator, discriminator, gan, dataset, latent_dim, epochs=10000, batch_size=128):
    for epoch in range(epochs):
        noise = np.random.normal(0, 1, size=[batch_size, latent_dim])
        generated_images = generator.predict(noise)
        idx = np.random.randint(0, dataset.shape[0], batch_size)
        real_images = dataset[idx]
        real_labels = np.ones((batch_size, 1))
        fake_labels = np.zeros((batch_size, 1))
        d_loss_real = discriminator.train_on_batch(real_images, real_labels)
        d_loss_fake = discriminator.train_on_batch(generated_images, fake_labels)
        d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)
        g_loss = gan.train_on_batch(noise, real_labels)

signatures_dataset = load_images(data_dir).astype('float32') / 255.0
train_gan(generator, discriminator, gan, signatures_dataset, latent_dim=100, epochs=150, batch_size=32)


In [None]:
import tensorflow as tf
from tensorflow.keras import layers, Model

def build_generator(latent_dim):
    model = tf.keras.Sequential([
        layers.Dense(16 * 16 * 64, activation="relu", input_dim=latent_dim),
        layers.Reshape((16, 16, 64)),
        layers.Conv2DTranspose(64, (3, 3), activation="relu", strides=2, padding="same"),
        layers.Conv2DTranspose(32, (3, 3), activation="relu", strides=2, padding="same"),
        layers.Conv2DTranspose(1, (3, 3), activation="sigmoid", padding="same")
    ])
    return model

def build_discriminator():
    model = tf.keras.Sequential([
        layers.Conv2D(32, (3, 3), activation="relu", strides=2, padding="same", input_shape=(IMG_WIDTH, IMG_HEIGHT, IMG_CHANNELS)),
        layers.Conv2D(64, (3, 3), activation="relu", strides=2, padding="same"),
        layers.Flatten(),
        layers.Dense(128, activation="relu"),
        layers.Dense(1, activation="sigmoid")
    ])
    return model

def build_gan(generator, discriminator):
    discriminator.trainable = False
    gan_input = layers.Input(shape=(100,))
    generated_image = generator(gan_input)
    gan_output = discriminator(generated_image)
    gan = Model(gan_input, gan_output)
    return gan

# Build generator and discriminator
latent_dim = 100
generator = build_generator(latent_dim)
discriminator = build_discriminator()

# Compile discriminator
discriminator.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0002, beta_1=0.5),
                      loss='binary_crossentropy', metrics=['accuracy'])

# Build and compile GAN
gan = build_gan(generator, discriminator)
gan.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0002, beta_1=0.5), loss='binary_crossentropy')

# Train GAN
train_gan(generator, discriminator, gan, signatures_dataset, latent_dim=latent_dim, epochs=150, batch_size=32)
