In [2]:
# ==============================================================
# Task-04: Image-to-Image Translation with cGAN (pix2pix)
# Dataset: Facades (edges ↔ photos)
# ==============================================================
# Clean fixed version (Colab-ready)
# ==============================================================

import os, time
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision.utils import make_grid, save_image
from torchvision.datasets.utils import download_and_extract_archive

# ---------------------------
# Config
# ---------------------------
ROOT = Path("/content/pix2pix_facades")
DATA_DIR = ROOT / "data"
SAMPLES_DIR = ROOT / "samples"
CKPT_DIR = ROOT / "checkpoints"
for d in [ROOT, DATA_DIR, SAMPLES_DIR, CKPT_DIR]:
    d.mkdir(parents=True, exist_ok=True)

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", DEVICE)

IMG_SIZE = 256
BATCH_SIZE = 4
LR = 2e-4
BETA1, BETA2 = 0.5, 0.999
EPOCHS = 20   # increase for better results

# ---------------------------
# Download facades dataset
# ---------------------------
url = "http://efrosgans.eecs.berkeley.edu/pix2pix/datasets/facades.tar.gz"
download_and_extract_archive(url, download_root=str(DATA_DIR))
print("Facades dataset downloaded!")

# ---------------------------
# Custom Dataset
# ---------------------------
class FacadesDataset(torch.utils.data.Dataset):
    def __init__(self, root, split="train"):
        self.root = Path(root)/"facades"/split
        self.files = list(self.root.glob("*.jpg"))

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        img = plt.imread(self.files[idx])
        if img.dtype == np.uint8:
            img = img.astype(np.float32) / 255.0
        w = img.shape[1]//2
        A = img[:, :w, :]
        B = img[:, w:, :]
        A = torch.from_numpy(A).permute(2,0,1)*2 - 1
        B = torch.from_numpy(B).permute(2,0,1)*2 - 1
        return A, B

train_ds = FacadesDataset(DATA_DIR, "train")
train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)

# ---------------------------
# Generator (U-Net)
# ---------------------------
class UNetGenerator(nn.Module):
    def __init__(self, in_ch=3, out_ch=3):
        super().__init__()
        def down_block(in_f, out_f, bn=True):
            layers = [nn.Conv2d(in_f, out_f, 4, 2, 1, bias=False)]
            if bn: layers.append(nn.BatchNorm2d(out_f))
            layers.append(nn.LeakyReLU(0.2, inplace=True))
            return nn.Sequential(*layers)

        def up_block(in_f, out_f, dropout=False):
            layers = [nn.ConvTranspose2d(in_f, out_f, 4, 2, 1, bias=False),
                      nn.BatchNorm2d(out_f),
                      nn.ReLU(inplace=True)]
            if dropout: layers.append(nn.Dropout(0.5))
            return nn.Sequential(*layers)

        self.down1 = down_block(in_ch,64,bn=False)
        self.down2 = down_block(64,128)
        self.down3 = down_block(128,256)
        self.down4 = down_block(256,512)
        self.down5 = down_block(512,512)
        self.down6 = down_block(512,512)
        self.down7 = down_block(512,512)
        self.down8 = down_block(512,512,bn=False)

        self.up1 = up_block(512,512,dropout=True)
        self.up2 = up_block(1024,512,dropout=True)
        self.up3 = up_block(1024,512,dropout=True)
        self.up4 = up_block(1024,512)
        self.up5 = up_block(1024,256)
        self.up6 = up_block(512,128)
        self.up7 = up_block(256,64)
        self.up8 = nn.Sequential(
            nn.ConvTranspose2d(128,out_ch,4,2,1),
            nn.Tanh()
        )

    def forward(self,x):
        d1 = self.down1(x)
        d2 = self.down2(d1)
        d3 = self.down3(d2)
        d4 = self.down4(d3)
        d5 = self.down5(d4)
        d6 = self.down6(d5)
        d7 = self.down7(d6)
        d8 = self.down8(d7)

        u1 = self.up1(d8)
        u2 = self.up2(torch.cat([u1,d7],1))
        u3 = self.up3(torch.cat([u2,d6],1))
        u4 = self.up4(torch.cat([u3,d5],1))
        u5 = self.up5(torch.cat([u4,d4],1))
        u6 = self.up6(torch.cat([u5,d3],1))
        u7 = self.up7(torch.cat([u6,d2],1))
        out = self.up8(torch.cat([u7,d1],1))
        return out

# ---------------------------
# Discriminator (PatchGAN)
# ---------------------------
class PatchDiscriminator(nn.Module):
    def __init__(self, in_ch=3, out_ch=3):
        super().__init__()
        def block(in_f,out_f,stride=2,bn=True):
            layers=[nn.Conv2d(in_f,out_f,4,stride,1,bias=False)]
            if bn: layers.append(nn.BatchNorm2d(out_f))
            layers.append(nn.LeakyReLU(0.2,inplace=True))
            return layers

        self.model=nn.Sequential(
            *block(in_ch+out_ch,64,bn=False),
            *block(64,128),
            *block(128,256),
            *block(256,512,stride=1),
            nn.Conv2d(512,1,4,1,1)
        )

    def forward(self,x,y):
        return self.model(torch.cat([x,y],1))

# ---------------------------
# Training
# ---------------------------
G=UNetGenerator().to(DEVICE)
D=PatchDiscriminator().to(DEVICE)

criterion_GAN = nn.BCEWithLogitsLoss()
criterion_L1 = nn.L1Loss()

opt_G = optim.Adam(G.parameters(), lr=LR, betas=(BETA1,BETA2))
opt_D = optim.Adam(D.parameters(), lr=LR, betas=(BETA1,BETA2))

for epoch in range(1,EPOCHS+1):
    for i,(A,B) in enumerate(train_loader):
        A,B = A.to(DEVICE), B.to(DEVICE)
        valid = torch.ones((A.size(0),1,30,30), device=DEVICE)
        fake  = torch.zeros((A.size(0),1,30,30), device=DEVICE)

        # --- Train G ---
        opt_G.zero_grad()
        fake_B = G(A)
        pred_fake = D(A,fake_B)
        loss_GAN = criterion_GAN(pred_fake, valid)
        loss_L1 = criterion_L1(fake_B,B)*100
        loss_G = loss_GAN+loss_L1
        loss_G.backward()
        opt_G.step()

        # --- Train D ---
        opt_D.zero_grad()
        pred_real = D(A,B)
        loss_real = criterion_GAN(pred_real, valid)
        pred_fake = D(A,fake_B.detach())
        loss_fake = criterion_GAN(pred_fake,fake)
        loss_D = 0.5*(loss_real+loss_fake)
        loss_D.backward()
        opt_D.step()

        if i%50==0:
            print(f"Epoch[{epoch}/{EPOCHS}] Batch[{i}/{len(train_loader)}] "
                  f"Loss_D:{loss_D.item():.4f} Loss_G:{loss_G.item():.4f} "
                  f"L1:{loss_L1.item():.4f}")

    # save sample
    with torch.no_grad():
        fake_B = G(A[:4])
        grid = make_grid(torch.cat([A[:4],B[:4],fake_B],0),nrow=4,normalize=True,scale_each=True)
        save_image(grid, SAMPLES_DIR/f"epoch_{epoch}.png")
        print("Saved sample at", SAMPLES_DIR/f"epoch_{epoch}.png")

    torch.save(G.state_dict(), CKPT_DIR/f"G_epoch{epoch}.pt")
    torch.save(D.state_dict(), CKPT_DIR/f"D_epoch{epoch}.pt")

print("Training finished. Check samples folder.")


Using device: cuda


100%|██████████| 30.2M/30.2M [00:04<00:00, 6.33MB/s]


Facades dataset downloaded!
Epoch[1/20] Batch[0/100] Loss_D:0.7179 Loss_G:85.8035 L1:84.9664
Epoch[1/20] Batch[50/100] Loss_D:0.0373 Loss_G:41.8899 L1:38.2575
Saved sample at /content/pix2pix_facades/samples/epoch_1.png
Epoch[2/20] Batch[0/100] Loss_D:0.4793 Loss_G:33.4892 L1:32.5566
Epoch[2/20] Batch[50/100] Loss_D:0.2161 Loss_G:41.1568 L1:39.1271
Saved sample at /content/pix2pix_facades/samples/epoch_2.png
Epoch[3/20] Batch[0/100] Loss_D:0.3443 Loss_G:39.1353 L1:37.7586
Epoch[3/20] Batch[50/100] Loss_D:1.0354 Loss_G:44.2981 L1:40.3535
Saved sample at /content/pix2pix_facades/samples/epoch_3.png
Epoch[4/20] Batch[0/100] Loss_D:0.0994 Loss_G:40.3026 L1:38.3140
Epoch[4/20] Batch[50/100] Loss_D:0.1155 Loss_G:40.5650 L1:37.8857
Saved sample at /content/pix2pix_facades/samples/epoch_4.png
Epoch[5/20] Batch[0/100] Loss_D:0.8724 Loss_G:28.9808 L1:25.9353
Epoch[5/20] Batch[50/100] Loss_D:0.4171 Loss_G:39.8930 L1:36.4657
Saved sample at /content/pix2pix_facades/samples/epoch_5.png
Epoch[6/20] 

In [4]:
# 1. Download and unzip the Facades dataset
!wget http://efrosgans.eecs.berkeley.edu/pix2pix/datasets/facades.tar.gz
!mkdir -p /content/pix2pix_facades/data
!tar -xvzf facades.tar.gz -C /content/pix2pix_facades/data

# 2. Check extracted files
!ls /content/pix2pix_facades/data/facades/train | head


--2025-08-21 03:21:47--  http://efrosgans.eecs.berkeley.edu/pix2pix/datasets/facades.tar.gz
Resolving efrosgans.eecs.berkeley.edu (efrosgans.eecs.berkeley.edu)... 128.32.244.190
Connecting to efrosgans.eecs.berkeley.edu (efrosgans.eecs.berkeley.edu)|128.32.244.190|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 30168306 (29M) [application/x-gzip]
Saving to: ‘facades.tar.gz’


2025-08-21 03:21:55 (3.52 MB/s) - ‘facades.tar.gz’ saved [30168306/30168306]

facades/
facades/test/
facades/test/27.jpg
facades/test/5.jpg
facades/test/72.jpg
facades/test/1.jpg
facades/test/10.jpg
facades/test/100.jpg
facades/test/101.jpg
facades/test/102.jpg
facades/test/103.jpg
facades/test/104.jpg
facades/test/105.jpg
facades/test/106.jpg
facades/test/11.jpg
facades/test/12.jpg
facades/test/13.jpg
facades/test/14.jpg
facades/test/15.jpg
facades/test/16.jpg
facades/test/17.jpg
facades/test/18.jpg
facades/test/19.jpg
facades/test/2.jpg
facades/test/20.jpg
facades/test/21.jpg
facades/tes

In [8]:
import torchvision.datasets as dset
import torchvision.transforms as transforms

DATA_DIR = "/content/pix2pix_facades/data/facades"

transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

dataset = dset.ImageFolder(
    root=DATA_DIR,
    transform=transform
)
