In [None]:
import os
import sys
import glob
from PIL import Image
import numpy as np
import torch
from torchvision import transforms, models
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torchvision
from torchvision.models.detection import fasterrcnn_resnet50_fpn, maskrcnn_resnet50_fpn
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import pandas as pd
from tqdm import tqdm
import time

print("‚úÖ All libraries imported successfully!")
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")

In [None]:
# ============ KAGGLE PATHS SETUP ============
# üéØ C√ì TH·ªÇ CH·∫†Y TR√äN KAGGLE HO·∫∂C LOCAL

# Ki·ªÉm tra c√≥ ph·∫£i ch·∫°y tr√™n Kaggle kh√¥ng
KAGGLE_INPUT = "/kaggle/input/penfudanped"
KAGGLE_OUTPUT = "/kaggle/working"

if os.path.exists(KAGGLE_INPUT):
    root = KAGGLE_INPUT
    output_dir = KAGGLE_OUTPUT
    print(f"\nüöÄ RUNNING ON KAGGLE")
    print(f"Input dataset: {root}")
    print(f"Output directory: {output_dir}")
else:
    # Local fallback
    root = r"./PennFudanPed"
    output_dir = root
    print(f"\nüíª RUNNING LOCAL")
    print(f"Dataset: {root}")

# Set up paths
img_dir = os.path.join(root, "PNGImages")
mask_dir = os.path.join(root, "PedMasks")

# Create directories in writable location (NOT in read-only input folder)
crop_dir = os.path.join(output_dir, "crops64")
pos_dir = os.path.join(output_dir, "crops64_pos")
neg_dir = os.path.join(output_dir, "crops64_neg")

# Create output dirs
os.makedirs(crop_dir, exist_ok=True)
os.makedirs(pos_dir, exist_ok=True)
os.makedirs(neg_dir, exist_ok=True)
os.makedirs(output_dir, exist_ok=True)

print(f"\n‚úÖ Directories created:")
print(f"   - PNGImages: {img_dir}")
print(f"   - PedMasks: {mask_dir}")
print(f"   - crops64: {crop_dir}")

In [None]:
# ============ GPU SETUP ============
print("\n" + "="*80)
print("üöÄ GPU SETUP - KAGGLE OPTIMIZATION")
print("="*80)

# 1Ô∏è‚É£ Check CUDA
print(f"\n1. ‚úÖ CUDA Available: {torch.cuda.is_available()}")
print(f"2. ‚úÖ PyTorch Version: {torch.__version__}")

if torch.cuda.is_available():
    # 2Ô∏è‚É£ Get GPU info
    print(f"\n3. GPU Count: {torch.cuda.device_count()}")
    print(f"4. GPU Name: {torch.cuda.get_device_name(0)}")
    gpu_mem = torch.cuda.get_device_properties(0).total_memory / 1e9
    print(f"5. GPU Memory: {gpu_mem:.2f} GB")
    
    # 3Ô∏è‚É£ Force GPU usage
    device = torch.device("cuda")
    torch.cuda.set_device(0)
    
    # Optimize memory
    torch.cuda.empty_cache()
    print(f"\n‚úÖ‚úÖ‚úÖ TRAIN B·∫∞NG GPU: {torch.cuda.get_device_name(0)}")
else:
    print("\n‚ùå GPU NOT FOUND - Using CPU (SLOW)")
    device = torch.device("cpu")

print("="*80 + "\n")

In [None]:
# ============ LOAD TARGET FUNCTION ============
def load_target(mask_p):
    """Extract bounding boxes and masks from annotation mask"""
    mask = np.array(Image.open(mask_p))
    obj_ids = np.unique(mask)[1:]  # Remove background
    masks = (mask[..., None] == obj_ids).astype(np.uint8).transpose(2,0,1)
    boxes = []
    for m in masks:
        pos = np.argwhere(m)
        y1, x1 = pos.min(0)
        y2, x2 = pos.max(0)
        boxes.append([x1, y1, x2, y2])
    boxes = torch.as_tensor(boxes, dtype=torch.float32)
    labels = torch.ones((len(boxes),), dtype=torch.int64)  # class=1 (person)
    masks = torch.as_tensor(masks, dtype=torch.uint8)
    return boxes, labels, masks

print("‚úÖ load_target() function defined")

In [None]:
# ============ CREATE 64x64 CROPS ============
print("\n" + "="*80)
print("üì∏ CREATING 64x64 CROPS FROM DATASET")
print("="*80)

to_tensor = transforms.ToTensor()
resize64 = transforms.Resize((64,64), interpolation=transforms.InterpolationMode.BILINEAR)

# Duy·ªát qua t·ª´ng ·∫£nh
for img_p in glob.glob(os.path.join(img_dir, "*.png")):
    base = os.path.basename(img_p).replace(".png", "")
    mask_p = os.path.join(mask_dir, base + "_mask.png")
    if not os.path.exists(mask_p):
        continue
    img = Image.open(img_p).convert("RGB")
    boxes, _, _ = load_target(mask_p)
    for i, b in enumerate(boxes):
        x1, y1, x2, y2 = map(int, b.tolist())
        crop = img.crop((x1, y1, x2, y2))
        crop = resize64(crop)
        crop.save(os.path.join(crop_dir, f"{base}_{i}.png"))

print(f"‚úÖ Created crops64: {len(glob.glob(os.path.join(crop_dir, '*.png')))} images")

In [None]:
# ============ CREATE POSITIVE/NEGATIVE SAMPLES ============
print("\n" + "="*80)
print("üîß CREATING BINARY CLASSIFICATION DATASET")
print("="*80)

to_tensor = transforms.ToTensor()
resize64 = transforms.Resize((64, 64), interpolation=transforms.InterpolationMode.BILINEAR)

# ========== POSITIVE SAMPLES (People) ==========
pos_count = 0
for img_p in glob.glob(os.path.join(img_dir, "*.png")):
    base = os.path.basename(img_p).replace(".png", "")
    mask_p = os.path.join(mask_dir, base + "_mask.png")
    if not os.path.exists(mask_p):
        continue
    img = Image.open(img_p).convert("RGB")
    boxes, _, _ = load_target(mask_p)
    for i, b in enumerate(boxes):
        x1, y1, x2, y2 = map(int, b.tolist())
        crop = img.crop((x1, y1, x2, y2))
        crop = resize64(crop)
        crop.save(os.path.join(pos_dir, f"{base}_{i}.png"))
        pos_count += 1

# ========== NEGATIVE SAMPLES (Background) ==========
neg_count = 0
np.random.seed(42)
for img_p in glob.glob(os.path.join(img_dir, "*.png")):
    base = os.path.basename(img_p).replace(".png", "")
    mask_p = os.path.join(mask_dir, base + "_mask.png")
    if not os.path.exists(mask_p):
        continue
    
    img = Image.open(img_p).convert("RGB")
    mask = np.array(Image.open(mask_p))
    img_h, img_w = img.size
    boxes, _, _ = load_target(mask_p)
    
    for attempt in range(3):
        w_crop, h_crop = 80, 80
        x_rand = np.random.randint(0, max(img_w - w_crop, 1))
        y_rand = np.random.randint(0, max(img_h - h_crop, 1))
        
        has_person = False
        for box in boxes:
            x1, y1, x2, y2 = map(int, box.tolist())
            if not (x_rand + w_crop < x1 or x_rand > x2 or 
                    y_rand + h_crop < y1 or y_rand > y2):
                has_person = True
                break
        
        if not has_person:
            crop = img.crop((x_rand, y_rand, x_rand + w_crop, y_rand + h_crop))
            crop = crop.resize((64, 64))
            crop.save(os.path.join(neg_dir, f"{base}_neg_{attempt}.png"))
            neg_count += 1

print(f"‚úÖ Positive samples: {pos_count} ·∫£nh ‚Üí {pos_dir}")
print(f"‚úÖ Negative samples: {neg_count} ·∫£nh ‚Üí {neg_dir}")
print(f"üìä Ratio: {pos_count}/{pos_count+neg_count} positive ({100*pos_count/(pos_count+neg_count):.1f}%)")

In [None]:
# ============ CNN CLASSIFIER (ResNet18) ============
print("\n" + "="*80)
print("üéØ CNN (RESNET18) - BINARY CLASSIFICATION")
print("="*80)

class PedCropDataset(Dataset):
    def __init__(self, pos_folder, neg_folder):
        self.pos_paths = sorted(glob.glob(os.path.join(pos_folder, "*.png")))
        self.neg_paths = sorted(glob.glob(os.path.join(neg_folder, "*.png")))
        self.paths = self.pos_paths + self.neg_paths
        self.labels = [1] * len(self.pos_paths) + [0] * len(self.neg_paths)
        self.tf = transforms.Compose([transforms.ToTensor()])
    
    def __len__(self): 
        return len(self.paths)
    
    def __getitem__(self, i):
        x = self.tf(Image.open(self.paths[i]).convert("RGB"))
        y = self.labels[i]
        return x, y

# Create dataset and dataloaders
ds_cnn = PedCropDataset(pos_dir, neg_dir)
n_cnn = len(ds_cnn)
n_train_cnn = int(0.8 * n_cnn)
train_ds_cnn, val_ds_cnn = torch.utils.data.random_split(ds_cnn, [n_train_cnn, n_cnn - n_train_cnn])
train_dl_cnn = DataLoader(train_ds_cnn, batch_size=32, shuffle=True)
val_dl_cnn   = DataLoader(val_ds_cnn, batch_size=32)

print(f"üìä Dataset: {len(train_ds_cnn)} train + {len(val_ds_cnn)} val")
print(f"   Positive: {len(PedCropDataset(pos_dir, neg_dir).pos_paths)}")
print(f"   Negative: {len(PedCropDataset(pos_dir, neg_dir).neg_paths)}")

# Build model
model = models.resnet18(weights=None, num_classes=2).to(device)
opt = torch.optim.Adam(model.parameters(), lr=1e-3)

# Training
print(f"üñ•Ô∏è  Device: {device}\n")
for epoch in range(10):
    model.train()
    train_loss = 0
    for xb, yb in train_dl_cnn:
        xb, yb = xb.to(device), yb.to(device)
        logits = model(xb)
        loss = F.cross_entropy(logits, yb)
        opt.zero_grad()
        loss.backward()
        opt.step()
        train_loss += loss.item()
    
    model.eval()
    with torch.no_grad():
        tot, correct = 0, 0
        for xb, yb in val_dl_cnn:
            xb, yb = xb.to(device), yb.to(device)
            pred = model(xb).argmax(1)
            tot += yb.numel()
            correct += (pred == yb).sum().item()
    
    print(f"Epoch {epoch+1:2d}/10: val_acc={correct/tot:.3f} | train_loss={train_loss:.4f}")

torch.cuda.empty_cache()
print("\n‚úÖ CNN training completed!")

In [None]:
# ============ FASTER R-CNN DETECTOR ============
print("\n" + "="*80)
print("üì¶ FASTER R-CNN - OBJECT DETECTION")
print("="*80)

class PennFudanDet(Dataset):
    def __init__(self, img_dir, mask_dir):
        self.imgs = sorted(glob.glob(os.path.join(img_dir, "*.png")))
        self.mask_dir = mask_dir
        self.tf = transforms.ToTensor()
    
    def __len__(self): 
        return len(self.imgs)
    
    def __getitem__(self, i):
        img_p = self.imgs[i]
        base = os.path.basename(img_p).replace(".png", "")
        mask_p = os.path.join(self.mask_dir, base + "_mask.png")
        img = Image.open(img_p).convert("RGB")
        boxes, labels, masks = load_target(mask_p)
        return self.tf(img), {"boxes": boxes, "labels": labels}

def collate(batch): 
    imgs, targets = zip(*batch)
    return list(imgs), list(targets)

full_det = PennFudanDet(img_dir, mask_dir)
n_det = len(full_det)
n_train_det = int(0.8 * n_det)
train_ds_det, val_ds_det = torch.utils.data.random_split(full_det, [n_train_det, n_det - n_train_det])
train_dl_det = DataLoader(train_ds_det, batch_size=2, shuffle=True, collate_fn=collate)
val_dl_det = DataLoader(val_ds_det, batch_size=2, collate_fn=collate)

print(f"üìä Detection dataset: {n_train_det} train + {n_det - n_train_det} val")

# Build model
det_model = fasterrcnn_resnet50_fpn(weights="DEFAULT")
in_features = det_model.roi_heads.box_predictor.cls_score.in_features
det_model.roi_heads.box_predictor = torchvision.models.detection.faster_rcnn.FastRCNNPredictor(in_features, 2)
det_model = det_model.to(device)
opt = torch.optim.SGD([p for p in det_model.parameters() if p.requires_grad], 
                      lr=0.005, momentum=0.9, weight_decay=1e-4)

# Training
print(f"üñ•Ô∏è  Device: {device}\n")
for epoch in range(6):
    det_model.train()
    train_loss = 0
    start_time = time.time()
    
    pbar = tqdm(train_dl_det, desc=f"Epoch {epoch+1}/6", leave=True)
    for imgs, targets in pbar:
        imgs = [im.to(device) for im in imgs]
        targets = [{k:v.to(device) for k,v in t.items()} for t in targets]
        loss_dict = det_model(imgs, targets)
        loss = sum(loss_dict.values())
        opt.zero_grad()
        loss.backward()
        opt.step()
        train_loss += loss.item()
        pbar.set_postfix({'loss': f'{loss.item():.4f}'})
    
    elapsed = time.time() - start_time
    print(f"‚úÖ Epoch {epoch+1}/6 completed in {elapsed:.1f}s | Avg Loss: {train_loss:.4f}\n")

torch.cuda.empty_cache()
print("‚úÖ Faster R-CNN training completed!")

In [None]:
# ============ MASK R-CNN SEGMENTATION ============
print("\n" + "="*80)
print("üé≠ MASK R-CNN - INSTANCE SEGMENTATION")
print("="*80)

class PennFudanSeg(PennFudanDet):
    def __getitem__(self, i):
        img_p = self.imgs[i]
        base = os.path.basename(img_p).replace(".png", "")
        mask_p = os.path.join(self.mask_dir, base + "_mask.png")
        img = Image.open(img_p).convert("RGB")
        boxes, labels, masks = load_target(mask_p)
        return self.tf(img), {"boxes": boxes, "labels": labels, "masks": masks}

full_seg = PennFudanSeg(img_dir, mask_dir)
n_seg = len(full_seg)
n_train_seg = int(0.8 * n_seg)
train_ds_seg, val_ds_seg = torch.utils.data.random_split(full_seg, [n_train_seg, n_seg - n_train_seg])
train_dl_seg = DataLoader(train_ds_seg, batch_size=2, shuffle=True, collate_fn=collate)
val_dl_seg = DataLoader(val_ds_seg, batch_size=2, collate_fn=collate)

print(f"üìä Segmentation dataset: {n_train_seg} train + {n_seg - n_train_seg} val")

# Build model
seg_model = maskrcnn_resnet50_fpn(weights="DEFAULT")
in_features_mask = seg_model.roi_heads.mask_predictor.conv5_mask.in_channels
hidden = 256
seg_model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask, hidden, 2)
in_features = seg_model.roi_heads.box_predictor.cls_score.in_features
seg_model.roi_heads.box_predictor = torchvision.models.detection.faster_rcnn.FastRCNNPredictor(in_features, 2)
seg_model = seg_model.to(device)
opt = torch.optim.SGD([p for p in seg_model.parameters() if p.requires_grad], 
                      lr=0.005, momentum=0.9, weight_decay=1e-4)

# Training
print(f"üñ•Ô∏è  Device: {device}\n")
for epoch in range(6):
    seg_model.train()
    train_loss = 0
    start_time = time.time()
    
    pbar = tqdm(train_dl_seg, desc=f"Epoch {epoch+1}/6", leave=True)
    for imgs, targets in pbar:
        imgs = [im.to(device) for im in imgs]
        targets = [{k:v.to(device) for k,v in t.items()} for t in targets]
        loss_dict = seg_model(imgs, targets)
        loss = sum(loss_dict.values())
        opt.zero_grad()
        loss.backward()
        opt.step()
        train_loss += loss.item()
        pbar.set_postfix({'loss': f'{loss.item():.4f}'})
    
    elapsed = time.time() - start_time
    print(f"‚úÖ Epoch {epoch+1}/6 completed in {elapsed:.1f}s | Avg Loss: {train_loss:.4f}\n")

torch.cuda.empty_cache()
print("‚úÖ Mask R-CNN training completed!")

In [None]:
# ============ AUTOENCODER ============
print("\n" + "="*80)
print("üîÑ AUTOENCODER - UNSUPERVISED LEARNING")
print("="*80)

class CropOnly(Dataset):
    def __init__(self, folder):
        self.paths = sorted(glob.glob(os.path.join(folder, "*.png")))
        self.tf = transforms.Compose([transforms.ToTensor()])
    
    def __len__(self): 
        return len(self.paths)
    
    def __getitem__(self, i):
        return self.tf(Image.open(self.paths[i]).convert("RGB"))

class SmallAE(nn.Module):
    def __init__(self):
        super().__init__()
        self.enc = nn.Sequential(
            nn.Conv2d(3, 32, 4, 2, 1), nn.ReLU(),
            nn.Conv2d(32, 64, 4, 2, 1), nn.ReLU(),
            nn.Conv2d(64, 128, 4, 2, 1), nn.ReLU(),
        )
        self.dec = nn.Sequential(
            nn.ConvTranspose2d(128, 64, 4, 2, 1), nn.ReLU(),
            nn.ConvTranspose2d(64, 32, 4, 2, 1), nn.ReLU(),
            nn.ConvTranspose2d(32, 3, 4, 2, 1), nn.Sigmoid(),
        )
    
    def forward(self, x): 
        return self.dec(self.enc(x))

ae_ds = CropOnly(crop_dir)
ae_dl = DataLoader(ae_ds, batch_size=64, shuffle=True)

print(f"üìä AutoEncoder dataset: {len(ae_ds)} crops")

ae = SmallAE().to(device)
opt = torch.optim.Adam(ae.parameters(), lr=1e-3)

print(f"üñ•Ô∏è  Device: {device}\n")
for epoch in range(10):
    ae.train()
    tot = 0
    for xb in ae_dl:
        xb = xb.to(device)
        recon = ae(xb)
        loss = ((recon - xb)**2).mean()
        opt.zero_grad()
        loss.backward()
        opt.step()
        tot += loss.item() * xb.size(0)
    print(f"Epoch {epoch+1:2d}/10: MSE={tot/len(ae_ds):.4f}")

torch.cuda.empty_cache()
print("\n‚úÖ AutoEncoder training completed!")

In [None]:
# ============ GAN (DCGAN) ============
print("\n" + "="*80)
print("üëª GAN (DCGAN) - GENERATIVE MODEL")
print("="*80)

nz, ngf, ndf = 64, 64, 64

class G(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            nn.ConvTranspose2d(nz, ngf*8, 4, 1, 0), nn.ReLU(True),
            nn.ConvTranspose2d(ngf*8, ngf*4, 4, 2, 1), nn.ReLU(True),
            nn.ConvTranspose2d(ngf*4, ngf*2, 4, 2, 1), nn.ReLU(True),
            nn.ConvTranspose2d(ngf*2, 3, 4, 2, 1), nn.Tanh(),
        )
    
    def forward(self, z): 
        return self.net(z)

class D(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv2d(3, ndf, 4, 2, 1), nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(ndf, ndf*2, 4, 2, 1), nn.BatchNorm2d(ndf*2), nn.LeakyReLU(0.2, True),
            nn.Conv2d(ndf*2, ndf*4, 4, 2, 1), nn.BatchNorm2d(ndf*4), nn.LeakyReLU(0.2, True),
            nn.Conv2d(ndf*4, 1, 4, 1, 0)
        )
    
    def forward(self, x): 
        return self.net(x).view(-1)

gen, disc = G().to(device), D().to(device)
optG = torch.optim.Adam(gen.parameters(), lr=2e-4, betas=(0.5, 0.999))
optD = torch.optim.Adam(disc.parameters(), lr=2e-4, betas=(0.5, 0.999))
bce = nn.BCEWithLogitsLoss()

gan_dl = DataLoader(ae_ds, batch_size=64, shuffle=True)

print(f"üñ•Ô∏è  Device: {device}\n")
for epoch in range(10):
    for real in gan_dl:
        real = real.to(device)
        # Train D
        z = torch.randn(real.size(0), nz, 1, 1, device=device)
        fake = gen(z).detach()
        d_real = disc(real)
        d_fake = disc(fake)
        lossD = bce(d_real, torch.ones_like(d_real)) + bce(d_fake, torch.zeros_like(d_fake))
        optD.zero_grad()
        lossD.backward()
        optD.step()
        # Train G
        z = torch.randn(real.size(0), nz, 1, 1, device=device)
        fake = gen(z)
        g = disc(fake)
        lossG = bce(g, torch.ones_like(g))
        optG.zero_grad()
        lossG.backward()
        optG.step()
    print(f"Epoch {epoch+1:2d}/10: D Loss={lossD.item():.4f} | G Loss={lossG.item():.4f}")

torch.cuda.empty_cache()
print("\n‚úÖ GAN training completed!")

In [None]:
# ============ VISUALIZATION: CNN RESULTS ============
print("\n" + "="*80)
print("üé® DEMO: CNN Classification Results")
print("="*80)

model.eval()
with torch.no_grad():
    sample_batch, sample_labels = next(iter(val_dl_cnn))
    sample_batch = sample_batch.to(device)
    predictions = model(sample_batch)
    predicted_classes = predictions.argmax(1)

fig, axes = plt.subplots(2, 4, figsize=(12, 6))
fig.suptitle('CNN Classification Results (ResNet18)', fontsize=14, fontweight='bold')
for idx in range(8):
    ax = axes[idx // 4, idx % 4]
    img = sample_batch[idx].cpu().permute(1, 2, 0).numpy()
    img = np.clip(img, 0, 1)
    ax.imshow(img)
    pred = predicted_classes[idx].item()
    label = sample_labels[idx].item()
    color = 'green' if pred == label else 'red'
    ax.set_title(f'Pred: {pred}, True: {label}', color=color, fontweight='bold')
    ax.axis('off')
plt.tight_layout()
output_path = os.path.join(output_dir, 'CNN_Results.png')
plt.savefig(output_path, dpi=150, bbox_inches='tight')
print(f"‚úÖ Saved: {output_path}")
plt.close()

In [None]:
# ============ VISUALIZATION: FASTER R-CNN DETECTION ============
print("\n" + "="*80)
print("üì¶ DEMO: Faster R-CNN Detection")
print("="*80)

det_model.eval()
sample_imgs, sample_targets = next(iter(val_dl_det))
sample_imgs_device = [im.to(device) for im in sample_imgs]

with torch.no_grad():
    predictions = det_model(sample_imgs_device)

fig, axes = plt.subplots(1, 2, figsize=(14, 6))
fig.suptitle('Faster R-CNN Detection Results', fontsize=14, fontweight='bold')

for idx in range(2):
    ax = axes[idx]
    img = sample_imgs[idx].permute(1, 2, 0).numpy()
    img = np.clip(img, 0, 1)
    ax.imshow(img)
    
    # Ground truth
    for box in sample_targets[idx]['boxes'].cpu().numpy():
        x1, y1, x2, y2 = box
        rect = patches.Rectangle((x1, y1), x2-x1, y2-y1, linewidth=2, edgecolor='green', facecolor='none')
        ax.add_patch(rect)
    
    # Predictions
    pred = predictions[idx]
    for score, box in zip(pred['scores'].cpu().numpy(), pred['boxes'].cpu().numpy()):
        if score > 0.5:
            x1, y1, x2, y2 = box
            rect = patches.Rectangle((x1, y1), x2-x1, y2-y1, linewidth=2, edgecolor='red', facecolor='none', linestyle='--')
            ax.add_patch(rect)
    ax.set_title(f'Image {idx+1}', fontweight='bold')
    ax.axis('off')

plt.tight_layout()
output_path = os.path.join(output_dir, 'RCNN_Detection.png')
plt.savefig(output_path, dpi=150, bbox_inches='tight')
print(f"‚úÖ Saved: {output_path}")
plt.close()

In [None]:
# ============ VISUALIZATION: MASK R-CNN SEGMENTATION ============
print("\n" + "="*80)
print("üé≠ DEMO: Mask R-CNN Segmentation")
print("="*80)

seg_model.eval()
seg_sample_imgs, seg_sample_targets = next(iter(val_dl_seg))
seg_sample_imgs_device = [im.to(device) for im in seg_sample_imgs]

with torch.no_grad():
    seg_predictions = seg_model(seg_sample_imgs_device)

fig, axes = plt.subplots(2, 2, figsize=(14, 12))
fig.suptitle('Mask R-CNN Segmentation Results', fontsize=14, fontweight='bold')

for idx in range(2):
    # Ground truth
    ax = axes[0, idx]
    img = seg_sample_imgs[idx].permute(1, 2, 0).numpy()
    img = np.clip(img, 0, 1)
    ax.imshow(img)
    ax.set_title(f'Ground Truth - Image {idx+1}', fontweight='bold')
    gt_masks = seg_sample_targets[idx]['masks'].cpu().numpy()
    for mask in gt_masks:
        ax.contour(mask, colors='green', linewidths=2)
    ax.axis('off')
    
    # Predictions
    ax = axes[1, idx]
    img = seg_sample_imgs[idx].permute(1, 2, 0).numpy()
    img = np.clip(img, 0, 1)
    ax.imshow(img)
    ax.set_title(f'Predictions - Image {idx+1}', fontweight='bold')
    pred = seg_predictions[idx]
    for mask, score in zip(pred['masks'].cpu().numpy(), pred['scores'].cpu().numpy()):
        if score > 0.5:
            ax.contour(mask.squeeze(), colors='red', linewidths=2, linestyles='--')
    ax.axis('off')

plt.tight_layout()
output_path = os.path.join(output_dir, 'MaskRCNN_Segmentation.png')
plt.savefig(output_path, dpi=150, bbox_inches='tight')
print(f"‚úÖ Saved: {output_path}")
plt.close()

In [None]:
# ============ VISUALIZATION: AUTOENCODER ============
print("\n" + "="*80)
print("üîÑ DEMO: AutoEncoder Reconstruction")
print("="*80)

ae.eval()
sample_imgs_ae = next(iter(ae_dl))[:8].to(device)

with torch.no_grad():
    reconstructed = ae(sample_imgs_ae)

fig, axes = plt.subplots(2, 8, figsize=(16, 4))
fig.suptitle('AutoEncoder: Original vs Reconstructed', fontsize=14, fontweight='bold')

for i in range(8):
    # Original
    ax = axes[0, i]
    img_orig = sample_imgs_ae[i].cpu().permute(1, 2, 0).numpy()
    img_orig = np.clip(img_orig, 0, 1)
    ax.imshow(img_orig)
    ax.set_title('Original', fontsize=9)
    ax.axis('off')
    
    # Reconstructed
    ax = axes[1, i]
    img_recon = reconstructed[i].cpu().permute(1, 2, 0).numpy()
    img_recon = np.clip(img_recon, 0, 1)
    ax.imshow(img_recon)
    ax.set_title('Reconstructed', fontsize=9)
    ax.axis('off')

plt.tight_layout()
output_path = os.path.join(output_dir, 'AE_Reconstruction.png')
plt.savefig(output_path, dpi=150, bbox_inches='tight')
print(f"‚úÖ Saved: {output_path}")
plt.close()

with torch.no_grad():
    mse_errors = ((reconstructed - sample_imgs_ae)**2).mean(dim=[1,2,3]).cpu().numpy()
    avg_mse = mse_errors.mean()
print(f"   Average MSE: {avg_mse:.4f}")

In [None]:
# ============ VISUALIZATION: GAN ============
print("\n" + "="*80)
print("üëª DEMO: GAN Generated Images")
print("="*80)

gen.eval()
z_samples = torch.randn(16, nz, 1, 1, device=device)

with torch.no_grad():
    generated_images = gen(z_samples)

fig, axes = plt.subplots(2, 8, figsize=(16, 4))
fig.suptitle('DCGAN: Generated Synthetic Pedestrian Images', fontsize=14, fontweight='bold')

for idx in range(16):
    ax = axes[idx // 8, idx % 8]
    img = generated_images[idx].cpu().permute(1, 2, 0).numpy()
    img = (img + 1) / 2
    img = np.clip(img, 0, 1)
    ax.imshow(img)
    ax.set_title(f'Generated {idx+1}', fontsize=9)
    ax.axis('off')

plt.tight_layout()
output_path = os.path.join(output_dir, 'GAN_Generated.png')
plt.savefig(output_path, dpi=150, bbox_inches='tight')
print(f"‚úÖ Saved: {output_path}")
plt.close()

In [None]:
# ============ SAVE MODELS ============
print("\n" + "="*80)
print("üíæ SAVING MODELS")
print("="*80)

# Save models to output directory
torch.save(model.state_dict(), os.path.join(output_dir, 'model_cnn.pth'))
torch.save(det_model.state_dict(), os.path.join(output_dir, 'model_faster_rcnn.pth'))
torch.save(seg_model.state_dict(), os.path.join(output_dir, 'model_mask_rcnn.pth'))
torch.save(ae.state_dict(), os.path.join(output_dir, 'model_autoencoder.pth'))
torch.save(gen.state_dict(), os.path.join(output_dir, 'model_generator.pth'))
torch.save(disc.state_dict(), os.path.join(output_dir, 'model_discriminator.pth'))

print(f"‚úÖ Models saved to: {output_dir}")
print(f"   - model_cnn.pth")
print(f"   - model_faster_rcnn.pth")
print(f"   - model_mask_rcnn.pth")
print(f"   - model_autoencoder.pth")
print(f"   - model_generator.pth")
print(f"   - model_discriminator.pth")

print("\n" + "="*80)
print("‚úÖ TRAINING COMPLETED SUCCESSFULLY!")
print("="*80)
print(f"\nAll outputs saved to: {output_dir}")
print("\nüìä Generated files:")
for f in glob.glob(os.path.join(output_dir, '*.png')):
    print(f"   - {os.path.basename(f)}")
for f in glob.glob(os.path.join(output_dir, '*.pth')):
    print(f"   - {os.path.basename(f)}")