<a href="https://colab.research.google.com/github/mohamedshouaib/iti/blob/main/Computer_Vision/Day02/Faster_final_f.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!git clone https://github.com/mohamedshouaib/iti.git
!cd iti/Computer_Vision/Day02

fatal: destination path 'iti' already exists and is not an empty directory.


In [None]:
import torch
import torchvision.transforms as T
import os
import cv2
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator
from torchvision.models import resnet50
import torch.nn as nn
from torchvision.ops import MultiScaleRoIAlign, box_iou
import torch.optim as optim
from tqdm import tqdm
import torchvision

In [None]:
BATCH_SIZE = 4
LEARNING_RATE = 0.005
NUM_EPOCHS = 30
PATIENCE = 3
MOMENTUM = 0.9
WEIGHT_DECAY = 0.0005

In [None]:
class Load_Dataset(Dataset):
    def __init__(self, image_dir, label_dir, transforms=None):
        self.image_dir = image_dir
        self.label_dir = label_dir
        self.transforms = transforms
        self.valid_samples = []

        for img_name in sorted(os.listdir(image_dir)):
            if not img_name.lower().endswith(('.jpg', '.png', '.jpeg', '.tif')):
                continue
            label_path = os.path.join(label_dir, os.path.splitext(img_name)[0] + ".txt")
            if not os.path.exists(label_path):
                continue
            with open(label_path, 'r') as f:
                for line in f:
                    parts = line.strip().replace(",", " ").split()
                    if len(parts) == 4:
                        x1, y1, x2, y2 = map(int, parts)
                        if x2 > x1 and y2 > y1:
                            self.valid_samples.append(img_name)
                            break

    def __len__(self):
        return len(self.valid_samples)

    def __getitem__(self, idx):
        img_name = self.valid_samples[idx]
        img_path = os.path.join(self.image_dir, img_name)
        label_path = os.path.join(self.label_dir, os.path.splitext(img_name)[0] + ".txt")
        img = Image.open(img_path).convert("RGB")

        boxes = []
        with open(label_path, 'r') as f:
            for line in f:
                parts = line.strip().replace(",", " ").split()
                if len(parts) == 4:
                    x1, y1, x2, y2 = map(int, parts)
                    if x2 > x1 and y2 > y1:
                        boxes.append([x1, y1, x2, y2])

        if self.transforms:
            img = self.transforms(img)

        boxes = torch.tensor(boxes, dtype=torch.float32)
        labels = torch.ones((len(boxes),), dtype=torch.int64)
        target = {'boxes': boxes, 'labels': labels}

        return img, target


In [None]:
transform = T.Compose([
    T.ToTensor(),  # Only conversion to tensor
    T.RandomHorizontalFlip(p=0.5)  # Kept one augmentation
])

train_dataset = Load_Dataset("iti/Computer_Vision/Day02/TrainImages",
                           "iti/Computer_Vision/Day02/TrainGroundTruth",
                           transforms=transform)
val_dataset = Load_Dataset("iti/Computer_Vision/Day02/TestImages",
                         "iti/Computer_Vision/Day02/TestGroundTruth",
                         transforms=transform)

print(f"Train samples: {len(train_dataset)}")
print(f"Validation samples: {len(val_dataset)}")

def collate_fn(batch):
    return tuple(zip(*batch))

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, collate_fn=collate_fn)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, collate_fn=collate_fn)

Train samples: 660
Validation samples: 115


In [None]:
class MyFasterRCNN(nn.Module):
    def __init__(self, num_classes=2):
        super().__init__()
        backbone = resnet50(weights=None)  # Changed from "DEFAULT" to None
        self.backbone = nn.Sequential(*list(backbone.children())[:-2])
        self.backbone.out_channels = 2048

        anchor_generator = AnchorGenerator(
            sizes=((32, 64, 128, 256, 512),),  # Matched friend's format
            aspect_ratios=((0.5, 1.0, 2.0),)
        )

        roi_pooler = MultiScaleRoIAlign(
            featmap_names=['0'],
            output_size=7,
            sampling_ratio=2
        )

        self.model = FasterRCNN(
            backbone=self.backbone,
            num_classes=num_classes,
            rpn_anchor_generator=anchor_generator,
            box_roi_pool=roi_pooler,
        )

    def forward(self, images, targets=None):
        return self.model(images, targets)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = MyFasterRCNN().to(device)
optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)

In [None]:
def train_one_epoch(model, optimizer, data_loader, device):
    model.train()
    total_loss = 0
    for images, targets in tqdm(data_loader, desc="Training"):
        if not images or not targets:
            continue

        images = [img.to(device) for img in images]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        optimizer.zero_grad()
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        losses.backward()
        optimizer.step()
        total_loss += losses.item()
    return total_loss / len(data_loader)

In [None]:
def validate(model, data_loader, device):
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for images, targets in tqdm(data_loader, desc="Validating"):
            if not images or not targets:
                continue
            images = [img.to(device) for img in images]
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
            loss_dict = model(images, targets)
            losses = sum(loss for loss in loss_dict.values())
            val_loss += losses.item()
    return val_loss / len(data_loader)

In [None]:
def evaluate_model(model, data_loader, device, iou_threshold=0.5):
    model.eval()
    total_acc = 0
    total_images = 0

    def compute_iou(pred_boxes, gt_boxes):
        if len(gt_boxes) == 0:
            return 1.0 if len(pred_boxes) == 0 else 0.0
        if len(pred_boxes) == 0:
            return 0.0
        ious = box_iou(pred_boxes, gt_boxes)
        matched_gt = set()
        correct = 0
        for i in range(len(pred_boxes)):
            max_iou, max_j = ious[i].max(0)
            if max_iou.item() > iou_threshold and max_j.item() not in matched_gt:
                matched_gt.add(max_j.item())
                correct += 1
        return correct / len(gt_boxes)

    with torch.no_grad():
        for images, targets in tqdm(data_loader, desc="Evaluating"):
            images = [img.to(device) for img in images]
            outputs = model(images)
            for output, target in zip(outputs, targets):
                pred_boxes = output["boxes"].cpu()
                gt_boxes = target["boxes"].cpu()
                acc = compute_iou(pred_boxes, gt_boxes)
                total_acc += acc
                total_images += 1

    return total_acc / total_images if total_images > 0 else 0


In [None]:
train_losses = []
val_losses = []
best_loss = float('inf')
counter = 0

for epoch in range(NUM_EPOCHS):
    train_loss = train_one_epoch(model, optimizer, train_loader, device)
    train_losses.append(train_loss)
    val_loss = validate(model, val_loader, device)
    val_losses.append(val_loss)

    print(f"Epoch {epoch+1}/{NUM_EPOCHS} - Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, LR: {optimizer.param_groups[0]['lr']:.6f}")

    if val_loss < best_loss:
        best_loss = val_loss
        counter = 0
        torch.save(model.state_dict(), 'iti/Computer_Vision/Day02/best_model.pth')
        print("Saved new best model")
    else:
        counter += 1
        print(f"No improvement for {counter} epoch(s)")
        if counter >= PATIENCE:
            print(f"Early stopping triggered after {epoch+1} epochs.")
            break

Training: 100%|██████████| 330/330 [03:34<00:00,  1.54it/s]
Validating: 100%|██████████| 58/58 [00:12<00:00,  4.57it/s]


Epoch 1/25 - Train Loss: 0.5265, Val Loss: 0.1179, LR: 0.001000
Saved new best model


Training: 100%|██████████| 330/330 [03:38<00:00,  1.51it/s]
Validating: 100%|██████████| 58/58 [00:12<00:00,  4.52it/s]


Epoch 2/25 - Train Loss: 0.1483, Val Loss: 0.0900, LR: 0.001000
Saved new best model


Training: 100%|██████████| 330/330 [03:37<00:00,  1.51it/s]
Validating: 100%|██████████| 58/58 [00:12<00:00,  4.52it/s]


Epoch 3/25 - Train Loss: 0.0731, Val Loss: 0.0650, LR: 0.001000
Saved new best model


Training:  34%|███▎      | 111/330 [01:13<02:22,  1.54it/s]

In [None]:
def plot_losses(train_losses, val_losses):
    plt.figure(figsize=(10, 5))
    plt.plot(train_losses, label='Training Loss')
    plt.plot(val_losses, label='Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.title('Training and Validation Loss Curves')
    plt.legend()
    plt.grid(True)
    plt.show()

plot_losses(train_losses, val_losses)

In [None]:
model.load_state_dict(torch.load("iti/Computer_Vision/Day02/best_model.pth"))
accuracy = evaluate_model(model, val_loader, device, iou_threshold=0.5)
print(f"Model Accuracy (IoU > 0.5): {accuracy:.4f}")

In [None]:
def visualize_predictions(model, image_dir, num_images=5):
    model.eval()
    image_files = sorted([f for f in os.listdir(image_dir) if f.endswith(('.jpg', '.png', '.tif'))])[:num_images]

    for img_name in image_files:
        img_path = os.path.join(image_dir, img_name)
        img = Image.open(img_path).convert("RGB")
        img_tensor = transform(img).unsqueeze(0).to(device)

        with torch.no_grad():
            prediction = model(img_tensor)

        img_np = np.array(img)
        for box in prediction[0]['boxes'].cpu().numpy():
            x1, y1, x2, y2 = map(int, box)
            cv2.rectangle(img_np, (x1, y1), (x2, y2), (0, 255, 0), 2)

        plt.figure(figsize=(12, 8))
        plt.imshow(img_np)
        plt.axis('off')
        plt.title(f'Predictions: {img_name}')
        plt.show()

visualize_predictions(model, "iti/Computer_Vision/Day02/TestImages")