<a href="https://colab.research.google.com/github/mohamedshouaib/iti/blob/main/Computer_Vision%20/Day02/Faster_final2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!git clone https://github.com/mohamedshouaib/iti.git
!cd iti/Computer_Vision/Day02

Cloning into 'iti'...
remote: Enumerating objects: 2436, done.[K
remote: Counting objects: 100% (1745/1745), done.[K
^C
/bin/bash: line 1: cd: iti/Computer_Vision/Day02: No such file or directory


In [2]:
import torch
import torchvision.transforms as T
import os
import cv2
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator
from torchvision.models import resnet50
import torch.nn as nn
from torchvision.ops import MultiScaleRoIAlign, box_iou
import torch.optim as optim
from tqdm import tqdm

In [3]:
BATCH_SIZE = 2
LEARNING_RATE = 0.001
WEIGHT_DECAY = 0.0005
NUM_EPOCHS = 10
PATIENCE = 3
ANCHOR_SIZES = ((32, 64, 128),)
ASPECT_RATIOS = ((0.5, 1.0, 2.0),)
IOU_THRESHOLD = 0.5

In [4]:
class Load_Dataset(Dataset):
    def __init__(self, image_dir, label_dir, transforms=None):
        self.image_dir = image_dir
        self.label_dir = label_dir
        self.transforms = transforms

        self.valid_samples = []
        for img_name in sorted(os.listdir(image_dir)):
            if not img_name.lower().endswith(('.jpg', '.png', '.jpeg', '.tif')):
                continue

            label_path = os.path.join(label_dir, os.path.splitext(img_name)[0] + ".txt")
            if not os.path.exists(label_path):
                continue

            with open(label_path, 'r') as f:
                has_valid_boxes = False
                for line in f:
                    parts = line.strip().replace(",", " ").split()
                    if len(parts) == 4:
                        x1, y1, x2, y2 = map(int, parts)
                        if x2 > x1 and y2 > y1:
                            has_valid_boxes = True
                            break

            if has_valid_boxes:
                self.valid_samples.append(img_name)

    def __len__(self):
        return len(self.valid_samples)

    def __getitem__(self, idx):
        img_name = self.valid_samples[idx]
        img_path = os.path.join(self.image_dir, img_name)
        label_path = os.path.join(self.label_dir, os.path.splitext(img_name)[0] + ".txt")

        img = Image.open(img_path).convert("RGB")

        boxes = []
        with open(label_path, 'r') as f:
            for line in f:
                parts = line.strip().replace(",", " ").split()
                if len(parts) == 4:
                    x1, y1, x2, y2 = map(int, parts)
                    if x2 > x1 and y2 > y1:
                        boxes.append([x1, y1, x2, y2])

        if self.transforms:
            img = self.transforms(img)

        boxes = torch.tensor(boxes, dtype=torch.float32)
        labels = torch.ones((len(boxes),), dtype=torch.int64)
        target = {'boxes': boxes, 'labels': labels}

        return img, target

In [5]:
transform = T.Compose([
    T.ToTensor(),
    T.RandomHorizontalFlip(p=0.5),
    T.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

train_dataset = Load_Dataset("iti/Computer_Vision/Day02/TrainImages", "iti/Computer_Vision/Day02/TrainGroundTruth", transforms=transform)
val_dataset = Load_Dataset("iti/Computer_Vision/Day02/TestImages", "iti/Computer_Vision/Day02/TestGroundTruth", transforms=transform)

print(f"Train samples: {len(train_dataset)}")
print(f"Validation samples: {len(val_dataset)}")

if len(train_dataset) == 0 or len(val_dataset) == 0:
    raise ValueError("No valid training samples found. Check your data paths and label files.")

def collate_fn(batch):
    return tuple(zip(*batch))

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, collate_fn=collate_fn, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, collate_fn=collate_fn, num_workers=2)

FileNotFoundError: [Errno 2] No such file or directory: 'iti/Computer_Vision/Day02/TrainImages'

In [None]:
class MyFasterRCNN(nn.Module):
    def __init__(self, num_classes=2):
        super().__init__()
        backbone = resnet50(weights=None)
        self.backbone = nn.Sequential(*list(resnet50(weights=None).children())[:-2], nn.Dropout(0.2))
        self.backbone.out_channels = 2048

        anchor_generator = AnchorGenerator(
            sizes=ANCHOR_SIZES,
            aspect_ratios=ASPECT_RATIOS
        )

        roi_pooler = MultiScaleRoIAlign(
            featmap_names=['0'],
            output_size=7,
            sampling_ratio=2
        )

        self.model = FasterRCNN(
            backbone=self.backbone,
            num_classes=num_classes,
            rpn_anchor_generator=anchor_generator,
            box_roi_pool=roi_pooler
        )

    def forward(self, images, targets=None):
        return self.model(images, targets)


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = MyFasterRCNN().to(device)
optimizer = optim.AdamW(model.parameters(), lr=LEARNING_RATE, weight_decay=0.001)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=3)

In [None]:
def train_one_epoch(model, optimizer, data_loader, device):
    model.train()
    total_loss = 0
    for images, targets in tqdm(data_loader, desc="Training"):
        images = [img.to(device) for img in images]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        optimizer.zero_grad()
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        losses.backward()
        optimizer.step()
        total_loss += losses.item()
    return total_loss / len(data_loader)


In [None]:
def validate(model, data_loader, device):
    model.train()
    val_loss = 0
    with torch.no_grad():
        for images, targets in tqdm(data_loader, desc="Validating"):
            if not images or not targets:
                continue

            images = [img.to(device) for img in images]
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            loss_dict = model(images, targets)

            losses = sum(loss for loss in loss_dict.values())
            val_loss += losses.item()

    model.eval()
    return val_loss / len(data_loader) if len(data_loader) > 0 else 0


In [None]:
def evaluate_model(model, data_loader, device, iou_threshold=0.5):
    model.eval()
    total_acc = 0
    total_images = 0

    def compute_iou(pred_boxes, gt_boxes):
        if len(gt_boxes) == 0:
            return 1.0 if len(pred_boxes) == 0 else 0.0
        if len(pred_boxes) == 0:
            return 0.0
        ious = box_iou(pred_boxes, gt_boxes)
        matched_gt = set()
        correct = 0
        for i in range(len(pred_boxes)):
            max_iou, max_j = ious[i].max(0)
            if max_iou.item() > iou_threshold and max_j.item() not in matched_gt:
                matched_gt.add(max_j.item())
                correct += 1
        return correct / len(gt_boxes)

    with torch.no_grad():
        for images, targets in tqdm(data_loader, desc="Evaluating"):
            images = [img.to(device) for img in images]
            outputs = model(images)
            for output, target in zip(outputs, targets):
                pred_boxes = output["boxes"].cpu()
                gt_boxes = target["boxes"].cpu()
                acc = compute_iou(pred_boxes, gt_boxes)
                total_acc += acc
                total_images += 1

    return total_acc / total_images if total_images > 0 else 0

In [None]:
train_losses = []
val_losses = []
best_loss = float('inf')
counter = 0

for epoch in range(NUM_EPOCHS):
    model.train()
    train_loss = train_one_epoch(model, optimizer, train_loader, device)
    train_losses.append(train_loss)

    val_loss = validate(model, val_loader, device)
    val_losses.append(val_loss)

    scheduler.step(val_loss)

    print(f"Epoch {epoch+1}/{NUM_EPOCHS} - "
          f"Train Loss: {train_loss:.4f}, "
          f"Val Loss: {val_loss:.4f}, "
          f"LR: {optimizer.param_groups[0]['lr']:.6f}")

    if val_loss < best_loss:
        best_loss = val_loss
        counter = 0
        torch.save(model.state_dict(), 'iti/Computer_Vision/Day02/best_model.pth')
        print("Saved new best model")
    else:
        counter += 1
        print(f"No improvement for {counter} epoch(s)")

    if counter >= patience:
        print(f"Early stopping triggered after {epoch+1} epochs.")
        break


In [None]:
def plot_losses(train_losses, val_losses):
    plt.figure(figsize=(10, 5))
    plt.plot(train_losses, label='Training Loss')
    plt.plot(val_losses, label='Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.title('Training and Validation Loss Curves')
    plt.legend()
    plt.grid(True)
    plt.show()

plot_losses(train_losses, val_losses)

In [None]:
model.load_state_dict(torch.load("iti/Computer_Vision/Day02/best_model.pth"))
accuracy = evaluate_model(model, val_loader, device, IOU_THRESHOLD)
print(f"Model Accuracy (IoU > {IOU_THRESHOLD}): {accuracy:.4f}")

In [None]:
def visualize_ground_truth(image_dir, label_dir, num_samples=5):
    dataset = Load_Dataset(image_dir, label_dir, transforms=T.ToTensor())
    for i in range(min(num_samples, len(dataset))):
        img, target = dataset[i]
        img = img.permute(1, 2, 0).numpy()

        # Draw ground truth boxes
        img_gt = img.copy()
        for box in target['boxes']:
            x1, y1, x2, y2 = map(int, box)
            cv2.rectangle(img_gt, (x1, y1), (x2, y2), (0, 255, 0), 2)

        plt.figure(figsize=(12, 6))
        plt.subplot(1, 2, 1)
        plt.imshow(img)
        plt.title("Original Image")
        plt.axis('off')

        plt.subplot(1, 2, 2)
        plt.imshow(img_gt)
        plt.title("Ground Truth Boxes")
        plt.axis('off')
        plt.show()

visualize_ground_truth("TestImages", "TestGroundTruth")

In [None]:
def visualize_predictions(model, image_dir, label_dir, num_images=5):
    model.eval()
    dataset = Load_Dataset(image_dir, label_dir, transforms=transform)

    for i in range(min(num_images, len(dataset))):
        img, target = dataset[i]
        img_tensor = img.unsqueeze(0).to(device)

        with torch.no_grad():
            predictions = model(img_tensor)[0]

        # Convert image for display
        img_np = img.permute(1, 2, 0).cpu().numpy()
        img_np = (img_np * np.array([0.229, 0.224, 0.225]) + np.array([0.485, 0.456, 0.406]))  # Unnormalize
        img_np = np.clip(img_np, 0, 1)

        # Create plot
        plt.figure(figsize=(15, 6))

        # Ground Truth
        plt.subplot(1, 2, 1)
        plt.imshow(img_np)
        for box in target['boxes']:
            x1, y1, x2, y2 = map(int, box)
            cv2.rectangle(img_np, (x1, y1), (x2, y2), (0, 255, 0), 2)
        plt.title("Ground Truth")
        plt.axis('off')

        # Predictions
        plt.subplot(1, 2, 2)
        plt.imshow(img_np)
        for box in predictions['boxes'].cpu().numpy():
            x1, y1, x2, y2 = map(int, box)
            cv2.rectangle(img_np, (x1, y1), (x2, y2), (255, 0, 0), 2)
        plt.title("Predictions (Blue)")
        plt.axis('off')

        plt.show()

visualize_predictions(model, "TestImages", "TestGroundTruth")

In [None]:
def visualize_predictions(model, image_dir, num_images=5):
    model.eval()
    image_files = sorted([f for f in os.listdir(image_dir) if f.endswith(('.jpg', '.png', '.tif'))])[num_images:2*num_images]

    for img_name in image_files:
        img_path = os.path.join(image_dir, img_name)
        img = Image.open(img_path).convert("RGB")
        img_tensor = transform(img).unsqueeze(0).to(device)

        with torch.no_grad():
            prediction = model(img_tensor)

        img_np = np.array(img)
        for box in prediction[0]['boxes'].cpu().numpy():
            x1, y1, x2, y2 = map(int, box)
            cv2.rectangle(img_np, (x1, y1), (x2, y2), (0, 255, 0), 2)

        plt.figure(figsize=(12, 8))
        plt.imshow(img_np)
        plt.axis('off')
        plt.title(f'Predictions: {img_name}')
        plt.show()

visualize_predictions(model, "iti/Computer_Vision/Day02/TestImages")