###albumentations
###transform pipeline uses albumentations

In [None]:
!pip install albumentations
!pip install torch torchvision
!pip install albumentations torch torchvision





In [None]:
import torch
import albumentations as A
from albumentations.pytorch import ToTensorV2
from torch.utils.data import Dataset, DataLoader
import cv2
import numpy as np
import os
import json

  check_for_updates()


In [None]:
json_path = '/content/drive/MyDrive/Object Recognition Dataset/instances_val2017.json/instances_val2017.json'
images_dir = '/content/drive/MyDrive/Object Recognition Dataset/val2017'

In [None]:
# Define the CocoDataset class
class CocoDataset(Dataset):
    def __init__(self, images, annotations, category_mapping, img_dir, transform=None):
        self.images = images
        self.annotations = annotations
        self.category_mapping = category_mapping
        self.img_dir = img_dir
        self.transform = transform
        self.image_id_to_annotations = self._group_annotations_by_image()  # Fix here

    def _group_annotations_by_image(self):
        image_id_to_annotations = {}
        for ann in self.annotations:
            image_id = ann['image_id']
            if image_id not in image_id_to_annotations:
                image_id_to_annotations[image_id] = []
            image_id_to_annotations[image_id].append(ann)
        return image_id_to_annotations

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image_info = self.images[idx]
        img_path = os.path.join(self.img_dir, image_info['file_name'])
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # Convert to RGB

        # Get annotations
        image_id = image_info['id']
        annotations = self.image_id_to_annotations.get(image_id, [])

        boxes = []
        labels = []
        for ann in annotations:
            x, y, width, height = ann['bbox']
            boxes.append([x, y, x + width, y + height])
            labels.append(ann['category_id'])

        # Convert boxes and labels to numpy arrays for Albumentations
        boxes = np.array(boxes)
        labels = np.array(labels)

        # Apply transformations
        if self.transform:
            transformed = self.transform(image=image, bboxes=boxes, labels=labels)
            image = transformed['image']
            boxes = transformed['bboxes']
            labels = transformed['labels']

        # Convert to PyTorch tensors
        boxes = torch.tensor(boxes, dtype=torch.float32)
        labels = torch.tensor(labels, dtype=torch.int64)
        target = {"boxes": boxes, "labels": labels}

        return image, target

In [None]:
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import FasterRCNN

In [None]:
# Augmentation and Preprocessing Pipeline
transform = A.Compose([
    A.Resize(416, 416),  # Resizing
    A.RandomBrightnessContrast(p=0.2),  # Brightness and Contrast Adjustment
    A.GaussianBlur(p=0.2),  # Blurring for image quality improvement
    A.HorizontalFlip(p=0.5),  # Horizontal Flip
    A.Rotate(limit=20, p=0.5),  # Random Rotation
    A.ColorJitter(p=0.3),  # Random color adjustments
    A.ToGray(p=0.1),  # Randomly convert some images to grayscale
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),  # Normalization
    ToTensorV2()  # Convert image to PyTorch tensor
], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['labels']))

# Load COCO dataset (images, annotations, category_mapping) from JSON
annotations_file = '/content/drive/MyDrive/Object Recognition Dataset/instances_val2017.json/instances_val2017.json'
img_dir = '/content/drive/MyDrive/Object Recognition Dataset/val2017'

# Load annotations (COCO-style)
with open(annotations_file, 'r') as f:
    coco_data = json.load(f)

images = coco_data['images']
annotations = coco_data['annotations']
categories = coco_data['categories']

# Create category mapping (optional)
category_mapping = {category['id']: category['name'] for category in categories}

# Initialize dataset
dataset = CocoDataset(images=images, annotations=annotations, category_mapping=category_mapping, img_dir=img_dir, transform=transform)

# Define collate_fn to handle variable-sized inputs
def collate_fn(batch):
    images, targets = zip(*batch)

    # Handle variable image sizes and padding if necessary
    max_height = max([img.shape[1] for img in images])  # Find the max height
    max_width = max([img.shape[2] for img in images])   # Find the max width

    padded_images = []
    for img in images:
        # Pad images to the maximum width and height
        padded_img = torch.zeros((3, max_height, max_width), dtype=torch.float32)
        padded_img[:, :img.shape[1], :img.shape[2]] = img
        padded_images.append(padded_img)

    images = torch.stack(padded_images, dim=0)  # Stack images to create a batch

    return images, targets


# DataLoader
#data_loader = DataLoader(dataset, batch_size=2, shuffle=True, collate_fn=collate_fn)
data_loader = DataLoader(dataset, batch_size=6, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))

In [None]:
import torch
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.faster_rcnn import FasterRCNN_ResNet50_FPN_Weights

def create_custom_faster_rcnn(num_classes):
    #Download the pretrained  FR-CNN model
    weights= FasterRCNN_ResNet50_FPN_Weights.DEFAULT
    model= torchvision.models.detection.fasterrcnn_resnet50_fpn(weights=weights)

    # Get the number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features


    # Replace the classifier with a new one (based on the number of classes)
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    return model

device= torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

num_classes = 91
model = create_custom_faster_rcnn(num_classes)

# Move the model to the available device
model = model.to(device)

Using device: cuda:0


Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth" to /root/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth
100%|██████████| 160M/160M [00:01<00:00, 102MB/s]


In [None]:
import torch
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
import time

# Define the optimizer (using Adam optimizer for Faster R-CNN)
params = [p for p in model.parameters() if p.requires_grad]
optimizer = optim.Adam(params, lr=1e-4)

# Define the learning rate scheduler (optional)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

# Define the loss function for Faster R-CNN (included in the model itself)

# Training Loop
num_epochs = 10  # Number of epochs to train
for epoch in range(num_epochs):
    model.train()  # Set the model to training mode
    start_time = time.time()
    running_loss = 0.0
    for images, targets in data_loader:
        # Move images and targets to the device (GPU/CPU)
        images = [image.to(device) for image in images]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        # Zero the gradients
        optimizer.zero_grad()

        # Forward pass
        loss_dict = model(images, targets)

        # Get total loss
        losses = sum(loss for loss in loss_dict.values())

        # Backward pass and optimize
        losses.backward()
        optimizer.step()

        running_loss += losses.item()

    # Step the learning rate scheduler
    lr_scheduler.step()

    epoch_loss = running_loss / len(data_loader)
    epoch_time = time.time() - start_time
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}, Time: {epoch_time:.2f}s")

    # Save the model
    torch.save(model.state_dict(), f"faster_rcnn_epoch_{epoch+1}.pth")

In [None]:
# Evaluation Mode
model.eval()
test_image, test_target = dataset[0]  # Get one sample
test_image = test_image.to(device).unsqueeze(0)

# Prediction
with torch.no_grad():
    predictions = model(test_image)

# Visualization
pred_image = test_image[0].permute(1, 2, 0).cpu().numpy()
pred_image = (pred_image * 255).astype(np.uint8)

for box, label, mask in zip(predictions[0]['boxes'], predictions[0]['labels'], predictions[0]['masks']):
    x1, y1, x2, y2 = map(int, box)
    color = (0, 255, 0)
    cv2.rectangle(pred_image, (x1, y1), (x2, y2), color, 2)
    text = category_mapping.get(label.item(), "Unknown")
    cv2.putText(pred_image, text, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1)
    mask = mask[0].cpu().numpy() > 0.5
    pred_image[mask] = pred_image[mask] * 0.5 + np.array([0, 255, 0], dtype=np.uint8) * 0.5

plt.figure(figsize=(12, 8))
plt.imshow(pred_image)
plt.axis("off")
plt.show()
