In [None]:
#!pip install albumentations
#!pip install torch torchvision

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import albumentations as A
from albumentations.pytorch import ToTensorV2
import torch
from torch.utils.data import Dataset, DataLoader
import cv2
import os
import numpy as np
import json

  check_for_updates()


In [None]:
json_path = '/content/drive/My Drive/instances_val2017/instances_val2017.json'
images_dir = '/content/drive/My Drive/val2017/'

In [None]:
# Define the CocoDataset class
class CocoDataset(Dataset):
    def __init__(self, images, annotations, category_mapping, img_dir, transform=None):
        self.images = images
        self.annotations = annotations
        self.category_mapping = category_mapping
        self.img_dir = img_dir
        self.transform = transform
        self.image_id_to_annotations = self._group_annotations_by_image()

    def _group_annotations_by_image(self):
        image_id_to_annotations = {}
        for ann in self.annotations:
            image_id = ann['image_id']
            if image_id not in image_id_to_annotations:
                image_id_to_annotations[image_id] = []
            image_id_to_annotations[image_id].append(ann)
        return image_id_to_annotations

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image_info = self.images[idx]
        img_path = os.path.join(self.img_dir, image_info['file_name'])  # Use file_name for path
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # Convert to RGB

        # Get annotations
        image_id = image_info['id']
        annotations = self.image_id_to_annotations.get(image_id, [])

        boxes = []
        labels = []
        for ann in annotations:
            x, y, width, height = ann['bbox']
            boxes.append([x, y, x + width, y + height])
            labels.append(ann['category_id'])

        # Convert boxes and labels to numpy arrays for Albumentations
        boxes = np.array(boxes)
        labels = np.array(labels)

        # Apply transformations
        if self.transform:
            transformed = self.transform(image=image, bboxes=boxes, labels=labels)
            image = transformed['image']
            boxes = transformed['bboxes']
            labels = transformed['labels']

        # Convert to PyTorch tensors
        boxes = torch.tensor(boxes, dtype=torch.float32)
        labels = torch.tensor(labels, dtype=torch.int64)
        target = {"boxes": boxes, "labels": labels}

        return image, target


In [None]:
# Augmentation and Preprocessing Pipeline
transform = A.Compose([
    A.Resize(416, 416),  # Resizing
    A.RandomBrightnessContrast(p=0.2),  # Brightness and Contrast Adjustment
    A.GaussianBlur(p=0.2),  # Blurring for image quality improvement
    A.HorizontalFlip(p=0.5),  # Horizontal Flip
    A.Rotate(limit=20, p=0.5),  # Random Rotation
    A.ColorJitter(p=0.3),  # Random color adjustments
    A.ToGray(p=0.1),  # Randomly convert some images to grayscale
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),  # Normalization
    ToTensorV2()  # Convert image to PyTorch tensor
], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['labels']))


In [None]:
# Load Dataset and file
ann_file = '/content/drive/My Drive/instances_val2017/instances_val2017.json'
img_dir = '/content/drive/My Drive/val2017/'

# Load Annotation
with open(ann_file, 'r') as f:
    coco_data = json.load(f)

In [None]:
# List of images
images = coco_data['images']
# List of annotations
annotations = coco_data['annotations']
# List of categories
categories = coco_data['categories']

# Create category mapping
category_mapping = {category['id']: category['name'] for category in categories}

# Create dataset and data loader
dataset = CocoDataset(images, annotations, category_mapping, img_dir, transform=transform)
data_loader = DataLoader(dataset, batch_size=16, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))

In [None]:
# Test data loading
for images, targets in data_loader:
    print(f"Images batch size: {len(images)}")
    print(f"Target batch size: {len(targets)}")
    print("Sample target:", targets[0])  # Print sample target (bounding boxes

Images batch size: 16
Target batch size: 16
Sample target: {'boxes': tensor([[160.9790,   0.4593, 222.6770,  83.6593],
        [393.8740, 248.0573, 415.8570, 298.5753]]), 'labels': tensor([13,  8])}
Images batch size: 16
Target batch size: 16
Sample target: {'boxes': tensor([[308.7039,   0.0000, 416.0000,  25.5048],
        [  0.0000,   0.0000,  24.2009,  83.6058],
        [  0.0000,  27.6993,  16.0818, 333.6871],
        [150.6022,   8.6241, 290.4800, 188.4184],
        [140.9520,  14.9981, 316.1199, 144.4031],
        [ 43.6092,   0.0000, 183.1267, 162.0476],
        [ 86.7861,   2.7616, 140.2574,  54.8010],
        [260.4970,  10.3381, 416.0000, 416.0000],
        [281.8682,  83.9569, 364.3212, 198.9024],
        [ 13.1020,   0.0000,  62.9382,  88.1252],
        [  0.0000,  58.0650, 276.3564, 364.0528],
        [200.3977, 122.9210, 316.1078, 189.3670],
        [188.1974, 164.5737, 396.5707, 416.0000],
        [133.8071, 159.5280, 277.9674, 416.0000],
        [255.9981, 249.5741, 362

In [None]:
import torch
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.faster_rcnn import FasterRCNN_ResNet50_FPN_Weights

In [None]:
#Check GPU is available or not

device= torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

#Download the pretrained  FR-CNN model
weights= FasterRCNN_ResNet50_FPN_Weights.DEFAULT
model= torchvision.models.detection.fasterrcnn_resnet50_fpn(weights=weights)

#Move the model to the available device
model=model.to(device)

#Save the model to a file (optional)

torch.save(model.state_dict(), "faster_rcon_resnet50_fpn.pth")
print("Model downloaded and saved successfully!")

Using device: cpu


Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth" to /root/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth
100%|██████████| 160M/160M [00:01<00:00, 149MB/s]


Model downloaded and saved successfully!


In [None]:
#Get the number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features
print(f"Number of input features for the classifier: {in_features}")

#Define the number of classes in your dataset (including background)
num_classes = 91

# Replace the pre-trained head with a new one (adjusted for our dataset)
model.roi_heads.box_predictor = FastRCNNPredictor (in_features, num_classes)
print(f"Classifier head replaced to accommodate {num_classes} classes.")

# Move the model to the available device
model = model.to(device)

#Save the model to a file (optional)
torch.save(model.state_dict(), "faster_rcnn_resnet50_fpn_modified.pth")
print("Model modified successfully!")

Number of input features for the classifier: 1024
Classifier head replaced to accommodate 91 classes.
Model modified successfully!


In [None]:
# Training loop
num_epochs = 10  # Set the number of epochs
model.train()  # Set the model to training mode

# Define an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)

for epoch in range(num_epochs):
    for images, targets in data_loader:
        images = [image.to(device) for image in images]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        # Zero the gradients
        optimizer.zero_grad()

        # Forward pass
        loss_dict = model(images, targets)

        # Compute the total loss
        losses = sum(loss for loss in loss_dict.values())

        # Backward pass
        losses.backward()
        optimizer.step()

    print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {losses.item():.4f}")



In [None]:
from torchvision.transforms import functional as F
from PIL import Image  # Import PIL for image handling
import matplotlib.pyplot as plt

In [None]:
COCO_INSTANCE_CATEGORY_NAMES = [
    "__background__", "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck",
    "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat",
    "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella",
    "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat",
    "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork",
    "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog",
    "pizza", "donut", "cake", "chair", "couch", "potted plant", "bed", "dining table", "toilet", "TV",
    "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink",
    "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"
]

In [None]:
# Set the model to evaluation mode
model.eval()

# Function to evaluate the model on a single image
def evaluate_model(image_path):
    image = Image.open(image_path).convert("RGB")
    image_tensor = F.to_tensor(image).unsqueeze(0).to(device)  # Add batch dimension

    with torch.no_grad():
        predictions = model(image_tensor)

    # Print the predictions
    print("Predictions:")
    #Get boxes, labels and scores from predictions
    boxes = predictions[0]['boxes']
    labels = predictions[0]['labels']
    scores = predictions[0]['scores']
    #Loop through all predictions
    for i in range(len(boxes)):
        #Check if prediction score is higher than threshold
        if scores[i].item() > 0.5:  # Use .item() to get the scalar value from the tensor
            print(f"Object {i + 1}:")
            print(f"  Bounding Box: {boxes[i].cpu().numpy()}")
            print(f"  Label: {labels[i].cpu().numpy()}")
            print(f"  Score: {scores[i].cpu().numpy()}")



In [None]:
def visualize_image(image, boxes, labels, scores):
    plt.figure(figsize=(12, 8))
    plt.imshow(image)
    ax = plt.gca()

    for box, label, score in zip(boxes, labels, scores):
        if score > 0.5:  # Only visualize boxes with a score above a threshold
            x1, y1, x2, y2 = box.cpu().numpy()
            width, height = x2 - x1, y2 - y1
            rect = plt.Rectangle((x1, y1), width, height, fill=False, color='red', linewidth=2)
            ax.add_patch(rect)
            ax.text(x1, y1, f'Label: {label.item()}, Score: {score.item():.2f}',
                    bbox=dict(facecolor='yellow', alpha=0.5), fontsize=12, color='black')

    plt.axis('off')
    plt.show()

  # Function to visualize the image and predictions
def visualize_image1(image, boxes, labels):
    plt.figure(figsize=(12, 8))
    plt.imshow(image)
    ax = plt.gca()

    for box, label in zip(boxes, labels):
        x1, y1, x2, y2 = box.cpu().numpy()
        width, height = x2 - x1, y2 - y1
        rect = plt.Rectangle((x1, y1), width, height, fill=False, color='red', linewidth=2)
        ax.add_patch(rect)
        # Get the class name from the label index
        # Check if the label is within the valid range
        if 0 <= label.item() < len(COCO_INSTANCE_CATEGORY_NAMES):
            class_name = COCO_INSTANCE_CATEGORY_NAMES[label.item()]
        else:
            class_name = "Unknown" # Assign "Unknown" if label is out of range

        ax.text(x1, y1, f'{class_name}',
                bbox=dict(facecolor='yellow', alpha=0.5), fontsize=12, color='black')

    plt.axis('off')
    plt.show()


In [None]:
# Set the model to evaluation mode
model.eval()

# Function to evaluate the model on a single image
def evaluate_model(image_path):
    image = Image.open(image_path).convert("RGB")
    image_tensor = F.to_tensor(image).unsqueeze(0).to(device)  # Add batch dimension

    with torch.no_grad():
        predictions = model(image_tensor)

    # Print the predictions
    print("Predictions:")
    #Get boxes, labels and scores from predictions
    boxes = predictions[0]['boxes']
    labels = predictions[0]['labels']
    scores = predictions[0]['scores']
    #Loop through all predictions
    for i in range(len(boxes)):
        #Check if prediction score is higher than threshold
        if scores[i].item() > 0.5:  # Use .item() to get the scalar value from the tensor
            print(f"Object {i + 1}:")
            print(f"  Bounding Box: {boxes[i].cpu().numpy()}")
            print(f"  Label: {labels[i].cpu().numpy()}")
            print(f"  Score: {scores[i].cpu().numpy()}")

    # Visualize the image with bounding boxes

    visualize_image(image, boxes, labels,scores)
    visualize_image1(image, boxes, labels)


In [None]:
# Example usage: Evaluate the model on a test image
test_image_path = "/content/drive/MyDrive/val2017/000000000139.jpg"  # Replace with test image /content/drive/My Drive/val2017/000000581781.jpg
evaluate_model(test_image_path)