# Bounding boxes

In [None]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms

from torchvision.utils import draw_bounding_boxes
from torchvision.ops import nms

from PIL import Image
import matplotlib.pyplot as plt

import torch.nn as nn
from torchvision.models import vgg16, VGG16_Weights

## Image tensors

In [None]:
image = Image.open("images/cat.jpeg")
bbox = [10, 10, 200, 200]

# Convert bbox into tensors
bbox_tensor = torch.tensor(bbox)

# Add a new batch dimension
bbox_tensor = bbox_tensor.unsqueeze(0)

# Resize the image and transform to tensor
transform = transforms.Compose([
  transforms.Resize(224),
  transforms.ToTensor()
])

# Apply transform to image
image_tensor = transform(image)
print(image_tensor)

## Drawing a bounding box

In [None]:
bbox= [50, 25, 160, 160]
bbox_tensor = torch.tensor(bbox).unsqueeze(0)

# Implement draw_bounding_boxes
image_tensor = (image_tensor * 255).type(torch.uint8)
img_bbox = draw_bounding_boxes(image_tensor, bbox_tensor, width=3, colors="red")

# Tranform tensors to image
transform = transforms.Compose([
    transforms.ToPILImage()
])

plt.imshow(transform(img_bbox))
plt.show()

# Evaluating object recognition models

## Bounding boxes prediction

In [None]:
# Get model's prediction
with torch.no_grad():
    output = model(test_image)

# Extract boxes from the output
boxes = output[0]["boxes"]

# Extract scores from the output
scores = output[0]["scores"]

print(boxes, scores)

## Calculate NMS

In [None]:
# Set the IoU threshold
iou_threshold = 0.5

# Apply non-max suppression
box_indices = nms(boxes=boxes, scores=scores, iou_threshold=0.5)

# Filter boxes
filtered_boxes = boxes[box_indices]

print("Filtered Boxes:", filtered_boxes)

# Object detection using R-CNN

## Pre-trained model backbone

In [None]:
# Load pretrained weights
vgg_model = vgg16(weights=VGG16_Weights.DEFAULT)

# Extract the input dimension
input_dim = nn.Sequential(*list(vgg_model.classifier.children()))[0].in_features

# Create a backbone with convolutional layers
backbone = nn.Sequential(*list(vgg_model.features.children()))

# Print the backbone model
print(backbone)

## Classifier block

In [None]:
# Create a variable with the number of classes
num_classes = 2
    
# Create a sequential block
classifier = nn.Sequential(
	# Create a linear layer with input features
	nn.Linear(input_dim, 512),
	nn.ReLU(),
	# Add the output dimension to the classifier
	nn.Linear(512, num_classes),
)

## Box regressor block

In [None]:
# Define the number of coordinates
num_coordinates = 4

bb = nn.Sequential(  
	# Add input and output dimensions
	nn.Linear(input_dim, 32),
	nn.ReLU(),
	# Add the output for the last regression layer
	nn.Linear(32, num_coordinates),
)