# Introduction to Object Detection

## Setup

In [None]:
import cv2
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import os
import re

import numpy as np

## Sliding Windows

In [None]:
image_path = "../images/pexels-frank-cone-2330502-cropped_e07fcbdc-984c-4f0a-92ab-8fd298a6101d.webp"

np_image = cv2.imread(image_path)
np_image = cv2.cvtColor(np_image, cv2.COLOR_BGR2RGB)
np_image = cv2.resize(np_image, dsize=(1000, 1000))
plt.imshow(np_image)

In [None]:
def sliding_window(image, step, window_size):
    for y in range(0, image.shape[0] - window_size[1] + 1, step):
        for x in range(0, image.shape[1] - window_size[0] + 1, step):
            yield (x, y, image[y : y + window_size[1], x : x + window_size[0]])

In [None]:
windows = sliding_window(np_image, 200, (200, 200))

In [None]:
os.makedirs("../images/sliding_windows", exist_ok=True)

In [None]:
for x, window in enumerate(windows):
    f, axes = plt.subplots(1, 2, figsize=(12, 12))
    axes[0].imshow(np_image)
    rect = patches.Rectangle(
        (window[0], window[1]), 200, 200, linewidth=2, edgecolor="g", facecolor="none"
    )
    axes[0].add_patch(rect)
    axes[1].imshow(window[2])
    f.savefig(f"../images/sliding_windows/image-{x}.png", dpi=f.dpi)

## Create a video with the images

In [None]:
image_folder = "../images/sliding_windows/"
video_name = "sliding_window.mp4"

images = [img for img in os.listdir(image_folder) if img.endswith(".png")]

In [None]:
pattern = "[0-9]+"
images = sorted(images, key=lambda x: int(re.search(pattern, x).group(0)))

# Get the shape of the images
frame = cv2.imread(os.path.join(image_folder, images[0]))
height, width, channels = frame.shape

video = cv2.VideoWriter(video_name, 0, 1, (height, width))
for image in images:
    video.write(cv2.imread(os.path.join(image_folder, image)))

video.release()

In [None]:
print(images)

## Object Detection metrics

#### IoU = Intersection over Union

The formula to calculate this is:

$$IoU = \frac{\text{Area of Intersection}}{\text{Ground Truth Area} + \text{Predicted Box Area} - \text{Area of Intersection}}$$

In [None]:
def intersection_over_union(ground_truth_bbox, predicted_bbox):
    x_a = max(ground_truth_bbox[0], predicted_bbox[0])
    y_a = max(ground_truth_bbox[1], predicted_bbox[1])
    x_b = min(ground_truth_bbox[2], predicted_bbox[2])
    y_b = min(ground_truth_bbox[3], predicted_bbox[3])

    intersection_area = max(0, x_b - x_a + 1) * max(0, y_b - y_a + 1)
    ground_truth_bb_area = (ground_truth_bbox[2] - ground_truth_bbox[0] + 1) * (
        ground_truth_bbox[3] - ground_truth_bbox[1] + 1
    )
    predicted_bb_area = (predicted_bbox[2] - predicted_bbox[0] + 1) * (
        predicted_bbox[3] - predicted_bbox[1] + 1
    )

    iou = intersection_area / float(
        ground_truth_bb_area + predicted_bb_area - intersection_area
    )

    return iou

In [None]:
image = cv2.imread(
    "../images/pexels-frank-cone-2330502-cropped_d19e5e67-9b2e-4134-a2b2-fccd10209453.jpg"
)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

In [None]:
example = ([700, 425, 825, 550], [710, 435, 810, 560])
iou = intersection_over_union(example[0], example[1])
print(iou)

In [None]:
fig, ax = plt.subplots(figsize=(15, 15))
ax.imshow(image)
rect = patches.Rectangle(
    tuple(example[0][:2]),
    example[0][2] - example[0][0],
    example[0][3] - example[0][1],
    linewidth=3,
    edgecolor="g",
    facecolor="none",
)
ax.add_patch(rect)

rect_2 = rect = patches.Rectangle(
    tuple(example[1][:2]),
    example[1][2] - example[1][0],
    example[1][3] - example[1][1],
    linewidth=3,
    edgecolor="r",
    facecolor="none",
)
ax.add_patch(rect_2)
plt.show()