In [None]:
# Importing the segment everything model 
from segment_anything import SamAutomaticMaskGenerator, sam_model_registry
MODEL_TYPE = "vit_b"

sam = sam_model_registry[MODEL_TYPE](checkpoint="sam_vit_b_01ec64.pth")
mask_generator = SamAutomaticMaskGenerator(sam)


In [None]:
# Image wrangling 
import cv2

# Ploting the image 
import matplotlib.pyplot as plt

In [None]:
# Defining the path to image 
image_path = "input/DJI_0952-2023-11-30-11-10-42.jpg"

# Opening the image
image = cv2.imread(image_path)

# Saving the original w and h 
original_h, original_w = image.shape[:2]

# Resizing the image
image = cv2.resize(image, (512, 512))

# Getting the masks
masks = mask_generator.generate(image)

In [None]:
plt.imshow(image)

In [None]:
# Extracting the bboxes 
bboxes = [x["bbox"] for x in masks]

In [None]:
# The format is xywh (top left point and the width and height)
# We will convet it to normalized xyxy (the top left point and the bottom right point)
for i in range(len(bboxes)):
    # Saving the top left point coords
    x = bboxes[i][0]
    y = bboxes[i][1]

    # Saving the width and height
    width = bboxes[i][2]
    height = bboxes[i][3]

    # Converting to xyxy (top left point and bottom right point)
    bboxes[i][0] = x
    bboxes[i][1] = y 
    bboxes[i][2] = x + width
    bboxes[i][3] = y + height

    # Normalizing the bboxes
    bboxes[i][0] /= 512
    bboxes[i][1] /= 512
    bboxes[i][2] /= 512
    bboxes[i][3] /= 512

In [None]:
# Converting the bboxes to original size
for i in range(len(bboxes)):
    # Converting to xyxy (top left point and bottom right point)
    bboxes[i][0] *= original_w
    bboxes[i][1] *= original_h
    bboxes[i][2] *= original_w
    bboxes[i][3] *= original_h

In [None]:
# Reading the original image 
image = cv2.imread(image_path)

# Drawing the bboxes
for bbox in bboxes:
    # Converting to int
    bbox = [int(x) for x in bbox]

    # Drawing the bbox
    image = cv2.rectangle(image, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 2)

In [None]:
# Infering which bbox has the highest overlap with the center 50 x 50 pixels patch 
from shapely.geometry import Polygon

# Saving the center point x and y
center_x = int(original_w // 2)
center_y = int(original_h // 2)

# Saving the center patch
# (top left x, top left y, bottom right x, bottom right y)
center_patch = [center_x - 25, center_y - 25, center_x + 25, center_y + 25]

# Converting to shapely polygon
center_patch_poly = Polygon([(center_patch[0], center_patch[1]), (center_patch[2], center_patch[1]), (center_patch[2], center_patch[3]), (center_patch[0], center_patch[3])])

# Saving the bboxes
bboxes_poly = [Polygon([(x[0], x[1]), (x[2], x[1]), (x[2], x[3]), (x[0], x[3])]) for x in bboxes]

# Saving the overlaps
overlaps = [x.intersection(center_patch_poly).area for x in bboxes_poly]

# Getting the index of the bbox with the highest overlap
index = overlaps.index(max(overlaps))

# Saving the bbox with the highest overlap
bbox_highest = bboxes[index]

# Converting to int
bbox_highest = [int(x) for x in bbox_highest]

In [None]:
# Drawing a red bbox around the bbox with the highest overlap
image = cv2.rectangle(image, (bbox_highest[0], bbox_highest[1]), (bbox_highest[2], bbox_highest[3]), (0, 0, 255), 2)

# Adding the labels for the corner points of the bounding box 
image = cv2.putText(image, f"({bbox_highest[0]}, {bbox_highest[1]})", (bbox_highest[0], bbox_highest[1]), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
image = cv2.putText(image, f"({bbox_highest[2]}, {bbox_highest[3]})", (bbox_highest[2], bbox_highest[3]), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)

# Adding the label for the point coordinate 
image = cv2.putText(image, f"({center_x}, {center_y})", (center_x, center_y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)



In [None]:
# Drawing the center patch 
image = cv2.rectangle(image, (int(center_patch[0]), int(center_patch[1])), (int(center_patch[2]), int(center_patch[3])), (255, 0, 0), 2)

# Adding the label of the center patch top left point and bottom right point
image = cv2.putText(image, f"({int(center_patch[0])}, {int(center_patch[1])})", (center_patch[0], center_patch[1]), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)
image = cv2.putText(image, f"({int(center_patch[2])}, {int(center_patch[3])})", (center_patch[2], center_patch[3]), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)

In [None]:
# Saving the image
cv2.imwrite("output.jpg", image)

In [None]:
# Calculating the total area of the highest overlapping bbox 
bbox_highest_poly = Polygon([(bbox_highest[0], bbox_highest[1]), (bbox_highest[2], bbox_highest[1]), (bbox_highest[2], bbox_highest[3]), (bbox_highest[0], bbox_highest[3])])
print(bbox_highest_poly.area)

# Printing the total area of the image
print(original_w * original_h)

print(f"The percentage of the image covered by the bbox with the highest overlap is {bbox_highest_poly.area / (original_w * original_h) * 100}%")

In [None]:
# Saving the center point x and y
center_x = int(original_w // 2)
center_y = int(original_h // 2)

# Creating top left and bottom right points of the center patch
center_patch = [center_x - 25, center_y - 25, center_x + 25, center_y + 25]

In [None]:
bbox_highest

In [None]:
center_patch

In [None]:
# If the area of the overalp is smaller than 1%, we will move the center patch to the nearest edge of the bbox with the highest overlap
if bbox_highest_poly.area / (original_w * original_h) < 0.01:


    # Calculating the distances to each of the edges;
    # The first element is the distance to the left edge
    # The second element is the distance to the right edge
    # The third element is the distance to the top edge
    # The fourth element is the distance to the bottom edge
    distance_left = abs(center_patch[0] - bbox_highest[0])
    distance_right = abs(center_patch[2] - bbox_highest[2])
    distance_top = abs(center_patch[1] - bbox_highest[1])
    distance_bottom = abs(center_patch[3] - bbox_highest[3])

    # Infering which edge is the closest
    distances = [distance_left, distance_right, distance_top, distance_bottom]

    # Getting the index of the closest edge
    index = distances.index(min(distances))

    # Moving the center patch to the closest edge
    if index == 0:
        # Moving to the left edge
        center_patch = [bbox_highest[0] - 25, center_y - 25, bbox_highest[0] + 25, center_y + 25]
    elif index == 1:
        # Moving to the right edge
        center_patch = [bbox_highest[2] - 25, center_y - 25, bbox_highest[2] + 25, center_y + 25]
    elif index == 2:
        # Moving to the top edge
        x1 = center_patch[0]
        x2 = center_patch[2]

        y1 = center_patch[1] - distance_top - abs(center_patch[1] - center_patch[3])
        y2 = center_patch[3] - distance_top - abs(center_patch[1] - center_patch[3]) 

        # Upading the center patch
        center_patch = [x1, y1, x2, y2]
    elif index == 3:
        # Moving to the bottom edge
        center_patch = [center_x - 25, bbox_highest[1] - 25, center_x + 25, bbox_highest[1] + 25]

    # Drawing the new center patch 
    image = cv2.rectangle(image, (center_patch[0], center_patch[1]), (center_patch[2], center_patch[3]), (255, 0, 0), 2)

In [None]:
# Saving the image 
cv2.imwrite("output.jpg", image)