In [13]:
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
import numpy as np
import cv2

In [14]:
# defining model path for the model we are using, in this case we will use efficientdet_lite0 for object detection
model_path = "efficientdet_lite0.tflite"

# defining options for object detector
base_options = python.BaseOptions(model_path)

# score threshold allows us to set a threshold for confidence. if the detection made has a confidence of lower than 0.5 (50%) it will not be shown in the output.
options = vision.ObjectDetectorOptions(base_options = base_options, score_threshold = 0.5)

# initializing object detector
detector = vision.ObjectDetector.create_from_options(options)

image = mp.Image.create_from_file("cat and dog.jpg")

In [15]:
# this prints the results that are returned by the object detector. these results give us the coordinates of the bounding boxes and the name of the categories that are returned
# we will use the output from our object detector in the visualization function
detection_result = detector.detect((image))
print(detection_result)

DetectionResult(detections=[Detection(bounding_box=BoundingBox(origin_x=148, origin_y=403, width=363, height=603), categories=[Category(index=None, score=0.84375, display_name=None, category_name='cat')], keypoints=[]), Detection(bounding_box=BoundingBox(origin_x=463, origin_y=165, width=423, height=851), categories=[Category(index=None, score=0.81640625, display_name=None, category_name='dog')], keypoints=[])])




In [16]:
MARGIN = 10  # pixels
ROW_SIZE = 10  # pixels
FONT_SIZE = 1
FONT_THICKNESS = 1
TEXT_COLOR = (255, 0, 0)  # red

# visualization function
def visualize(image, detection_result) -> np.ndarray: #this makes it so that the function returns a numpy ndarray
  for detection in detection_result.detections:
    # drawning bounding_box
    bbox = detection.bounding_box
    # defining starting point
    start_point = bbox.origin_x, bbox.origin_y
    # defining end points
    end_point = bbox.origin_x + bbox.width, bbox.origin_y + bbox.height
    cv2.rectangle(image, start_point, end_point, TEXT_COLOR, 3)

    # drawing label(s) and score
    category = detection.categories[0]
    # getting names of the categories that are detected
    category_name = category.category_name
    # rounding off the score to the second decimal place
    score = round(category.score, 2)
    # merging category names and the score
    result_text = category_name + ' (' + str(score) + ')'
    # defining text location on the image
    text_location = (MARGIN + bbox.origin_x, MARGIN + ROW_SIZE + bbox.origin_y)
    # putting the text (category name and score) on the image
    cv2.putText(image, result_text, text_location, cv2.FONT_HERSHEY_PLAIN, FONT_SIZE, TEXT_COLOR, FONT_THICKNESS)
  return image

In [17]:
# np.copy is being used to create a copy of the original image by using numpy.view() to get a new view of the image
image_copy = np.copy(image.numpy_view())

# calling the visualization function and passing copy of the original image and the object detector results to it
annotated_image = visualize(image_copy, detection_result)
# converting the annotated image from BGR to RGB. BGR is the default color space used by opencv
annotated_image= cv2.cvtColor(annotated_image, cv2.COLOR_BGR2RGB)

# writes the image to the set path with the provided file name
cv2.imwrite("obj detector output.png",annotated_image)

True