# Object detection in python using Mediapipe API
Before beginning
1. [Download a pre-trained model](https://developers.google.com/mediapipe/solutions/vision/object_detector#efficientdet-lite0_model_recommended) and store it in the root folder.
2. Modify the `model_asset_path` as part of the `ObjectDectorOptions` class initiation to point to this location.
3. Modify the `max_results` kwarg for the `ObjectDetectorOptions` to change the number of return detections, up to 7 are coded based on the color list `TEXT_COLOR`.

In [1]:
import cv2
import mediapipe as mp
from mediapipe.tasks.python.components.containers.detections import DetectionResult
from mediapipe.tasks.python.vision import ObjectDetector, ObjectDetectorOptions, RunningMode
import numpy as np

In [2]:
BaseOptions = mp.tasks.BaseOptions
DetectionResult = DetectionResult

# async output will be handled as a FIFO queue list data structure, initiate the queue
detect_output = []

# append results to the queue
def append_result(result: DetectionResult, output_image: mp.Image, timestamp_ms: int):
    detect_output.append(result)


options = ObjectDetectorOptions(
    base_options=BaseOptions(model_asset_path='efficientdet.tflite'),
    running_mode=RunningMode.LIVE_STREAM,
    max_results=2,
    result_callback=append_result)

In [3]:
MARGIN = 10  # pixels
ROW_SIZE = 10  # pixels
FONT_SIZE = 1
FONT_THICKNESS = 1
# red, green, blue, cyan, magenta, yellow, black, white
TEXT_COLOR = [(255, 0, 0), (0,255,0), (0,0,255), (0,255,255), (255,0,255), (255,255,0), (0,0,0), (255,255,255)]

def visualize(image, detection_result) -> np.ndarray:
	"""Draws bounding boxes on the input image and return it.
	Args:
	image: The input RGB image.
	detection_result: The list of all "Detection" entities to be visualize.
	Returns:
	Image with bounding boxes.
	"""
	for detection in detection_result.detections:
		index = detection_result.detections.index(detection)
		# Draw bounding_box
		bbox = detection.bounding_box
		start_point = bbox.origin_x, bbox.origin_y
		end_point = bbox.origin_x + bbox.width, bbox.origin_y + bbox.height
		cv2.rectangle(image, start_point, end_point, TEXT_COLOR[index], 3)

		# Draw label and score
		category = detection.categories[0]
		category_name = category.category_name
		probability = round(category.score, 2)
		result_text = category_name + ' (' + str(probability) + ')'
		text_location = (MARGIN + bbox.origin_x,
						MARGIN + ROW_SIZE + bbox.origin_y)
		cv2.putText(image, result_text, text_location, cv2.FONT_HERSHEY_PLAIN,
					FONT_SIZE, TEXT_COLOR[index], FONT_THICKNESS)

	
	return image

In [4]:
cap = cv2.VideoCapture(0)
start = 0
with ObjectDetector.create_from_options(options) as detector:
  while cap.isOpened():
    success, image = cap.read()
    if not success:
      print("Ignoring empty camera frame.")
      # If loading a video, use 'break' instead of 'continue'.
      continue

    # To improve performance, optionally mark the image as not writeable to
    # pass by reference.
    image.flags.writeable = False
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    start += 1
    image.flags.writeable = True
    mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=image)
    detector.detect_async(mp_image, start) #result_callback prints a result
    # handle the queue, if there are more than 1 result, pop the first one
    if len(detect_output) > 1:
      detect_output.pop(0)
      image = visualize(image, detect_output[0])
    elif len(detect_output) == 1:
      image = visualize(image, detect_output[0])

    return_image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    cv2.imshow('Mediapipe FaceMesh', return_image)

    if cv2.waitKey(10) & 0xFF == ord('q'):
      break
cap.release()
cv2.destroyAllWindows()

INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
qt.qpa.plugin: Could not find the Qt platform plugin "wayland" in "/home/robert/miniconda3/envs/mpipe/lib/python3.9/site-packages/cv2/qt/plugins"
