# Face Detection with MediaPipe Tasks

In [1]:
!pip install -q mediapipe==0.10.0

## Visualization utilities

In [1]:
from typing import Tuple, Union
import math
import cv2
import numpy as np

MARGIN = 10  # pixels
ROW_SIZE = 10  # pixels
FONT_SIZE = 1
FONT_THICKNESS = 1
TEXT_COLOR = (255, 0, 0)  # red


# def _normalized_to_pixel_coordinates(
#     normalized_x: float, normalized_y: float, image_width: int,
#     image_height: int) -> Union[None, Tuple[int, int]]:
#   """Converts normalized value pair to pixel coordinates."""

#   # Checks if the float value is between 0 and 1.
#   def is_valid_normalized_value(value: float) -> bool:
#     return (value > 0 or math.isclose(0, value)) and (value < 1 or
#                                                       math.isclose(1, value))

#   if not (is_valid_normalized_value(normalized_x) and
#           is_valid_normalized_value(normalized_y)):
#     # TODO: Draw coordinates even if it's outside of the image bounds.
#     return None
#   x_px = min(math.floor(normalized_x * image_width), image_width - 1)
#   y_px = min(math.floor(normalized_y * image_height), image_height - 1)
#   return x_px, y_px


def visualize(
    image,
    detection_result
) -> np.ndarray:
  """Draws bounding boxes and keypoints on the input image and return it.
  Args:
    image: The input RGB image.
    detection_result: The list of all "Detection" entities to be visualize.
  Returns:
    Image with bounding boxes.
  """
  annotated_image = image.copy()
  height, width, _ = image.shape

  for detection in detection_result.detections:
    # Draw bounding_box
    bbox = detection.bounding_box
    start_point = bbox.origin_x, bbox.origin_y
    end_point = bbox.origin_x + bbox.width, bbox.origin_y + bbox.height
    cv2.rectangle(annotated_image, start_point, end_point, TEXT_COLOR, 3)

    # # Draw keypoints
    # for keypoint in detection.keypoints:
    #   keypoint_px = _normalized_to_pixel_coordinates(keypoint.x, keypoint.y,
    #                                                  width, height)
    #   color, thickness, radius = (0, 255, 0), 2, 2
    #   cv2.circle(annotated_image, keypoint_px, thickness, color, radius)

    # Draw label and score
    category = detection.categories[0]
    category_name = category.category_name
    category_name = '' if category_name is None else category_name
    probability = round(category.score, 2)
    result_text = category_name + ' (' + str(probability) + ')'
    text_location = (MARGIN + bbox.origin_x,
                     MARGIN + ROW_SIZE + bbox.origin_y)
    cv2.putText(annotated_image, result_text, text_location, cv2.FONT_HERSHEY_PLAIN,
                FONT_SIZE, TEXT_COLOR, FONT_THICKNESS)

  return annotated_image

In [2]:
#재생할 파일 
VIDEO_FILE_PATH = '../../0612/spicy_karina.mp4'

# 동영상 파일 열기
cap = cv2.VideoCapture(VIDEO_FILE_PATH)

#잘 열렸는지 확인
if cap.isOpened() == False:
    print ('Can\'t open the video (%d)' % (VIDEO_FILE_PATH))
    exit()

titles = ['orig']
#윈도우 생성 및 사이즈 변경
for t in titles:
    cv2.namedWindow(t)

#재생할 파일의 넓이 얻기
width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
#재생할 파일의 높이 얻기
height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
#재생할 파일의 프레임 레이트 얻기
fps = cap.get(cv2.CAP_PROP_FPS)

print('width {0}, height {1}, fps {2}'.format(width, height, fps))

#XVID가 제일 낫다고 함.
#linux 계열 DIVX, XVID, MJPG, X264, WMV1, WMV2.
#windows 계열 DIVX
#저장할 비디오 코덱
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
#저장할 파일 이름
filename = './spicy_karina_mosaic.mp4'

#파일 stream 생성
out = cv2.VideoWriter(filename, fourcc, fps, (int(width), int(height)))

width 720.0, height 1280.0, fps 30.0


In [3]:
# STEP 1: Import the necessary modules.
import numpy as np
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision

# STEP 2: Create an FaceDetector object.
base_options = python.BaseOptions(model_asset_path='detector.tflite')
options = vision.FaceDetectorOptions(base_options=base_options)
detector = vision.FaceDetector.create_from_options(options)

while True:
    ret, frame = cap.read()
    
    if frame is None:
        break;
    
    frame = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame)

    # STEP 4: Detect faces in the input image.
    detection_result = detector.detect(frame)

    # STEP 5: Process the detection result. In this case, visualize it.
    image_copy = np.copy(frame.numpy_view())
    annotated_image = visualize(image_copy, detection_result)
    rgb_annotated_image = cv2.cvtColor(annotated_image, cv2.COLOR_BGR2RGB)
    
    cv2.imshow('image',rgb_annotated_image)
    out.write(rgb_annotated_image)
    
    if cv2.waitKey(1) == 27:
        break;

#재생 파일 종료
cap.release()
#저장 파일 종료
out.release()
#윈도우 종료
cv2.destroyAllWindows()