In [1]:
import mediapipe as mp
import cv2
import numpy as np

In [9]:
video_path = r"E:\Lip_Wise_GFPGAN\_testData\Inputs\test_trim.mp4"
detector_model_path = r"E:\Lip_Wise\weights\mp\blaze_face_short_range.tflite"
landmarker_model_path = r"E:\Lip_Wise\weights\mp\face_landmarker.task"
direct = r"E:\Lip_Wise\Result_Analytics\Cropped_face"

In [8]:
video = cv2.VideoCapture(video_path)

# Get video properties
fps = video.get(cv2.CAP_PROP_FPS)
frame_count = int(video.get(cv2.CAP_PROP_FRAME_COUNT))

# Initialize mediapipe
BaseOptions = mp.tasks.BaseOptions
VisionRunningMode = mp.tasks.vision.RunningMode

FaceLandmarker = mp.tasks.vision.FaceLandmarker
FaceLandmarkerOptions = mp.tasks.vision.FaceLandmarkerOptions

FaceDetector = mp.tasks.vision.FaceDetector
FaceDetectorOptions = mp.tasks.vision.FaceDetectorOptions

# Create a face detector instance with the image mode:
options_det = FaceDetectorOptions(
    base_options=BaseOptions(model_asset_path=detector_model_path),
    min_detection_confidence=0.5,
    running_mode=VisionRunningMode.IMAGE)


options_lan = FaceLandmarkerOptions(
    base_options=BaseOptions(model_asset_path=landmarker_model_path),
    min_face_detection_confidence=0.5,
    running_mode=VisionRunningMode.IMAGE)

frame_no = 0
no_face_index = []

with FaceLandmarker.create_from_options(options_lan) as landmarker,FaceDetector.create_from_options(options_det) as detector:
    while video.isOpened() and frame_no < 160:

        ret, frame = video.read()
        
        if not ret:
            break

        # Convert frame to RGB and convert to MediaPipe image
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        mp_frame = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame)

        # Run face detector and face landmark models in IMAGE mode
        result_landmarker = landmarker.detect(mp_frame)
        result_detection = detector.detect(mp_frame)
        
        if len(result_detection.detections) > 0 and len(result_landmarker.face_landmarks) > 0:

            # Get landmarks
            landmarks_np = np.array([[i.x, i.y] for i in result_landmarker.face_landmarks[0]]).astype(np.float64)

            # Get bounding box
            # x-coordinates are at even indices and y-coordinates are at odd indices
            x_coordinates = landmarks_np[:, 0]
            y_coordinates = landmarks_np[:, 1]

            # Top-most point has the smallest y-coordinate
            y_min = landmarks_np[np.argmin(y_coordinates)]

            # Bottom-most point has the largest y-coordinate
            y_max = landmarks_np[np.argmax(y_coordinates)]

            # Left-most point has the smallest x-coordinate
            x_min = landmarks_np[np.argmin(x_coordinates)]

            # Right-most point has the largest x-coordinate
            x_max = landmarks_np[np.argmax(x_coordinates)]

            bbox_np = np.array([[x_min[0], y_min[1]], [x_max[0], y_max[1]]]).astype(np.float64)
            bbox_np = bbox_np * [frame.shape[1], frame.shape[0]]

            # Crop face
            if frame_no%16 == 0:
                face = frame[int(bbox_np[0, 1]):int(bbox_np[1, 1]), int(bbox_np[0, 0]):int(bbox_np[1, 0])]
                face = cv2.cvtColor(face, cv2.COLOR_RGB2BGR)
                face = cv2.resize(face, (96, 96))
                # face = cv2.resize(face, (512, 512), interpolation=cv2.INTER_LANCZOS4)
                cv2.imwrite(direct + f"\\{frame_no//16}.jpg", face)
    
        # Increment frame number
        frame_no += 1

    # Release video
    video.release()