##### Copyright 2023 The MediaPipe Authors. All Rights Reserved.

# 1. Rosha Week 2 code example (annotating images for facial components recognition)


In [1]:
import cv2
import mediapipe as mp
import os

# Initialize the face detection and drawing utilities
mp_face_detection = mp.solutions.face_detection
mp_drawing = mp.solutions.drawing_utils

# List of image files to process
IMAGE_FILES = [r'img1.jpg']

# Define the output directory
output_dir = r'tmp'

# Create the output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

# Set up the face detection model
with mp_face_detection.FaceDetection(model_selection=1, min_detection_confidence=0.5) as face_detection:
    # Loop through each image file
    for idx, file in enumerate(IMAGE_FILES):
        # Read the image
        image = cv2.imread(file)
        # Check if the image was read successfully
        if image is None:
            print(f"Error: Could not read image file {file}")
            continue
        
        # Convert the BGR image to RGB and process it with MediaPipe Face Detection
        results = face_detection.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))

        # If no faces are detected, continue to the next image
        if not results.detections:
            continue
        
        # Create a copy of the image for annotations
        annotated_image = image.copy()
        
        # Loop through each detected face
        for detection in results.detections:
            # Print the nose tip coordinates
            print('Nose tip:')
            print(mp_face_detection.get_key_point(
                detection, mp_face_detection.FaceKeyPoint.NOSE_TIP))
            # Draw the detection annotations on the image
            mp_drawing.draw_detection(annotated_image, detection)
        
        # Save the annotated image to a file
        output_file = os.path.join(output_dir, 'annotated_image' + str(idx) + '.png')
        cv2.imwrite(output_file, annotated_image)
        print(f"Annotated image saved to {output_file}")

Matplotlib created a temporary config/cache directory at /var/folders/zf/49prmn_s7s5861rytkmr3_080000gn/T/matplotlib-s8pq61f6 because the default path (/Users/ribells/.matplotlib) is not a writable directory; it is highly recommended to set the MPLCONFIGDIR environment variable to a writable directory, in particular to speed up the import of Matplotlib and to better support multiprocessing.
2024-08-13 08:43:54.747111: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Nose tip:
x: 0.32838702
y: 0.18372527

Annotated image saved to tmp/annotated_image0.png


INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


# 2. Simple example of facial detection during live video stream

This next two cells show you how to use MediaPipe Tasks Python API to detect facial components in a video stream.

In [1]:
import cv2
import mediapipe as mp
mp_face_detection = mp.solutions.face_detection
mp_drawing = mp.solutions.drawing_utils

# For static images:
IMAGE_FILES = []
with mp_face_detection.FaceDetection(
    model_selection=1, min_detection_confidence=0.5) as face_detection:
  for idx, file in enumerate(IMAGE_FILES):
    image = cv2.imread(file)
    # Convert the BGR image to RGB and process it with MediaPipe Face Detection.
    results = face_detection.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))

    # Draw face detections of each face.
    if not results.detections:
      continue
    annotated_image = image.copy()
    for detection in results.detections:
      print('Nose tip:')
      print(mp_face_detction.get_key_point(
          detection, mp_face_detection.FaceKeyPoint.NOSE_TIP))
      mp_drawing.draw_detection(annotated_image, detection)
    cv2.imwrite('/tmp/annotated_image' + str(idx) + '.png', annotated_image)


Matplotlib created a temporary config/cache directory at /var/folders/zf/49prmn_s7s5861rytkmr3_080000gn/T/matplotlib-2udy_tky because the default path (/Users/ribells/.matplotlib) is not a writable directory; it is highly recommended to set the MPLCONFIGDIR environment variable to a writable directory, in particular to speed up the import of Matplotlib and to better support multiprocessing.
2024-07-01 10:11:21.628142: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


In [None]:
# For webcam input:
cap = cv2.VideoCapture(0)
with mp_face_detection.FaceDetection(
    model_selection=0, min_detection_confidence=0.5) as face_detection:
  while cap.isOpened():
    success, image = cap.read()
    if not success:
      print("Ignoring empty camera frame.")
      # If loading a video, use 'break' instead of 'continue'.
      continue

    # To improve performance, optionally mark the image as not writeable to
    # pass by reference.
    image.flags.writeable = False
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = face_detection.process(image)

    # Draw the face detection annotations on the image.
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    if results.detections:
      for detection in results.detections:
        mp_drawing.draw_detection(image, detection)
    # Flip the image horizontally for a selfie-view display.
    cv2.imshow('MediaPipe Face Detection', cv2.flip(image, 1))
    if cv2.waitKey(5) & 0xFF == 27:
      break
cap.release()

# 3. Pose Landmarks Detection with MediaPipe Tasks

This notebook shows you how to use MediaPipe Tasks Python API to detect pose landmarks from images.

https://github.com/google-ai-edge/mediapipe-samples/blob/main/examples/pose_landmarker/python/%5BMediaPipe_Python_Tasks%5D_Pose_Landmarker.ipynb

## Preparation

Let's start with installing MediaPipe.


In [7]:
#Note: install successful - only needed to run once
#!pip install -q mediapipe

Then download an off-the-shelf model bundle. Check out the [MediaPipe documentation](https://developers.google.com/mediapipe/solutions/vision/pose_landmarker#models) for more information about this model bundle.

In [2]:
!wget -O pose_landmarker.task -q https://storage.googleapis.com/mediapipe-models/pose_landmarker/pose_landmarker_heavy/float16/1/pose_landmarker_heavy.task

## Visualization utilities

In [28]:
#@markdown To better demonstrate the Pose Landmarker API, we have created a set of visualization tools that will be used in this colab. These will draw the landmarks on a detect person, as well as the expected connections between those markers.

from mediapipe import solutions
from mediapipe.framework.formats import landmark_pb2
import numpy as np

def draw_landmarks_on_image(rgb_image, detection_result):
  pose_landmarks_list = detection_result.pose_landmarks
  annotated_image = np.copy(rgb_image)

  # Loop through the detected poses to visualize.
  for idx in range(len(pose_landmarks_list)):
    pose_landmarks = pose_landmarks_list[idx]

    # Draw the pose landmarks.
    pose_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
    pose_landmarks_proto.landmark.extend([
      landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z) for landmark in pose_landmarks
    ])
    solutions.drawing_utils.draw_landmarks(
      annotated_image,
      pose_landmarks_proto,
      solutions.pose.POSE_CONNECTIONS,
      solutions.drawing_styles.get_default_pose_landmarks_style())
  return annotated_image

## Download test image

To demonstrate the Pose Landmarker API, you can download a sample image using the follow code. The image is from [Pixabay](https://pixabay.com/photos/girl-woman-fitness-beautiful-smile-4051811/).

In [11]:
from PIL import Image
import matplotlib.pyplot as plt

!wget -q -O image.jpg https://cdn.pixabay.com/photo/2019/03/12/20/39/girl-4051811_960_720.jpg

import cv2
#from google.colab.patches import cv2_imshow

#img = cv2.imread("image.jpg")
im = Image.open("image.jpg")
px = im.load()
im.show()

Optionally, you can upload your own image. If you want to do so, uncomment and run the cell below.

In [None]:
# from google.colab import files
# uploaded = files.upload()

# for filename in uploaded:
#   content = uploaded[filename]
#   with open(filename, 'wb') as f:
#     f.write(content)

# if len(uploaded.keys()):
#   IMAGE_FILE = next(iter(uploaded))
#   print('Uploaded file:', IMAGE_FILE)

## Running inference and visualizing the results

The final step is to run pose landmark detection on your selected image. This involves creating your PoseLandmarker object, loading your image, running detection, and finally, the optional step of displaying the image with visualizations.

Check out the [MediaPipe documentation](https://developers.google.com/mediapipe/solutions/vision/pose_landmarker/python) to learn more about configuration options that this solution supports.


In [12]:
# STEP 1: Import the necessary modules.
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision

# STEP 2: Create an PoseLandmarker object.
base_options = python.BaseOptions(model_asset_path='pose_landmarker.task')
options = vision.PoseLandmarkerOptions(
    base_options=base_options,
    output_segmentation_masks=True)
detector = vision.PoseLandmarker.create_from_options(options)

# STEP 3: Load the input image.
image = mp.Image.create_from_file("image.jpg")

# STEP 4: Detect pose landmarks from the input image.
detection_result = detector.detect(image)

# STEP 5: Process the detection result. In this case, visualize it.
annotated_image = draw_landmarks_on_image(image.numpy_view(), detection_result)
cv2_imshow(cv2.cvtColor(annotated_image, cv2.COLOR_RGB2BGR))

AttributeError: module 'mediapipe.tasks.python.vision' has no attribute 'PoseLandmarkerOptions'

# NOTE: PoseLandmarkerOptions not available with Mac OSX version

Visualize the pose segmentation mask.

In [None]:
segmentation_mask = detection_result.segmentation_masks[0].numpy_view()
visualized_mask = np.repeat(segmentation_mask[:, :, np.newaxis], 3, axis=2) * 255
cv2_imshow(visualized_mask)

# 4. Mediapipe Face Mesh Example

https://chuoling.github.io/mediapipe/solutions/face_mesh.html#mediapipe-face-mesh

In [None]:
import cv2
import mediapipe as mp
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_face_mesh = mp.solutions.face_mesh

# For static images:
IMAGE_FILES = []
drawing_spec = mp_drawing.DrawingSpec(thickness=1, circle_radius=1)
with mp_face_mesh.FaceMesh(
    static_image_mode=True,
    max_num_faces=1,
    refine_landmarks=True,
    min_detection_confidence=0.5) as face_mesh:
  for idx, file in enumerate(IMAGE_FILES):
    image = cv2.imread(file)
    # Convert the BGR image to RGB before processing.
    results = face_mesh.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))

    # Print and draw face mesh landmarks on the image.
    if not results.multi_face_landmarks:
      continue
    annotated_image = image.copy()
    for face_landmarks in results.multi_face_landmarks:
      print('face_landmarks:', face_landmarks)
      mp_drawing.draw_landmarks(
          image=annotated_image,
          landmark_list=face_landmarks,
          connections=mp_face_mesh.FACEMESH_TESSELATION,
          landmark_drawing_spec=None,
          connection_drawing_spec=mp_drawing_styles
          .get_default_face_mesh_tesselation_style())
      mp_drawing.draw_landmarks(
          image=annotated_image,
          landmark_list=face_landmarks,
          connections=mp_face_mesh.FACEMESH_CONTOURS,
          landmark_drawing_spec=None,
          connection_drawing_spec=mp_drawing_styles
          .get_default_face_mesh_contours_style())
      mp_drawing.draw_landmarks(
          image=annotated_image,
          landmark_list=face_landmarks,
          connections=mp_face_mesh.FACEMESH_IRISES,
          landmark_drawing_spec=None,
          connection_drawing_spec=mp_drawing_styles
          .get_default_face_mesh_iris_connections_style())
    cv2.imwrite('/tmp/annotated_image' + str(idx) + '.png', annotated_image)

# For webcam input:
drawing_spec = mp_drawing.DrawingSpec(thickness=1, circle_radius=1)
cap = cv2.VideoCapture(0)
with mp_face_mesh.FaceMesh(
    max_num_faces=1,
    refine_landmarks=True,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5) as face_mesh:
  while cap.isOpened():
    success, image = cap.read()
    if not success:
      print("Ignoring empty camera frame.")
      # If loading a video, use 'break' instead of 'continue'.
      continue

    # To improve performance, optionally mark the image as not writeable to
    # pass by reference.
    image.flags.writeable = False
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = face_mesh.process(image)

    # Draw the face mesh annotations on the image.
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    if results.multi_face_landmarks:
      for face_landmarks in results.multi_face_landmarks:
        mp_drawing.draw_landmarks(
            image=image,
            landmark_list=face_landmarks,
            connections=mp_face_mesh.FACEMESH_TESSELATION,
            landmark_drawing_spec=None,
            connection_drawing_spec=mp_drawing_styles
            .get_default_face_mesh_tesselation_style())
        mp_drawing.draw_landmarks(
            image=image,
            landmark_list=face_landmarks,
            connections=mp_face_mesh.FACEMESH_CONTOURS,
            landmark_drawing_spec=None,
            connection_drawing_spec=mp_drawing_styles
            .get_default_face_mesh_contours_style())
        mp_drawing.draw_landmarks(
            image=image,
            landmark_list=face_landmarks,
            connections=mp_face_mesh.FACEMESH_IRISES,
            landmark_drawing_spec=None,
            connection_drawing_spec=mp_drawing_styles
            .get_default_face_mesh_iris_connections_style())
    # Flip the image horizontally for a selfie-view display.
    cv2.imshow('MediaPipe Face Mesh', cv2.flip(image, 1))
    if cv2.waitKey(5) & 0xFF == 27:
      break
cap.release()

# Now we want to use our own customized facemesh instead of the default Mediapipe one 

although the default has 468 mesh points

Here's a video to consider:
https://www.youtube.com/watch?v=LGPBRH6Hqw8

And here is the Mediapipe face mesh documentation:
https://github.com/google-ai-edge/mediapipe/wiki/MediaPipe-Face-Mesh

Found a discussion about custom mesh, attempted to follow...
https://github.com/google-ai-edge/mediapipe/issues/1155

In [4]:
# Copyright 2020 Kanstantsin Sokal.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Converts OBJ file into `mediapipe.face_geometry.Mesh3d`.
#
# The OBJ file must be located in the same directory and named "model.obj".
# The output protobuf test will be written into "model.pbtxt".
def Main():
  file_in = open("model.obj", "r")
  file_out = open("model.pbtxt", "w")

  v_positions = []
  v_tex_coords = []
  faces = []

  compressed_v_ids = {}
  compressed_vs = []

  for line in file_in.readlines():
    tokens = line.strip().split(' ')

    if tokens[0] == "v":
      v_positions.append((float(tokens[1]), float(tokens[2]), float(tokens[3])))
    elif tokens[0] == "vt":
      v_tex_coords.append((float(tokens[1]), float(tokens[2])))
    elif tokens[0] == "f":
      v_position_ids = [int(token.split('/')[0]) - 1 for token in tokens[1:]]
      v_tex_coord_ids = [int(token.split('/')[1]) - 1 for token in tokens[1:]]
      v_compressed_ids = []

      for v_position_id, v_tex_coord_id in zip(v_position_ids, v_tex_coord_ids):
        key = (v_position_id, v_tex_coord_id)
        if key not in compressed_v_ids:
          compressed_v_ids[key] = len(compressed_vs)
          compressed_vs.append(key)

        v_compressed_ids.append(compressed_v_ids[key])

      for middle_id in range(1, len(v_compressed_ids) - 1):
        face = []
        face.append(v_compressed_ids[0])
        face.append(v_compressed_ids[middle_id])
        face.append(v_compressed_ids[middle_id + 1])
        faces.append(tuple(face))

  file_out.write("vertex_type: VERTEX_PT\n")
  file_out.write("primitive_type: TRIANGLE\n")

  for v_position_id, v_tex_coord_id in compressed_vs:
    v_position = v_positions[v_position_id]
    v_tex_coord = v_tex_coords[v_tex_coord_id]

    line = ""
    line += "vertex_buffer: {:.6f}\n".format(v_position[0])
    line += "vertex_buffer: {:.6f}\n".format(v_position[1])
    line += "vertex_buffer: {:.6f}\n".format(v_position[2])
    line += "vertex_buffer: {:.6f}\n".format(v_tex_coord[0])
    line += "vertex_buffer: {:.6f}\n".format(1 - v_tex_coord[1])
    file_out.write(line)

  for face in faces:
    line = ""
    line += "index_buffer: {}\n".format(face[0])
    line += "index_buffer: {}\n".format(face[1])
    line += "index_buffer: {}\n".format(face[2])
    file_out.write(line)

  file_in.close()
  file_out.close()
  
if __name__ == "__main__":
  Main()

In [6]:
import google.protobuf.text_format as text_format
from mesh_3d_pb2 import Mesh3d

message = Mesh3d()  

with open('model.pbtxt', 'r') as file:
    text_format.Merge(file.read(), message)

# Convert to binary and save to a new .pbbinary file
with open('model.pbbinary', 'wb') as file:
    file.write(message.SerializeToString())

ModuleNotFoundError: No module named 'mesh_3d_pb2'