In [1]:
import cv2
import numpy as np 
import mediapipe as mp
import tensorflow as tf

In [2]:
# Initialize MediaPipe Holistic model
mp_holistic = mp.solutions.holistic
holistic_model = mp_holistic.Holistic(
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5
)

frame_counter = 0

all_landmarks_list = []

# Initialize drawing utilities
mp_drawing = mp.solutions.drawing_utils

In [3]:
# OPENCV
cap = cv2.VideoCapture(0)
capture = cv2.VideoCapture(0)

In [4]:
while capture.isOpened() and frame_counter < 100:
    ret, frame = capture.read()

    if not ret:
        break

    frame = cv2.resize(frame, (800, 600))
    image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Use holistic model to detect landmarks
    image.flags.writeable = False
    results = holistic_model.process(image)
    image.flags.writeable = True

    # Convert back to BGR for rendering
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

    # Draw landmarks
    mp_drawing.draw_landmarks(
        image,
        results.face_landmarks,
        mp_holistic.FACEMESH_CONTOURS,
        landmark_drawing_spec=mp_drawing.DrawingSpec(color=(0,255,255), thickness=1, circle_radius=1),
        connection_drawing_spec=mp_drawing.DrawingSpec(color=(255,0,255), thickness=1, circle_radius=1)
    )

    mp_drawing.draw_landmarks(
        image, 
        results.right_hand_landmarks, 
        mp_holistic.HAND_CONNECTIONS
    )

    mp_drawing.draw_landmarks(
        image, 
        results.left_hand_landmarks, 
        mp_holistic.HAND_CONNECTIONS
    )

    # Display the resulting image with landmarks
    cv2.imshow('Holistic Model Landmarks', image)

    all_landmarks = []

    # Extract pose landmarks
    if results.pose_landmarks:
        pose_landmarks = [[lm.x, lm.y, lm.z] for lm in results.pose_landmarks.landmark]
        all_landmarks.extend(pose_landmarks)

    # Extract face landmarks
    if results.face_landmarks:
        face_landmarks = [[lm.x, lm.y, lm.z] for lm in results.face_landmarks.landmark]
        all_landmarks.extend(face_landmarks)

    # Extract left hand landmarks
    if results.left_hand_landmarks:
        left_hand_landmarks = [[lm.x, lm.y, lm.z] for lm in results.left_hand_landmarks.landmark]
        all_landmarks.extend(left_hand_landmarks)

    # Extract right hand landmarks
    if results.right_hand_landmarks:
        right_hand_landmarks = [[lm.x, lm.y, lm.z] for lm in results.right_hand_landmarks.landmark]
        all_landmarks.extend(right_hand_landmarks)

    # Append the landmarks of this frame to the list
    all_landmarks_list.append(all_landmarks)

    frame_counter += 1

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

capture.release()
cv2.destroyAllWindows()

In [5]:
# Define the expected order of landmarks
expected_landmark_order = []

# Add face landmarks (assuming 468 landmarks)
for i in range(468):
    expected_landmark_order.append(i)

# Add right hand landmarks (assuming 21 landmarks)
for i in range(468, 468 + 21):
    expected_landmark_order.append(i)

# Add left hand landmarks (assuming 21 landmarks)
for i in range(468 + 21, 468 + 21 + 21):
    expected_landmark_order.append(i)

# Add pose landmarks (assuming 33 landmarks)
for i in range(468 + 21 + 21, 468 + 21 + 21 + 33):
    expected_landmark_order.append(i)

# Find the maximum number of landmarks
max_landmarks = max(len(landmarks) for landmarks in all_landmarks_list)
# Ensure that the shape is (100, 543, 3) by padding with NaN values
padded_landmarks = []
for landmarks in all_landmarks_list:
    padded_landmarks.append(landmarks + [[np.nan, np.nan, np.nan]] * (543 - len(landmarks)))

# Convert the list of landmarks to a TensorFlow tensor
all_landmarks_tensor = tf.convert_to_tensor(padded_landmarks, dtype=tf.float32)

print("Shape of all landmarks tensor before reshaping:", all_landmarks_tensor.shape)

# Reshape the tensor to have shape (100, 1629)
all_landmarks_tensor_reshaped = tf.reshape(all_landmarks_tensor, (100, -1))

print("Shape of all landmarks tensor after reshaping:", all_landmarks_tensor_reshaped.shape)

Shape of all landmarks tensor before reshaping: (100, 543, 3)
Shape of all landmarks tensor after reshaping: (100, 1629)
