# 1. Import Libraries

In [3]:
import os
import time
import cv2 as cv
import tensorflow as tf
import numpy as np
import sklearn
import matplotlib
import mediapipe as mp

# 3. Detect and Render Keypoints using MMPose Inference Models

In [4]:
mp_holistic = mp.solutions.holistic # Holistic Model
mp_drawing = mp.solutions.drawing_utils # Drawing utilities

# Detect keypoints using holistic model
def detect_keypoints(image, model):
    image = cv.cvtColor(image, cv.COLOR_BGR2RGB)    # Convert image from bgr (default channel from opencv feed) to rbg (media pipe detects in rgb)
    image.flags.writable = False                    # Set image to unwritable to save memory
    results = model.process(image)                  # Detecting keypoints
    image.flags.writable = True                     # Set back to writable
    image = cv.cvtColor(image, cv.COLOR_RGB2BGR)    # Convert back to bgr
    return image, results

# Render landmarks on frame in place
def draw_landmarks(image, results):
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACE_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(80, 110, 10), thickness=1, circle_radius=1),
                              mp_drawing.DrawingSpec(color=(80, 256, 120), thickness=1, circle_radius=1),
                             )
    mp_drawing.draw_landmarks(image, results.post_landmarks, mp_holistic.POSE_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(200, 0, 18), thickness=1, circle_radius=1),
                              mp_drawing.DrawingSpec(color=(100, 0, 18), thickness=1, circle_radius=1),
                             )
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(180, 140, 180), thickness=1, circle_radius=1),
                              mp_drawing.DrawingSpec(color=(200, 180, 180), thickness=1, circle_radius=1),
                             )
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(180, 140, 180), thickness=1, circle_radius=1),
                              mp_drawing.DrawingSpec(color=(200, 180, 180), thickness=1, circle_radius=1),
                             )
    

# Set up webcam for video capture
cam = cv.VideoCapture(0) # 0 is the device value (webcam), can be substituted for the name of a video file

# Access MediaPipe model
with mp_holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cam.isOpened():

        # Read feed
        ret, frame = cam.read()

        # Make detections (result contains all the different landmarks)
        image, results = detect_keypoints(frame, holistic)
        
        # Draw landmarks
        draw_landmarks(image, results)

        # Show frame to screen (name of window, frame)
        cv.imshow('OpenCV Feed', image)

        # Wait 10 ms for a keypress after the window is shown, if the key returned is q, break
        if cv.waitKey(10) & 0xFF == ord('q'):
            break

    # Release webcam
    cam.release()

    # Close window
    cv.destroyAllWindows()

print('ehllo')

TypeError: 'module' object is not callable

In [None]:
import cv2
cap_front = cv2.VideoCapture(0) #front
cap_back = cv2.VideoCapture(1) #back
active_capture = cap_front
while True:
    ret, frame = active_capture.read()
    
    key = cv2.waitKey(1)
    if key == ord("b"):
        active_capture = cap_back
    elif key == ord("f"):
        active_capture = cap_front
        cv2.imshow(" ",frame)
    if key == ord('q'):
        break
print('done'

In [None]:
x = 5
x

# 4. Extract Keypoint Data
- Concat all landmarks into a numpy array
- array of zeros if no landmarks are detected

In [None]:
result.post_landmarks

In [None]:
# Extract all landmark coordinates as a single flattened np array
# Handle errors when hands are not in the frame: replace landmarks with blank array

def extract_landmarks_arrays(results):
    if results.pose_landmarks:
        pose_array = np.array([[pos.x, pos.y, pos.z, pos.visibility] for pos in results.pose_landmarks.landmark]).flatten()
    else:
        pose_array = np.zeros(33 * 4)

    if results.face_landmarks:
        face_array = np.array([[pos.x, pos.y, pos.z] for pos in results.face_landmarks.landmark]).flatten()
    else:
        face_array = np.zeros(468 * 3)

    if results.left_hand_landmarks:
        lh_array = np.array([[pos.x, pos.y, pos.z] for pos in results.left_hand_landmarks.landmark]).flatten()
    else:
        lh_array = np.zeros(21 * 3)

    if results.right_hand_landmarks:
        rh_array = np.array([[pos.x, pos.y, pos.z] for pos in results.right_hand_landmarks.landmark]).flatten()
    else:
        rh_array = np.zeros(21 * 3)
    
    return np.concatenate(pose_array, face_array, lh_array, rh_array)

In [None]:
arr = np.array([[5, 6, 7], [8, 9, 0]])
arr
arr.flatten()
np.zeros(21 * 3)

# 2. Set Up Webcam
- setup video capture and loop through frames

In [None]:
# Set up webcam for video capture
cam = cv.VideoCapture(0) # 0 is the device value (webcam), can be substituted for the name of a video file

while cam.isOpened():

    # Read feed
    ret, frame = cam.read()

    # Show frame to screen (name of window, frame)
    cv.imshow('OpenCV Feed', frame)

    # Wait 10 ms for a keypress after the window is shown, if the key returned is q, break
    if cv.waitKey(10) & 0xFF == ord('q'):
        break

# Release webcam
cam.release()

# Close window
cv.destroyAllWindows()