In [1]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp

In [2]:
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils

In [3]:
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # COLOR CONVERSION BGR 2 RGB
    image.flags.writeable = False  # Image is no longer writeable
    results = model.process(image)  # Make prediction
    image.flags.writeable = True  # Image is now writeable
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)  # COLOR COVERSION RGB 2 BGR
    return image, results


In [4]:
def draw_styled_landmarks(image, results):
    # face pose connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                               mp_drawing.DrawingSpec(color=(80, 22, 10), thickness=2, circle_radius=4),
                               mp_drawing.DrawingSpec(color=(80, 44, 121), thickness=2, circle_radius=2)
                               )
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                               mp_drawing.DrawingSpec(color=(121, 22, 76), thickness=2, circle_radius=4),
                               mp_drawing.DrawingSpec(color=(121, 44, 250), thickness=2, circle_radius=2)
                               )
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                               mp_drawing.DrawingSpec(color=(245, 117, 66), thickness=2, circle_radius=4),
                               mp_drawing.DrawingSpec(color=(245, 66, 230), thickness=2, circle_radius=2)
                               )

In [5]:
cap = cv2.VideoCapture(0)
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
        ret, frame = cap.read()

        image, results = mediapipe_detection(frame, holistic)

        draw_styled_landmarks(image, results)

        cv2.imshow('OpenCV Feed', image)

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()


In [6]:
results.pose_landmarks.landmark[0]

x: 0.5296849
y: 0.5940437
z: -0.91820306
visibility: 0.9998221

In [7]:
pose = []
for res in results.pose_landmarks.landmark:
    test = np.array([res.x , res.y , res.z , res.visibility])
    pose.append(test)

pose

[array([ 0.5296849 ,  0.59404367, -0.91820306,  0.99982208]),
 array([ 0.55173963,  0.52071422, -0.84448874,  0.99967629]),
 array([ 0.56490618,  0.52227306, -0.84436959,  0.99953926]),
 array([ 0.57710207,  0.52389568, -0.84457147,  0.99955493]),
 array([ 0.50219715,  0.5118776 , -0.86590689,  0.99976343]),
 array([ 0.48007184,  0.50669849, -0.86525214,  0.99975604]),
 array([ 0.45541731,  0.50248092, -0.8654806 ,  0.99983108]),
 array([ 0.58040857,  0.54556209, -0.35841358,  0.99934316]),
 array([ 0.39381614,  0.52207756, -0.44760242,  0.99985063]),
 array([ 0.5370847 ,  0.66451257, -0.75371784,  0.99975294]),
 array([ 0.48028454,  0.66129196, -0.77322763,  0.9998492 ]),
 array([ 0.67594045,  0.89894873, -0.07935663,  0.99670881]),
 array([ 0.23652521,  0.89221072, -0.3961502 ,  0.99927384]),
 array([0.72611129, 1.38412774, 0.14888044, 0.11164055]),
 array([-0.03315427,  1.23604417, -1.18417108,  0.97544879]),
 array([0.74240541, 1.74839616, 0.19758877, 0.09721   ]),
 array([ 0.15486

In [8]:
pose = np.array([[res.x , res.y ,res.z , res.visibility] for res in results.pose_landmarks.landmark])
lh = np.array([[res.x , res.y , res.z] for res in results.left_hand_landmarks.landmark]) if results.left_hand_landmarks else np.zeros(21*3)
rh = np.array([[res.x , res.y , res.z] for res in results.right_hand_landmarks.landmark]) if results.right_hand_landmarks else np.zeros(21*3)
face = np.array([[res.x , res.y ,res.z] for res in results.face_landmarks.landmark]) if results.face_landmarks else np.zeros(468*3)
face

array([[ 0.51880121,  0.67780244, -0.01375741],
       [ 0.53037387,  0.64541072, -0.04697132],
       [ 0.5217852 ,  0.65190393, -0.02020814],
       ...,
       [ 0.53619921,  0.55586493, -0.01318693],
       [ 0.5799129 ,  0.54235178,  0.01809301],
       [ 0.58447057,  0.53678483,  0.01946235]])

In [9]:
lh
# shape will give the idea of everything for the project 

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [10]:
rh

array([[ 1.51803687e-01,  7.41745710e-01,  4.50651413e-07],
       [ 2.20287144e-01,  7.24534452e-01, -2.14877818e-02],
       [ 2.85042465e-01,  6.83744311e-01, -3.37836221e-02],
       [ 3.33688796e-01,  6.45053387e-01, -4.63540964e-02],
       [ 3.75485063e-01,  6.06126308e-01, -5.84954657e-02],
       [ 2.75136590e-01,  5.36274076e-01, -9.82520729e-03],
       [ 3.14647913e-01,  4.57955837e-01, -2.89792754e-02],
       [ 3.40469182e-01,  4.13238823e-01, -4.69264425e-02],
       [ 3.65516931e-01,  3.74540150e-01, -5.99683896e-02],
       [ 2.45385647e-01,  5.15190721e-01, -1.54435029e-02],
       [ 2.79145390e-01,  4.23771501e-01, -3.04265562e-02],
       [ 3.05764854e-01,  3.68153512e-01, -4.60115261e-02],
       [ 3.30089360e-01,  3.23332489e-01, -5.81819043e-02],
       [ 2.12145582e-01,  5.14382541e-01, -2.52723973e-02],
       [ 2.42722318e-01,  4.28139776e-01, -4.08256426e-02],
       [ 2.70874202e-01,  3.80992264e-01, -5.13063110e-02],
       [ 2.98521906e-01,  3.40873867e-01

In [11]:
def extract_keypoints(results):
    keypoints = []
    
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark])
    if pose.shape[0] != 0:
        keypoints.append(pose)
    
    lh = np.array([[res.x, res.y, res.z, 0] for res in results.left_hand_landmarks.landmark]) if results.left_hand_landmarks else np.zeros((0, 4))
    if lh.shape[0] != 0:
        keypoints.append(lh)
    
    rh = np.array([[res.x, res.y, res.z, 0] for res in results.right_hand_landmarks.landmark]) if results.right_hand_landmarks else np.zeros((0, 4))
    if rh.shape[0] != 0:
        keypoints.append(rh)
    
    face = np.array([[res.x, res.y, res.z, 0] for res in results.face_landmarks.landmark]) if results.face_landmarks else np.zeros((0, 4))
    if face.shape[0] != 0:
        keypoints.append(face)
    
    if keypoints:
        keypoints = np.concatenate(keypoints, axis=0)
    else:
        keypoints = np.zeros((0, 4))  # Use (0, 4) to match the shape of the pose array
    
    return keypoints


In [12]:
# Path for exported data, numpy arrays
DATA_PATH = os.path.join('MP_Data') 

# Actions that we try to detect
actions = np.array(['hello', 'thanks', 'iloveyou'])

# Thirty videos worth of data
no_sequences = 30

# Videos are going to be 30 frames in length
sequence_length = 30
# hello
## 0
## 1
## 2
## ...
## 29
# thanks

# I love you
for action in actions: 
    for sequence in range(no_sequences):
        try: 
            os.makedirs(os.path.join(DATA_PATH, action, str(sequence)))
        except:
            pass

In [13]:
current_directory = os.getcwd()
print("Currentjiol p;'directory:", current_directory)

Currentjiol p;'directory: C:\Users\admin


In [14]:
cap = cv2.VideoCapture(0)
# Set mediapipe model 
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    
    # NEW LOOP
    # Loop through actions
    for action in actions:
        # Loop through sequences aka videos
        for sequence in range(no_sequences):
            # Loop through video length aka sequence length
            for frame_num in range(sequence_length):

                # Read feed
                ret, frame = cap.read()

                # Make detections
                image, results = mediapipe_detection(frame, holistic)
#                 print(results)

                # Draw landmarks
                draw_styled_landmarks(image, results)
                
                # NEW Apply wait logic
                if frame_num == 0: 
                    cv2.putText(image, 'STARTING COLLECTION', (120,200), 
                               cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255, 0), 4, cv2.LINE_AA)
                    cv2.putText(image, 'Collecting frames for {} Video Number {}'.format(action, sequence), (15,12), 
                               cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
                    # Show to screen
                    cv2.imshow('OpenCV Feed', image)
                    cv2.waitKey(2000)
                else: 
                    cv2.putText(image, 'Collecting frames for {} Video Number {}'.format(action, sequence), (15,12), 
                               cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
                    # Show to screen
                    cv2.imshow('OpenCV Feed', image)
                
                # NEW Export keypoints
                keypoints = extract_keypoints(results)
                npy_path = os.path.join(DATA_PATH, action, str(sequence), str(frame_num))
                np.save(npy_path, keypoints)

                # Break gracefully
                if cv2.waitKey(10) & 0xFF == ord('q'):
                    break
                    
    cap.release()
    cv2.destroyAllWindows()

In [15]:
cap.release()
cv2.destroyAllWindows()