In [2]:
import cv2
import numpy as np
import os
import matplotlib.pyplot as plt
import mediapipe as mp

mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils

def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image.flags.writeable = False
    results = model.process(image)
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    return image, results

def draw_landmarks(image, results):
    if results.face_landmarks:
        mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION)
    if results.pose_landmarks:
        mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS)
    if results.left_hand_landmarks:
        mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS)
    if results.right_hand_landmarks:
        mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS)
 
def draw_styled_landmarks(image, results):
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION,
                              mp_drawing.DrawingSpec(color=(80, 110, 10), thickness=1, circle_radius=1),  # color_landmark
                              mp_drawing.DrawingSpec(color=(80, 256, 121), thickness=1, circle_radius=1)  # color_joints
                              )
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS, 
                              mp_drawing.DrawingSpec(color=(80, 22, 10), thickness=1, circle_radius=1),  # color_landmark
                              mp_drawing.DrawingSpec(color=(80, 44, 121), thickness=1, circle_radius=1)
                             )
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(121, 22, 76), thickness=1, circle_radius=1),  # color_landmark
                              mp_drawing.DrawingSpec(color=(121, 44, 250), thickness=1, circle_radius=1)
                             )
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(245, 117, 66), thickness=1, circle_radius=1),  # color_landmark
                              mp_drawing.DrawingSpec(color=(245, 66, 230), thickness=1, circle_radius=1)
                             )


cap = cv2.VideoCapture(0)
# last_frame = None  # Initialize a variable to store the last frame

with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
        ret, frame = cap.read()

        image, results = mediapipe_detection(frame, holistic)
        print(results)
        
        draw_styled_landmarks(image, results)  # Draw landmarks on the current frame

        # Display the frame with landmarks using cv2.imshow
        cv2.imshow('OpenCV Feed', image)

        # Uncomment the following lines if you want to display the frame using plt.imshow
        # plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
        # plt.show()

        key = cv2.waitKey(10)
        if key == ord('q') or key == 27:  # Check for 'q' or 'Esc' key press
            break
    
    cap.release()
    cv2.destroyAllWindows()

<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.solution_base.SolutionOutputs'>
<class 'mediapipe.python.soluti

KeyboardInterrupt: 

In [None]:
results


In [32]:
results.face_landmarks

landmark {
  x: 0.55293465
  y: 0.7308941
  z: -0.049059875
}
landmark {
  x: 0.56108487
  y: 0.661867
  z: -0.077720396
}
landmark {
  x: 0.55482805
  y: 0.6853641
  z: -0.04452266
}
landmark {
  x: 0.5480711
  y: 0.5933145
  z: -0.055412386
}
landmark {
  x: 0.5628213
  y: 0.6399757
  z: -0.08120257
}
landmark {
  x: 0.5626516
  y: 0.6128451
  z: -0.07351634
}
landmark {
  x: 0.56006813
  y: 0.550756
  z: -0.028251348
}
landmark {
  x: 0.44273278
  y: 0.54055214
  z: 0.012662754
}
landmark {
  x: 0.5613653
  y: 0.4981142
  z: -0.014108506
}
landmark {
  x: 0.5634884
  y: 0.46913403
  z: -0.014332925
}
landmark {
  x: 0.5660232
  y: 0.37765115
  z: 0.010483644
}
landmark {
  x: 0.5521076
  y: 0.7413999
  z: -0.047579944
}
landmark {
  x: 0.55057836
  y: 0.7501323
  z: -0.042993337
}
landmark {
  x: 0.5490309
  y: 0.75429976
  z: -0.037011765
}
landmark {
  x: 0.5489307
  y: 0.75804067
  z: -0.03656099
}
landmark {
  x: 0.5489029
  y: 0.766582
  z: -0.039541747
}
landmark {
  x: 0.5482

In [30]:
len(results.pose_landmarks.landmark)

33

In [None]:
lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)


In [None]:
lh

In [None]:
rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)

In [None]:
rh

In [9]:
pose = []
for res in results.pose_landmarks.landmark:
    test = np.array([res.x, res.y, res.z, res.visibility])
    pose.append(test)

In [7]:
pose

[array([ 0.41024572,  0.67261881, -3.16635704,  0.99323833]),
 array([ 0.47846875,  0.54922748, -3.08443499,  0.99320036]),
 array([ 0.51845354,  0.5456171 , -3.08527803,  0.99452955]),
 array([ 0.55239809,  0.54374099, -3.08612061,  0.99312931]),
 array([ 0.34186724,  0.55596554, -3.05616331,  0.99202788]),
 array([ 0.30373138,  0.55804503, -3.05685091,  0.99208182]),
 array([ 0.27491045,  0.55933625, -3.0575881 ,  0.98938423]),
 array([ 0.62013847,  0.57545382, -2.20996118,  0.99588442]),
 array([ 0.2470123 ,  0.58743644, -2.06095743,  0.99415004]),
 array([ 0.49199301,  0.80938858, -2.80013394,  0.98752815]),
 array([ 0.3497104 ,  0.818591  , -2.76148653,  0.98209995]),
 array([ 0.84790909,  1.00259292, -1.55246758,  0.91921151]),
 array([ 0.13407949,  0.97478861, -1.06730688,  0.90146232]),
 array([ 1.10735786,  1.53410769, -2.14356136,  0.2702876 ]),
 array([-0.02202864,  1.56949985, -0.94549543,  0.08366067]),
 array([ 0.91932708,  1.6155802 , -3.17114496,  0.0796361 ]),
 array([

In [10]:
len(pose)

33

In [16]:
pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(132)
face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(21*3)
lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)

In [17]:
pose.shape

(132,)

In [18]:
#path for exported data, numpy arrays 

DATA_PATH = os.path.join('MP_Data')
#Actions that we try to deect
actions = np.array(['hello', 'thanks', 'iloveyou'])
#30 videos worth of data
no_sequences = 30
#videos are going to be 30 frames in length
sequence_length = 30
for action in actions:
    for sequence in range(no_sequences):
        try:
            os.makedirs(os.path.join(DATA_PATH, action, str(sequence)))
        except:
            pass

In [19]:
import os
print(os.getcwd())


C:\Users\Rani


In [18]:
np.zeros(21*3) #even if we dont get the land mark we are going to pass the array of 0s with the same size

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [21]:
def extract_keypoints(results): #extracting the keypoints and creating them into numpy array
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([pose, face, lh, rh])

In [22]:
result_test = extract_keypoints(results)
result_test

array([ 0.41024572,  0.67261881, -3.16635704, ...,  0.        ,
        0.        ,  0.        ])

In [23]:
np.save('0', result_test)

In [31]:
np.load('0.npy')

array([ 0.41024572,  0.67261881, -3.16635704, ...,  0.        ,
        0.        ,  0.        ])

In [32]:
# Path for exported data, numpy arrays
DATA_PATH = os.path.join('MP_Data') 

# Actions that we try to detect
actions = np.array(['hello', 'thanks', 'iloveyou'])

# Thirty videos worth of data
no_sequences = 30

# Videos are going to be 30 frames in length
sequence_length = 30

In [33]:
for action in actions: 
    for sequence in range(no_sequences):
        try: 
            os.makedirs(os.path.join(DATA_PATH, action, str(sequence)))
        except:
            pass

In [None]:
cap = cv2.VideoCapture(0)
# last_frame = None  # Initialize a variable to store the last frame

with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    # Loop through actions
    for action in actions:
        # Loop through sequences aka videos
        for sequence in range(no_sequences):  # Number of sequences
            # Loop through video length aka sequence length
            for frame_num in range(sequence_length):

                ret, frame = cap.read()

                image, results = mediapipe_detection(frame, holistic)
                print(results)

                draw_styled_landmarks(image, results)  # Draw landmarks on the current frame

                if frame_num == 0:
                    cv2.putText(image, 'STARTING COLLECTION', (120, 200),
                                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 4, cv2.LINE_AA)
                    cv2.putText(image, 'Collecting frames for {} Video Number {}'.format(action, sequence), (15, 12),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 225), 1, cv2.LINE_AA)
                    cv2.waitKey(2000)
                else:
                    cv2.putText(image, 'Collecting frames for {} Video Number {}'.format(action, sequence), (15, 12),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 225), 1, cv2.LINE_AA)
                    
                    cv2.imshow('OpenCV Feed', image)
                    
            #NEW EXPORT KEYPOINTS
            keypoints = extract_keypoints(results)
            npy_path = os.path.join(DATA_PATH, action, str(sequence), str(frame_num))
            np.save(npy_path, keypoints)
            if cv2.waitKey(10) & 0xFF == ord('q'):
                      break

                  
cap.release()
cv2.destroyAllWindows()



In [36]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
label_map = {label:num for num, label in enumerate(actions)}
label_map


{'hello': 0, 'thanks': 1, 'iloveyou': 2}

In [37]:
sequences, labels = [], []
for action in actions:
    for sequence in range(no_sequences):
        window = []
        for frame_num in range(sequence_length):
            res = np.load(os.path.join(DATA_PATH, action, str(sequence), "{}.npy".format(frame_num)))
            window.append(res)
        sequences.append(window)
        labels.append(label_map[action])

FileNotFoundError: [Errno 2] No such file or directory: 'MP_Data\\hello\\0\\0.npy'