In [1]:
import cv2 #computer vision to work with webcam
import numpy as np
import os #used to work with file paths
from matplotlib import pyplot as plt #used for visualisation
import time
import mediapipe as mp #used to extract keypoints

In [2]:
#holistic
mp_holistic = mp.solutions.holistic #make predictions on body
mp_drawing = mp.solutions.drawing_utils #draw our predictions

In [3]:
def mediapipe_detection(image,model):
    image = cv2.cvtColor(image,cv2.COLOR_BGR2RGB)
    image.flags.writeable = False
    results = model.process(image)
    image.flags.writeable = True
    image = cv2.cvtColor(image,cv2.COLOR_RGB2BGR)
    return image,results

In [4]:
def draw_landmarks(image,results):
    mp_drawing.draw_landmarks(image,results.face_landmarks,mp_holistic.FACEMESH_CONTOURS,
                              mp_drawing.DrawingSpec(color = (50,50,130),thickness = 1, circle_radius = 1),
                              mp_drawing.DrawingSpec(color = (100,100,100),thickness = 1, circle_radius = 1))
    mp_drawing.draw_landmarks(image,results.pose_landmarks,mp_holistic.POSE_CONNECTIONS)
    mp_drawing.draw_landmarks(image,results.left_hand_landmarks,mp_holistic.HAND_CONNECTIONS)
    mp_drawing.draw_landmarks(image,results.right_hand_landmarks,mp_holistic.HAND_CONNECTIONS)

In [5]:
def extract_keypoints(results):
    pose = np.array([[res.x,res.y,res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    face = np.array([[res.x,res.y,res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
    left_hand = np.array([[res.x,res.y,res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    right_hand = np.array([[res.x,res.y,res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([pose,face,left_hand,right_hand])

In [6]:
#create videos

#create media pipe model
actions = np.array(['Thank_you'])
no_sequences = 30 #number of videos to use for training
len_sequence = 30 #number of frames for each video
video = []
data_path = os.path.join('mpdata') #path for exported data

In [7]:
#create folders
for action in actions:
    for sequence in range(no_sequences):
        try:
            os.makedirs(os.path.join(data_path,action,str(sequence)))
        except:
            pass

In [None]:
#create videos
cap = cv2.VideoCapture(0) #to use internal camera
#create media pipe model

blank_image = 255 * np.ones(shape=[512, 512, 3], dtype=np.uint8)
with mp_holistic.Holistic(min_detection_confidence = 0.5, min_tracking_confidence = 0.5) as holistic:
    for action in actions:
        for sequence in range(no_sequences):
            for lenght in range(len_sequence):
                
                ret,frame = cap.read() #capturing a snap
                #make detections
                image, results = mediapipe_detection(frame,holistic)
                
                #draw_landmarks(image,results)
                #print(results)
                #display results
               # cv2.waitKey(100)
                if lenght == 0:
                    cv2.putText(image,'starting collecting data',(12,20), cv2.FONT_HERSHEY_SIMPLEX,1, (0,255,0),1, cv2.LINE_AA)
                    cv2.putText(image,'collecting frames for {} video_number {}'.format(action,str(sequence)),(12,10), 
                                cv2.FONT_HERSHEY_SIMPLEX,1, (0,0,255),1, cv2.LINE_AA)
                    cv2.waitKey(2000)
                else:
                    cv2.putText(image,'collecting frames for {} video_number {}'.format(action,str(sequence)),(12,150), 
                                cv2.FONT_HERSHEY_SIMPLEX,1, (0,0,255),1, cv2.LINE_AA)
                key_points = extract_keypoints(results)
                npy_path = os.path.join(data_path,action,str(sequence),str(lenght))
                np.save(npy_path,key_points)
                cv2.imshow('openCV Feed',image) #show image on screen
                video.append(image)
                if cv2.waitKey(10) & 0xFF == ord('q'):
                    break
cv2.putText(blank_image,'getting ready to play the video',(12,50), cv2.FONT_HERSHEY_SIMPLEX,1, (255,255,0),1, cv2.LINE_AA)
cv2.imshow('openCV Feed',blank_image)
cv2.waitKey(4000)                    
for image in video:
    
    
    if cv2.waitKey(10) & 0xFF == ord('q'):
        break
    else:
        cv2.putText(image,'playing the video',(12,20), cv2.FONT_HERSHEY_SIMPLEX,1, (0,255,0),1, cv2.LINE_AA)
        cv2.waitKey(150)
        cv2.imshow('openCV Feed',image) #show image on screen
               
                
cap.release()
cv2.destroyAllWindows()