In [1]:
import mediapipe as mp
import math
import cv2
import pyautogui

### Setting Up Mediapipe and drawing landmarks

In [2]:
# Setup the mediapipe model
mp_pose = mp.solutions.pose
pose = mp_pose.Pose(min_detection_confidence=0.3)
mp_drawing = mp.solutions.drawing_utils

In [3]:
def draw_and_get_landmarks(image, pose):
    """Places landmarks on an image."""
    output_image = image.copy()
    
    # Identify all landmarks
    imageRGB = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = pose.process(imageRGB)
    
    # Retrieve the height and width of the input image.
    height, width, _ = image.shape
    
    # Append all landmarks and draw them on the OUTPUT image
    landmarks = []

    if results.pose_landmarks:
        mp_drawing.draw_landmarks(image=output_image, landmark_list=results.pose_landmarks,
                                  connections=mp_pose.POSE_CONNECTIONS)

        for landmark in results.pose_landmarks.landmark:
            landmarks.append((int(landmark.x * width), int(landmark.y * height),
                                  (landmark.z * width)))
    
    return output_image, landmarks

### Calculating angles between landmarks and detecting specific poses

In [4]:
def calculate_angle(landmark1, landmark2, landmark3):
    """Calculate the angle between 3 landmarks."""
    # Get the required landmarks coordinates.
    x1, y1, _ = landmark1
    x2, y2, _ = landmark2
    x3, y3, _ = landmark3
 
    # Calculate the angle between the three points
    angle = math.degrees(math.atan2(y3 - y2, x3 - x2) - math.atan2(y1 - y2, x1 - x2))
    
    # Make the angle positive
    return abs(angle)

In [5]:
def check_salute(shoulder, elbow, wrist, frame, display_frame=False):
    """Checks to see if the person is saluting."""
    is_saluting = True if 25 < calculate_angle(shoulder, elbow, wrist) < 40 else False
    if is_saluting and display_frame:
        cv2.putText(frame, "Saluting!", (10, 30), cv2.FONT_HERSHEY_PLAIN, 2, (0, 255, 0), 2)
    return is_saluting

def check_right_arm_up(shoulder, elbow, wrist, hip, frame, display_frame=False):
    """Checks to see if the person has their right arm at a 90 degree angle to their body."""
    is_arm_up = True if 85 < calculate_angle(elbow, shoulder, hip) < 95 else False
    if not is_arm_up:
        return False
    
    is_arm_flat = True if 175 < calculate_angle(wrist, elbow, shoulder) < 185 else False
    if display_frame:
        cv2.putText(frame, "Right T-Pose", (10, 30), cv2.FONT_HERSHEY_PLAIN, 2, (0, 255, 0), 2)
    return is_arm_flat


### Using pyautogui for movement

In [6]:
def move_forward():
    """Press the w key on the user's laptop."""
    pyautogui.keyDown('W')
    

def move_right():
    """Press the d key on the user's laptop."""
    pyautogui.keyDown('D')
    

def move_left():
    """Press the a key on the user's laptop."""
    pyautogui.keyDown('A')
    

def stop_movement():
    """Stops all movement."""
    pyautogui.keyUp('W')
    pyautogui.keyUp('D')
    pyautogui.keyUp('A')
    

### Connecting everything together with video

In [7]:
pose_video = mp_pose.Pose(static_image_mode=False, min_detection_confidence=0.5, model_complexity=1)
 
# Initialize the VideoCapture object to read from the webcam.
video = cv2.VideoCapture(0)

# +++++++++++++++++++++++++++++++ UNCOMMENT THIS TO SEE THE VIDEO +++++++++++++++++++++++++++++++++
# cv2.namedWindow('Pose Detection', cv2.WINDOW_NORMAL)

# Initialize a variable to store the time of the previous frame.
time1 = 0
 
# Iterate until the video is accessed successfully.
while video.isOpened():
    
    # Read a frame.
    ok, frame = video.read()
    
    # Check if frame is not read properly.
    if not ok:
        break
    
    frame = cv2.flip(frame, 1)
    
    # Adjust the frame
    frame_height, frame_width, _ =  frame.shape
    frame = cv2.resize(frame, (int(frame_width * (640 / frame_height)), 640))
    
    # Perform Pose landmark detection.
    frame, landmarks = draw_and_get_landmarks(frame, pose_video)

    if landmarks:
        # Everything is mirrored here so I have to pass it in weirdly...
        is_saluting_right = check_salute(landmarks[mp_pose.PoseLandmark.LEFT_SHOULDER.value], landmarks[mp_pose.PoseLandmark.LEFT_ELBOW.value], 
                                   landmarks[mp_pose.PoseLandmark.LEFT_WRIST.value], frame)
        is_saluting_left =  check_salute(landmarks[mp_pose.PoseLandmark.RIGHT_SHOULDER.value], landmarks[mp_pose.PoseLandmark.RIGHT_ELBOW.value], 
                                   landmarks[mp_pose.PoseLandmark.RIGHT_WRIST.value], frame)
        is_right_arm_up = check_right_arm_up(landmarks[mp_pose.PoseLandmark.LEFT_SHOULDER.value], landmarks[mp_pose.PoseLandmark.LEFT_ELBOW.value], 
                                   landmarks[mp_pose.PoseLandmark.LEFT_WRIST.value], landmarks[mp_pose.PoseLandmark.LEFT_HIP.value], frame)
        
        if is_saluting_right:
            move_right()
        elif is_saluting_left:
            move_left()
        else:
            stop_movement()
    
    # Display the frame.
    # +++++++++++++++++++++++++++++++ UNCOMMENT THIS TO SEE THE VIDEO +++++++++++++++++++++++++++++++++
    # cv2.imshow('Pose Detection', frame)
    
    # Check to see if the user pressed 'esc'
    k = cv2.waitKey(1) & 0xFF
    if(k == 27):
        break
 
# Clean Up
video.release()
cv2.destroyAllWindows()

KeyboardInterrupt: 

### Ending the Demo (Optional)
TO end the demo when you have to gui window, just stop the jupyter cell from running. Then your camera should still be in use so to terminate that process, run the cell below:

In [None]:
# Clean Up
video.release()
cv2.destroyAllWindows()