Reference:
Lesson Week 3.2 MediaPipe with webcam.ipynb

In [1]:
import cv2
import mediapipe as mp

In [4]:
mp_drawing = mp.solutions.drawing_utils          # mediapipe Drawing Methods
mp_drawing_styles = mp.solutions.drawing_styles  # mediapipe Drawing Patterns
mp_pose = mp.solutions.pose                      # mediapipe Posture Detection


In [3]:
from tqdm import tqdm
import time
import numpy as np

Use mediapipe to read the coordinates of points between the current 2 frames.

In [4]:
cap = cv2.VideoCapture(0)
last_time = time.time()
pose_map ={}
for i in range(33):
    pose_map[i] = []
# Enable posture detection
with mp_pose.Pose(
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5) as pose:
    
    if not cap.isOpened():
        print("Cannot open camera")
        exit()
    while True:
        ret, img = cap.read()
        if not ret:
            print("Cannot receive frame")
            break
        
        # Set the frame rate to 30 fps
        now = time.time()
        if now - last_time < 1.0/30.0:
            continue
        last_time = now
        
        img = cv2.resize(img,(520,300))               # Reduced size for faster algorithms
        img2 = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)   # Convert BGR to RGB
        results = pose.process(img2)                  # Getting posture detection results
        
        if results.pose_landmarks:
            # Print body coordinates
            for id, lm in enumerate(results.pose_landmarks.landmark):
                if id in [0] and len(pose_map[id])>0:
                    last_lm = pose_map[id][-1] ## Take the previous landmark of the bone point ##
                    dist = np.sqrt((last_lm[0]-lm.x)**2 + (last_lm[1]-lm.y)**2 + (last_lm[2]-lm.z)**2)#Calculate the movement distance of each bone point, taking the nose as an example
                    speed = np.divide(dist,now-last_lm[3])#Calculate the speed of movement of each skeletal point, using the nose as an example
                    print(dist, speed)
                    h, w, c = img.shape
                    cx, cy = int(lm.x * w), int(lm.y * h)
                    last_cx, last_cy = int(last_lm[0] * w), int(last_lm[1] * h)
                    cv2.line(img, (cx, cy), (last_cx, last_cy), (0, 0, 255), 2)
                    cv2.circle(img, (cx, cy), 5, (255, 0, 0), cv2.FILLED)
                    cv2.putText(img, str(np.around(speed, decimals=3)), (cx, cy), cv2.FONT_HERSHEY_PLAIN, 3,(255, 0, 0), 3)
                    
                pose_map[id].append([lm.x, lm.y, lm.z, now])
                    
#             print(len(results.pose_landmarks.landmark))
#             for landmark in results.pose_landmarks.landmark:
#                 print(f"Landmark {landmark.x}: {landmark.y}, {landmark.z}")

        cv2.imshow('test', img)
        if cv2.waitKey(5) == ord('q'):
            break     # Press q to stop

print(pose_map)
cap.release()
cv2.destroyAllWindows()

0.30707999335111474 3.1924719464426476
0.02596435881945928 0.7624137249203182
0.06927334984734139 1.3937023367586838
0.05847431343199974 0.8655661943140848
0.012395849572939755 0.20400444738492374
0.03189457601442487 0.8804961940579095
0.021980668286281525 0.38061457795429726
0.028831828306141997 0.5808582157334592
0.04698916783983255 0.735080542105958
0.03926679280603446 0.6262976477589434
0.0036830029377076974 0.05783962660071495
0.015804636585245085 0.24586798329478365
0.015231041899094867 0.23337675702699387
0.008236080790496734 0.16965409052197553
0.003440392362248239 0.055318497882138044
0.023710705582032796 0.3642066632688997
0.019905271212530815 0.31784536042350636
0.05102142782296344 0.7952617823710273
0.0033283971910340114 0.04914494498402725
0.045236643090885105 0.9806757103639838
0.007474767466087959 0.11788030937766052
0.02917833026759372 0.4650031625980364
0.03535973062713671 0.5532180450540761
0.006493760997938356 0.18481294472398196
0.016136904643176297 0.26609797248123

0.006009979952837789 0.075338570426392
0.004116762257510392 0.06415364073462704
0.007393441182561209 0.11714879330049681
0.018229563117906505 0.2801185882893193
0.0906123272591352 1.3960880239515254
0.03210095930107987 0.5007053919084447
0.015082925828260776 0.3228594707323393
0.008199013340565194 0.12784404966071963
{0: [[0.4232945442199707, 0.9727423191070557, -0.9664937853813171, 1695563599.1121485], [0.423293799161911, 0.9707322120666504, -1.2735671997070312, 1695563599.2083373], [0.4234440326690674, 0.9709832668304443, -1.2995299100875854, 1695563599.2423928], [0.41998833417892456, 0.9666996002197266, -1.2304755449295044, 1695563599.2920973], [0.4168930649757385, 0.9613094329833984, -1.288618564605713, 1695563599.3596535], [0.4125157594680786, 0.9547585844993591, -1.2981884479522705, 1695563599.420416], [0.4076281189918518, 0.9490935206413269, -1.3291929960250854, 1695563599.4566395], [0.4078958034515381, 0.9472488164901733, -1.3072915077209473, 1695563599.51439], [0.4052078425884

In [1]:
#First implement a function that detects a person's posture in the camera in real time and classifies the current posture: manic/depressive phase

Get human body coordinates and output them in real time

In [6]:
from tensorflow.keras.models import load_model
model = load_model('lstm_emotion_detection.h5')
max_frames = 90
num_features = 75

In [7]:
def get_df_lstm(df_origin):
# Group by TYPE, FILE, FRAME and merge XYZ of each group into an array
    df_lstm = df_origin.groupby(['TYPE', 'FILE', 'FRAME'])['X', 'Y', 'Z'].apply(lambda x: x.values.flatten().tolist()).reset_index()
    df_lstm.drop(columns=['TYPE'], inplace=True)
    df_lstm.rename(columns={0: 'POSE'}, inplace=True)
    return df_lstm

In [8]:
import pandas as pd
is_manic = -1
# Use the trained model to classify the characters in the video as belonging to the hyperactive or depressive phase
def predict_crying(model, new_video_pose_data):
    global is_manic
    if len(new_video_pose_data) < 10:
        is_manic = -1
        return # Collected too little to analyse
    # Pre-processing of input data
    df_video_pose_data = pd.DataFrame(new_video_pose_data)
    # Limit list length to no more than max_frames
    df_video_pose_data = df_video_pose_data[df_video_pose_data['FRAME'] <= max_frames]
#     print(df_video_pose_data)
    df_lstm_phase = get_df_lstm(df_video_pose_data)
    poses_phase = df_lstm_phase.loc[:, 'POSE'].apply(lambda x: x).tolist()
    pose_data = np.array(poses_phase)
    padded_poses = np.zeros((1, max_frames, num_features))
    padded_poses[0, :pose_data.shape[0], :pose_data.shape[1]] = pose_data
    
    # Predictions
    prediction = model.predict(padded_poses)
    is_manic = (1 if prediction >= 0.5 else 0)
    print("is in manic phase" if is_manic else "is in depression phase ")
    return

In [9]:
import _thread
cap = cv2.VideoCapture(0)
last_time = time.time()

# Enable posture detection
mpPose = mp.solutions.pose 
pose = mpPose.Pose()
# Initialise drawing tools
mpDraw = mp.solutions.drawing_utils
is_manic = -1
duration = 5  # seconds
start_time = cv2.getTickCount()
landmarks_data = []
frame_num = 0
video_num = 1
now_phase = "unknow phase"
speed_map = {}
pose_map = {}
for i in range(33):
    pose_map[i] = []
while cap.isOpened():
    current_time = cv2.getTickCount()
    elapsed_time = (current_time - start_time) / cv2.getTickFrequency()

    if elapsed_time >= duration:
        # Start parsing the data
        print(video_num, frame_num, time.time())
        try:
            _thread.start_new_thread( predict_crying, (model, landmarks_data, ) )
        except:
            print("Error: Unable to start thread")
        # Initialise data
        start_time = cv2.getTickCount()
        landmarks_data = []
        frame_num = 0
        video_num += 1

    ret, img = cap.read()
    if not ret:
        print("Cannot receive frame")
        break
    
    # Set the frame rate to 30 fps
    now = time.time()
    if now - last_time < 1.0/30.0:
        continue
    last_time = now
    
    img = cv2.resize(img,(520,300))               # Reduced size for faster algorithms
    img2 = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)   # Convert BGR to RGB
    results = pose.process(img2)                  # Get posture detection results
    
    #Remove the bone spots on your legs
    points_to_remove = [25,26,27,28,29,30,31,32]
    points_to_speed = [0] # of dots showing speed
    if results.pose_landmarks:
       
        # If we want to perform a special operation on one of the 33 keypoints, we need to traverse the 33 keypoints first
        frame_num += 1
        for id, lm in enumerate(results.pose_landmarks.landmark):
        #The coordinates of the keypoints printed out are all in percentage form, we need to get the width and height of the video
            if id in points_to_remove:
                continue
            h, w, c = img.shape
            # Convert x times the width of the video and y times the height of the video to coordinate form
            cx, cy = int(lm.x * w), int(lm.y * h)
            # Add emotion type, video name, XYZ coordinate data to DataFrame
            landmarks_data.append({'TYPE': 'unknown', 'FILE': str(video_num), 'FRAME':frame_num, 'ID': id, 'X': lm.x, 'Y': lm.y, 'Z': lm.z})
            # Use cv2's circle function to make keypoints special
            # cv2.circle(img, (cx, cy), 5, (255, 0, 0), cv2.FILLED)

            if len(pose_map[id])>0:
                last_lm = pose_map[id][-1] ## Take the previous landmark of the bone point ##
                dist = np.sqrt((last_lm[0]-lm.x)**2 + (last_lm[1]-lm.y)**2 + (last_lm[2]-lm.z)**2)# Calculate the movement distance of each skeletal point, using the nose as an example
                speed = np.divide(dist,now-last_lm[3])*(10 ** 2)# Calculate the velocity of movement of each skeletal point, using the nose as an example
                speed_map[id] = speed
                # Use the values() method to get all the values in the dictionary and convert them to a list
                values = list(speed_map.values())
                # Calculate the sum of all values using the sum() function
                total = sum(values)
                # Use the len() function to calculate the number of values in the dictionary
                count = len(values)
                # Calculate the average
                average = total / count
                cv2.putText(img, str(np.around(average, decimals=2)), (10, h-50), cv2.FONT_HERSHEY_PLAIN, 2,
                                (255, 0, 0), 3)
                # print(dist, speed)
                last_cx, last_cy = int(last_lm[0] * w), int(last_lm[1] * h)
                if id in points_to_speed:
                    cv2.line(img, (cx, cy), (last_cx, last_cy), (0, 0, 255), 2)
                    cv2.circle(img, (cx, cy), 5, (255, 0, 0), cv2.FILLED)
                    cv2.putText(img, str(np.around(speed, decimals=2)), (cx, cy), cv2.FONT_HERSHEY_PLAIN, 2,(255, 0, 0), 3)
                
            pose_map[id].append([lm.x, lm.y, lm.z, now])
        #Use mpDraw to carve and connect key points of the human body
        mpDraw.draw_landmarks(img, results.pose_landmarks, mpPose.POSE_CONNECTIONS)
    if is_manic == 1 :
        now_phase = "positive/in manic phase" 
    elif is_manic == 0 :
        now_phase ="negative/in depression phase"
    else:
        now_phase = "unknow phase"
    cv2.putText(img, now_phase, (70, 50), cv2.FONT_HERSHEY_PLAIN, 2,
                    (255, 0, 0), 3)
    cv2.imshow('test', img)
    if cv2.waitKey(5) == ord('q'):
        break      # Press q to stop

cap.release()
cv2.destroyAllWindows()

1 105 1695595772.7883499


  df_lstm = df_origin.groupby(['TYPE', 'FILE', 'FRAME'])['X', 'Y', 'Z'].apply(lambda x: x.values.flatten().tolist()).reset_index()


is in manic phase
2 64 1695595777.823747
is in manic phase


  df_lstm = df_origin.groupby(['TYPE', 'FILE', 'FRAME'])['X', 'Y', 'Z'].apply(lambda x: x.values.flatten().tolist()).reset_index()


3 46 1695595782.8526957
is in manic phase


  df_lstm = df_origin.groupby(['TYPE', 'FILE', 'FRAME'])['X', 'Y', 'Z'].apply(lambda x: x.values.flatten().tolist()).reset_index()


4 86 1695595787.8764563
is in manic phase


  df_lstm = df_origin.groupby(['TYPE', 'FILE', 'FRAME'])['X', 'Y', 'Z'].apply(lambda x: x.values.flatten().tolist()).reset_index()


5 80 1695595792.879818
is in depression phase 


  df_lstm = df_origin.groupby(['TYPE', 'FILE', 'FRAME'])['X', 'Y', 'Z'].apply(lambda x: x.values.flatten().tolist()).reset_index()


6 117 1695595797.8925178
is in manic phase


  df_lstm = df_origin.groupby(['TYPE', 'FILE', 'FRAME'])['X', 'Y', 'Z'].apply(lambda x: x.values.flatten().tolist()).reset_index()


7 108 1695595802.900461
is in manic phase


  df_lstm = df_origin.groupby(['TYPE', 'FILE', 'FRAME'])['X', 'Y', 'Z'].apply(lambda x: x.values.flatten().tolist()).reset_index()


8 118 1695595807.9411545
is in depression phase 


  df_lstm = df_origin.groupby(['TYPE', 'FILE', 'FRAME'])['X', 'Y', 'Z'].apply(lambda x: x.values.flatten().tolist()).reset_index()


9 92 1695595812.962707
is in depression phase 


  df_lstm = df_origin.groupby(['TYPE', 'FILE', 'FRAME'])['X', 'Y', 'Z'].apply(lambda x: x.values.flatten().tolist()).reset_index()


10 108 1695595817.9855576
is in manic phase


  df_lstm = df_origin.groupby(['TYPE', 'FILE', 'FRAME'])['X', 'Y', 'Z'].apply(lambda x: x.values.flatten().tolist()).reset_index()


11 101 1695595823.0190735
is in manic phase


  df_lstm = df_origin.groupby(['TYPE', 'FILE', 'FRAME'])['X', 'Y', 'Z'].apply(lambda x: x.values.flatten().tolist()).reset_index()


12 24 1695595828.0444026
is in manic phase


  df_lstm = df_origin.groupby(['TYPE', 'FILE', 'FRAME'])['X', 'Y', 'Z'].apply(lambda x: x.values.flatten().tolist()).reset_index()
