In [2]:
import cv2
import mediapipe as mp

# 获取MediaPipe的姿势估计模块
mp_pose = mp.solutions.pose

# 获取摄像头输入
cap = cv2.VideoCapture(0)
def predict_action(pose_features):
    # 使用机器学习模型识别动作
    # 可以根据你的数据集和模型来改变这个函数实现更准确的动作识别
    if pose_features[2] > pose_features[12] and pose_features[14] < pose_features[24]:
        return 'Squat'
    elif pose_features[22] < pose_features[12]:
        return 'Raise arms'
    else:
        return 'Idle'

# 循环获取摄像头帧
while cap.isOpened():
    # 读取摄像头帧并进行姿势估计
    ret, frame = cap.read()
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    
    with mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose:
        results = pose.process(frame)
        
        # 如果有检测到姿势
        if results.pose_landmarks:
            # 提取姿势特征
            pose_features = []
            for landmark in results.pose_landmarks.landmark:
                pose_features.append(landmark.x)
                pose_features.append(landmark.y)
                pose_features.append(landmark.z)
                
            # 使用机器学习模型识别动作
            action = predict_action(pose_features)
            
            # 在帧上显示识别结果
            cv2.putText(frame, action, (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
            
        # 显示帧
        cv2.imshow('MediaPipe Pose Estimation', frame)

        # 如果按下ESC键，则退出循环
        if cv2.waitKey(10) & 0xFF == 27:
            break

# 释放资源
cap.release()
cv2.destroyAllWindows()

def predict_action(pose_features):
    # 通过机器学习模型识别动作
    return 'Jump'  # 假设这里返回跳跃动作的结果


In [None]:
import cv2
import mediapipe as mp
import numpy as np
import tensorflow as tf
from playsound import playsound

# 获取MediaPipe的姿势估计模块
mp_pose = mp.solutions.pose

# 获取摄像头输入
cap = cv2.VideoCapture(0)

# 加载训练好的手势识别模型
model = tf.keras.models.load_model('handgesture.h5')

# 循环获取摄像头帧
while cap.isOpened():
    # 读取摄像头帧并进行姿势估计
    ret, frame = cap.read()
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    
    with mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose:
        results = pose.process(frame)
        
        # 如果有检测到姿势
        if results.pose_landmarks:
            # 提取姿势特征
            pose_features = []
            for landmark in results.pose_landmarks.landmark:
                pose_features.append(landmark.x)
                pose_features.append(landmark.y)
                pose_features.append(landmark.z)
            
            # 将骨架坐标转化为图像坐标
            pose_landmarks = np.array([[lmk.x, lmk.y] for lmk in results.pose_landmarks.landmark])
            pose_landmarks = (pose_landmarks * np.array([frame.shape[1], frame.shape[0]])).astype(np.int32)
            
            # 裁剪出手势区域，并进行手势识别
            width = max(pose_landmarks[:, 0]) - min(pose_landmarks[:, 0])
            height = max(pose_landmarks[:, 1]) - min(pose_landmarks[:, 1])
            x1, y1 = max(0, min(pose_landmarks[:, 0]) - int(0.2 * width)), max(0, min(pose_landmarks[:, 1]) - int(0.2 * height))
            x2, y2 = min(frame.shape[1], max(pose_landmarks[:, 0]) + int(0.2 * width)), min(frame.shape[0], max(pose_landmarks[:, 1]) + int(0.2 * height))
            gesture = cv2.resize(cv2.cvtColor(frame[y1:y2, x1:x2], cv2.COLOR_RGB2GRAY), (64, 64))
            gesture = np.expand_dims(gesture, axis=2)
            gesture = np.expand_dims(gesture, axis=0)
            prediction = model.predict(gesture)
            action = 'Unknown'
            if prediction[0][0] > 0.5:
                action = 'Victory'
                # 播放提示音
                playsound('victory.mp3')
            
            # 在帧上显示识别结果
            cv2.putText(frame, action, (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
            
            # 在帧上绘制骨架
            mp_pose.draw_landmarks(frame, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)
            
        # 显示帧
        cv2.imshow('MediaPipe Pose Estimation', frame)

        # 如果按下ESC键，则退出循环
        if cv2.waitKey(10) & 0xFF == 27:
            break

# 释放资源
cap.release()
cv2.destroyAllWindows()