In [11]:
import cv2
import mediapipe as mp
import numpy as np
from collections import deque
from tensorflow.keras.models import load_model

mp_pose = mp.solutions.pose
pose = mp_pose.Pose()
mp_draw = mp.solutions.drawing_utils

model = load_model('action_recognition_model.h5')
model.summary()


I0000 00:00:1737628424.496995   11121 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1737628424.498889   12262 gl_context.cc:369] GL version: 3.2 (OpenGL ES 3.2 Mesa 23.2.1-1ubuntu3.1~22.04.2), renderer: Mesa Intel(R) Xe Graphics (TGL GT2)
W0000 00:00:1737628424.553234   12257 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1737628424.590387   12255 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


FileNotFoundError: [Errno 2] Unable to synchronously open file (unable to open file: name = 'action_recognition_model.h5', errno = 2, error message = 'No such file or directory', flags = 0, o_flags = 0)

In [None]:

actions = ['Walking', 'Running', 'Jumping', 'Standing', 'Sitting'] 
sequence = deque(maxlen=30) 

def extract_keypoints(results):
    if results.pose_landmarks:
        keypoints = np.array([[landmark.x, landmark.y, landmark.z] for landmark in results.pose_landmarks.landmark]).flatten()
    else:
        keypoints = np.zeros(99) 
    return keypoints

cap = cv2.VideoCapture(0)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Convert Frame to RGB for MediaPipe
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    result = pose.process(rgb_frame)

    # Extract Keypoints and Perform Action Recognition
    if result.pose_landmarks:
        # Extract keypoints and append to sequence
        keypoints = extract_keypoints(result)
        sequence.append(keypoints)

        # Predict action if sequence is full
        if len(sequence) == 30:
            input_data = np.expand_dims(np.array(sequence), axis=0)  # Add batch dimension
            predictions = model.predict(input_data)
            action = actions[np.argmax(predictions)]

            # Display Action on Screen
            cv2.putText(frame, f'Action: {action}', (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    # Draw Pose Landmarks on the Frame
    mp_draw.draw_landmarks(frame, result.pose_landmarks, mp_pose.POSE_CONNECTIONS)

    # Display the Frame
    cv2.imshow('Real-time Action Recognition', frame)

    # Break Loop on 'q' Key Press
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release Resources
cap.release()
cv2.destroyAllWindows()
