In [None]:
import cv2
import mediapipe as mp
import numpy as np
mp_drawing = mp.solutions.drawing_utils
mp_pose = mp.solutions.pose

# 0. Test simple video feed

In [None]:
# VIDEO FEED
cap = cv2.VideoCapture(0)
while cap.isOpened():
    ret, frame = cap.read()
    cv2.imshow('Mediapipe Feed', frame)
    if cv2.waitKey(5) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

# 1. Make detections

In [None]:
# cap = cv2.VideoCapture(0)
cap = cv2.VideoCapture('data/50_ways_to_fall.mp4')

with mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose:
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break # use break if capturing a video file, continue if webcam

        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # mediapipe requires RGB
        image.flags.writeable = False # helps manage memory

        # Make detections
        results = pose.process(image)
        
        image.flags.writeable = True
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

        # Render detections
        mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)

        cv2.imshow('Mediapipe Feed', image)
        
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()

# 2. Determining joints

<img src='https://google.github.io/mediapipe/images/mobile/pose_tracking_full_body_landmarks.png' style='height:300px'>

In [None]:
# cap = cv2.VideoCapture(0)
cap = cv2.VideoCapture('data/50_ways_to_fall.mp4')

with mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose:
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break # use break if capturing a video file, continue if webcam

        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # mediapipe requires RGB
        image.flags.writeable = False # helps manage memory

        # Make detections
        results = pose.process(image)
        
        # Extract landmarks
        try:
            landmarks = results.pose_landmarks.landmark
            print(landmarks)
        except:
            pass

        image.flags.writeable = True
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

        # Render detections
        mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)

        cv2.imshow('Mediapipe Feed', image)
        

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()

In [32]:
help(mp_pose)

Help on module mediapipe.python.solutions.pose in mediapipe.python.solutions:

NAME
    mediapipe.python.solutions.pose - MediaPipe Pose.

CLASSES
    enum.IntEnum(builtins.int, enum.Enum)
        PoseLandmark
    mediapipe.python.solution_base.SolutionBase(builtins.object)
        Pose
    
    class Pose(mediapipe.python.solution_base.SolutionBase)
     |  Pose(static_image_mode=False, model_complexity=1, smooth_landmarks=True, enable_segmentation=False, smooth_segmentation=True, min_detection_confidence=0.5, min_tracking_confidence=0.5)
     |  
     |  MediaPipe Pose.
     |  
     |  MediaPipe Pose processes an RGB image and returns pose landmarks on the most
     |  prominent person detected.
     |  
     |  Please refer to https://solutions.mediapipe.dev/pose#python-solution-api for
     |  usage examples.
     |  
     |  Method resolution order:
     |      Pose
     |      mediapipe.python.solution_base.SolutionBase
     |      builtins.object
     |  
     |  Methods define

In [48]:
list(mp_pose.PoseLandmark)

<enum 'PoseLandmark'>

In [28]:
landmarks[mp_pose.PoseLandmark.NOSE.value]

x: 0.30772656202316284
y: 0.710430920124054
z: -0.004651469178497791
visibility: 0.9999597668647766

# 3. Calculate perceived width and height

In [85]:
def calculate_height_width(landmarks):
    '''
    Calculate perceived width and height by finding the range between extremes of x amd y coordinates respectively

    Params:
    landmarks -  pose_landmarks field that contains the pose landmarks (returned from mp.solutions.pose.Pose.process)
    '''
    joints_x, joints_y = [landmarks[i].x for i in range(0,33)], [landmarks[i].y for i in range(0,33)]
    min_x, min_y = min(joints_x), min(joints_y)
    max_x, max_y = max(joints_x), max(joints_y)
    perceived_width = max_x - min_x
    perceived_height = max_y - min_y
    return perceived_height, perceived_width
def annotate_top_left(frame, height, width):
    cv2.putText(
        image, f'Height = {height}',
        tuple(np.multiply(0.02, frame.shape[:2]).astype(int)), 
        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,0)
        )
    cv2.putText(
        image, f'Width = {width}',
        tuple(np.multiply(0.04, frame.shape[:2]).astype(int)), 
        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,0)
        )

calculate_height_width(landmarks)

(0.24396061897277832, 0.2551570534706116)

In [86]:
# cap = cv2.VideoCapture(0)
cap = cv2.VideoCapture('data/50_ways_to_fall.mp4')

with mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose:
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break # use break if capturing a video file, continue if webcam

        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # mediapipe requires RGB
        image.flags.writeable = False # helps manage memory

        # Make detections
        results = pose.process(image)
        
        # Extract landmarks
        try:
            landmarks = results.pose_landmarks.landmark
            height, width = calculate_height_width(landmarks)
            annotate_top_left(frame, height, width)
        except:
            pass

        image.flags.writeable = True
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

        # Render detections
        mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)

        cv2.imshow('Mediapipe Feed', image)
        

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()

# 4. Count falls
- when 

In [94]:
def calculate_height_width(landmarks):
    '''
    Calculate perceived width and height by finding the range between extremes of x amd y coordinates respectively

    Params:
    landmarks -  pose_landmarks field that contains the pose landmarks (returned from mp.solutions.pose.Pose.process)
    '''
    joints_x, joints_y = [landmarks[i].x for i in range(0,33)], [landmarks[i].y for i in range(0,33)]
    min_x, min_y = min(joints_x), min(joints_y)
    max_x, max_y = max(joints_x), max(joints_y)
    perceived_width = max_x - min_x
    perceived_height = max_y - min_y
    return perceived_height, perceived_width
def annotate_top_left(frame, height, width, fall_count):
    cv2.putText(
        image, f'Height = {height}',
        tuple(np.multiply(0.02, frame.shape[:2]).astype(int)), 
        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,0)
        )
    cv2.putText(
        image, f'Width = {width}',
        tuple(np.multiply(0.04, frame.shape[:2]).astype(int)), 
        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,0)
        )
    cv2.putText(
        image, f'Fall count = {fall_count}',
        tuple(np.multiply(0.06, frame.shape[:2]).astype(int)), 
        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,0,0)
        )
def detect_fall(height_0, width_0, height_1, width_1):
    '''
    Detect a fall when 
    - width_1 > height_1 (current frame)
    and width_0 <= height_0 (previous frame)

    Params:
    height_0, width_0 - height and width from the previous frame
    height_1, width_1 - height and width from the current frame
    '''
    return width_1 > height_1 and width_0 <= height_0


In [96]:
# cap = cv2.VideoCapture(0)
cap = cv2.VideoCapture('data/50_ways_to_fall.mp4')

with mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose:
    # init fall_count and height_0, width_0
    fall_count = 0
    height_0 = None
    width_0 = None
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break # use break if capturing a video file, continue if webcam

        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # mediapipe requires RGB
        image.flags.writeable = False # helps manage memory

        # Make detections
        results = pose.process(image)
        
        # Extract landmarks
        try:
            landmarks = results.pose_landmarks.landmark
            # current frame
            height_1, width_1 = calculate_height_width(landmarks)
            # detect_fall
            if height_0 and width_0:
                if detect_fall(height_0, width_0, height_1, width_1):
                    fall_count += 1
            height_0, width_0 = height_1, width_1
            annotate_top_left(frame, height_1, width_1, fall_count)
        except:
            pass

        image.flags.writeable = True
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

        # Render detections
        mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)

        cv2.imshow('Mediapipe Feed', image)
        

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()

- able to detect simple singular falls well
- overcount rapid movements
- ...... note some more with ting ...