In [1]:
import cv2
import mediapipe as mp
import numpy as np
mp_drawing = mp.solutions.drawing_utils
mp_pose = mp.solutions.pose

# 0. Test simple video feed

In [2]:
# VIDEO FEED
cap = cv2.VideoCapture(0)
while cap.isOpened():
    ret, frame = cap.read()
    cv2.imshow('Mediapipe Feed', frame)
    if cv2.waitKey(5) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

# 1. Make detections

In [None]:
# cap = cv2.VideoCapture(0)
cap = cv2.VideoCapture('data/50_ways_to_fall.mp4')

with mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose:
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break # use break if capturing a video file, continue if webcam

        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # mediapipe requires RGB
        image.flags.writeable = False # helps manage memory

        # Make detections
        results = pose.process(image)
        
        image.flags.writeable = True
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

        # Render detections
        mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)

        cv2.imshow('Mediapipe Feed', image)
        
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()

# 2. Determining joints

<img src='https://google.github.io/mediapipe/images/mobile/pose_tracking_full_body_landmarks.png' style='height:300px'>

In [16]:
# cap = cv2.VideoCapture(0)
cap = cv2.VideoCapture('data/50_ways_to_fall.mp4')

with mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose:
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break # use break if capturing a video file, continue if webcam

        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # mediapipe requires RGB
        image.flags.writeable = False # helps manage memory

        # Make detections
        results = pose.process(image)
        
        # Extract landmarks
        try:
            landmarks = results.pose_landmarks.landmark
            print(landmarks)
        except:
            pass

        image.flags.writeable = True
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

        # Render detections
        mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)

        cv2.imshow('Mediapipe Feed', image)
        

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()

[x: 0.5067785382270813
y: 0.15123003721237183
z: -0.21540065109729767
visibility: 0.9999669790267944
, x: 0.5124020576477051
y: 0.13691723346710205
z: -0.19900240004062653
visibility: 0.9998769760131836
, x: 0.5159053206443787
y: 0.1368686556816101
z: -0.1990187019109726
visibility: 0.9998745918273926
, x: 0.5182655453681946
y: 0.13732609152793884
z: -0.19899477064609528
visibility: 0.9998562335968018
, x: 0.5021677613258362
y: 0.13734155893325806
z: -0.19884061813354492
visibility: 0.9998730421066284
, x: 0.49892228841781616
y: 0.1379486322402954
z: -0.198841392993927
visibility: 0.9998714923858643
, x: 0.4956898093223572
y: 0.1388031542301178
z: -0.1988343447446823
visibility: 0.9998657703399658
, x: 0.5233373641967773
y: 0.14517876505851746
z: -0.10259611904621124
visibility: 0.9996359348297119
, x: 0.4920266568660736
y: 0.14670434594154358
z: -0.10378997027873993
visibility: 0.999683141708374
, x: 0.5136308073997498
y: 0.16798898577690125
z: -0.17966075241565704
visibility: 0.99992

# 3. Utilize landmarks

## Calculate perceived width and height

In [28]:
def calculate_height_width(landmarks):
    '''
    Calculate perceived width and height by finding the range between extremes of x amd y coordinates respectively

    Input:
    landmarks - pose_landmarks field that contains the pose landmarks (returned from mp.solutions.pose.Pose.process)

    Returns:
    perceived height, perceived width
    '''
    joints_x, joints_y = [landmarks[i].x for i in range(0,33)], [landmarks[i].y for i in range(0,33)]
    min_x, min_y = min(joints_x), min(joints_y)
    max_x, max_y = max(joints_x), max(joints_y)
    perceived_width = max_x - min_x
    perceived_height = max_y - min_y
    return perceived_height, perceived_width
def annotate_height_width(image, height, width):
    cv2.putText(
        image, f'Height = {height}',
        tuple(np.multiply(0.02, image.shape[:2]).astype(int)), 
        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,0)
        )
    cv2.putText(
        image, f'Width = {width}',
        tuple(np.multiply(0.04, image.shape[:2]).astype(int)), 
        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,0)
        )

calculate_height_width(landmarks)

(0.6652794778347015, 0.08808645606040955)

In [30]:
# cap = cv2.VideoCapture(0)
cap = cv2.VideoCapture('data/50_ways_to_fall.mp4')

with mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose:
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break # use break if capturing a video file, continue if webcam

        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # mediapipe requires RGB
        image.flags.writeable = False # helps manage memory

        # Make detections
        results = pose.process(image)
        
        # Extract landmarks
        try:
            landmarks = results.pose_landmarks.landmark
            height, width = calculate_height_width(landmarks)
            annotate_height_width(image, height, width)
        except:
            pass

        image.flags.writeable = True
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

        # Render detections
        mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)

        cv2.imshow('Mediapipe Feed', image)
        

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()

## Draw an enclosing rectangle on the body

In [41]:
def get_enclosing_rectangle(landmarks):
    '''
    Get coordinates for a rectangle enclosing the body derived from extreme points of the body.
    - nose
    - left and right: shoulders, wrists, heels
    Input:
    landmarks - pose_landmarks field that contains the pose landmarks (returned from mp.solutions.pose.Pose.process)

    Returns:
    rectangle - ((left_corner_x, left_corner_y), (right_corner_x, right_corner_y), height, width) 
                x, y are from 0 - 1
    '''   
    extreme_joints_ix = [0,11,12,15,16,29,30]
    x = [landmarks[ix].x for ix in extreme_joints_ix]
    y = [landmarks[ix].y for ix in extreme_joints_ix]
    return (
        (min(x),max(y)),
        (max(x),min(y)),
        max(y)-min(y),
        max(x)-min(x))
def annotate_rectangle(image, rectangle):
    image_height, image_width = image.shape[0], image.shape[1]
    rectangle_lower_left_x = int(rectangle[0][0] * image_width)
    rectangle_lower_left_y = int(rectangle[0][1] * image_height)
    rectangle_upper_right_x = int(rectangle[1][0] * image_width)
    rectangle_upper_right_y = int(rectangle[1][1] * image_height)
    cv2.rectangle(
        image,
        (rectangle_lower_left_x, rectangle_lower_left_y),
        (rectangle_upper_right_x, rectangle_upper_right_y),
        (54, 89, 255), 5
        )
        
    rectangle_height = rectangle[2]
    rectangle_width = rectangle[3]
    cv2.putText(
        image, f'Rectangle Height = {rectangle_height}',
        (int(0.02*image_width), int(0.04*image_height)), 
        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,0)
        )
    cv2.putText(
        image, f'Rectangle Width = {rectangle_width}',
        (int(0.02*image_width), int(0.07*image_height)), 
        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,0)
        )   
get_enclosing_rectangle(landmarks)

((0.3728671371936798, 0.8116862177848816),
 (0.6407171487808228, 0.1946987509727478),
 0.6169874668121338,
 0.26785001158714294)

In [45]:
# cap = cv2.VideoCapture(0)
cap = cv2.VideoCapture('data/50_ways_to_fall.mp4')

with mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose:
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break # use break if capturing a video file, continue if webcam

        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # mediapipe requires RGB
        image.flags.writeable = False # helps manage memory

        # Make detections
        results = pose.process(image)
        
        # Extract landmarks
        try:
            landmarks = results.pose_landmarks.landmark
            rectangle = get_enclosing_rectangle(landmarks)
            annotate_rectangle(image, rectangle)
        except:
            pass

        image.flags.writeable = True
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

        # Render detections
        # mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)

        cv2.imshow('Mediapipe Feed', image)
        

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()

# 4. Count falls

## Calculate perceived width and height

In [50]:
def annotate_fall_count(image, height, width, fall_count):
    annotate_height_width(image, height, width)
    cv2.putText(
        image, f'Fall count = {fall_count}',
        tuple(np.multiply(0.06, image.shape[:2]).astype(int)), 
        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,0,0)
        )
def detect_fall(height_0, width_0, height_1, width_1):
    '''
    Detect a fall when 
    - width_1 > height_1 (current frame)
    and width_0 <= height_0 (previous frame)

    Params:
    height_0, width_0 - height and width from the previous frame
    height_1, width_1 - height and width from the current frame
    '''
    return width_1 > height_1 and width_0 <= height_0


In [51]:
# cap = cv2.VideoCapture(0)
cap = cv2.VideoCapture('data/50_ways_to_fall.mp4')

with mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose:
    # init fall_count and height_0, width_0
    fall_count = 0
    height_0 = None
    width_0 = None
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break # use break if capturing a video file, continue if webcam

        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # mediapipe requires RGB
        image.flags.writeable = False # helps manage memory

        # Make detections
        results = pose.process(image)
        
        # Extract landmarks
        try:
            landmarks = results.pose_landmarks.landmark
            # current frame
            height_1, width_1 = calculate_height_width(landmarks)
            # detect_fall
            if height_0 and width_0:
                if detect_fall(height_0, width_0, height_1, width_1):
                    fall_count += 1
            height_0, width_0 = height_1, width_1
            annotate_fall_count(image, height_1, width_1, fall_count)
        except:
            pass

        image.flags.writeable = True
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

        # Render detections
        mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)

        cv2.imshow('Mediapipe Feed', image)
        

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()

## Enclosing rectangle

In [52]:
def annotate_rectangle_fall_count(image, rectangle, fall_count):
    annotate_rectangle(image, rectangle)
    image_height, image_width = image.shape[0], image.shape[1]
    cv2.putText(
        image, f'Fall count = {fall_count}',
        (int(0.02*image_width), int(0.1*image_height)), 
        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,255)
        )
def detect_rectangle_fall(rectangle_0, rectangle_1):
    '''
    Detect a fall when 
    - width_1 > height_1 (current frame)
    and width_0 <= height_0 (previous frame)

    Params:
    rectangle_0, rectangle_1 - return val of get_enclosing_rectangle from consecutive frames
    '''
    # return width_1 > height_1 and width_0 <= height_0
    return rectangle_1[3] > rectangle_1[2] and rectangle_0[3] <= rectangle_0[2]
    


In [53]:
# cap = cv2.VideoCapture(0)
cap = cv2.VideoCapture('data/50_ways_to_fall.mp4')

with mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose:
    # init fall_count and height_0, width_0
    fall_count = 0
    rectangle_0 = None
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break # use break if capturing a video file, continue if webcam

        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # mediapipe requires RGB
        image.flags.writeable = False # helps manage memory

        # Make detections
        results = pose.process(image)
        
        # Extract landmarks
        try:
            landmarks = results.pose_landmarks.landmark
            # current frame
            rectangle_1 = get_enclosing_rectangle(landmarks)
            # detect_fall
            if rectangle_0:
                if detect_rectangle_fall(rectangle_0, rectangle_1):
                    fall_count += 1
            rectangle_0 = rectangle_1
            annotate_rectangle_fall_count(image, rectangle_1, fall_count)
        except:
            pass

        image.flags.writeable = True
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

        # Render detections
        mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)

        cv2.imshow('Mediapipe Feed', image)
        

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()