In [13]:
# 1. System Setup for Fall Detection Application

# Import necessary standard libraries
import os
import time
import math
import argparse
import numpy as np
import cv2
from PIL import Image

# Import PyTorch libraries
import torch
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

# Set up GPU if available
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Define constants and configuration
CONFIG = {
    'model_path': 'models/yolov7-w6-pose.pt',  # Path to the pre-trained YOLOv7-w6-pose model
    'input_size': (960, 960),           # Input size for the model (width, height)
    'confidence_threshold': 0.25,       # Confidence threshold for detection
    'iou_threshold': 0.45,              # IoU threshold for NMS
    'device': device,                   # Device to run inference on
}

# Check if the model file exists
if not os.path.exists(CONFIG['model_path']):
    print(f"Model file not found at {CONFIG['model_path']}. Please download it first.")
    print("You can download it from: https://github.com/WongKinYiu/yolov7/releases")
else:
    print(f"Model file found at {CONFIG['model_path']}")

# Function to check camera availability
def check_camera(camera_id=0):
    cap = cv2.VideoCapture(camera_id)
    if not cap.isOpened():
        print(f"Error: Camera {camera_id} is not available")
        return False
    else:
        print(f"Camera {camera_id} is available")
        cap.release()
        return True

# Check camera
camera_available = check_camera()

print("System setup completed!")

Using device: cuda:0
Model file found at models/yolov7-w6-pose.pt
Camera 0 is available
System setup completed!


In [14]:
# 2. Video Processing Pipeline

def preprocess_frame(frame, input_size=(960, 960)):
    """
    Preprocess a frame for input to YOLOv7-W6-Pose.
    
    Args:
        frame (numpy.ndarray): Input frame from video/camera
        input_size (tuple): Target size for model input (width, height)
        
    Returns:
        torch.Tensor: Preprocessed frame tensor ready for model input
    """
    # Convert BGR to RGB (OpenCV uses BGR by default)
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    
    # Create a PIL image from the numpy array
    image = Image.fromarray(rgb_frame)
    
    # Original dimensions
    orig_width, orig_height = image.size
    
    # Calculate the letterbox dimensions to maintain aspect ratio
    ratio = min(input_size[0] / orig_width, input_size[1] / orig_height)
    new_width = int(orig_width * ratio)
    new_height = int(orig_height * ratio)
    
    # Resize the image
    resized_image = image.resize((new_width, new_height), Image.LANCZOS)
    
    # Create a new image with the target size and paste the resized image
    letterboxed_image = Image.new("RGB", input_size, (114, 114, 114))
    letterboxed_image.paste(resized_image, ((input_size[0] - new_width) // 2, 
                                           (input_size[1] - new_height) // 2))
    
    # Convert to tensor and normalize
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                             std=[0.229, 0.224, 0.225])
    ])
    
    # Apply transforms
    tensor = transform(letterboxed_image)
    
    # Add batch dimension
    tensor = tensor.unsqueeze(0)
    
    return tensor, ratio, (input_size[0] - new_width) // 2, (input_size[1] - new_height) // 2


def create_video_capture(source=0):
    """
    Create a video capture object for the specified source.
    
    Args:
        source: Camera index or video file path
        
    Returns:
        cv2.VideoCapture: VideoCapture object
    """
    # Create video capture object
    cap = cv2.VideoCapture(video_path, cv2.CAP_FFMPEG)
    
    # Check if camera/video opened successfully
    if not cap.isOpened():
        print(f"Error: Could not open video source {source}")
        return None
    
    # Get frame dimensions
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    
    print(f"Video source opened: {frame_width}x{frame_height} at {fps} FPS")
    
    return cap


def process_video_source(source=0, process_frame_func=None, display=True, output_file=None):
    """
    Process frames from a video source (camera or file).
    
    Args:
        source: Camera index or video file path
        process_frame_func: Function to process each frame
        display: Whether to display the output frame
        output_file: Path to save output video (if None, no saving)
        
    Returns:
        None
    """
    # Create video capture
    cap = create_video_capture(source)
    if cap is None:
        return
    
    # Get video properties
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    
    # Create video writer if output file is specified
    out = None
    if output_file is not None:
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        out = cv2.VideoWriter(output_file, fourcc, fps, (frame_width, frame_height))
    
    # Process video frames
    frame_count = 0
    start_time = time.time()
    
    while True:
        # Read frame
        ret, frame = cap.read()
        
        # Break if end of video or error
        if not ret:
            break
        
        frame_count += 1
        
        # Process the frame if a processing function is provided
        if process_frame_func is not None:
            processed_frame, fall_detected = process_frame_func(frame)
        else:
            processed_frame = frame
            fall_detected = False
        
        # Write frame to output video if specified
        if out is not None:
            out.write(processed_frame)
        
        # Display the frame
        if display:
            # Add fall detection status to the frame
            if fall_detected:
                cv2.putText(processed_frame, "FALL DETECTED", (50, 50), 
                           cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
            
            cv2.imshow('Video Processing', processed_frame)
            
            # Break if 'q' is pressed
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
    
    # Calculate and print processing stats
    elapsed_time = time.time() - start_time
    if frame_count > 0 and elapsed_time > 0:
        fps_processing = frame_count / elapsed_time
        print(f"Processed {frame_count} frames in {elapsed_time:.2f} seconds ({fps_processing:.2f} FPS)")
    
    # Release resources
    cap.release()
    if out is not None:
        out.release()
    cv2.destroyAllWindows()

# Test the video processing pipeline
# if __name__ == "__main__":
#     # This will just display the camera feed without any processing
#     process_video_source(0, None, True, None)

In [15]:
# 3. Pose Estimation using YOLOv7-W6-Pose

# Load the YOLOv7-W6-Pose model
def load_model(model_path, device):
    """
    Load the YOLOv7-W6-Pose model from file.
    
    Args:
        model_path (str): Path to the model weights file
        device (torch.device): Device to load the model on
        
    Returns:
        model: Loaded YOLOv7 model
    """
    # Load model
    model = torch.load(model_path, map_location=device)['model']
    # Extract the model
    if isinstance(model, dict):
        model = model.float().to(device)  # FP32 model
    else:
        model.float().to(device)
    model.eval()  # Set to evaluation mode
    
    if device.type != 'cpu':
        model = model.half()  # Convert to FP16 if on GPU
    
    print(f"Model {model_path} loaded to {device}")
    return model

# Function for non-maximum suppression to filter pose detections
def non_max_suppression_pose(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, max_det=300):
    """
    Performs Non-Maximum Suppression (NMS) on the outputs of YOLOv7-W6-Pose.
    
    Args:
        prediction (torch.Tensor): Model predictions
        conf_thres (float): Confidence threshold
        iou_thres (float): IoU threshold
        classes (list): Filter by class
        max_det (int): Maximum number of detections
        
    Returns:
        list: List of detections with pose keypoints
    """
    # Handle empty prediction case
    if prediction.numel() == 0:
        return [torch.zeros((0, 57), device=prediction.device)] * prediction.shape[0]
    
    # Print prediction shape for debugging
    print(f"Prediction shape: {prediction.shape}")
    
    # Try to determine the number of classes and keypoints
    try:
        # For YOLOv7-W6-Pose standard format
        nc = prediction.shape[2] - 57  # Number of classes
        nkpt = 17  # Number of keypoints (COCO keypoints)
    except IndexError:
        # If prediction shape is not as expected, use default values
        print("Warning: Unable to determine classes from prediction shape. Using defaults.")
        nc = 1  # Default: single class (person)
        nkpt = 17  # Default: 17 keypoints (COCO format)

# Helper function to convert [cx, cy, w, h] to [x1, y1, x2, y2]
def xywh2xyxy(x):
    """
    Convert bounding box coordinates from [cx, cy, w, h] to [x1, y1, x2, y2].
    
    Args:
        x (torch.Tensor): Bounding box coordinates in [cx, cy, w, h] format
        
    Returns:
        torch.Tensor: Bounding box coordinates in [x1, y1, x2, y2] format
    """
    y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
    y[:, 0] = x[:, 0] - x[:, 2] / 2  # x1 = cx - w/2
    y[:, 1] = x[:, 1] - x[:, 3] / 2  # y1 = cy - h/2
    y[:, 2] = x[:, 0] + x[:, 2] / 2  # x2 = cx + w/2
    y[:, 3] = x[:, 1] + x[:, 3] / 2  # y2 = cy + h/2
    return y

# Function to perform pose estimation on a frame
def detect_pose(model, img, device, conf_thres=0.25, iou_thres=0.45):
    """
    Perform pose estimation on an image.
    
    Args:
        model: YOLOv7-W6-Pose model
        img (torch.Tensor): Preprocessed image tensor
        device (torch.device): Device to run inference on
        conf_thres (float): Confidence threshold
        iou_thres (float): IoU threshold
        
    Returns:
        tuple: (Detections with pose keypoints, inference time)
    """
    # Transfer to device
    img = img.to(device)
    
    # Half precision
    half = device.type != 'cpu'
    if half:
        img = img.half()
    
    # Get model output
    try:
        with torch.no_grad():  # Inference
            start_time = time.time()
            # The model might return a tuple or a single tensor
            output = model(img)
            end_time = time.time()
        
        # Handle different output types
        if isinstance(output, tuple):
            # If output is a tuple, use the first element
            # This assumes the first element contains the detections
            output = output[0]
        
        # Apply NMS
        output = non_max_suppression_pose(output, conf_thres, iou_thres)
        
    except Exception as e:
        print(f"Error during detection: {e}")
        output = None
        end_time = time.time()
    
    inference_time = end_time - start_time
    
    return output, inference_time

# Extract pose keypoints from detections
def extract_keypoints(detections, orig_shape, ratio, pad_x, pad_y):
    """
    Extract and format pose keypoints from detections.
    
    Args:
        detections (list): Detections from model after NMS
        orig_shape (tuple): Original image shape (height, width)
        ratio (float): Ratio from preprocessing
        pad_x (int): Padding in x direction
        pad_y (int): Padding in y direction
        
    Returns:
        list: List of dictionaries containing keypoints and bounding boxes
    """
    people = []
    
    # Check if detections is None or empty
    if detections is None or len(detections) == 0 or len(detections[0]) == 0:
        return people
    
    # Process each detection
    for detection in detections[0]:
        # Get confidence score
        conf = detection[4].cpu().numpy()
        
        # Skip if confidence is below threshold
        if conf < 0.25:
            continue
        
        # Extract bounding box
        box = detection[:4].cpu().numpy()
        
        # Adjust box coordinates for original image
        box[0] = (box[0] - pad_x) / ratio
        box[1] = (box[1] - pad_y) / ratio
        box[2] = (box[2] - pad_x) / ratio
        box[3] = (box[3] - pad_y) / ratio
        
        # Extract keypoints
        keypoints = []
        num_keypoints = 17
        for i in range(num_keypoints):
            # Extract x, y, confidence for each keypoint
            kp_x = detection[6 + i * 3].cpu().numpy()
            kp_y = detection[6 + i * 3 + 1].cpu().numpy()
            kp_conf = detection[6 + i * 3 + 2].cpu().numpy()
            
            # Adjust coordinates for original image
            kp_x = (kp_x - pad_x) / ratio
            kp_y = (kp_y - pad_y) / ratio
            
            keypoints.append((kp_x, kp_y, kp_conf))
        
        # Store person data
        person = {
            'bbox': box,
            'keypoints': keypoints,
            'confidence': conf
        }
        
        people.append(person)
    
    return people

# Define the keypoint indices according to COCO format
KEYPOINT_DICT = {
    'nose': 0,
    'left_eye': 1,
    'right_eye': 2,
    'left_ear': 3,
    'right_ear': 4,
    'left_shoulder': 5,
    'right_shoulder': 6,
    'left_elbow': 7,
    'right_elbow': 8,
    'left_wrist': 9,
    'right_wrist': 10,
    'left_hip': 11,
    'right_hip': 12,
    'left_knee': 13,
    'right_knee': 14,
    'left_ankle': 15,
    'right_ankle': 16
}

# Function to draw pose skeleton on image
def draw_pose(img, people):
    """
    Draw pose skeleton and bounding boxes on the image.
    
    Args:
        img (numpy.ndarray): Original image
        people (list): List of dictionaries containing keypoints and bounding boxes
        
    Returns:
        numpy.ndarray: Image with pose skeleton drawn
    """
    # Create a copy of the image
    img_with_pose = img.copy()
    
    # Define connections between keypoints
    skeleton = [
        ('nose', 'left_eye'), ('nose', 'right_eye'), ('left_eye', 'left_ear'),
        ('right_eye', 'right_ear'), ('left_shoulder', 'right_shoulder'),
        ('left_shoulder', 'left_hip'), ('right_shoulder', 'right_hip'),
        ('left_hip', 'right_hip'), ('left_shoulder', 'left_elbow'),
        ('left_elbow', 'left_wrist'), ('right_shoulder', 'right_elbow'),
        ('right_elbow', 'right_wrist'), ('left_hip', 'left_knee'),
        ('left_knee', 'left_ankle'), ('right_hip', 'right_knee'),
        ('right_knee', 'right_ankle')
    ]
    
    # Colors for visualization
    colors = {
        'bbox': (0, 255, 0),  # Green
        'keypoints': (0, 0, 255),  # Red
        'skeleton': (255, 0, 0)  # Blue
    }
    
    # Draw each person
    for person in people:
        # Draw bounding box
        box = person['bbox'].astype(int)
        cv2.rectangle(img_with_pose, (box[0], box[1]), (box[2], box[3]), colors['bbox'], 2)
        
        keypoints = person['keypoints']
        
        # Draw keypoints
        for i, (x, y, conf) in enumerate(keypoints):
            if conf > 0.25:  # Only draw keypoints with sufficient confidence
                cv2.circle(img_with_pose, (int(x), int(y)), 5, colors['keypoints'], -1)
        
        # Draw skeleton
        for kp1_name, kp2_name in skeleton:
            kp1_idx = KEYPOINT_DICT[kp1_name]
            kp2_idx = KEYPOINT_DICT[kp2_name]
            
            x1, y1, conf1 = keypoints[kp1_idx]
            x2, y2, conf2 = keypoints[kp2_idx]
            
            if conf1 > 0.25 and conf2 > 0.25:  # Only draw connections with confident keypoints
                cv2.line(img_with_pose, (int(x1), int(y1)), (int(x2), int(y2)), colors['skeleton'], 2)
    
    return img_with_pose

# Function to process a frame with pose estimation
def process_frame_with_pose(frame, model, device):
    """
    Process a frame with pose estimation.
    
    Args:
        frame (numpy.ndarray): Input frame
        model: YOLOv7-W6-Pose model
        device (torch.device): Device to run inference on
        
    Returns:
        tuple: (Processed frame with pose visualization, people detected with keypoints)
    """
    # Preprocess the frame
    input_tensor, ratio, pad_x, pad_y = preprocess_frame(frame, CONFIG['input_size'])
    
    # Perform pose detection
    detections, inference_time = detect_pose(
        model, 
        input_tensor, 
        device, 
        CONFIG['confidence_threshold'], 
        CONFIG['iou_threshold']
    )
    
    # Extract keypoints
    people = extract_keypoints(detections, frame.shape, ratio, pad_x, pad_y)
    
    # Draw pose skeleton on the frame
    frame_with_pose = draw_pose(frame, people)
    
    # Add inference time to the frame
    cv2.putText(
        frame_with_pose, 
        f'Inference: {inference_time*1000:.1f}ms', 
        (10, 30), 
        cv2.FONT_HERSHEY_SIMPLEX, 
        1, 
        (0, 255, 0), 
        2
    )
    
    return frame_with_pose, people

# Test pose estimation if running this cell directly
# if __name__ == "__main__":
#     # Only import if this file is run directly
#     from video_processing import process_video_source
    
#     # Load the model
#     model = load_model(CONFIG['model_path'], CONFIG['device'])
    
#     # Define a function to process each frame with the model
#     def process_frame_func(frame):
#         frame_with_pose, people = process_frame_with_pose(frame, model, CONFIG['device'])
#         return frame_with_pose, False  # No fall detection yet
    
#     # Process video from webcam
#     process_video_source(0, process_frame_func, True, None)

In [16]:
# 4. Fall Detection Algorithm - Including Fall vs. Lying Down Differentiation

class FallDetector:
    def __init__(self):
        """Initialize the fall detector with tracking for speed calculation."""
        self.prev_keypoints = None
    
    def calculate_length_factor(self, keypoints):
        """
        Calculate length factor based on the shoulder-to-torso distance.
        
        This implements Equation 1 from page 9 of the journal:
        Lfactor = √(xl - xTl)² + (yl - yTl)²
        
        Args:
            keypoints (list): List of keypoint tuples (x, y, confidence)
            
        Returns:
            float: Length factor for distance calculations
        """
        # Get left shoulder and left hip (torso) keypoints
        left_shoulder = keypoints[KEYPOINT_DICT['left_shoulder']]
        left_hip = keypoints[KEYPOINT_DICT['left_hip']]
        
        # Calculate Euclidean distance as in Equation 1
        shoulder_x, shoulder_y = left_shoulder[0], left_shoulder[1]
        hip_x, hip_y = left_hip[0], left_hip[1]
        
        length_factor = math.sqrt((shoulder_x - hip_x)**2 + (shoulder_y - hip_y)**2)
        return length_factor
    
    def calculate_vertical_speed(self, prev_keypoints, curr_keypoints):
        """
        Calculate vertical speed of movement to differentiate falls from lying down.
        
        As described on page 10: "The speed of key body points is calculated by
        measuring the displacement between their positions in consecutive frames."
        
        Args:
            prev_keypoints (list): Previous frame keypoints
            curr_keypoints (list): Current frame keypoints
            
        Returns:
            float: Vertical speed (displacement between frames)
        """
        if prev_keypoints is None:
            return 0
        
        # Use shoulders to calculate vertical speed as mentioned in the paper
        prev_left_shoulder = prev_keypoints[KEYPOINT_DICT['left_shoulder']]
        curr_left_shoulder = curr_keypoints[KEYPOINT_DICT['left_shoulder']]
        
        # Calculate vertical displacement
        vertical_displacement = abs(curr_left_shoulder[1] - prev_left_shoulder[1])
        return vertical_displacement
    
    def calculate_torso_angle(self, keypoints):
        """
        Calculate angle between torso and vertical.
        
        As mentioned on page 10: "A threshold of 45 degrees is used in the code.
        If the angle between the torso and legs drops below this value, it indicates
        that the person's body is approaching a horizontal position..."
        
        Args:
            keypoints (list): List of keypoint tuples (x, y, confidence)
            
        Returns:
            float: Angle in degrees
        """
        # Get shoulder and hip keypoints to define torso
        left_shoulder = keypoints[KEYPOINT_DICT['left_shoulder']]
        right_shoulder = keypoints[KEYPOINT_DICT['right_shoulder']]
        left_hip = keypoints[KEYPOINT_DICT['left_hip']]
        right_hip = keypoints[KEYPOINT_DICT['right_hip']]
        
        # Calculate midpoints
        mid_shoulder_x = (left_shoulder[0] + right_shoulder[0]) / 2
        mid_shoulder_y = (left_shoulder[1] + right_shoulder[1]) / 2
        mid_hip_x = (left_hip[0] + right_hip[0]) / 2
        mid_hip_y = (left_hip[1] + right_hip[1]) / 2
        
        # Calculate torso vector
        torso_x = mid_shoulder_x - mid_hip_x
        torso_y = mid_shoulder_y - mid_hip_y
        
        # Calculate angle with vertical (y-axis in image coordinates)
        # Note: In image coordinates, y increases downward
        angle_rad = math.atan2(torso_x, torso_y)  # Angle with vertical
        angle_deg = math.degrees(abs(angle_rad))
        
        return angle_deg
    
    def detect_fall(self, people):
        """
        Detect if a fall has occurred based on pose keypoints.
        
        This implements the fall detection algorithm described in the paper
        on pages 9-10, including the differentiation between falls and lying down.
        
        Args:
            people (list): List of dictionaries containing keypoints and bounding boxes
            
        Returns:
            bool: True if fall detected, False otherwise
        """
        # If no people detected, return False
        if not people:
            self.prev_keypoints = None
            return False
        
        # Use the first person detected
        person = people[0]
        keypoints = person['keypoints']
        
        # Get relevant keypoints
        left_shoulder = keypoints[KEYPOINT_DICT['left_shoulder']]
        right_shoulder = keypoints[KEYPOINT_DICT['right_shoulder']]
        left_hip = keypoints[KEYPOINT_DICT['left_hip']]
        right_hip = keypoints[KEYPOINT_DICT['right_hip']]
        left_ankle = keypoints[KEYPOINT_DICT['left_ankle']]
        right_ankle = keypoints[KEYPOINT_DICT['right_ankle']]
        
        # 1. Calculate the length factor (Equation 1, page 9)
        length_factor = self.calculate_length_factor(keypoints)
        
        # 2. Check if shoulders are lower than feet with adjustment (Equation 2, page 9)
        # yl ≤ yFl + α·Lfactor
        # Note: In image coordinates, y increases downward, so we flip the inequality
        alpha = 0.1  # Small adjustment factor as mentioned in the paper
        shoulder_below_feet = (
            left_shoulder[1] >= left_ankle[1] - alpha * length_factor or
            right_shoulder[1] >= right_ankle[1] - alpha * length_factor
        )
        
        # 3. Calculate body height and width (Equations 3 & 4, page 10)
        # Hbody = |yl - yFl|
        body_height = abs(left_shoulder[1] - left_ankle[1])
        # Wbody = |xl - xr|
        body_width = abs(left_shoulder[0] - right_shoulder[0])
        
        # 4. Check fall condition based on body dimensions (Equation 5, page 10)
        # Hbody < Wbody
        orientation_fallen = body_height < body_width
        
        # 5. Differentiate between fall and lying down (as described on page 10)
        # Calculate vertical speed between frames
        vertical_speed = 0
        if self.prev_keypoints is not None:
            vertical_speed = self.calculate_vertical_speed(self.prev_keypoints, keypoints)
        
        # Calculate torso angle
        torso_angle = self.calculate_torso_angle(keypoints)
        
        # From page 10: "If the vertical speed exceeds a specific threshold, it indicates a fall..."
        # From page 10: "A threshold of 45 degrees is used in the code."
        is_rapid_movement = vertical_speed > 15  # Threshold for rapid movement
        is_horizontal_position = torso_angle > 45  # Angle threshold as mentioned in paper
        
        # Fall is detected if basic conditions are met AND
        # either the movement is rapid OR the body is in a horizontal position
        fall_detected = (
            shoulder_below_feet and 
            orientation_fallen and 
            (is_rapid_movement or is_horizontal_position)
        )
        
        # Store current keypoints for next frame's speed calculation
        self.prev_keypoints = keypoints
        
        return fall_detected
    
    def annotate_frame(self, frame, fall_detected):
        """
        Annotate the frame with fall detection status.
        
        Args:
            frame (numpy.ndarray): Frame to annotate
            fall_detected (bool): Whether a fall is detected
            
        Returns:
            numpy.ndarray: Annotated frame
        """
        if fall_detected:
            # Draw red text for fall detection
            cv2.putText(
                frame,
                "FALL DETECTED!",
                (50, 50),
                cv2.FONT_HERSHEY_SIMPLEX,
                1,
                (0, 0, 255),  # Red color
                2
            )
        
        return frame

# Function to process a frame with fall detection
def process_frame_with_fall_detection(frame, model, device, fall_detector):
    """
    Process a frame with pose estimation and fall detection.
    
    Args:
        frame (numpy.ndarray): Input frame
        model: YOLOv7-W6-Pose model
        device (torch.device): Device to run inference on
        fall_detector (FallDetector): Fall detector instance
        
    Returns:
        tuple: (Processed frame with annotations, fall detection status)
    """
    # Perform pose estimation
    frame_with_pose, people = process_frame_with_pose(frame, model, device)
    
    # Detect falls using the algorithm from the journal
    fall_detected = fall_detector.detect_fall(people)
    
    # Annotate frame with fall detection status
    annotated_frame = fall_detector.annotate_frame(frame_with_pose, fall_detected)
    
    return annotated_frame, fall_detected

# # Test fall detection if running this cell directly
# if __name__ == "__main__":
#     # Only import if this file is run directly
#     from video_processing import process_video_source
    
#     # Load the model
#     model = load_model(CONFIG['model_path'], CONFIG['device'])
    
#     # Create fall detector
#     fall_detector = FallDetector()
    
#     # Define a function to process each frame with the model and fall detection
#     def process_frame_func(frame):
#         return process_frame_with_fall_detection(frame, model, device, fall_detector)
    
#     # Process video from webcam
#     process_video_source(0, process_frame_func, True, None)

In [17]:
# 5. Main Program Integration

def main():
    """
    Main function that integrates all components of the fall detection system.
    """
    print("Fall Detection System using YOLOv7-W6-Pose")
    print("------------------------------------------")
    
    # Parse command line arguments
    parser = argparse.ArgumentParser(description='Fall Detection System using YOLOv7-W6-Pose')
    parser.add_argument('--source', type=str, default='0', 
                        help='Source for video input (0 for webcam, or path to video file)')
    parser.add_argument('--output', type=str, default=None, 
                        help='Path to save processed video (None for no saving)')
    parser.add_argument('--display', action='store_true', 
                        help='Display video processing in real-time')
    args = parser.parse_args()
    
    # Convert source to int if it's a digit (camera index)
    if args.source.isdigit():
        args.source = int(args.source)
    
    # Check if model file exists
    if not os.path.exists(CONFIG['model_path']):
        print(f"Error: Model file not found at {CONFIG['model_path']}")
        print("Please download it from: https://github.com/WongKinYiu/yolov7/releases")
        return
    
    try:
        # Load YOLOv7-W6-Pose model
        print(f"Loading model from {CONFIG['model_path']}...")
        model = load_model(CONFIG['model_path'], CONFIG['device'])
        print("Model loaded successfully.")
        
        # Create fall detector
        fall_detector = FallDetector()
        
        # Define frame processing function
        def process_frame_func(frame):
            return process_frame_with_fall_detection(frame, model, CONFIG['device'], fall_detector)
        
        # Process video source
        print(f"Processing video from source: {args.source}")
        if args.output:
            print(f"Output will be saved to: {args.output}")
        
        process_video_source(args.source, process_frame_func, args.display, args.output)
        
        print("Processing completed.")
        
    except Exception as e:
        print(f"Error: {str(e)}")
        import traceback
        traceback.print_exc()

# # Run the main program if this script is executed directly
# if __name__ == "__main__":
#     main()

In [None]:
def evaluate_on_le2i_dataset():
    """
    Evaluate the fall detection system on the Le2i dataset with the specific directory structure.
    
    Returns:
        dict: Evaluation metrics
    """
    # Hard-coded path to Le2i dataset
    base_path = "datasets/le2i"
    print(f"Evaluating on Le2i dataset: {base_path}")
    
    # Load the model
    model = load_model(CONFIG['model_path'], CONFIG['device'])
    
    # Create fall detector
    fall_detector = FallDetector()
    
    # Results tracking
    true_positives = 0
    false_positives = 0
    true_negatives = 0
    false_negatives = 0
    
    # Define the environment folders and their structures
    environments = {
        # "All_Rooms": {
        #     "videos_path": os.path.join(base_path, "All_Rooms", "Videos"),
        #     "annotations_path": os.path.join(base_path, "All_Rooms", "Annotation_files")
        # },
        "Coffee_room_01": {
            "videos_path": os.path.join(base_path, "Coffee_room_01", "Coffee_room_01", "Videos"),
            "annotations_path": os.path.join(base_path, "Coffee_room_01", "Coffee_room_01", "Annotation_files")
        },
        "Coffee_room_02": {
            "videos_path": os.path.join(base_path, "Coffee_room_02", "Coffee_room_02", "Videos"),
            "annotations_path": os.path.join(base_path, "Coffee_room_02", "Coffee_room_02", "Annotations_files")
        },
        "Home_01": {
            "videos_path": os.path.join(base_path, "Home_01", "Home_01", "Videos"),
            "annotations_path": os.path.join(base_path, "Home_01", "Home_01", "Annotation_files")
        },
        "Home_02": {
            "videos_path": os.path.join(base_path, "Home_02", "Home_02", "Videos"),
            "annotations_path": os.path.join(base_path, "Home_02", "Home_02", "Annotation_files")
        },
        "Office": {
            "videos_path": os.path.join(base_path, "Office", "Office", "Videos"),
            "annotations_path": os.path.join(base_path, "Office", "Office", "Annotation_files")
        },
        "Lecture_room": {
            "videos_path": os.path.join(base_path, "Lecture_room", "Lecture room", "Videos"),
            "annotations_path": os.path.join(base_path, "Lecture_room", "Lecture room", "Annotation_files")
        }
    }
    
    # Process each environment
    for env_name, paths in environments.items():
        videos_path = paths["videos_path"]
        annotations_path = paths["annotations_path"]
        
        print(f"\nProcessing environment: {env_name}")
        
        # Check if the paths exist
        if not os.path.exists(videos_path):
            print(f"Warning: Videos path does not exist: {videos_path}")
            continue
            
        if not os.path.exists(annotations_path):
            print(f"Warning: Annotations path does not exist: {annotations_path}")
            continue
        
        # Get all video files
        video_files = [f for f in os.listdir(videos_path) if f.endswith(('.mp4', '.avi'))]
        print(f"Found {len(video_files)} videos in {env_name}")
        
        # Add counter for periodic model reloading
        video_counter = 0
        
        for video_file in video_files:
            video_counter += 1
            
            # Every 20 videos, reload the model to clear accumulated memory
            if video_counter % 20 == 0:
                print(f"Processed {video_counter} videos, reloading model to clear memory...")
                del model
                del fall_detector
                
                # Force garbage collection
                if CONFIG['device'].type != 'cpu':
                    torch.cuda.empty_cache()
                import gc
                gc.collect()
                
                # Reload model and fall detector
                model = load_model(CONFIG['model_path'], CONFIG['device'])
                fall_detector = FallDetector()
            
            video_path = os.path.join(videos_path, video_file)
            print(f"Processing {video_file}...")
            
            # Find corresponding annotation file (same name with .txt extension)
            annotation_file = os.path.splitext(video_file)[0] + '.txt'
            annotation_path = os.path.join(annotations_path, annotation_file)
            
            if not os.path.exists(annotation_path):
                print(f"Warning: No annotation file found for {video_file}")
                continue
            
            # Parse the annotation file
            fall_frames = []
            try:
                with open(annotation_path, 'r') as f:
                    lines = f.readlines()
                    # Skip the first two lines (metadata)
                    for line in lines[2:]:
                        parts = line.strip().split(',')
                        if len(parts) >= 2:
                            frame_num = int(parts[0])
                            action_code = int(parts[1])
                            # Action codes 7 and 8 represent falling and fallen states
                            if action_code in [7, 8]:
                                fall_frames.append(frame_num)
            except Exception as e:
                print(f"Error parsing annotation file {annotation_file}: {str(e)}")
                continue
            
            # Process the video
            cap = cv2.VideoCapture(video_path)
            if not cap.isOpened():
                print(f"Error: Could not open video {video_path}")
                continue
            
            # Process frames and detect falls
            frame_num = 0
            system_detected_falls = []
            
            while True:
                ret, frame = cap.read()
                if not ret:
                    break
                
                frame_num += 1
                
                # Process frame for fall detection
                _, people = process_frame_with_pose(frame, model, CONFIG['device'])
                frame_fall_detected = fall_detector.detect_fall(people)
                
                if frame_fall_detected:
                    system_detected_falls.append(frame_num)
            
            # Clean up
            cap.release()
            
            # Memory cleanup after each video
            if CONFIG['device'].type != 'cpu':
                torch.cuda.empty_cache()
            
            import gc
            gc.collect()
            
            # Compare results with ground truth
            # A fall is correctly detected if the system detected a fall in any frame
            # where the ground truth indicates a fall
            has_fall_in_ground_truth = len(fall_frames) > 0
            system_detected_fall = len(system_detected_falls) > 0
            
            if has_fall_in_ground_truth and system_detected_fall:
                true_positives += 1
                print(f"✓ True positive: Fall correctly detected in {video_file}")
            elif not has_fall_in_ground_truth and system_detected_fall:
                false_positives += 1
                print(f"✗ False positive: Fall incorrectly detected in {video_file}")
            elif has_fall_in_ground_truth and not system_detected_fall:
                false_negatives += 1
                print(f"✗ False negative: Failed to detect fall in {video_file}")
            else:  # not has_fall_in_ground_truth and not system_detected_fall
                true_negatives += 1
                print(f"✓ True negative: Correctly identified no fall in {video_file}")
        
        # Memory cleanup after each environment
        if CONFIG['device'].type != 'cpu':
            torch.cuda.empty_cache()
            
        import gc
        gc.collect()
        print(f"Completed environment: {env_name}, clearing memory...")
    
    # Calculate metrics
    metrics = {}
    
    # Calculate accuracy
    total = true_positives + true_negatives + false_positives + false_negatives
    if total > 0:
        metrics['accuracy'] = (true_positives + true_negatives) / total * 100
    
    # Calculate precision
    if true_positives + false_positives > 0:
        metrics['precision'] = true_positives / (true_positives + false_positives) * 100
    
    # Calculate recall (sensitivity)
    if true_positives + false_negatives > 0:
        metrics['recall'] = true_positives / (true_positives + false_negatives) * 100
    
    # Calculate specificity
    if true_negatives + false_positives > 0:
        metrics['specificity'] = true_negatives / (true_negatives + false_positives) * 100
    
    # Calculate F1 score
    if 'precision' in metrics and 'recall' in metrics:
        metrics['f1_score'] = 2 * (metrics['precision'] * metrics['recall']) / (metrics['precision'] + metrics['recall'])
    
    # Print confusion matrix
    print("\nConfusion Matrix:")
    print(f"True Positives: {true_positives}")
    print(f"False Positives: {false_positives}")
    print(f"True Negatives: {true_negatives}")
    print(f"False Negatives: {false_negatives}")
    
    # Print metrics
    print("\nEvaluation Metrics:")
    for metric, value in metrics.items():
        print(f"{metric.capitalize()}: {value:.2f}%")
    
    return metrics


metrics = evaluate_on_le2i_dataset()

Evaluating on Le2i dataset: datasets/le2i


  model = torch.load(model_path, map_location=device)['model']


Model models/yolov7-w6-pose.pt loaded to cuda:0

Processing environment: All_Rooms
Found 187 videos in All_Rooms
Processing video (1).avi...
Prediction shape: torch.Size([1, 57375, 57])
✗ False negative: Failed to detect fall in video (1).avi
Processing video (10).avi...
Prediction shape: torch.Size([1, 57375, 57])
Prediction shape: torch.Size([1, 57375, 57])
Prediction shape: torch.Size([1, 57375, 57])
Prediction shape: torch.Size([1, 57375, 57])
Prediction shape: torch.Size([1, 57375, 57])
Prediction shape: torch.Size([1, 57375, 57])
Prediction shape: torch.Size([1, 57375, 57])
Prediction shape: torch.Size([1, 57375, 57])
Prediction shape: torch.Size([1, 57375, 57])
Prediction shape: torch.Size([1, 57375, 57])
Prediction shape: torch.Size([1, 57375, 57])
Prediction shape: torch.Size([1, 57375, 57])
Prediction shape: torch.Size([1, 57375, 57])
Prediction shape: torch.Size([1, 57375, 57])
Prediction shape: torch.Size([1, 57375, 57])
Prediction shape: torch.Size([1, 57375, 57])
Predicti