In [2]:
import cv2
import time
import torch
import numpy as np
import os
import math
from collections import deque
from utils.datasets import letterbox
from utils.torch_utils import select_device
from models.experimental import attempt_load
from utils.plots import output_to_keypoint, plot_skeleton_kpts
from utils.general import non_max_suppression_kpt, strip_optimizer
from torchvision import transforms

class FallDetector:
    def __init__(self, poseweights='yolov7-w6-pose.pt', device='0'):
        """
        Initialize the Fall Detector with parameters as defined in the paper
        "Enhanced Fall Detection Using YOLOv7-W6-Pose for Real-Time Elderly Monitoring"
        
        Key parameters:
        - LENGTH_FACTOR_ALPHA (α): Used in height condition formula (Section 3.1)
        - VELOCITY_THRESHOLD: Threshold for fall speed detection (Section 3.2)
        - LEG_ANGLE_THRESHOLD: Degrees threshold for leg angles (Section 3.2)
        - TORSO_ANGLE_THRESHOLD: Degrees threshold for torso orientation (Section 3.2)
        - ASPECT_RATIO_THRESHOLD: Width/height ratio threshold (Section 3.1)
        - CONFIDENCE_THRESHOLD: Minimum keypoint confidence for reliable detection
        """
        print(f"Initializing Fall Detector with weights: {poseweights} on device: {device}")
        
        # Select the appropriate device
        self.device = select_device(device)
        self.half = self.device.type != 'cpu'
        
        # Load model
        self.model = attempt_load(poseweights, map_location=self.device)
        self.model.eval()
        
        # Create output directory if it doesn't exist
        os.makedirs('output', exist_ok=True)
        
        # Threshold parameters as defined in the paper
        self.LENGTH_FACTOR_ALPHA = 0.5  # α in the height condition formula
        self.VELOCITY_THRESHOLD = 1.0    # px/frame for fall speed
        self.LEG_ANGLE_THRESHOLD = 45    # degrees for leg angles
        self.TORSO_ANGLE_THRESHOLD = 50  # degrees for torso orientation
        self.ASPECT_RATIO_THRESHOLD = 0.8 # width/height ratio
        self.CONFIDENCE_THRESHOLD = 0.4  # minimum keypoint confidence
        self.TARGET_FPS = 25
        
        # State tracking variables
        self.prev_keypoints = None
        self.velocity_buffer = deque(maxlen=3)  # tracks vertical speed
        self.fall_buffer = deque(maxlen=2)      # confirmation buffer
        self.prev_frame_time = None
        self.fall_start_time = None
        self.prev_shoulder_y = None
        
        # Fall detection status
        self.fall_detected = False
    
    def calculate_euclidean_distance(self, point1, point2):
        """
        Calculate Euclidean distance between two points
        Used in the paper to measure distances between key body points,
        particularly for the Lfactor (length factor) calculation in Section 3.1
        
        Args:
            point1, point2: Coordinate points (x,y)
        Returns:
            Euclidean distance between the points
        """
        return math.hypot(point1[0]-point2[0], point1[1]-point2[1])

    def calculate_angle(self, a, b, c):
        """
        Calculate angle between three points (in degrees)
        Used in the paper for calculating leg angles (Section 3.2)
        
        Args:
            a, b, c: Three points where b is the vertex
        Returns:
            Angle in degrees
        """
        try:
            ba = np.array([a[0]-b[0], a[1]-b[1]])
            bc = np.array([c[0]-b[0], c[1]-b[1]])
            cosine_angle = np.dot(ba, bc) / (np.linalg.norm(ba) * np.linalg.norm(bc) + 1e-6)
            return np.degrees(np.arccos(np.clip(cosine_angle, -1.0, 1.0)))
        except:
            return 180  # return maximum angle if calculation fails

    def calculate_torso_angle(self, shoulders, hips):
        """
        Calculate torso angle relative to vertical axis
        Implements the torso orientation assessment described in Section 3.2
        of the paper to detect when the torso is horizontal (fallen state)
        
        Args:
            shoulders: list of shoulder points [(x,y), (x,y)]
            hips: list of hip points [(x,y), (x,y)]
        Returns:
            angle in degrees between torso and vertical axis
        """
        shoulder_center = np.mean(shoulders, axis=0)
        hip_center = np.mean(hips, axis=0)
        vertical_vector = np.array([0, 1])
        torso_vector = np.array([hip_center[0]-shoulder_center[0], 
                                hip_center[1]-shoulder_center[1]])
        
        if np.linalg.norm(torso_vector) < 1e-6:
            return 90  # neutral angle if points overlap
            
        cosine = np.dot(torso_vector, vertical_vector) / (np.linalg.norm(torso_vector) + 1e-6)
        return np.degrees(np.arccos(np.clip(cosine, -1.0, 1.0)))

    def detect_fall(self, keypoints):
        """
        Main fall detection function implementing the paper's algorithm from Sections 3.1 and 3.2
        Combines multiple conditions (height, velocity, angles, aspect ratio) to detect falls
        
        Args:
            keypoints: Array of 17 keypoints with (x,y,confidence)
        Returns:
            tuple: (is_fall, state, condition_info)
        """
        # Keypoint indices as defined in the paper
        NOSE = 0
        LEFT_SHOULDER = 5
        RIGHT_SHOULDER = 6
        LEFT_HIP = 11
        RIGHT_HIP = 12
        LEFT_KNEE = 13
        RIGHT_KNEE = 14
        LEFT_ANKLE = 15
        RIGHT_ANKLE = 16
        
        try:
            # Extract keypoints with confidence check
            kp = {}
            
            # Reshape keypoints to get (x, y, conf) format for each keypoint
            reshaped_kpts = keypoints.reshape(-1, 3)
            
            # Extract specific keypoints
            kp['nose'] = reshaped_kpts[NOSE]
            kp['left_shoulder'] = reshaped_kpts[LEFT_SHOULDER]
            kp['right_shoulder'] = reshaped_kpts[RIGHT_SHOULDER]
            kp['left_hip'] = reshaped_kpts[LEFT_HIP]
            kp['right_hip'] = reshaped_kpts[RIGHT_HIP]
            kp['left_knee'] = reshaped_kpts[LEFT_KNEE]
            kp['right_knee'] = reshaped_kpts[RIGHT_KNEE]
            kp['left_ankle'] = reshaped_kpts[LEFT_ANKLE]
            kp['right_ankle'] = reshaped_kpts[RIGHT_ANKLE]
            
            # Confidence check for all keypoints
            if any(point[2] < self.CONFIDENCE_THRESHOLD for point in kp.values()):
                return False, "low_confidence", []

            # Get coordinates (convert to tuples for clarity)
            ls = (kp['left_shoulder'][0], kp['left_shoulder'][1])
            rs = (kp['right_shoulder'][0], kp['right_shoulder'][1])
            lh = (kp['left_hip'][0], kp['left_hip'][1])
            rh = (kp['right_hip'][0], kp['right_hip'][1])
            lk = (kp['left_knee'][0], kp['left_knee'][1])
            rk = (kp['right_knee'][0], kp['right_knee'][1])
            la = (kp['left_ankle'][0], kp['left_ankle'][1])
            ra = (kp['right_ankle'][0], kp['right_ankle'][1])

            """ 1. HEIGHT CONDITION (Paper Section 3.1) """
            # Calculate length factor (Lfactor) as Euclidean distance
            torso_mid = ((lh[0] + rh[0])/2, (lh[1] + rh[1])/2)
            Lfactor = self.calculate_euclidean_distance(ls, torso_mid)
            
            # Get vertical positions
            max_feet_y = max(la[1], ra[1])
            min_shoulder_y = min(ls[1], rs[1])
            
            # Paper's height condition: yl ≤ yFl + α·Lfactor
            height_cond = min_shoulder_y >= (max_feet_y - self.LENGTH_FACTOR_ALPHA * Lfactor)
            
            """ 2. VELOCITY CONDITION (Paper Section 3.2) """
            current_time = time.time()
            vertical_speed = 0
            current_min_y = min(ls[1], rs[1])
            
            if self.prev_shoulder_y is not None and self.prev_frame_time is not None:
                time_elapsed = current_time - self.prev_frame_time
                if time_elapsed > 0:
                    vertical_speed = (current_min_y - self.prev_shoulder_y) / time_elapsed
                    self.velocity_buffer.append(abs(vertical_speed))
            
            avg_speed = sum(self.velocity_buffer)/len(self.velocity_buffer) if self.velocity_buffer else 0
            speed_cond = avg_speed >= self.VELOCITY_THRESHOLD
            
            """ 3. ANGLE CONDITIONS (Paper Section 3.2) """
            left_leg_angle = self.calculate_angle(lh, lk, la)
            right_leg_angle = self.calculate_angle(rh, rk, ra)
            leg_angle_cond = min(left_leg_angle, right_leg_angle) < self.LEG_ANGLE_THRESHOLD
            
            # Torso orientation (not explicitly in paper but mentioned in text)
            torso_angle = self.calculate_torso_angle([ls, rs], [lh, rh])
            torso_cond = torso_angle > self.TORSO_ANGLE_THRESHOLD
            
            """ 4. ASPECT RATIO CONDITION (Paper Section 3.1) """
            # Body orientation ratio: width/height
            body_width = abs(ls[0] - rs[0])
            head_to_feet = abs(kp['nose'][1] - max_feet_y)
            orientation_ratio = body_width / (head_to_feet + 1e-6)
            aspect_cond = orientation_ratio > self.ASPECT_RATIO_THRESHOLD
            
            """ FALL DECISION LOGIC (Paper Section 3) """
            # Combined conditions - at least 2 must be true
            conditions_met = sum([height_cond, speed_cond, leg_angle_cond, torso_cond, aspect_cond])
            
            # State determination
            current_state = "normal"
            conditions_info = []
            
            if height_cond:
                if speed_cond:  # Rapid descent
                    current_state = "falling"
                    self.fall_start_time = current_time
                    conditions_info.append(f"speed:{avg_speed:.1f}px/s")
                elif torso_cond and self.fall_start_time and (current_time - self.fall_start_time < 1.0):
                    current_state = "fallen"
                    conditions_info.append("horizontal")
            
            if leg_angle_cond:
                conditions_info.append(f"leg_angle:{min(left_leg_angle, right_leg_angle):.0f}°")
            
            if aspect_cond:
                conditions_info.append(f"aspect:{orientation_ratio:.2f}")
            
            # Final decision with confirmation buffer
            is_fall = conditions_met >= 2
            self.fall_buffer.append(is_fall)
            final_detection = sum(self.fall_buffer) >= 2 if len(self.fall_buffer) >= 1 else is_fall
            
            if final_detection:
                current_state = "fallen"
                self.fall_detected = True
            else:
                self.fall_detected = False
            
            # Update tracking variables
            self.prev_keypoints = kp
            self.prev_shoulder_y = current_min_y
            self.prev_frame_time = current_time
            
            # Diagnostic information
            conditions_info.extend([
                f"height:{'Y' if height_cond else 'N'}",
                f"speed:{'Y' if speed_cond else 'N'}",
                f"leg_angle:{'Y' if leg_angle_cond else 'N'}",
                f"torso:{'Y' if torso_cond else 'N'}",
                f"aspect:{'Y' if aspect_cond else 'N'}",
                f"conf:{min(p[2] for p in kp.values()):.2f}"
            ])
            
            return final_detection, current_state, conditions_info
            
        except Exception as e:
            print(f"Detection error: {str(e)}")
            return False, "error", [f"Error: {str(e)}"]

    def process_frame(self, frame):
        """
        Process a single frame for fall detection
        
        Args:
            frame: Video frame to process
            
        Returns:
            frame: Processed frame with detections
            is_fall: Boolean indicating whether a fall was detected
            state: Current state (normal, falling, fallen)
            condition_info: List of conditions that triggered the detection
        """
        # Preprocess image
        orig_image = frame.copy()
        image = cv2.cvtColor(orig_image, cv2.COLOR_BGR2RGB)
        
        # Resize image while maintaining aspect ratio
        frame_height, frame_width = orig_image.shape[:2]
        image = letterbox(image, (frame_width), stride=64, auto=True)[0]
        
        # Convert to tensor
        image_ = image.copy()
        image = transforms.ToTensor()(image)
        image = torch.tensor(np.array([image.numpy()]))
        
        image = image.to(self.device)
        image = image.float()
        
        # Inference
        with torch.no_grad():
            output, _ = self.model(image)
            
        # Post-process
        output = non_max_suppression_kpt(output, 0.25, 0.65, nc=self.model.yaml['nc'], nkpt=self.model.yaml['nkpt'], kpt_label=True)
        output = output_to_keypoint(output)
        
        # Convert back to BGR for display
        img = image[0].permute(1, 2, 0) * 255
        img = img.cpu().numpy().astype(np.uint8)
        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
        
        # Initialize fall status and state for this frame
        is_fall = False
        current_state = "normal"
        condition_info = []
        
        # Process each person detected
        for idx in range(output.shape[0]):
            # Draw skeleton and keypoints
            plot_skeleton_kpts(img, output[idx, 7:].T, 3)
            
            # Calculate improved bounding box based on keypoints (YouTube approach)
            # Find the minimum and maximum x,y coordinates from all keypoints
            kpts = output[idx, 7:].reshape(-1, 3)
            
            # Initialize with first keypoint
            x_values = [kpt[0] for kpt in kpts if kpt[2] > 0.5]  # Only use keypoints with confidence > 0.5
            y_values = [kpt[1] for kpt in kpts if kpt[2] > 0.5]
            
            if x_values and y_values:  # Check if we have valid keypoints
                xmin, ymin = min(x_values), min(y_values)
                xmax, ymax = max(x_values), max(y_values)
                
                # Add padding to make bounding box a bit larger
                padding = 10
                xmin = max(0, xmin - padding)
                ymin = max(0, ymin - padding)
                xmax = xmax + padding
                ymax = ymax + padding
                
                # Calculate aspect ratio for reference (not used in detection)
                width = xmax - xmin
                height = ymax - ymin
                bbox_aspect_ratio = width / height if height > 0 else 0
                
                # Calculate center
                cx = int((xmin + xmax) // 2)
                cy = int((ymin + ymax) // 2)
                
                # For debugging: show aspect ratio on frame
                cv2.putText(img, f"Ratio: {bbox_aspect_ratio:.2f}", (10, 30), 
                           cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
            else:
                # Fallback to original bounding box if no valid keypoints
                x1, y1, x2, y2 = output[idx, 0], output[idx, 1], output[idx, 2], output[idx, 3]
                xmin, ymin = x1, y1
                xmax, ymax = x2, y2
                cx, cy = int((x1 + x2) // 2), int((y1 + y2) // 2)
            
            # Get key points for this person
            key_points = output[idx, 7:]
            
            # Detect fall for this person using enhanced algorithm
            person_fall, person_state, person_conditions = self.detect_fall(key_points)
            
            # If any person is falling, set global fall status
            if person_fall:
                is_fall = True
                current_state = person_state
                condition_info = person_conditions
                
                # Add visual indication of fall
                status_text = f"FALL DETECTED: {person_state.upper()}"
                cv2.putText(img, status_text, (50, 50), 
                           cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
                
                # Draw the bounding box in red for a fall
                cv2.rectangle(img, (int(xmin), int(ymin)), (int(xmax), int(ymax)), (0, 0, 255), 2)
                
                # For YouTube-style visual, add a colored rectangle at the center
                cv2.rectangle(img, (cx-10, cy-10), (cx+10, cy+10), (84, 61, 247), -1)
                
                # Add condition info to the frame
                for i, cond in enumerate(person_conditions[:3]):  # Show first 3 conditions only
                    cv2.putText(img, cond, (10, 60 + i*25), 
                              cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 255), 1)
            else:
                # Draw normal bounding box in green for no fall
                cv2.rectangle(img, (int(xmin), int(ymin)), (int(xmax), int(ymax)), (0, 255, 0), 1)
                
                # Show normal state
                cv2.putText(img, f"State: {person_state}", (10, 60), 
                          cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 1)
        
        return img, is_fall, current_state, condition_info

def run_fall_detection(poseweights='yolov7-w6-pose.pt', source='pose.mp4', device='cpu', display=True, save_output=True):
    """
    Run fall detection on a video or webcam feed
    
    Args:
        poseweights: Path to the YOLOv7 pose weights
        source: Path to video file or webcam ID (0, 1, etc.)
        device: Device to run inference on ('cpu' or '0', '1', etc. for GPU)
        display: Whether to show video with detections in real-time
        save_output: Whether to save the output video
    """
    # Initialize the fall detector
    detector = FallDetector(poseweights=poseweights, device=device)
    
    # Parse the input source
    input_path = source
    if source.isnumeric():
        input_path = int(source)
    
    # Open video capture
    cap = cv2.VideoCapture(input_path)
    if not cap.isOpened():
        print(f"Error: Could not open video source {source}")
        return
    
    # Get video properties
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    
    # Setup output video writer if requested
    out = None
    if save_output:
        if isinstance(input_path, int):
            # For webcam
            output_path = os.path.join('output', f"webcam_fall_detection.mp4")
        else:
            # For video file
            filename = os.path.basename(input_path).split('.')[0]
            output_path = os.path.join('output', f"{filename}_fall_detection.mp4")
        
        # Create VideoWriter
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        out = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))
        print(f"Output will be saved to: {output_path}")
    
    # Process video frames
    frame_count = 0
    total_fps = 0
    
    print("Starting fall detection...")
    
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        
        frame_count += 1
        print(f"Processing frame {frame_count}")
        
        # Process frame for fall detection
        start_time = time.time()
        processed_frame, is_fall, current_state, condition_info = detector.process_frame(frame)
        end_time = time.time()
        
        # Calculate FPS
        processing_fps = 1 / (end_time - start_time)
        total_fps += processing_fps
        
        # Resize processed frame to match original dimensions for display and saving
        processed_frame_resized = cv2.resize(processed_frame, (frame_width, frame_height))
        
        # Add FPS info
        cv2.putText(processed_frame_resized, f"FPS: {processing_fps:.2f}", (frame_width - 150, 30), 
                   cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
        
        # Display the frame if requested
        if display:
            cv2.imshow('Fall Detection', processed_frame_resized)
            
            # Exit on 'q' press
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
        
        # Save frame to output video if requested
        if save_output and out is not None:
            out.write(processed_frame_resized)
    
    # Release resources
    cap.release()
    if save_output and out is not None:
        out.release()
    cv2.destroyAllWindows()
    
    # Print statistics
    if frame_count > 0:
        avg_fps = total_fps / frame_count
        print(f"Processed {frame_count} frames")
        print(f"Average FPS: {avg_fps:.2f}")
        if save_output:
            print(f"Output saved to: {output_path}")

def process_dataset_folder(dataset_path, detector, is_sorted=False):
    """
    Process the Le2i fall detection dataset with the hierarchical folder structure
    
    Args:
        dataset_path: Path to the dataset root directory
        detector: Initialized FallDetector instance
        is_sorted: Boolean indicating if the dataset is in sorted structure (Fall/Non Fall folders)
    """
    import os
    import cv2
    import time
    import numpy as np
    from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
    
    # Path to Le2i dataset
    le2i_path = os.path.join(dataset_path, "le2i")
    
    # Initialize tracking variables for performance metrics
    true_labels = []
    predicted_labels = []
    
    # Total counters for statistics
    total_videos = 0
    total_frames = 0
    processing_times = []
    
    print(f"Starting batch processing of Le2i dataset at {le2i_path}")
    
    if is_sorted:
        # Process the sorted structure (Fall/Non Fall folders)
        fall_folder = os.path.join(le2i_path, "Le2i_Sorted", "Fall")
        nonfall_folder = os.path.join(le2i_path, "Le2i_Sorted", "Non Fall")  # Corrected folder name with space
        
        # Check if Fall folder exists
        if os.path.exists(fall_folder):
            print(f"Processing Fall folder: {fall_folder}")
            
            # Get all environment subfolders
            env_folders = [d for d in os.listdir(fall_folder) if os.path.isdir(os.path.join(fall_folder, d))]
            print(f"Found {len(env_folders)} environment folders in Fall folder: {', '.join(env_folders)}")
            
            # Process each environment folder
            for env_folder in env_folders:
                env_path = os.path.join(fall_folder, env_folder)
                videos_folder = os.path.join(env_path, "Videos")
                
                if os.path.exists(videos_folder):
                    print(f"Processing videos in {env_folder}/Videos")
                    
                    # Get all video files
                    video_files = [f for f in os.listdir(videos_folder) 
                                  if f.endswith(('.mp4', '.avi', '.mov', '.mkv', '.MP4', '.AVI', '.MOV', '.MKV'))]
                    
                    print(f"Found {len(video_files)} video files in {env_folder}/Videos")
                    
                    for video_file in video_files:
                        video_path = os.path.join(videos_folder, video_file)
                        print(f"Processing fall video: {env_folder}/{video_file}")
                        
                        # Extract true labels (1 for fall)
                        true_label = 1
                        
                        try:
                            # Process the video and get predictions
                            result, frames_processed, avg_time = process_single_video(video_path, detector, true_label)
                            
                            # Extend the lists with results
                            true_labels.extend([true_label] * len(result))
                            predicted_labels.extend(result)
                            
                            # Update statistics
                            total_videos += 1
                            total_frames += frames_processed
                            processing_times.append(avg_time)
                        except Exception as e:
                            print(f"Error processing video {video_file}: {str(e)}")
                else:
                    print(f"Videos folder not found in {env_folder}")
        else:
            print(f"Fall folder not found at {fall_folder}")
        
        # Check if NonFall folder exists
        if os.path.exists(nonfall_folder):
            print(f"Processing Non Fall folder: {nonfall_folder}")
            
            # Get all environment subfolders
            env_folders = [d for d in os.listdir(nonfall_folder) if os.path.isdir(os.path.join(nonfall_folder, d))]
            print(f"Found {len(env_folders)} environment folders in Non Fall folder: {', '.join(env_folders)}")
            
            # Process each environment folder
            for env_folder in env_folders:
                env_path = os.path.join(nonfall_folder, env_folder)
                videos_folder = os.path.join(env_path, "Videos")
                
                if os.path.exists(videos_folder):
                    print(f"Processing videos in {env_folder}/Videos")
                    
                    # Get all video files
                    video_files = [f for f in os.listdir(videos_folder) 
                                  if f.endswith(('.mp4', '.avi', '.mov', '.mkv', '.MP4', '.AVI', '.MOV', '.MKV'))]
                    
                    print(f"Found {len(video_files)} video files in {env_folder}/Videos")
                    
                    for video_file in video_files:
                        video_path = os.path.join(videos_folder, video_file)
                        print(f"Processing non-fall video: {env_folder}/{video_file}")
                        
                        # Extract true labels (0 for non-fall)
                        true_label = 0
                        
                        try:
                            # Process the video and get predictions
                            result, frames_processed, avg_time = process_single_video(video_path, detector, true_label)
                            
                            # Extend the lists with results
                            true_labels.extend([true_label] * len(result))
                            predicted_labels.extend(result)
                            
                            # Update statistics
                            total_videos += 1
                            total_frames += frames_processed
                            processing_times.append(avg_time)
                        except Exception as e:
                            print(f"Error processing video {video_file}: {str(e)}")
                else:
                    print(f"Videos folder not found in {env_folder}")
        else:
            print(f"Non Fall folder not found at {nonfall_folder}")
    else:
        # Process the traditional structure with mixed videos and ground truth files
        videos_folder = os.path.join(le2i_path, "Videos")
        
        # Check if directory exists
        if not os.path.exists(videos_folder):
            print(f"Error: Videos directory not found at {videos_folder}")
            return
            
        # List of environments in Le2i dataset
        environments = ["Home", "Coffee_room", "Office", "Lecture_room"]
        
        for env in environments:
            env_folder = os.path.join(videos_folder, env)
            if os.path.exists(env_folder):
                print(f"Processing environment: {env}")
                
                # Get all video files
                video_files = [f for f in os.listdir(env_folder) if f.endswith(('.mp4', '.avi'))]
                
                for video_file in video_files:
                    video_path = os.path.join(env_folder, video_file)
                    print(f"Processing video: {video_file}")
                    
                    # Determine if this is a fall video based on filename
                    # In Le2i dataset, videos with 'fall' in the name are fall videos
                    true_label = 1 if 'fall' in video_file.lower() else 0
                    
                    # Process the video and get predictions
                    result, frames_processed, avg_time = process_single_video(video_path, detector, true_label)
                    
                    # Extend the lists with results
                    true_labels.extend([true_label] * len(result))
                    predicted_labels.extend(result)
                    
                    # Update statistics
                    total_videos += 1
                    total_frames += frames_processed
                    processing_times.append(avg_time)
    
    # Calculate and display performance metrics based on paper's key metrics
    if true_labels and predicted_labels:
        calculate_and_display_metrics(true_labels, predicted_labels)
    
    # Display overall processing statistics
    print("\nProcessing Statistics:")
    print(f"Total videos processed: {total_videos}")
    print(f"Total frames processed: {total_frames}")
    if processing_times:
        avg_processing_time = sum(processing_times) / len(processing_times)
        print(f"Average processing time per frame: {avg_processing_time:.4f} seconds")
        print(f"Average FPS: {1/avg_processing_time:.2f}")


def process_single_video(video_path, detector, true_label):
    """
    Process a single video file and return predictions
    
    Args:
        video_path: Path to the video file
        detector: Initialized FallDetector instance
        true_label: Ground truth label (1 for fall, 0 for non-fall)
        
    Returns:
        predictions: List of predicted labels for each frame
        frames_processed: Number of frames processed
        avg_processing_time: Average processing time per frame
    """
    import cv2
    import time
    import numpy as np
    import os
    
    print(f"Opening video file: {video_path}")
    # Verify the file exists
    if not os.path.exists(video_path):
        print(f"Error: Video file does not exist: {video_path}")
        return [0], 0, 0
    
    # Open video
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"Error: Could not open video {video_path}")
        print(f"Video file exists: {os.path.exists(video_path)}")
        print(f"Video file size: {os.path.getsize(video_path) if os.path.exists(video_path) else 'N/A'} bytes")
        return [0], 0, 0
    
    # Get video properties for debugging
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    
    print(f"Video properties: {width}x{height}, {fps} FPS, {total_frames} frames")
    
    # Initialize counters and results
    frames_processed = 0
    predictions = []
    processing_times = []
    
    # Process each frame
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
            
        frames_processed += 1
        
        # Process frame for fall detection
        start_time = time.time()
        try:
            processed_frame, is_fall, current_state, condition_info = detector.process_frame(frame)
            end_time = time.time()
            
            processing_time = end_time - start_time
            processing_times.append(processing_time)
            
            # Record prediction (1 for fall, 0 for non-fall)
            predictions.append(1 if is_fall else 0)
        except Exception as e:
            print(f"Error processing frame {frames_processed}: {str(e)}")
            predictions.append(0)  # Default to no fall on error
        
        # Show progress every 5 frames
        if frames_processed % 5 == 0:
            print(f"Processed {frames_processed}/{total_frames} frames from {os.path.basename(video_path)}")
    
    # Release video capture
    cap.release()
    
    # Check if we processed any frames
    if not processing_times:
        print("No frames were successfully processed.")
        return [0], 0, 0
    
    # Calculate average processing time
    avg_processing_time = sum(processing_times) / len(processing_times)
    
    # Handle predictions - use majority voting to determine overall video prediction
    # For fall videos (true_label = 1), even a single fall detection is considered a fall
    # For non-fall videos (true_label = 0), even a single false fall detection is considered a false positive
    if true_label == 1:  
        video_prediction = 1 if 1 in predictions else 0
    else:  
        video_prediction = 1 if 1 in predictions else 0
    
    print(f"Video prediction: {video_prediction} (True label: {true_label})")
    print(f"Processed {frames_processed} frames with avg processing time: {avg_processing_time:.4f} seconds")
    
    return [video_prediction], frames_processed, avg_processing_time


def calculate_and_display_metrics(true_labels, predicted_labels):
    """
    Calculate and display performance metrics based on the paper's key metrics
    
    Args:
        true_labels: List of ground truth labels
        predicted_labels: List of predicted labels
    """
    from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
    import numpy as np
    
    # Check if there are enough labels to calculate metrics
    if not true_labels or not predicted_labels:
        print("No data available to calculate metrics.")
        return
    
    if len(true_labels) != len(predicted_labels):
        print(f"Error: Mismatch in label lengths. True: {len(true_labels)}, Predicted: {len(predicted_labels)}")
        return
    
    # Calculate confusion matrix
    try:
        cm = confusion_matrix(true_labels, predicted_labels, labels=[0, 1])
        tn, fp, fn, tp = cm.ravel()
    except Exception as e:
        print(f"Error calculating confusion matrix: {str(e)}")
        return
    
    # Calculate metrics based on the paper
    accuracy = (tp + tn) / (tp + tn + fp + fn) if (tp + tn + fp + fn) > 0 else 0
    precision = tp / (tp + fp) if (tp + fp) > 0 else 0
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0
    specificity = tn / (tn + fp) if (tn + fp) > 0 else 0
    f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
    
    # Display confusion matrix in a more readable format
    print("\nConfusion Matrix:")
    print(f"True Positives (TP): {tp}")
    print(f"True Negatives (TN): {tn}")
    print(f"False Positives (FP): {fp}")
    print(f"False Negatives (FN): {fn}")
    
    # Display metrics with percentage format matching the paper
    print("\nPerformance Metrics:")
    print(f"Accuracy: {accuracy:.2%}")
    print(f"Precision: {precision:.2%}")
    print(f"Recall (Sensitivity): {recall:.2%}")
    print(f"Specificity: {specificity:.2%}")
    print(f"F1 Score: {f1:.2%}")
    
    # Additional comparison with paper's metrics
    print("\nComparison with Paper's Metrics:")
    print("                  | Our Method | Paper's Method")
    print("------------------|------------|---------------")
    print(f"Accuracy          | {accuracy:.2%}     | 96.15%")
    print(f"Precision         | {precision:.2%}     | 97.00%")
    print(f"Recall            | {recall:.2%}     | 97.98%")
    print(f"Specificity       | {specificity:.2%}     | 90.32%")
    print(f"F1 Score          | {f1:.2%}     | 97.48%")
    
def run_interactive():
    """
    Interactive function to run fall detection with user input
    """
    # Get the weights file
    poseweights = input("Enter path to weights file [default: yolov7-w6-pose.pt]: ") or "yolov7-w6-pose.pt"
    
    # Get device type
    use_gpu = input("Use GPU? (y/n) [default: y]: ").lower() or "y"
    if use_gpu == "y":
        device = input("Enter GPU device ID [default: 0]: ") or "0"
    else:
        device = "cpu"
    
    # Get source type
    print("\nSelect input source:")
    print("1: Video file")
    print("2: Webcam")
    print("3: Batch process (Le2i dataset)")
    source_choice = input("Enter choice [1/2/3]: ")
    
    if source_choice == "1":
        # Video file
        default_video = "sample_video.mp4"
        source = input(f"Enter video file path [default: {default_video}]: ") or default_video
        # Ask if user wants to display the processed video in real-time
        display_video = input("Display video with pose estimation in real-time? (y/n) [default: y]: ").lower() or "y"
        # Ask if user wants to save the output video
        save_video = input("Save output video? (y/n) [default: y]: ").lower() or "y"
        
        print(f"\nRunning fall detection with:")
        print(f"- Weights: {poseweights}")
        print(f"- Device: {device}")
        print(f"- Source: {source}")
        print(f"- Display: {'Yes' if display_video == 'y' else 'No'}")
        print(f"- Save output: {'Yes' if save_video == 'y' else 'No'}")
        confirmation = input("\nConfirm? (y/n) [default: y]: ").lower() or "y"
        
        if confirmation == "y":
            # Run the model
            run_with_display = (display_video == "y")
            save_output = (save_video == "y")
            
            # First strip optimizer to ensure model works correctly
            strip_optimizer(device, poseweights)
            
            # Run fall detection
            run_fall_detection(
                poseweights=poseweights,
                source=source,
                device=device,
                display=run_with_display,
                save_output=save_output
            )
        else:
            print("Operation cancelled")
    
    elif source_choice == "2":
        # Webcam
        cam_id = input("Enter webcam ID [default: 0]: ") or "0"
        source = cam_id
        
        # Ask if user wants to save the output video
        save_video = input("Save output video? (y/n) [default: y]: ").lower() or "y"
        
        print(f"\nRunning fall detection with:")
        print(f"- Weights: {poseweights}")
        print(f"- Device: {device}")
        print(f"- Source: Webcam {source}")
        print(f"- Display: Yes")  # Always display for webcam
        print(f"- Save output: {'Yes' if save_video == 'y' else 'No'}")
        confirmation = input("\nConfirm? (y/n) [default: y]: ").lower() or "y"
        
        if confirmation == "y":
            # First strip optimizer to ensure model works correctly
            strip_optimizer(device, poseweights)
            
            # Run fall detection
            run_fall_detection(
                poseweights=poseweights,
                source=source,
                device=device,
                display=True,  # Always display for webcam
                save_output=(save_video == "y")
            )
        else:
            print("Operation cancelled")
    
    elif source_choice == "3":
        # Batch process (Le2i dataset)
        default_dataset_path = "datasets"
        dataset_path = input(f"Enter dataset root path [default: {default_dataset_path}]: ") or default_dataset_path
        
        # Check if the dataset path exists
        if not os.path.exists(dataset_path):
            print(f"Error: Dataset path '{dataset_path}' does not exist.")
            return
        
        # Check for Le2i dataset structure
        le2i_path = os.path.join(dataset_path, "le2i")
        if not os.path.exists(le2i_path):
            print(f"Error: Le2i dataset not found at {le2i_path}")
            return
        
        # Check for Le2i_Sorted structure
        le2i_sorted_path = os.path.join(le2i_path, "Le2i_Sorted")
        is_sorted = os.path.exists(le2i_sorted_path)
        
        if is_sorted:
            print(f"Found Le2i dataset with sorted structure (Fall/Non Fall folders)")
        else:
            print(f"Found Le2i dataset with traditional structure")
        
        print(f"\nRunning batch processing with:")
        print(f"- Weights: {poseweights}")
        print(f"- Device: {device}")
        print(f"- Dataset path: {le2i_path}")
        print(f"- Structure: {'Sorted' if is_sorted else 'Traditional'}")
        confirmation = input("\nConfirm? (y/n) [default: y]: ").lower() or "y"
        
        if confirmation == "y":
            # Initialize fall detector
            detector = FallDetector(poseweights=poseweights, device=device)
            
            # Process the entire dataset
            process_dataset_folder(dataset_path, detector, is_sorted)
        else:
            print("Operation cancelled")
    
    else:
        print("Invalid choice. Please run again and select a valid option.")

if __name__ == "__main__":
    # Run interactively
    run_interactive()

Enter path to weights file [default: yolov7-w6-pose.pt]:  
Use GPU? (y/n) [default: y]:  
Enter GPU device ID [default: 0]:  



Select input source:
1: Video file
2: Webcam
3: Batch process (Le2i dataset)


Enter choice [1/2/3]:  3
Enter dataset root path [default: datasets]:  


Found Le2i dataset with sorted structure (Fall/Non Fall folders)

Running batch processing with:
- Weights: yolov7-w6-pose.pt
- Device: 0
- Dataset path: datasets\le2i
- Structure: Sorted



Confirm? (y/n) [default: y]:  y


Initializing Fall Detector with weights: yolov7-w6-pose.pt on device: 0


  ckpt = torch.load(w, map_location=map_location)  # load


Fusing layers... 


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


Starting batch processing of Le2i dataset at datasets\le2i
Processing Fall folder: datasets\le2i\Le2i_Sorted\Fall
Found 6 environment folders in Fall folder: Coffee_room_01, Coffee_room_02, Home_01, Home_02, Lecture_room, Office
Processing videos in Coffee_room_01/Videos
Found 48 video files in Coffee_room_01/Videos
Processing fall video: Coffee_room_01/video (1).avi
Opening video file: datasets\le2i\Le2i_Sorted\Fall\Coffee_room_01\Videos\video (1).avi
Video properties: 320x240, 25.0 FPS, 157 frames
Processed 5/157 frames from video (1).avi
Processed 10/157 frames from video (1).avi
Processed 15/157 frames from video (1).avi
Processed 20/157 frames from video (1).avi
Processed 25/157 frames from video (1).avi
Processed 30/157 frames from video (1).avi
Processed 35/157 frames from video (1).avi
Processed 40/157 frames from video (1).avi
Processed 45/157 frames from video (1).avi
Processed 50/157 frames from video (1).avi
Processed 55/157 frames from video (1).avi
Processed 60/157 frames