In [3]:
import cv2
import numpy as np
import torch
from torchvision import transforms
from utils.datasets import letterbox
from utils.general import non_max_suppression_kpt
from utils.plots import output_to_keypoint, plot_skeleton_kpts
from models.yolo import Model
from PIL import Image, ImageEnhance
import random

# Add the custom class to the safe globals list
torch.serialization.add_safe_globals([Model])

# Initialize device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Load YOLOv7-pose model
weights = torch.load('yolov7-w6-pose.pt', map_location=device, weights_only=False)
model = weights['model']
_ = model.float().eval()

if torch.cuda.is_available():
    model.half().to(device)

def augment_image(image):
    """
    Apply random augmentation to the image (brightness, contrast, noise, etc.).
    :param image: Input image (numpy array in BGR format).
    :return: Augmented image (numpy array in BGR format).
    """
    # Convert to PIL Image for easier augmentation (PIL uses RGB format)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # Convert BGR to RGB
    image = Image.fromarray(image)

    # Random brightness adjustment
    brightness_factor = random.uniform(0.8, 1.2)  # Adjust brightness randomly
    enhancer = ImageEnhance.Brightness(image)
    image = enhancer.enhance(brightness_factor)

    # Random contrast adjustment
    contrast_factor = random.uniform(0.8, 1.2)  # Adjust contrast randomly
    enhancer = ImageEnhance.Contrast(image)
    image = enhancer.enhance(contrast_factor)

    # Convert back to numpy array (RGB format)
    image = np.array(image)

    # Add Gaussian noise
    mean = 0
    var = random.uniform(0, 0.005)  # Random noise variance (reduced to avoid extreme changes)
    sigma = var ** 0.5
    gaussian = np.random.normal(mean, sigma, image.shape).reshape(image.shape)
    image = image + gaussian * 255
    image = np.clip(image, 0, 255).astype(np.uint8)

    # Convert back to BGR format for OpenCV
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    return image

def apply_clahe(image):
    """
    Apply CLAHE to the image to enhance contrast.
    :param image: Input image (numpy array in BGR format).
    :return: Image with CLAHE applied (numpy array in BGR format).
    """
    # Convert to LAB color space
    lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)
    l, a, b = cv2.split(lab)

    # Apply CLAHE to the L channel
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    l_clahe = clahe.apply(l)

    # Merge the channels back
    lab_clahe = cv2.merge((l_clahe, a, b))
    image_clahe = cv2.cvtColor(lab_clahe, cv2.COLOR_LAB2BGR)

    return image_clahe

def preprocess_image(image, apply_augmentation=True, apply_clahe_flag=True):
    """
    Preprocess the image with augmentation and CLAHE.
    :param image: Input image (numpy array in BGR format).
    :param apply_augmentation: Whether to apply augmentation.
    :param apply_clahe_flag: Whether to apply CLAHE.
    :return: Preprocessed image (numpy array in BGR format).
    """
    if apply_augmentation:
        image = augment_image(image)

    if apply_clahe_flag:
        image = apply_clahe(image)

    return image

def detect_fall(keypoints, threshold=0.5):
    """
    Detect fall based on keypoints.
    :param keypoints: Array of keypoints (17 keypoints, each with x, y, confidence).
    :param threshold: Confidence threshold for keypoints.
    :return: True if fall is detected, False otherwise.
    """
    # Indices for keypoints (COCO format)
    LEFT_SHOULDER = 5
    RIGHT_SHOULDER = 6
    LEFT_HIP = 11
    RIGHT_HIP = 12
    LEFT_KNEE = 13
    RIGHT_KNEE = 14

    # Get keypoints and confidence scores
    left_shoulder = keypoints[LEFT_SHOULDER * 3: (LEFT_SHOULDER + 1) * 3]
    right_shoulder = keypoints[RIGHT_SHOULDER * 3: (RIGHT_SHOULDER + 1) * 3]
    left_hip = keypoints[LEFT_HIP * 3: (LEFT_HIP + 1) * 3]
    right_hip = keypoints[RIGHT_HIP * 3: (RIGHT_HIP + 1) * 3]
    left_knee = keypoints[LEFT_KNEE * 3: (LEFT_KNEE + 1) * 3]
    right_knee = keypoints[RIGHT_KNEE * 3: (RIGHT_KNEE + 1) * 3]

    # Check confidence scores
    if (left_shoulder[2] < threshold or right_shoulder[2] < threshold or
        left_hip[2] < threshold or right_hip[2] < threshold or
        left_knee[2] < threshold or right_knee[2] < threshold):
        return False  # Skip if any keypoint is not confident

    # Calculate average y positions
    shoulder_y = (left_shoulder[1] + right_shoulder[1]) / 2
    hip_y = (left_hip[1] + right_hip[1]) / 2
    knee_y = (left_knee[1] + right_knee[1]) / 2

    # Check if hip and knee are below shoulders (fall condition)
    if hip_y > shoulder_y and knee_y > shoulder_y:
        return True

    return False

def calculate_metrics(ground_truth, predictions):
    """
    Calculate precision, recall, F1-score, and accuracy manually.
    :param ground_truth: List of ground truth labels (1 = Fall, 0 = No Fall).
    :param predictions: List of predicted labels (1 = Fall, 0 = No Fall).
    :return: Precision, recall, F1-score, and accuracy.
    """
    TP = 0  # True Positives
    FP = 0  # False Positives
    TN = 0  # True Negatives
    FN = 0  # False Negatives

    for gt, pred in zip(ground_truth, predictions):
        if gt == 1 and pred == 1:
            TP += 1
        elif gt == 0 and pred == 1:
            FP += 1
        elif gt == 0 and pred == 0:
            TN += 1
        elif gt == 1 and pred == 0:
            FN += 1

    # Calculate metrics
    precision = TP / (TP + FP) if (TP + FP) > 0 else 0
    recall = TP / (TP + FN) if (TP + FN) > 0 else 0
    f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
    accuracy = (TP + TN) / (TP + TN + FP + FN) if (TP + TN + FP + FN) > 0 else 0

    return precision, recall, f1_score, accuracy

# Real-time fall detection
cap = cv2.VideoCapture(0)

# Lists to store ground truth and predictions
ground_truth_list = []
predictions_list = []

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Preprocess the frame
    preprocessed_frame = preprocess_image(frame, apply_augmentation=True, apply_clahe_flag=True)

    # Resize and normalize the frame for YOLOv7-pose
    image = letterbox(preprocessed_frame, 960, stride=64, auto=True)[0]
    image = transforms.ToTensor()(image)
    image = torch.tensor(np.array([image.numpy()]))

    if torch.cuda.is_available():
        image = image.half().to(device)

    # Run inference
    with torch.no_grad():
        output, _ = model(image)
        output = non_max_suppression_kpt(output, 0.25, 0.65, nc=model.yaml['nc'], nkpt=model.yaml['nkpt'], kpt_label=True)
        output = output_to_keypoint(output)

    nimg = image[0].permute(1, 2, 0) * 255
    nimg = nimg.cpu().numpy().astype(np.uint8)
    nimg = cv2.cvtColor(nimg, cv2.COLOR_RGB2BGR)

    fall_detected = False
    for idx in range(output.shape[0]):
        keypoints = output[idx, 7:].T
        plot_skeleton_kpts(nimg, keypoints, 3)

        # Detect fall
        if detect_fall(keypoints):
            fall_detected = True
            cv2.putText(nimg, "Fall Detected!", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

    # Save prediction
    predictions_list.append(1 if fall_detected else 0)

    # Display instructions for manual ground truth input
    cv2.putText(nimg, "Press 'f' for Fall, 'n' for No Fall, 'q' to Quit", (50, 400), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)

    # Display the result
    cv2.imshow("Fall Detection", nimg)

    # Wait for key press to input ground truth
    key = cv2.waitKey(1) & 0xFF
    if key == ord('f'):  # Fall
        ground_truth_list.append(1)
    elif key == ord('n'):  # No Fall
        ground_truth_list.append(0)
    elif key == ord('q'):  # Quit
        break

    # Calculate metrics if ground truth is available
    if len(ground_truth_list) == len(predictions_list):
        precision, recall, f1_score, accuracy = calculate_metrics(ground_truth_list, predictions_list)

        # Display metrics on the frame
        cv2.putText(nimg, f"Precision: {precision:.2f}", (50, 100), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
        cv2.putText(nimg, f"Recall: {recall:.2f}", (50, 130), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
        cv2.putText(nimg, f"F1-score: {f1_score:.2f}", (50, 160), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
        cv2.putText(nimg, f"Accuracy: {accuracy:.2f}", (50, 190), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)

    # Clear GPU cache
    if torch.cuda.is_available():
        torch.cuda.empty_cache()

cap.release()
cv2.destroyAllWindows()

# Print final metrics
if len(ground_truth_list) == len(predictions_list):
    print("Final Metrics:")
    print(f"Precision: {precision:.2f}")
    print(f"Recall: {recall:.2f}")
    print(f"F1-score: {f1_score:.2f}")
    print(f"Accuracy: {accuracy:.2f}")

IndexError: list index out of range