In [2]:
import cv2
import numpy as np
import torch
from torchvision import transforms
from utils.datasets import letterbox
from utils.general import non_max_suppression_kpt
from utils.plots import output_to_keypoint, plot_skeleton_kpts
from models.yolo import Model

# Add the custom class to the safe globals list
torch.serialization.add_safe_globals([Model])

# Initialize device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Load YOLOv7-pose model
weights = torch.load('yolov7-w6-pose.pt', map_location=device, weights_only=False)
model = weights['model']
_ = model.float().eval()
if torch.cuda.is_available():
    model.half().to(device)

def detect_fall(keypoints, threshold=0.5):
    """
    Detect fall based on keypoints.
    :param keypoints: Array of keypoints (17 keypoints, each with x, y, confidence).
    :param threshold: Confidence threshold for keypoints.
    :return: True if fall is detected, False otherwise.
    """
    # Indices for keypoints (COCO format)
    LEFT_SHOULDER = 5
    RIGHT_SHOULDER = 6
    LEFT_HIP = 11
    RIGHT_HIP = 12
    LEFT_KNEE = 13
    RIGHT_KNEE = 14

    # Get keypoints and confidence scores
    left_shoulder = keypoints[LEFT_SHOULDER * 3: (LEFT_SHOULDER + 1) * 3]
    right_shoulder = keypoints[RIGHT_SHOULDER * 3: (RIGHT_SHOULDER + 1) * 3]
    left_hip = keypoints[LEFT_HIP * 3: (LEFT_HIP + 1) * 3]
    right_hip = keypoints[RIGHT_HIP * 3: (RIGHT_HIP + 1) * 3]
    left_knee = keypoints[LEFT_KNEE * 3: (LEFT_KNEE + 1) * 3]
    right_knee = keypoints[RIGHT_KNEE * 3: (RIGHT_KNEE + 1) * 3]

    # Check confidence scores
    if (left_shoulder[2] < threshold or right_shoulder[2] < threshold or
        left_hip[2] < threshold or right_hip[2] < threshold or
        left_knee[2] < threshold or right_knee[2] < threshold):
        return False  # Skip if any keypoint is not confident

    # Calculate average y positions
    shoulder_y = (left_shoulder[1] + right_shoulder[1]) / 2
    hip_y = (left_hip[1] + right_hip[1]) / 2
    knee_y = (left_knee[1] + right_knee[1]) / 2

    # Check if hip and knee are below shoulders (fall condition)
    if hip_y > shoulder_y and knee_y > shoulder_y:
        return True

    return False

def calculate_metrics(ground_truth, predictions):
    """
    Calculate precision, recall, F1-score, and accuracy.
    :param ground_truth: List of ground truth labels (1 = Fall, 0 = No Fall).
    :param predictions: List of predicted labels (1 = Fall, 0 = No Fall).
    :return: Precision, recall, F1-score, and accuracy.
    """
    TP = sum(1 for gt, pred in zip(ground_truth, predictions) if gt == 1 and pred == 1)
    FP = sum(1 for gt, pred in zip(ground_truth, predictions) if gt == 0 and pred == 1)
    TN = sum(1 for gt, pred in zip(ground_truth, predictions) if gt == 0 and pred == 0)
    FN = sum(1 for gt, pred in zip(ground_truth, predictions) if gt == 1 and pred == 0)

    precision = TP / (TP + FP) if (TP + FP) > 0 else 0
    recall = TP / (TP + FN) if (TP + FN) > 0 else 0
    f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
    accuracy = (TP + TN) / (TP + TN + FP + FN) if (TP + TN + FP + FN) > 0 else 0

    return precision, recall, f1_score, accuracy

# Video capture
cap = cv2.VideoCapture(0)
ground_truth_list = []
predictions_list = []

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Preprocess the frame
    image = letterbox(frame, 960, stride=64, auto=True)[0]
    image = transforms.ToTensor()(image)
    image = torch.tensor(np.array([image.numpy()]))
    if torch.cuda.is_available():
        image = image.half().to(device)

    # Run inference
    with torch.no_grad():
        output, _ = model(image)
        output = non_max_suppression_kpt(output, 0.25, 0.65, nc=model.yaml['nc'], nkpt=model.yaml['nkpt'], kpt_label=True)
        output = output_to_keypoint(output)

    nimg = image[0].permute(1, 2, 0) * 255
    nimg = nimg.cpu().numpy().astype(np.uint8)
    nimg = cv2.cvtColor(nimg, cv2.COLOR_RGB2BGR)

    fall_detected = False
    for idx in range(output.shape[0]):
        keypoints = output[idx, 7:].T
        plot_skeleton_kpts(nimg, keypoints, 3)

        # Detect fall
        if detect_fall(keypoints):
            fall_detected = True
            cv2.putText(nimg, "Fall Detected!", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

    # Simulate ground truth (for demonstration purposes)
    # Here, we assume the model's prediction is correct for simplicity
    ground_truth = 1 if fall_detected else 0
    ground_truth_list.append(ground_truth)
    predictions_list.append(1 if fall_detected else 0)

    # Calculate and display metrics in real-time
    if len(ground_truth_list) == len(predictions_list) and len(ground_truth_list) > 0:
        precision, recall, f1_score, accuracy = calculate_metrics(ground_truth_list, predictions_list)
        cv2.putText(nimg, f"Precision: {precision:.2f}", (50, 100), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
        cv2.putText(nimg, f"Recall: {recall:.2f}", (50, 130), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
        cv2.putText(nimg, f"F1-score: {f1_score:.2f}", (50, 160), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
        cv2.putText(nimg, f"Accuracy: {accuracy:.2f}", (50, 190), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)

    # Display the result
    cv2.imshow("Fall Detection", nimg)

    # Break if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

# Print final metrics
if len(ground_truth_list) == len(predictions_list):
    print("Final Metrics:")
    precision, recall, f1_score, accuracy = calculate_metrics(ground_truth_list, predictions_list)
    print(f"Precision: {precision:.2f}")
    print(f"Recall: {recall:.2f}")
    print(f"F1-score: {f1_score:.2f}")
    print(f"Accuracy: {accuracy:.2f}")
#

Final Metrics:
Precision: 0.00
Recall: 0.00
F1-score: 0.00
Accuracy: 1.00
