In [13]:
import cv2
import numpy as np
import os
import torch
import pandas as pd
from models.experimental import attempt_load
from utils.general import non_max_suppression, scale_coords
from utils.datasets import letterbox
from utils.plots import plot_skeleton_kpts
from sklearn.metrics import confusion_matrix

# Load model YOLOv7-W6 Pose
def load_model(weights_path):
    model = attempt_load(weights_path, map_location=torch.device('cpu'))
    return model

# Hitung sudut antara 3 titik (kepala, pinggang, kaki)
def calculate_angle(a, b, c):
    a, b, c = np.array(a), np.array(b), np.array(c)
    ba, bc = a - b, c - b
    cosine_angle = np.dot(ba, bc) / (np.linalg.norm(ba) * np.linalg.norm(bc))
    return np.degrees(np.arccos(cosine_angle))

# Baca ground truth dari file anotasi
def read_ground_truth(annotation_file):
    if os.path.exists(annotation_file):
        try:
            df = pd.read_csv(annotation_file, header=None, sep=r'\s+|,', engine='python')
            print(df.head())  # Cek apakah data terbaca dengan benar
            return set(df.iloc[:, 0].astype(int).values)  # Ambil kolom pertama
        except pd.errors.ParserError as e:
            print(f"Error parsing {annotation_file}: {e}")
            return set()
    return set()

# Deteksi pose dan fall detection
def detect_pose(model, image, img_size=640, fall_threshold=60):
    img = letterbox(image, img_size, stride=64, auto=True)[0]
    img = img[:, :, ::-1].transpose(2, 0, 1)
    img = np.ascontiguousarray(img)
    img = torch.from_numpy(img).float() / 255.0
    if img.ndimension() == 3:
        img = img.unsqueeze(0)
    
    with torch.no_grad():
        pred = model(img)[0]
    pred = non_max_suppression(pred, conf_thres=0.25, iou_thres=0.45)
    
    fall_detected = False
    for det in pred:
        if len(det):
            det[:, :4] = scale_coords(img.shape[2:], det[:, :4], image.shape).round()
            for *xyxy, conf, cls, kpts in det:
                if kpts.dim() == 2 and kpts.shape[1] == 3:
                    head, waist, left_foot = kpts[0].cpu().numpy(), kpts[11].cpu().numpy(), kpts[15].cpu().numpy()
                    angle = calculate_angle(head[:2], waist[:2], left_foot[:2])
                    if angle < fall_threshold:
                        fall_detected = True
                        cv2.putText(image, "FALL DETECTED", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
                    plot_skeleton_kpts(image, kpts, steps=3)
    return image, fall_detected

# Buffer untuk menyimpan riwayat posisi hip
hip_positions = []

def detect_fall_video(keypoints, threshold=0.5, frame_window=5, fall_threshold=20):
    """
    Mendeteksi jatuh berdasarkan pergerakan hip dalam video.
    
    Args:
        keypoints (list): Array dari keypoints (17 keypoints, masing-masing x, y, confidence).
        threshold (float): Confidence minimal agar keypoints valid.
        frame_window (int): Jumlah frame yang disimpan untuk mendeteksi pergerakan.
        fall_threshold (int): Seberapa besar perubahan hip_y dianggap jatuh.

    Returns:
        bool: True jika terdeteksi jatuh, False jika tidak.
    """

    # Indeks keypoints (COCO format)
    LEFT_HIP = 11
    RIGHT_HIP = 12

    # Ambil koordinat hip
    left_hip = keypoints[LEFT_HIP * 3: (LEFT_HIP + 1) * 3]
    right_hip = keypoints[RIGHT_HIP * 3: (RIGHT_HIP + 1) * 3]

    # Pastikan confidence cukup tinggi
    if left_hip[2] < threshold or right_hip[2] < threshold:
        return False

    # Hitung posisi rata-rata hip
    hip_y = (left_hip[1] + right_hip[1]) / 2

    # Simpan posisi hip di buffer
    hip_positions.append(hip_y)

    # Batasi buffer hanya menyimpan `frame_window` terakhir
    if len(hip_positions) > frame_window:
        hip_positions.pop(0)

    # Jika buffer belum penuh, belum bisa deteksi fall
    if len(hip_positions) < frame_window:
        return False

    # Hitung perubahan posisi hip
    delta_hip = hip_positions[0] - hip_positions[-1]  # Perbedaan antara awal dan akhir

    # Jika hip turun drastis, deteksi jatuh
    return delta_hip > fall_threshold
    
# Evaluasi dengan Confusion Matrix
def evaluate_model(predictions, ground_truths):
    y_pred, y_true = np.array(predictions), np.array(ground_truths)
    cm = confusion_matrix(y_true, y_pred, labels=[0, 1])
    print("Confusion Matrix:\n", cm)

# Proses video dengan anotasi
def process_video(video_path, annotation_file, model):
    cap = cv2.VideoCapture(video_path)
    frame_count, predictions, ground_truths = 0, [], []
    fall_frames = read_ground_truth(annotation_file)
    
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        
        result_frame, fall_detected = detect_pose(model, frame)
        predictions.append(int(fall_detected))
        ground_truths.append(1 if frame_count in fall_frames else 0)
        
        cv2.imshow("Result", result_frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
        frame_count += 1
    
    cap.release()
    cv2.destroyAllWindows()
    return predictions, ground_truths

# Main pipeline
if __name__ == "__main__":
    weights_path = "yolov7-w6-pose.pt"
    dataset_path = r"C:\Users\LENOVO\Documents\A Skripsi\datasets\FallDataset\Dataset"
    model = load_model(weights_path)
    
    video_count, all_predictions, all_ground_truths = 0, [], []
    for folder in os.listdir(dataset_path):
        video_folder = os.path.join(dataset_path, folder, "Videos")
        annotation_folder = os.path.join(dataset_path, folder, "Annotation_files")
        if os.path.exists(video_folder) and os.path.exists(annotation_folder):
            for video_file in sorted(os.listdir(video_folder))[:10]:  # Ambil 10 video pertama
                if video_file.endswith(".avi"):
                    video_path = os.path.join(video_folder, video_file)
                    annotation_file = os.path.join(annotation_folder, video_file.replace(".avi", ".txt"))
                    
                    print(f"Processing {video_file}...")
                    predictions, ground_truths = process_video(video_path, annotation_file, model)
                    
                    all_predictions.extend(predictions)
                    all_ground_truths.extend(ground_truths)
                    video_count += 1
                    if video_count >= 10:
                        break
    evaluate_model(all_predictions, all_ground_truths)


  ckpt = torch.load(w, map_location=map_location)  # load


Fusing layers... 
Processing video (1).avi...
Error parsing C:\Users\LENOVO\Documents\A Skripsi\datasets\FallDataset\Dataset\Coffee_room_01\Annotation_files\video (1).txt: Expected 1 fields in line 4, saw 6. Error could possibly be due to quotes being ignored when a multi-char delimiter is used.
Processing video (10).avi...
Error parsing C:\Users\LENOVO\Documents\A Skripsi\datasets\FallDataset\Dataset\Coffee_room_01\Annotation_files\video (10).txt: Expected 1 fields in line 4, saw 6. Error could possibly be due to quotes being ignored when a multi-char delimiter is used.
Processing video (11).avi...
Error parsing C:\Users\LENOVO\Documents\A Skripsi\datasets\FallDataset\Dataset\Coffee_room_01\Annotation_files\video (11).txt: Expected 1 fields in line 3, saw 6. Error could possibly be due to quotes being ignored when a multi-char delimiter is used.
Processing video (12).avi...
Error parsing C:\Users\LENOVO\Documents\A Skripsi\datasets\FallDataset\Dataset\Coffee_room_01\Annotation_files\v