In [1]:
from ultralytics import YOLO
import torch
import cv2
import matplotlib.pyplot as plt
import os
import numpy as np

In [3]:
# If using GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [5]:
### Script LSTM Training
# Path to the yolo trained model
BASE_PATH = "results/fall_detect_yolo11n_pose_balanced"
MODEL_PATH = f"{BASE_PATH}/yolo11n_pose_train/weights/best.pt"
WINDOW = 30  # frames
STRIDE = 10   # next window step
K_CONSEC_FALL = 8  # consecutive frames to alert
K_CONSEC_ATT = 5
CONFIG_LABEL = f"{WINDOW}_s{STRIDE}_kf{K_CONSEC_FALL}_ka{K_CONSEC_ATT}"
OUTPUT_DIR = f"{BASE_PATH}/windows_{CONFIG_LABEL}/"
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Loading of trained model
model = YOLO(MODEL_PATH)

In [7]:
def extract_box_features(box):
    x1, y1, x2, y2 = box
    width = x2 - x1
    height = y2 - y1
    x_center = x1 + width / 2
    y_center = y1 + height / 2
    area = width * height
    aspect_ratio = width / height if height != 0 else 0
    return np.array([x1, y1, x2, y2, x_center, y_center, width, height, area, aspect_ratio], dtype=np.float32)

In [9]:
def normalize_box_features(features, frame_shape):
    frame_height, frame_width = frame_shape[:2]
    max_area = frame_width * frame_height

    # Normalize os valores com base na dimensão do frame
    normalized = np.array([
        features[0] / frame_width,     # x1
        features[1] / frame_height,    # y1
        features[2] / frame_width,     # x2
        features[3] / frame_height,    # y2
        features[4] / frame_width,     # x_center
        features[5] / frame_height,    # y_center
        features[6] / frame_width,     # width
        features[7] / frame_height,    # height
        features[8] / max_area,        # area
        features[9]                    # aspect_ratio (já é uma razão)
    ], dtype=np.float32)

    return normalized

In [11]:
def extract_pose_features(pose_data, frame_shape):
    keypoints = []
    frame_height, frame_width = frame_shape[:2]
    
    if pose_data is None or len(pose_data) == 0:
        return np.zeros(54, dtype=np.float32)
    
    for x, y, c in pose_data:
        #if c < 0.5: # ignora quando confiança é menor que 50% - evita ruídos
        #    continue
        keypoints.extend([x / frame_width, y / frame_height, c])
    return np.array(keypoints, dtype=np.float32)

In [13]:
def diff_features(curr: np.ndarray, prev: np.ndarray) -> np.ndarray:
    """Difference (frame_t - frame_{t-1}) in all normalized features."""
    if prev is None or prev.shape != curr.shape:
        return np.zeros_like(curr, dtype=np.float32)
    return (curr - prev).astype(np.float32)

In [15]:
def has_k_consecutive(seq, target, k):
    run = 0
    for v in seq:
        if v == target:
            run += 1
            if run >= k:
                return True
        else:
            run = 0
    return False

In [17]:
def process_video(video_path):
    cap = cv2.VideoCapture(video_path)
    FILE_DIR = os.path.splitext(os.path.basename(video_path))[0]
    output_path = os.path.join(OUTPUT_DIR, FILE_DIR)
    os.makedirs(output_path, exist_ok=True)
    # sequence = []
    # prev_features = None
    sequences_by_id = {}
    prev_features_by_id = {}
    all_sequences = []
    tick = 0

    while True:
        valid, frame = cap.read()
        if not valid:
            break

        results = model.track(frame, verbose=False) # track one person - id
        
        #if len(results) == 0 or results[0].boxes is None or results[0].keypoints is None:
        #    continue

        if results[0].boxes is None or results[0].boxes.id is None:
            continue

        boxes = results[0].boxes.xyxy.cpu().numpy()
        classes = results[0].boxes.cls.cpu().numpy().astype(int)
        keypoints = results[0].keypoints.data.cpu().numpy()
        track_ids = results[0].boxes.id.cpu().numpy().astype(int)

        for i, (box, kp, track_id) in enumerate(zip(boxes, keypoints, track_ids)):
            label = classes[i]
            if label not in [0, 1, 2]:
                continue

            if track_id not in sequences_by_id:
                sequences_by_id[track_id] = []
                prev_features_by_id[track_id] = None

            current_sequence = sequences_by_id[track_id]
            prev_features = prev_features_by_id[track_id]
            
            box_features = extract_box_features(box)
            norm_box = normalize_box_features(box_features, frame.shape)
            pose_kp = extract_pose_features(kp, frame.shape)
            
            combined_static_features = np.concatenate([norm_box, pose_kp])
            velocity_features = diff_features(combined_static_features, prev_features)
            
            prev_features_by_id[track_id] = combined_static_features.copy()
            all_features = np.concatenate([combined_static_features, velocity_features])
            current_sequence.append((all_features, label))

            if len(current_sequence) > WINDOW:
                current_sequence.pop(0)
    
            if len(current_sequence) == WINDOW:
                features = [ft for ft, _ in current_sequence]
                labels = [lb for _, lb in current_sequence]

                if has_k_consecutive(labels, 1, K_CONSEC_FALL):
                    final_label = 1
                elif has_k_consecutive(labels, 2, K_CONSEC_ATT):
                    final_label = 2
                else:
                    final_label = max(set(labels), key=labels.count)  # fallback: most common

                if tick % STRIDE == 0:
                    np.save(os.path.join(
                        output_path,
                        f"{FILE_DIR}_{len(all_sequences):03d}_{final_label}.npy"
                    ), np.array(features))
                
                all_sequences.append((np.array(features), final_label))
                tick += 1

    cap.release()
    cv2.destroyAllWindows()

In [19]:
# Windows creation
videos_dir = 'videos_aug/'
print(f"Processing videos...")
video_count = 0

files = os.listdir(videos_dir)
total_videos = len([f for f in files if os.path.isfile(os.path.join(videos_dir, f))])
checkpoints = [int(total_videos * i / 10) for i in range(1, 11)]

for i, file in enumerate(sorted(os.listdir(videos_dir))):
    if file.endswith('.mp4'):
        video_count += 1
        
        if video_count in checkpoints:
            percent = (video_count / total_videos) * 100
            print(f"Processed {percent:.0f}% ({video_count}/{total_videos})")
            
        # print(f"Processando video {i}...")
        video_path = os.path.join(videos_dir, file)
        process_video(video_path)

print("Videos processed successfully!")

Processing videos...
Processed 10% (48/480)
Processed 20% (96/480)
Processed 30% (144/480)
Processed 40% (192/480)
Processed 50% (240/480)
Processed 60% (288/480)
Processed 70% (336/480)
Processed 80% (384/480)
Processed 90% (432/480)
Processed 100% (480/480)
Videos processed successfully!


In [25]:
# Check Windows files (npy files)
dados = np.load(f"{OUTPUT_DIR}/001_aug1_flip_gamma/001_aug1_flip_gamma_010_0.npy")
print(dados[:1])
print("Shape 1: ", dados.shape)

[[    0.59319     0.36539     0.70941     0.93856      0.6513     0.65197     0.11623     0.57317    0.066619      0.3605     0.63194      0.4019     0.96163     0.63932     0.38244      0.9594     0.62282     0.38468     0.96503     0.65428     0.37357     0.96289     0.61346     0.37915     0.96498      0.6369
      0.45787     0.97374     0.68717     0.46212     0.96718      0.5921     0.47803     0.96541     0.68985     0.61662     0.96707     0.60333      0.6239     0.96226     0.66385     0.69883     0.95996     0.63364     0.70042      0.9553     0.68067     0.64374     0.96314     0.61463     0.64689     0.96711
      0.71028     0.74038       0.939     0.58771     0.74951     0.92429     0.67975     0.92366     0.87388     0.63106      0.9301     0.88874 -0.00077981  0.00024447 -8.1599e-05 -0.00041777 -0.00043076 -8.6606e-05  0.00069818 -0.00066221  0.00032367   0.0025791  8.2672e-05  0.00043428  4.9353e-05    0.000126
   0.00031364  7.5817e-05  8.5413e-05  0.00037688 -1.6093e