In [1]:
import cv2
import numpy as np
import torch
from models.experimental import attempt_load
from utils.general import non_max_suppression, scale_coords
from utils.datasets import letterbox
from utils.plots import plot_one_box

# Load YOLOv7 model
weights = 'yolov7-w6-pose.pt'
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = attempt_load(weights, map_location=device)

# Function to process each frame
def process_frame(frame, model):
    # Resize and normalize frame
    img = letterbox(frame, 640, stride=64, auto=True)[0]
    img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB
    img = np.ascontiguousarray(img)
    img = torch.from_numpy(img).to(device)
    img = img.float() / 255.0  # Normalize
    if img.ndimension() == 3:
        img = img.unsqueeze(0)

    # Inference
    with torch.no_grad():
        pred = model(img)[0]
    pred = non_max_suppression(pred, 0.25, 0.45)

    # Process detections
    for det in pred:
        if det is not None and len(det):
            det[:, :4] = scale_coords(img.shape[2:], det[:, :4], frame.shape).round()
            for *xyxy, conf, cls in det:
                plot_one_box(xyxy, frame, label=f'{model.names[int(cls)]} {conf:.2f}', color=(255, 0, 0))

    return frame, det

# Function to calculate Euclidean distance
def euclidean_distance(p1, p2):
    return np.sqrt((p1[0] - p2[0])**2 + (p1[1] - p2[1])**2)

# Function to detect fall
def detect_fall(keypoints):
    # Extract keypoints
    left_shoulder = keypoints[5]
    right_shoulder = keypoints[6]
    left_hip = keypoints[11]
    right_hip = keypoints[12]
    left_foot = keypoints[15]
    right_foot = keypoints[16]

    # Calculate length factor
    length_factor = euclidean_distance(left_shoulder, left_hip)

    # Check shoulder height relative to feet
    if left_shoulder[1] <= left_foot[1] + 0.1 * length_factor:
        return True

    # Check body dimensions
    body_height = abs(left_shoulder[1] - left_foot[1])
    body_width = abs(left_shoulder[0] - right_shoulder[0])
    if body_height < body_width:
        return True

    return False

# Read video and annotation
video_path = 'C:/Users/LENOVO/Documents/A Skripsi/datasets/FallDataset/Dataset/Coffee_room_01/Videos/video (1).avi'
cap = cv2.VideoCapture(video_path)
annotation_path = 'C:/Users/LENOVO/Documents/A Skripsi/datasets/FallDataset/Dataset/Coffee_room_01/Annotation_files/video (1).txt'
with open(annotation_path, 'r') as f:
    annotations = f.readlines()

# Get video FPS and calculate delay between frames
fps = cap.get(cv2.CAP_PROP_FPS)
desired_fps = 25
delay = int(1000 / desired_fps)  # Delay in milliseconds

frame_count = 0
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    frame_count += 1
    processed_frame, detections = process_frame(frame, model)

    # Check if current frame is within fall annotation
    for ann in annotations:
        start_frame, end_frame, height, width, center_x, center_y = map(int, ann.strip().split())
        if start_frame <= frame_count <= end_frame:
            # Apply fall detection logic
            if detections is not None:
                for det in detections:
                    keypoints = det[5:].reshape(-1, 2)
                    if detect_fall(keypoints):
                        print(f"Fall detected in frame {frame_count}")

    cv2.imshow('Frame', processed_frame)
    if cv2.waitKey(delay) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

  ckpt = torch.load(w, map_location=map_location)  # load


Fusing layers... 


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


ValueError: not enough values to unpack (expected 6, got 1)

In [3]:
import cv2
import numpy as np
import torch
from models.experimental import attempt_load
from utils.general import non_max_suppression, scale_coords
from utils.datasets import letterbox
from utils.plots import plot_one_box

# Load YOLOv7 model
weights = 'yolov7-w6-pose.pt'
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = attempt_load(weights, map_location=device)

# Function to process each frame
def process_frame(frame, model):
    img = letterbox(frame, 640, stride=64, auto=True)[0]
    img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB
    img = np.ascontiguousarray(img)
    img = torch.from_numpy(img).to(device)
    img = img.float() / 255.0  # Normalize
    if img.ndimension() == 3:
        img = img.unsqueeze(0)

    with torch.no_grad():
        pred = model(img)[0]
    pred = non_max_suppression(pred, 0.25, 0.45)

    for det in pred:
        if det is not None and len(det):
            det[:, :4] = scale_coords(img.shape[2:], det[:, :4], frame.shape).round()
            for *xyxy, conf, cls in det:
                plot_one_box(xyxy, frame, label=f'{model.names[int(cls)]} {conf:.2f}', color=(255, 0, 0))

    return frame, pred

# Function to calculate Euclidean distance
def euclidean_distance(p1, p2):
    return np.sqrt((p1[0] - p2[0])**2 + (p1[1] - p2[1])**2)

# Function to detect fall
def detect_fall(keypoints):
    left_shoulder = keypoints[5]
    right_shoulder = keypoints[6]
    left_hip = keypoints[11]
    right_hip = keypoints[12]
    left_foot = keypoints[15]
    right_foot = keypoints[16]
    
    length_factor = euclidean_distance(left_shoulder, left_hip)
    if left_shoulder[1] <= left_foot[1] + 0.1 * length_factor:
        return True
    
    body_height = abs(left_shoulder[1] - left_foot[1])
    body_width = abs(left_shoulder[0] - right_shoulder[0])
    if body_height < body_width:
        return True
    
    return False

# Read video
video_path = 'C:/Users/LENOVO/Documents/A Skripsi/datasets/FallDataset/Dataset/Coffee_room_01/Videos/video (1).avi'
cap = cv2.VideoCapture(video_path)

fps = int(cap.get(cv2.CAP_PROP_FPS))
frame_interval = fps // 25  # Process every 25 fps

frame_count = 0
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    frame_count += 1
    if frame_count % frame_interval == 0:
        processed_frame, detections = process_frame(frame, model)
        if detections is not None:
            for det in detections:
                keypoints = det[5:].reshape(-1, 2)
                if detect_fall(keypoints):
                    print(f"Fall detected in frame {frame_count}")
        
        cv2.imshow('Frame', processed_frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()


Fusing layers... 


IndexError: index 5 is out of bounds for dimension 0 with size 0

In [2]:
import cv2
import numpy as np
import torch
from models.experimental import attempt_load
from utils.general import non_max_suppression, scale_coords
from utils.datasets import letterbox
from utils.plots import plot_one_box

# Load YOLOv7 model
weights = 'yolov7-w6-pose.pt'
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = attempt_load(weights, map_location=device)

# Function to process each frame
def process_frame(frame, model):
    # Resize and normalize frame
    img = letterbox(frame, 640, stride=64, auto=True)[0]
    img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB
    img = np.ascontiguousarray(img)
    img = torch.from_numpy(img).to(device)
    img = img.float() / 255.0  # Normalize
    if img.ndimension() == 3:
        img = img.unsqueeze(0)

    # Inference
    with torch.no_grad():
        pred = model(img)[0]
    pred = non_max_suppression(pred, 0.25, 0.45)

    # Process detections
    detections = []
    for det in pred:
        if det is not None and len(det):
            det[:, :4] = scale_coords(img.shape[2:], det[:, :4], frame.shape).round()
            for *xyxy, conf, cls, kpts in det:
                if int(cls) < len(model.names):  # Check if class index is valid
                    label = f'{model.names[int(cls)]} {conf:.2f}'
                    plot_one_box(xyxy, frame, label=label, color=(255, 0, 0))
                    detections.append(det.cpu().numpy())
    
    return frame, detections if detections else None

# Function to calculate Euclidean distance
def euclidean_distance(p1, p2):
    return np.sqrt((p1[0] - p2[0])**2 + (p1[1] - p2[1])**2)

# Function to detect fall
def detect_fall(keypoints):
    # Extract keypoints
    left_shoulder = keypoints[5]
    right_shoulder = keypoints[6]
    left_hip = keypoints[11]
    right_hip = keypoints[12]
    left_foot = keypoints[15]
    right_foot = keypoints[16]

    # Calculate length factor
    length_factor = euclidean_distance(left_shoulder, left_hip)

    # Check shoulder height relative to feet
    if left_shoulder[1] <= left_foot[1] + 0.1 * length_factor:
        return True

    # Check body dimensions
    body_height = abs(left_shoulder[1] - left_foot[1])
    body_width = abs(left_shoulder[0] - right_shoulder[0])
    if body_height < body_width:
        return True

    return False

# Read video and annotation
video_path = 'C:/Users/LENOVO/Documents/A Skripsi/datasets/FallDataset/Dataset/Coffee_room_01/Videos/video (1).avi'
cap = cv2.VideoCapture(video_path)
annotation_path = 'C:/Users/LENOVO/Documents/A Skripsi/datasets/FallDataset/Dataset/Coffee_room_01/Annotation_files/video (1).txt'

# Read annotations and filter invalid lines
annotations = []
with open(annotation_path, 'r') as f:
    for line in f:
        parts = line.strip().split()
        if len(parts) == 6:  # Ensure there are exactly 6 values
            annotations.append(list(map(int, parts)))

# Get video FPS and calculate delay between frames
fps = cap.get(cv2.CAP_PROP_FPS)
desired_fps = 25
delay = int(1000 / desired_fps)  # Delay in milliseconds

frame_count = 0
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    frame_count += 1
    processed_frame, detections = process_frame(frame, model)

    # Check if current frame is within fall annotation
    for ann in annotations:
        start_frame, end_frame, height, width, center_x, center_y = ann
        if start_frame <= frame_count <= end_frame:
            # Apply fall detection logic
            if detections is not None:
                for det in detections:
                    keypoints = det[5:].reshape(-1, 2)
                    if detect_fall(keypoints):
                        print(f"Fall detected in frame {frame_count}")

    cv2.imshow('Frame', processed_frame)
    if cv2.waitKey(delay) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

Fusing layers... 


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


In [12]:
import cv2
import torch
import numpy as np
from models.experimental import attempt_load
from utils.general import non_max_suppression, scale_coords

def main():
    # Load YOLOv7 Pose Model
    model = attempt_load('yolov7-w6-pose.pt', map_location='cpu')  # Change 'cpu' to 'cuda' if available
    stride = int(model.stride.max())  # Model stride

    # Video Capture
    cap = cv2.VideoCapture('C:/Users/LENOVO/Documents/A Skripsi/datasets/FallDataset/Dataset/Coffee_room_01/Videos/video (1).avi')
    if not cap.isOpened():
        print("Error: Could not open video file")
        return

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Preprocess the image
        img = cv2.resize(frame, (640, 640))
        img = img[:, :, ::-1].transpose(2, 0, 1)  # Convert BGR to RGB and reorder dimensions
        img = np.ascontiguousarray(img)
        img_tensor = torch.from_numpy(img).float().div(255.0).unsqueeze(0).to('cpu')  # Normalize

        # Inference
        with torch.no_grad():
            pred = model(img_tensor)[0]
        
        # Apply NMS (Non-Maximum Suppression)
        pred = non_max_suppression(pred, 0.25, 0.45, classes=0)  # Filter for 'person' class only
        
        # Process detections
        for det in pred:
            if det is not None and len(det):
                # Rescale boxes to original image size
                det[:, :4] = scale_coords(img_tensor.shape[2:], det[:, :4], frame.shape).round()
                
                for det_item in det:
                    xyxy = det_item[:4]  # Bounding box
                    conf = det_item[4]  # Confidence score
                    cls = det_item[5]  # Class ID
                    kpts = det_item[6:].reshape(-1, 3)  # Keypoints (x, y, conf)

                    # Draw keypoints
                    for x, y, kp_conf in kpts:
                        if kp_conf > 0.5:  # Confidence threshold
                            cv2.circle(frame, (int(x), int(y)), 5, (0, 255, 0), -1)

                    # Draw bounding box
                    cv2.rectangle(frame, 
                                  (int(xyxy[0]), int(xyxy[1])), 
                                  (int(xyxy[2]), int(xyxy[3])), 
                                  (255, 0, 0), 2)

        # Display result
        cv2.imshow('Pose Detection', frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

if __name__ == "__main__":
    main()


Fusing layers... 


In [3]:
import cv2
import torch
import numpy as np
from models.experimental import attempt_load
from utils.general import non_max_suppression, scale_coords

def main():
    # Load YOLOv7 Pose Model
    model = attempt_load('yolov7-w6-pose.pt', map_location='cpu')  # Change 'cpu' to 'cuda' if available
    stride = int(model.stride.max())  # Model stride

    # Video Capture
    cap = cv2.VideoCapture('C:/Users/LENOVO/Documents/A Skripsi/datasets/FallDataset/Dataset/Coffee_room_01/Videos/video (1).avi')
    if not cap.isOpened():
        print("Error: Could not open video file")
        return

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Preprocess the image
        img = cv2.resize(frame, (640, 640))
        img = img[:, :, ::-1].transpose(2, 0, 1)  # Convert BGR to RGB and reorder dimensions
        img = np.ascontiguousarray(img)
        img_tensor = torch.from_numpy(img).float().div(255.0).unsqueeze(0).to('cpu')  # Normalize

        # Inference
        with torch.no_grad():
            pred = model(img_tensor)[0]
        
        # Apply NMS (Non-Maximum Suppression)
        pred = non_max_suppression(pred, 0.25, 0.45, classes=0)  # Filter for 'person' class only
        
        # Process detections
        for det in pred:
            if det is not None and len(det):
                # Rescale boxes to original image size
                det[:, :4] = scale_coords(img_tensor.shape[2:], det[:, :4], frame.shape).round()
                
                for det_item in det:
                    xyxy = det_item[:4]  # Bounding box
                    conf = det_item[4]  # Confidence score
                    cls = det_item[5]  # Class ID
                    kpts = det_item[6:].reshape(-1, 3)  # Keypoints (x, y, conf)

                    # Draw keypoints
                    for x, y, kp_conf in kpts:
                        if kp_conf > 0.5:  # Confidence threshold
                            cv2.circle(frame, (int(x), int(y)), 5, (0, 255, 0), -1)

                    # Draw bounding box
                    cv2.rectangle(frame, 
                                  (int(xyxy[0]), int(xyxy[1])), 
                                  (int(xyxy[2]), int(xyxy[3])), 
                                  (255, 0, 0), 2)

        # Display result
        cv2.imshow('Pose Detection', frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

if __name__ == "__main__":
    main()


Fusing layers... 
