In [None]:
import os
import glob

# Configuration
input_dir = "runs/detect/exp/labels"  # Folder containing YOLO detection .txt files
output_file = "converted_annotations.txt"  # Output file in your desired format
total_frames = 157  # Total number of frames in your video

def convert_yolo_to_abs(yolo_coords, img_width=640, img_height=480):
    """Convert YOLO format [class, x_center, y_center, width, height] to absolute coordinates"""
    class_id = int(yolo_coords[0])
    x_center = float(yolo_coords[1]) * img_width
    y_center = float(yolo_coords[2]) * img_height
    width = float(yolo_coords[3]) * img_width
    height = float(yolo_coords[4]) * img_height
    return class_id, width, height, x_center, y_center

# Get all detection files and sort them numerically
detection_files = sorted(glob.glob(os.path.join(input_dir, "*.txt")), 
                       key=lambda x: int(os.path.splitext(os.path.basename(x))[0]))

with open(output_file, "w") as out_f:
    # Write total frames count (as in your example)
    out_f.write(f"{total_frames}\n")
    
    # Process each frame
    for frame_num in range(1, total_frames + 1):
        frame_file = os.path.join(input_dir, f"{frame_num}.txt")
        
        # Default values when no detection
        class_id = 1
        width = height = x_center = y_center = 0
        
        # If detection exists for this frame
        if os.path.exists(frame_file):
            with open(frame_file, "r") as in_f:
                lines = in_f.readlines()
                if lines:  # Take first detection only (modify if you need multiple)
                    yolo_coords = lines[0].strip().split()
                    class_id, width, height, x_center, y_center = convert_yolo_to_abs(yolo_coords)
        
        # Write in your desired format: frame_num,class_id,width,height,x_center,y_center
        out_f.write(f"{frame_num},{class_id},{int(width)},{int(height)},{int(x_center)},{int(y_center)}\n")

print(f"Conversion complete. Results saved to {output_file}")

In [4]:
import os
import glob

# Configuration
input_dir = "runs/detect/exp/labels"  # YOLO output folder
output_file = "person_annotations.txt"  # Output file
video_width = 640  # Adjust to your video dimensions
video_height = 480  # Adjust to your video dimensions
total_frames = 285  # Total frames in your video

def convert_yolo_to_abs(yolo_coords):
    """Convert YOLO format to absolute coordinates for persons only"""
    class_id = int(yolo_coords[0])
    # Only process person detections (class 0)
    if class_id != 0:
        return None
    
    x_center = float(yolo_coords[1]) * video_width
    y_center = float(yolo_coords[2]) * video_height
    width = float(yolo_coords[3]) * video_width
    height = float(yolo_coords[4]) * video_height
    confidence = float(yolo_coords[5]) if len(yolo_coords) > 5 else 1.0
    
    # Return only if confidence > 0.5 (adjust as needed)
    if confidence > 0.5:
        return width, height, x_center, y_center
    return None

with open(output_file, "w") as out_f:
    # Write total frames count
    out_f.write(f"{total_frames}\n")
    
    for frame_num in range(1, total_frames + 1):
        txt_file = os.path.join(input_dir, f"{frame_num}.txt")
        width = height = x_center = y_center = 0
        class_id = 1  # Default class if no detection
        
        if os.path.exists(txt_file):
            with open(txt_file, "r") as in_f:
                for line in in_f:
                    coords = line.strip().split()
                    result = convert_yolo_to_abs(coords)
                    if result:
                        width, height, x_center, y_center = result
                        class_id = 1  # Your desired class ID for persons
                        break  # Take first confident person detection
        
        # Write in your format
        out_f.write(f"{frame_num},{class_id},{int(width)},{int(height)},{int(x_center)},{int(y_center)}\n")

print(f"Person detections saved to {output_file}")

Person detections saved to person_annotations.txt


In [1]:
import cv2
import pandas as pd
import torch

# Path ke video
video_path = r"C:\Users\LENOVO\Documents\A Skripsi\datasets\FallDataset\Dataset\Lecture room\Videos\video (1).avi"

# Buka video
cap = cv2.VideoCapture(video_path)

# Periksa apakah video berhasil dibuka
if not cap.isOpened():
    print("Error: Tidak dapat membuka video.")
    exit()

# Simpan hasil annotation
annotations = []

frame_id = 1
while True:
    ret, frame = cap.read()
    if not ret:
        break  # Hentikan jika video selesai
    
    # Placeholder untuk deteksi objek menggunakan YOLOv7-W6-Pose
    # Misalnya, kita mendapatkan kelas, x_min, y_min, x_max, y_max
    class_id = 1  # Sesuaikan dengan hasil deteksi
    x_min, y_min, x_max, y_max = 0, 0, 0, 0  # Sesuaikan dengan hasil deteksi
    
    # Simpan hasil dalam format yang diminta
    annotations.append([frame_id, class_id, x_min, y_min, x_max, y_max])
    
    frame_id += 1

# Simpan ke file CSV
annotation_df = pd.DataFrame(annotations, columns=["Frame", "Class", "X_min", "Y_min", "X_max", "Y_max"])
annotation_df.to_csv("annotations.csv", index=False, header=False)

print("Annotation file telah dibuat: annotations.csv")

# Tutup video
cap.release()
cv2.destroyAllWindows()


Annotation file telah dibuat: annotations.csv


In [2]:
import torch
print(torch.__version__)



ModuleNotFoundError: No module named 'torch'