In [None]:
from ultralytics import YOLO
import cv2
import os

def auto_annotate_videos(video_dir, output_dir):
    """Generate initial annotations using pre-trained YOLO"""
    model = YOLO('yolo11m-pose.pt')
    
    for video_file in os.listdir(video_dir):
        if not video_file.endswith(('.mp4', '.avi', '.mov')):
            continue
            
        video_path = os.path.join(video_dir, video_file)
        cap = cv2.VideoCapture(video_path)
        
        frame_count = 0
        while cap.isOpened():
            success, frame = cap.read()
            if not success:
                break
            
            # Process every 10th frame to reduce dataset size
            if frame_count % 10 == 0:
                results = model(frame, verbose=False)
                
                if results[0].keypoints is not None:
                    # Save frame
                    img_name = f"{video_file[:-4]}_frame{frame_count:04d}.jpg"
                    img_path = os.path.join(output_dir, 'images', img_name)
                    cv2.imwrite(img_path, frame)
                    
                    # Save annotations
                    label_name = f"{video_file[:-4]}_frame{frame_count:04d}.txt"
                    label_path = os.path.join(output_dir, 'labels', label_name)
                    
                    with open(label_path, 'w') as f:
                        for result in results:
                            if result.keypoints is not None:
                                # Extract keypoints
                                kpts = result.keypoints.xy[0].cpu().numpy()
                                conf = result.keypoints.conf[0].cpu().numpy()
                                
                                # Get bounding box
                                box = result.boxes.xywhn[0].cpu().numpy()
                                
                                # Format: class x_center y_center width height kp1_x kp1_y kp1_v ...
                                line = f"0 {box[0]:.6f} {box[1]:.6f} {box[2]:.6f} {box[3]:.6f}"
                                
                                # Normalize keypoints to image dimensions
                                h, w = frame.shape[:2]
                                for i, (kp, c) in enumerate(zip(kpts, conf)):
                                    x_norm = kp[0] / w
                                    y_norm = kp[1] / h
                                    visibility = 2 if c > 0.5 else 0
                                    line += f" {x_norm:.6f} {y_norm:.6f} {visibility}"
                                
                                f.write(line + '\n')
            
            frame_count += 1
        
        cap.release()
    
    print(f"Auto-annotation complete. Review and correct labels before training.")

# Usage
auto_annotate_videos('raw_videos/', 'pt_exercise_dataset/')