# Pose Estimation Tool Comparison

This notebook runs multiple pose estimation tools (AlphaPose, OpenPose, YOLOv8, BlazePose) on frame sets and saves results to organized output directories.

## Configuration

Before running, configure the following in the **Config** cell:
- `FRAME_SET`: Which frame set to process (e.g., "frames2")
- `OUTPUT_SUBDIR`: Output subdirectory number (e.g., "2")

All outputs will be saved to `outputs/{tool_name}/{OUTPUT_SUBDIR}/`

## Usage

1. Run the **Imports** and **Config** cells first
2. Run helper function cells (extract_frames, find_frames, ensure_out, etc.)
3. Run cells for each tool you want to use
4. Results are automatically saved to the configured output directories

### Imports

In [None]:
# Standard library imports
import os
import sys
import random
import subprocess
import shlex
import time
import json
import shutil
import re
import itertools
import csv
import pathlib
from pathlib import Path
from typing import List, Dict, Optional, Tuple
from collections import defaultdict

# Third-party imports
import cv2
import numpy as np
import pandas as pd
from tqdm import tqdm
from ultralytics import YOLO
import mediapipe as mp

### Config

In [None]:
# Choose which frame set to use and output directory number
FRAME_SET = "frames2"  # examples: "frames1", "frames2", "frames3", etc.
OUTPUT_SUBDIR = "2"    # Output subdirectory number (change for different runs)

# Directory setup
ROOT = Path.cwd()
FRAMES_DIR = ROOT / "frames"
OUT_DIR = ROOT / "outputs"
FRAMES_DIR.mkdir(exist_ok=True)
OUT_DIR.mkdir(exist_ok=True)

Working directory: c:\Users\sarah\OneDrive - Georgia Institute of Technology\VIP\landmarking


## Getting Frames from Video

#### Splitting video into frames function

In [None]:
def extract_frames(video_path, num_frames=6, output_dir=FRAMES_DIR):
    """
    Function created to extract evenly spaced frames from a video and save them as frame_01.jpg, frame_02.jpg, etc.
    Args:
        video_path (str): Path to the input video file.
        num_frames (int): Number of frames to extract.
        output_dir (Path): Directory to save the extracted frames.
    Returns:
        tuple: (saved_paths, frame_indices) - list of saved frame paths and their indices
    """
    if not Path(video_path).exists():
        print(f"Video file {video_path} does not exist.")
        return [], []
    
    # Create output directory if it doesn't exist
    output_path = Path(output_dir)
    output_path.mkdir(parents=True, exist_ok=True)

    cap = cv2.VideoCapture(video_path)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT) or 0)

    if total_frames == 0:
        cap.release()
        return [], []
    print(f"Total frames in video: {total_frames}")

    # Choosing evenly spaced frame indices across the video timeline
    indices = []
    for k in range(num_frames):
        start = int(total_frames * k / num_frames)
        end = int(total_frames * (k + 1) / num_frames) - 1
        if end < start:
            idx = min(start, total_frames - 1)
        else:
            idx = random.randint(start, end)
        indices.append(idx)

    indices = sorted(indices)
    print("Chosen frame indices:", indices)

    # Extract and save frames
    saved_paths = []
    for i, idx in enumerate(indices):
        cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
        ret, frame = cap.read()
        if not ret:
            continue
                
        # Save as frame_01.jpg, frame_02.jpg, etc.
        output_file = output_path / f"frame_{i+1:02d}.jpg"
        cv2.imwrite(str(output_file), frame)
        saved_paths.append(output_file)
        print(f"Saved: {output_file} (from original frame {idx})")

    cap.release()
    return saved_paths, indices

#### Extracting already split frames function

In [None]:
def find_frames(frames_directory="frames/frames1"):
    """
    Function to find all frame images in the specified frames directory.
    Args:
        frames_directory (str): Path to the directory containing frame images
    Returns:
        tuple: (saved_paths, extracted_indices) - list of frame paths and their indices
    """
    frames_dir = pathlib.Path(frames_directory)
    saved_paths = sorted([f for f in frames_dir.glob("frame_*.jpg")])
    print(f"Found existing frames in '{frames_directory}': {len(saved_paths)} frames")
    
    extracted_indices = []
    if saved_paths:
        for path in saved_paths:
            # Extract frame number from filename (e.g., "frame_01.jpg" = 1)
            filename = path.stem # gets "frame_01" from "frame_01.jpg"
            
            # using regex to find all numbers in the filename
            numbers = re.findall(r'\d+', filename)
            if numbers:
                frame_number = int(numbers[-1])
                extracted_indices.append(frame_number)
    
    return saved_paths, extracted_indices

#### Create frames or find existing ones

In [None]:
saved_paths, indices = extract_frames("videos/input3.mp4", num_frames=6, output_dir="frames/frames3")

In [None]:
saved_paths, indices = find_frames(f"frames/{FRAME_SET}")

## Helper functions

In [None]:
def ensure_out(subdir_name, subdir_num=None):
    """
    Ensure output directory exists and return path.
    Args:
        subdir_name (str): Name of the output subdirectory (e.g., "blazepose_mediapipe")
        subdir_num (str, optional): Subdirectory number (e.g., "2"). If None, uses OUTPUT_SUBDIR from config.
    Returns:
        Path: Path to the output directory
    """
    if subdir_num is None:
        subdir_num = OUTPUT_SUBDIR
    output_dir = pathlib.Path("outputs") / subdir_name / subdir_num
    output_dir.mkdir(parents=True, exist_ok=True)
    return output_dir

In [None]:
def parse_keypoints_from_result(r):
    """
    Extract keypoints from YOLOv8 pose results.
    
    Args:
        r: YOLOv8 result object containing keypoints
    Returns:
        list: List of numpy arrays, each containing keypoints for one person
              Shape: (num_persons, num_keypoints, 3) where 3 = (x, y, confidence)
    Note:
        YOLOv8 uses COCO format with 17 keypoints per person.
    """
    persons = []
    
    # YOLOv8 pose results store keypoints in r.keypoints.data
    if hasattr(r, 'keypoints') and r.keypoints is not None:
        try:
            kps_data = r.keypoints.data.cpu().numpy()
            for person_idx in range(len(kps_data)):
                person_kps = kps_data[person_idx]
                persons.append(person_kps)
                
        except Exception as e:
            print(f"Error extracting keypoints: {e}")
    
    return persons

## AlphaPose

In [None]:
saved_paths, extracted_indices = find_frames(f"frames/{FRAME_SET}")

project_root = Path.cwd()
alphapose_dir = project_root / "alphapose"
demo_script = alphapose_dir / "scripts" / "demo_api.py"


# Use the frames found by function
selected_images = saved_paths[:6] # Should only be six anyway but grabs the first 6

# Explicitly setting desired config and checkpoint paths (using relative paths)
cfg_path = project_root / "AlphaPose" / "configs" / "coco" / "resnet" / "256x192_res50_lr1e-3_1x.yaml"
checkpoint_path = project_root / "AlphaPose" / "pretrained_models" / "fast_res50_256x192.pth"

gpus = "-1"

### Run Alphapose (1 image at a time)

In [None]:
# discovery variables should already exist from earlier cell
try:
    demo_script, cfg_path, checkpoint_path, selected_images
except NameError:
    raise RuntimeError("Run the discovery cell first so demo_script, cfg_path, checkpoint_path and selected_images exist.")

alphapose_root = Path(demo_script).parents[1]

cfg_path = alphapose_root / "configs" / "coco" / "resnet" / "256x192_res50_lr1e-3_1x.yaml"

if not cfg_path.exists():
    raise FileNotFoundError(f"Config does not exist at {cfg_path}")

overlay_dir = alphapose_root / "examples" / "res" / "vis"
overlay_dir.mkdir(parents=True, exist_ok=True)

env = os.environ.copy()
cur_pythonpath = env.get("PYTHONPATH","")
alphapose_abs = str(alphapose_root.resolve())
if alphapose_abs not in cur_pythonpath.split(os.pathsep):
    env["PYTHONPATH"] = alphapose_abs + (os.pathsep + cur_pythonpath if cur_pythonpath else "")

python_exec = sys.executable
base_args = [
    python_exec,
    str(demo_script),
    "--cfg", str(cfg_path),
    "--checkpoint", str(checkpoint_path),
    "--save_img",
    "--format", "coco",
    "--gpus", "-1"
]

# Run AlphaPose on just one image
img_path = selected_images[1]
cmd = base_args + ["--image", str(img_path.resolve())]

start = time.time()
try:
    proc = subprocess.run(cmd, env=env, cwd=str(alphapose_root), capture_output=True, text=True, timeout=900)
    elapsed = time.time() - start
    rc = proc.returncode
    
    if rc != 0:
        print(f"Failed with return code: {rc}")

except subprocess.TimeoutExpired:
    print("Timeout for image", img_path)
except Exception as e:
    print("Exception for image", img_path, ":", repr(e))

In [None]:
project_root = Path.cwd()
alphapose_root = Path(demo_script).parents[1]
intermediate_output_dir = alphapose_root / "examples" / "res"
overlay_dir = intermediate_output_dir / "vis"

final_output_dir = project_root / "outputs" / "alphapose" / OUTPUT_SUBDIR
final_overlays = final_output_dir / "overlays"
final_json = final_output_dir / "json"

final_overlays.mkdir(parents=True, exist_ok=True)
final_json.mkdir(parents=True, exist_ok=True)

original_image_name = selected_images[1].stem

if overlay_dir.exists():
    # Look for overlay image with the same base name
    for f in sorted(overlay_dir.glob("*")):
        if f.suffix.lower() in [".jpg", ".jpeg", ".png"] and original_image_name in f.stem:
            dst = final_overlays / "02_result.jpg"
            shutil.copy2(f, dst)
            break

json_sources = [
    intermediate_output_dir / "alphapose-results.json",
    alphapose_root / "alphapose-results.json"
]

for json_source in json_sources:
    if json_source.exists():
        dst = final_json / "02_result.json"
        shutil.copy2(json_source, dst)
        break

## BlazePose

In [None]:
saved_paths, extracted_indices = find_frames(f"frames/{FRAME_SET}")

mp_pose = mp.solutions.pose

blazepose_out = ensure_out("blazepose_mediapipe")
json_data = defaultdict(list)

with mp_pose.Pose(static_image_mode=True, min_detection_confidence=0.3) as pose:
    for fp in saved_paths:
        img = cv2.imread(str(fp))
        h, w = img.shape[:2]
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        results = pose.process(img_rgb)
        vis = img.copy()
        
        if results.pose_landmarks:
            # Convert landmarks to the desired format
            keypoints = []
            total_score = 0.0
            
            for idx, lm in enumerate(results.pose_landmarks.landmark):
                x = lm.x * w
                y = lm.y * h
                visibility = lm.visibility
                
                # Add to keypoints list in the format: [x, y, score]
                keypoints.extend([float(x), float(y), float(visibility)])
                total_score += visibility
            
            # Calculate average score for this detection
            avg_score = total_score / len(results.pose_landmarks.landmark) if results.pose_landmarks.landmark else 0
            
            json_data[str(fp)].append({
                "score": float(avg_score),
                "keypoints": keypoints
            })
            
            for idx, lm in enumerate(results.pose_landmarks.landmark):
                x = int(lm.x * w)
                y = int(lm.y * h)
                vis = cv2.circle(vis, (x, y), 3, (0, 255, 0), -1)
        
        # Save visualization
        outimg = blazepose_out / f"{fp.stem}_blazepose.jpg"
        cv2.imwrite(str(outimg), vis)

# Save JSON
json_path = blazepose_out / "blazepose_landmarks.json"
with open(json_path, 'w') as f:
    json.dump(dict(json_data), f, indent=4)


## OpenPose

In [None]:
# Use the function for the configured frame set
frames_directory = f"frames/{FRAME_SET}"
saved_paths, extracted_indices = find_frames(frames_directory)

project_root = Path.cwd()

frames_dir_absolute = (project_root / frames_directory).resolve()

try:
    openpose_bin
except NameError:
    openpose_bin = project_root / "openpose" / "bin" / "OpenPoseDemo.exe"

if not openpose_bin.exists():
    raise FileNotFoundError(f"OpenPose binary not found at {openpose_bin}")

final_output_dir = project_root / "outputs" / "openpose" / OUTPUT_SUBDIR
final_overlays = final_output_dir / "overlays"
final_json = final_output_dir / "json"
final_overlays.mkdir(parents=True, exist_ok=True)
final_json.mkdir(parents=True, exist_ok=True)

openpose_root_guess = openpose_bin.parents[1]
models_dir = openpose_root_guess / "models"

if not models_dir.exists():
    # Try alternative location
    models_dir = project_root / "openpose" / "models"
    if not models_dir.exists():
        raise FileNotFoundError(f"Models directory not found at {models_dir}")

cmd = [
    str(openpose_bin),
    "--image_dir", str(frames_dir_absolute),
    "--write_images", str(final_overlays),
    "--write_json", str(final_json),
    "--model_folder", str(models_dir),
    "--hand",
    "--face"
]


# Run OpenPose
try:
    proc = subprocess.run(cmd, cwd=str(openpose_bin.parent), capture_output=True, text=True)
    
    if proc.returncode != 0:
        print(f"OpenPose failed with return code: {proc.returncode}")
    
except Exception as e:
    print("Exception when running OpenPose:", e)

## Yolo

In [None]:
# Use the function for the configured frame set
frames_directory = f"frames/{FRAME_SET}"
saved_paths, extracted_indices = find_frames(frames_directory)

yolo_out = ensure_out("yolov8")

MODEL_NAME = "yolov8n-pose.pt"
model = YOLO(MODEL_NAME)

for fp in saved_paths:
    img = cv2.imread(str(fp))
    if img is None:
        continue

    img_height, img_width = img.shape[:2]
    
    # Predict with YOLOv8 pose
    results = model.predict(source=str(fp), save=False, verbose=False)
    
    # Create JSON data for this frame
    frame_json_data = {
        "version": 1.3,
        "people": []
    }
    
    for result_idx, r in enumerate(results):
        vis = img.copy()
        
        person_kps_list = parse_keypoints_from_result(r)
        
        for pid, person_kps in enumerate(person_kps_list):
            pose_keypoints_2d = []
            for kp in person_kps:
                x, y, confidence = float(kp[0]), float(kp[1]), float(kp[2])
                pose_keypoints_2d.extend([x, y, confidence])
            
            # Create person data structure
            person_data = {
                "person_id": [pid],
                "pose_keypoints_2d": pose_keypoints_2d,
                "face_keypoints_2d": [0] * 210,
                "hand_left_keypoints_2d": [0] * 63,
                "hand_right_keypoints_2d": [0] * 63,
                "pose_keypoints_3d": [],
                "face_keypoints_3d": [],
                "hand_left_keypoints_3d": [],
                "hand_right_keypoints_3d": []
            }
            
            frame_json_data["people"].append(person_data)
            
            # Draw keypoints for visualization
            for kp_idx, kp in enumerate(person_kps):
                x, y, confidence = float(kp[0]), float(kp[1]), float(kp[2])
                
                if confidence > 0.1:
                    x_pixel = int(round(x))
                    y_pixel = int(round(y))
                    
                    if 0 <= x_pixel < img_width and 0 <= y_pixel < img_height:
                        color = (0, 255, 0)
                        cv2.circle(vis, (x_pixel, y_pixel), 4, color, -1)
                        cv2.circle(vis, (x_pixel, y_pixel), 6, (255, 255, 255), 1)
                        cv2.putText(vis, str(kp_idx), (x_pixel+5, y_pixel-5), 
                                   cv2.FONT_HERSHEY_SIMPLEX, 0.4, (255, 255, 255), 1)

        # Save visualization
        outimg = yolo_out / f"{fp.stem}_yolo.jpg"
        cv2.imwrite(str(outimg), vis)
        
        json_path = yolo_out / f"{fp.stem}_keypoints.json"
        with open(json_path, 'w') as f:
            json.dump(frame_json_data, f, indent=2)
