In [1]:
!pip install mediapipe==0.10.* opencv-python yt-dlp
!apt-get install ffmpeg


Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
ffmpeg is already the newest version (7:4.4.2-0ubuntu0.22.04.1).
0 upgraded, 0 newly installed, 0 to remove and 35 not upgraded.


# Imports & Setup

In [2]:
from pathlib import Path
import cv2
import numpy as np
import mediapipe as mp
import math, json, os
from dataclasses import dataclass, field
from typing import Dict, Optional
import yt_dlp

# CONFIG

In [3]:
INPUT_VIDEO = "https://youtube.com/shorts/vSX3IRxGnNY"
OUTPUT_DIR = Path("/content/output")
CHECK_IMG_PATH = "/content/sample_data/check.png"
CROSS_IMG_PATH = "/content/sample_data/cross.png"
TARGET_WIDTH = 960
TARGET_FPS = 30

OUTPUT_DIR.mkdir(exist_ok=True, parents=True)

# Load PNG icons

In [4]:
check_img = cv2.imread(CHECK_IMG_PATH, cv2.IMREAD_UNCHANGED)
cross_img = cv2.imread(CROSS_IMG_PATH, cv2.IMREAD_UNCHANGED)

def overlay_png(bg, fg, x, y, scale=1.0):
    """Overlay transparent PNG onto background."""
    if fg is None:
        return
    h, w = int(fg.shape[0] * scale), int(fg.shape[1] * scale)
    fg = cv2.resize(fg, (w, h))
    if fg.shape[2] != 4:
        return
    y1, y2 = max(0, y), min(bg.shape[0], y + h)
    x1, x2 = max(0, x), min(bg.shape[1], x + w)
    if y1 >= y2 or x1 >= x2:
        return
    fg_crop = fg[0:y2-y1, 0:x2-x1]
    alpha = fg_crop[:, :, 3] / 255.0
    for c in range(3):
        bg[y1:y2, x1:x2, c] = (
            alpha * fg_crop[:, :, c] + (1 - alpha) * bg[y1:y2, x1:x2, c]
        )

# Thresholds & Stats

In [5]:
@dataclass
class Thresholds:
    elbow_min: float = 80.0
    elbow_max: float = 150.0
    spine_lean_max: float = 20.0
    head_knee_max_px_ratio: float = 0.06
    foot_angle_target: float = 25.0
    foot_angle_tol: float = 20.0

@dataclass
class RunningStats:
    elbow_angles: list = field(default_factory=list)
    spine_leans: list = field(default_factory=list)
    head_knee_dxs: list = field(default_factory=list)
    foot_angles: list = field(default_factory=list)


# Mediapipe setup

In [6]:
mp_drawing = mp.solutions.drawing_utils
mp_pose = mp.solutions.pose

# Geometry helpers

In [7]:
def _angle_deg(a, b, c):
    ba = a - b
    bc = c - b
    if np.linalg.norm(ba) == 0 or np.linalg.norm(bc) == 0:
        return None
    cosang = np.dot(ba, bc) / (np.linalg.norm(ba) * np.linalg.norm(bc))
    cosang = max(min(cosang, 1.0), -1.0)
    return math.degrees(math.acos(cosang))

def _angle_vs_vertical(p1, p2):
    v = p2 - p1
    if np.linalg.norm(v) == 0:
        return None
    dot = v[0] * 0 + v[1] * (-1)
    cosang = dot / (np.linalg.norm(v) * 1.0)
    cosang = max(min(cosang, 1.0), -1.0)
    return abs(math.degrees(math.acos(cosang)))

def _line_angle_vs_x_axis(p1, p2):
    v = p2 - p1
    if np.linalg.norm(v) == 0:
        return None
    ang = math.degrees(math.atan2(v[1], v[0]))
    return abs(ang)


# Landmark helpers

In [8]:
def landmarks_to_xy(frame_w, frame_h, pose_landmarks):
    id2pt = {idx: (lm.x * frame_w, lm.y * frame_h)
             for idx, lm in enumerate(pose_landmarks.landmark)}
    P = mp_pose.PoseLandmark
    out = {}
    def put(name, landmark):
        out[name] = id2pt[int(landmark)]
    for name in ["NOSE", "LEFT_SHOULDER", "RIGHT_SHOULDER",
                 "LEFT_ELBOW", "RIGHT_ELBOW", "LEFT_WRIST", "RIGHT_WRIST",
                 "LEFT_HIP", "RIGHT_HIP", "LEFT_KNEE", "RIGHT_KNEE",
                 "LEFT_ANKLE", "RIGHT_ANKLE", "LEFT_FOOT_INDEX", "RIGHT_FOOT_INDEX"]:
        put(name, getattr(P, name))
    if "LEFT_HIP" in out and "RIGHT_HIP" in out:
        out["MID_HIP"] = tuple(((np.array(out["LEFT_HIP"]) + np.array(out["RIGHT_HIP"])) / 2.0).tolist())
    if "LEFT_SHOULDER" in out and "RIGHT_SHOULDER" in out:
        out["MID_SHOULDER"] = tuple(((np.array(out["LEFT_SHOULDER"]) + np.array(out["RIGHT_SHOULDER"])) / 2.0).tolist())
    return out
def compute_metrics(landmarks, frame_w):
    metrics = {}
    # Front elbow angle
    angles = []
    for side in ("LEFT", "RIGHT"):
        shoulder = landmarks.get(f"{side}_SHOULDER")
        elbow = landmarks.get(f"{side}_ELBOW")
        wrist = landmarks.get(f"{side}_WRIST")
        if shoulder and elbow and wrist:
            ang = _angle_deg(np.array(shoulder), np.array(elbow), np.array(wrist))
            if ang is not None:
                angles.append(ang)
    metrics["front_elbow_deg"] = min(angles) if angles else None

    # Spine lean
    if "MID_HIP" in landmarks and "MID_SHOULDER" in landmarks:
        metrics["spine_lean_deg"] = _angle_vs_vertical(
            np.array(landmarks["MID_HIP"]),
            np.array(landmarks["MID_SHOULDER"])
        )
    else:
        metrics["spine_lean_deg"] = None

    # Head over knee alignment
    head = landmarks.get("NOSE")
    knee = landmarks.get("LEFT_KNEE") or landmarks.get("RIGHT_KNEE")
    if head and knee:
        dx = abs(head[0] - knee[0]) / frame_w
        metrics["head_knee_dx_ratio"] = dx
    else:
        metrics["head_knee_dx_ratio"] = None

    # Foot angle
    foot = landmarks.get("LEFT_FOOT_INDEX") or landmarks.get("RIGHT_FOOT_INDEX")
    ankle = landmarks.get("LEFT_ANKLE") or landmarks.get("RIGHT_ANKLE")
    if foot and ankle:
        metrics["foot_angle_deg"] = _line_angle_vs_x_axis(np.array(ankle), np.array(foot))
    else:
        metrics["foot_angle_deg"] = None

    return metrics


# Draw overlays

In [9]:
def draw_overlays(frame, metrics, th: Thresholds):
    y_offset = 50
    def draw_cue(condition, label, value=None):
        nonlocal y_offset
        if condition:
            overlay_png(frame, check_img, 10, y_offset, scale=0.1)
            cv2.putText(frame, f"{label}: {value}", (60, y_offset+20),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0,255,0), 2)
        else:
            overlay_png(frame, cross_img, 10, y_offset, scale=0.1)
            cv2.putText(frame, f"{label}: {value}", (60, y_offset+20),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0,0,255), 2)
        y_offset += 50

    fe = metrics.get("front_elbow_deg")
    sl = metrics.get("spine_lean_deg")
    hk = metrics.get("head_knee_dx_ratio")
    fa = metrics.get("foot_angle_deg")

    if fe is not None:
        draw_cue(th.elbow_min <= fe <= th.elbow_max, "Elbow", f"{fe:.1f}°")
    if sl is not None:
        draw_cue(sl <= th.spine_lean_max, "Spine Lean", f"{sl:.1f}°")
    if hk is not None:
        draw_cue(hk <= th.head_knee_max_px_ratio, "Head-Knee Align", f"{hk:.3f}")
    if fa is not None:
        draw_cue(abs(fa - th.foot_angle_target) <= th.foot_angle_tol, "Foot Angle", f"{fa:.1f}°")

# =========================
# Video download
# =========================
def get_video_path(input_src, out_dir):
    if input_src.startswith("http"):
        out_file = out_dir / "input.mp4"
        ydl_opts = {"outtmpl": str(out_file), "format": "mp4/bestaudio/best"}
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            ydl.download([input_src])
        return str(out_file)
    return input_src

# Main analysis

In [10]:
def analyze_video():
    stats = RunningStats()
    src_path = get_video_path(INPUT_VIDEO, OUTPUT_DIR)
    cap = cv2.VideoCapture(src_path)
    out_path = OUTPUT_DIR / "annotated_video.mp4"
    fourcc = cv2.VideoWriter_fourcc(*"mp4v")

    ret, frame = cap.read()
    if not ret:
        print("Error: cannot read video")
        return
    in_h, in_w = frame.shape[:2]
    scale = TARGET_WIDTH / in_w
    out_h = int(in_h * scale)
    cap.set(cv2.CAP_PROP_POS_FRAMES, 0)
    writer = cv2.VideoWriter(str(out_path), fourcc, TARGET_FPS, (TARGET_WIDTH, out_h))

    with mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose:
        while True:
            ok, frame = cap.read()
            if not ok:
                break
            frame = cv2.resize(frame, (TARGET_WIDTH, out_h))
            results = pose.process(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
            if results.pose_landmarks:
                mp_drawing.draw_landmarks(frame, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)
                landmarks = landmarks_to_xy(TARGET_WIDTH, out_h, results.pose_landmarks)
                m = compute_metrics(landmarks, TARGET_WIDTH)

                # Store stats
                if m["front_elbow_deg"] is not None: stats.elbow_angles.append(m["front_elbow_deg"])
                if m["spine_lean_deg"] is not None: stats.spine_leans.append(m["spine_lean_deg"])
                if m["head_knee_dx_ratio"] is not None: stats.head_knee_dxs.append(m["head_knee_dx_ratio"])
                if m["foot_angle_deg"] is not None: stats.foot_angles.append(m["foot_angle_deg"])

                draw_overlays(frame, m, Thresholds())

            writer.write(frame)

    cap.release()
    writer.release()

    # Final evaluation
    evaluation = {
        "Footwork": score_and_feedback(stats.foot_angles, 25, 20),
        "Head Position": score_and_feedback(stats.head_knee_dxs, 0, 0.06, inverse=True),
        "Swing Control": score_and_feedback(stats.elbow_angles, 115, 35),
        "Balance": score_and_feedback(stats.spine_leans, 0, 20, inverse=True),
        "Follow-through": score_and_feedback(stats.elbow_angles, 115, 35)
    }
    with open(OUTPUT_DIR / "evaluation.json", "w") as f:
        json.dump(evaluation, f, indent=2)

    print(f"Done! Video saved to {out_path}, evaluation saved to evaluation.json")
    return out_path


# Simple scoring function

In [11]:
def score_and_feedback(values, target, tol, inverse=False):
    if not values:
        return {"score": 0, "feedback": "No data available"}
    avg_val = sum(values) / len(values)
    diff = abs(avg_val - target)
    if inverse:
        score = max(1, 10 - int((avg_val / tol) * 10))
    else:
        score = max(1, 10 - int((diff / tol) * 10))
    feedback = "Good form" if score >= 8 else "Needs improvement"
    return {"score": score, "average": avg_val, "feedback": feedback}

# Run

In [12]:
if __name__ == "__main__":
    analyze_video()


[youtube] Extracting URL: https://youtube.com/shorts/vSX3IRxGnNY
[youtube] vSX3IRxGnNY: Downloading webpage
[youtube] vSX3IRxGnNY: Downloading tv client config
[youtube] vSX3IRxGnNY: Downloading player 6b03aad7-main
[youtube] vSX3IRxGnNY: Downloading tv player API JSON
[youtube] vSX3IRxGnNY: Downloading ios player API JSON
[youtube] vSX3IRxGnNY: Downloading m3u8 information
[info] vSX3IRxGnNY: Downloading 1 format(s): 18
[download] Destination: /content/output/input.mp4
[download] 100% of  337.54KiB in 00:00:00 at 1.56MiB/s   
Done! Video saved to /content/output/annotated_video.mp4, evaluation saved to evaluation.json
