# 720 AI Assistant MVP ‚Äî Video Pose Analysis (MediaPipe)

This notebook processes a **video** (not just photos):
- Runs **MediaPipe PoseLandmarker** per frame
- Saves an **annotated video** with landmarks
- Computes simple technique metrics and prints **insights/cues**

> Update `INPUT_VIDEO_PATH` to your local file name (upload it to the same folder as the notebook or mount it).

In [100]:
import sys
sys.version

'3.11.0 (main, Oct 24 2022, 18:26:48) [MSC v.1933 64 bit (AMD64)]'

In [101]:
!pip install -q mediapipe opencv-python


[notice] A new release of pip available: 22.3 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [102]:
!pip install -q pandas


[notice] A new release of pip available: 22.3 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [103]:
import urllib.request, os, pathlib

MODEL_URL = "https://storage.googleapis.com/mediapipe-models/pose_landmarker/pose_landmarker_heavy/float16/1/pose_landmarker_heavy.task"
MODEL_PATH = "pose_landmarker_heavy.task"

if not os.path.exists(MODEL_PATH):
    urllib.request.urlretrieve(MODEL_URL, MODEL_PATH)
    print("Downloaded:", MODEL_PATH)
else:
    print("Model already exists:", MODEL_PATH)


Model already exists: pose_landmarker_heavy.task


In [104]:
from mediapipe import solutions
from mediapipe.framework.formats import landmark_pb2
import numpy as np

def draw_landmarks_on_image(rgb_image, detection_result):
    pose_landmarks_list = detection_result.pose_landmarks
    annotated_image = np.copy(rgb_image)

    # üé® COLORS (BGR)
    LANDMARK_COLOR = (255, 0, 255)  # FUCHSIA
    CONNECTION_COLOR = (0, 255, 0)  # GREEN

    landmark_spec = solutions.drawing_utils.DrawingSpec(
        color=LANDMARK_COLOR,
        thickness=2,
        circle_radius=3
    )

    connection_spec = solutions.drawing_utils.DrawingSpec(
        color=CONNECTION_COLOR,
        thickness=2
    )

    for pose_landmarks in pose_landmarks_list:
        pose_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
        pose_landmarks_proto.landmark.extend([
            landmark_pb2.NormalizedLandmark(
                x=lm.x,
                y=lm.y,
                z=lm.z,
                visibility=getattr(lm, "visibility", 0.0)
            )
            for lm in pose_landmarks
        ])

        solutions.drawing_utils.draw_landmarks(
            annotated_image,
            pose_landmarks_proto,
            solutions.pose.POSE_CONNECTIONS,
            landmark_drawing_spec=landmark_spec,
            connection_drawing_spec=connection_spec
        )

    return annotated_image


## Run on a video

1) Set `INPUT_VIDEO_PATH`
2) Run the cell ‚Äî it will create:
- `annotated_output.mp4`
- `metrics.csv`
- Printed insights

In [None]:
import cv2
import numpy as np
import pandas as pd
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
from dataclasses import dataclass
from typing import Optional, Dict, List

# ========= 1) INPUT =========
#INPUT_VIDEO_PATH = "Swing720AttemptRaw.mp4"   # <-- change this to your file
INPUT_VIDEO_PATH = "Double360AttemptRaw.mp4"   # <-- change this to your file
OUTPUT_VIDEO_PATH = "Double360AttemptAnnotated.mp4"
OUTPUT_METRICS = "metrics.csv"

# ========= 2) PoseLandmarker (VIDEO mode) =========
BaseOptions = mp.tasks.BaseOptions
PoseLandmarker = mp.tasks.vision.PoseLandmarker
PoseLandmarkerOptions = mp.tasks.vision.PoseLandmarkerOptions
VisionRunningMode = mp.tasks.vision.RunningMode

options = PoseLandmarkerOptions(
    base_options=BaseOptions(model_asset_path=MODEL_PATH),
    running_mode=VisionRunningMode.VIDEO,
    num_poses=1
)
landmarker = PoseLandmarker.create_from_options(options)

# ========= 3) Landmark indices =========
NOSE = 0
L_SHOULDER, R_SHOULDER = 11, 12
L_ELBOW, R_ELBOW = 13, 14
L_WRIST, R_WRIST = 15, 16
L_HIP, R_HIP = 23, 24
L_ANKLE, R_ANKLE = 27, 28

def lm_xy(lms, idx):
    lm = lms[idx]
    return np.array([lm.x, lm.y], dtype=np.float32)

def dist(a, b):
    return float(np.linalg.norm(a - b))

@dataclass
class FrameMetrics:
    t: float
    head_drop: float      # proxy for "looking down" / chest collapsing
    elbow_tuck: float     # proxy for elbows in (lower is better)
    arm_open: float       # proxy for arms open (higher is more open)
    torso_lean: float     # radians from vertical (higher = more inclined)
    kick_lateral: float   # proxy for kick going to the side
    rot_proxy: float      # proxy for rotation per-frame (higher = faster)

def compute_frame_metrics(lms, t: float, prev_state: Optional[Dict]=None):
    nose = lm_xy(lms, NOSE)
    ls, rs = lm_xy(lms, L_SHOULDER), lm_xy(lms, R_SHOULDER)
    le, re = lm_xy(lms, L_ELBOW), lm_xy(lms, R_ELBOW)
    lw, rw = lm_xy(lms, L_WRIST), lm_xy(lms, R_WRIST)
    lh, rh = lm_xy(lms, L_HIP), lm_xy(lms, R_HIP)
    la, ra = lm_xy(lms, L_ANKLE), lm_xy(lms, R_ANKLE)

    shoulder_mid = (ls + rs) / 2.0
    hip_mid = (lh + rh) / 2.0

    sh_w = dist(ls, rs)
    sh_w = sh_w if sh_w > 1e-6 else 1e-6

    # 1) Head drop proxy: nose lower than shoulder mid (y grows downward)
    head_drop = float((nose[1] - shoulder_mid[1]) / sh_w)

    # 2) Elbow tuck proxy
    elbow_tuck = float(((dist(le, shoulder_mid) + dist(re, shoulder_mid)) / 2.0) / sh_w)

    # 3) Arm open proxy: wrist far from shoulder
    arm_open = float(((dist(lw, ls) + dist(rw, rs)) / 2.0) / sh_w)

    # 4) Torso lean angle to vertical
    v = shoulder_mid - hip_mid
    v_norm = np.linalg.norm(v) + 1e-6
    cos_to_vertical = abs(np.dot(v / v_norm, np.array([0.0, -1.0], dtype=np.float32)))
    torso_lean = float(np.arccos(np.clip(cos_to_vertical, 0, 1)))  # radians

    # 5) Kick lateral proxy: ankles horizontal deviation from hips
    kick_lateral = float((abs(la[0] - hip_mid[0]) + abs(ra[0] - hip_mid[0])) / sh_w)

    # 6) Rotation proxy: change in shoulder line angle
    shoulder_vec = rs - ls
    shoulder_angle = float(np.arctan2(shoulder_vec[1], shoulder_vec[0]))
    rot_proxy = 0.0
    if prev_state and "shoulder_angle" in prev_state:
        d = shoulder_angle - prev_state["shoulder_angle"]
        d = (d + np.pi) % (2*np.pi) - np.pi
        rot_proxy = abs(float(d))

    state = {"shoulder_angle": shoulder_angle}

    return FrameMetrics(t, head_drop, elbow_tuck, arm_open, torso_lean, kick_lateral, rot_proxy), state

# ========= 4) Read video and write annotated output =========
cap = cv2.VideoCapture(INPUT_VIDEO_PATH)
if not cap.isOpened():
    raise FileNotFoundError(f"Could not open video: {INPUT_VIDEO_PATH}")

fps = cap.get(cv2.CAP_PROP_FPS) or 30.0
w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

fourcc = cv2.VideoWriter_fourcc(*"mp4v")
writer = cv2.VideoWriter(OUTPUT_VIDEO_PATH, fourcc, fps, (w, h))

rows: List[dict] = []
prev_state = None
frame_idx = 0
pose_frames = 0

while True:
    ok, frame_bgr = cap.read()
    if not ok:
        break

    frame_rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)
    mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame_rgb)

    t_ms = int((frame_idx / fps) * 1000)
    result = landmarker.detect_for_video(mp_image, t_ms)

    if result.pose_landmarks and len(result.pose_landmarks) > 0:
        pose_frames += 1
        lms = result.pose_landmarks[0]
        t = frame_idx / fps
        m, prev_state = compute_frame_metrics(lms, t, prev_state)
        rows.append(m.__dict__)

        annotated_rgb = draw_landmarks_on_image(frame_rgb, result)
        annotated_bgr = cv2.cvtColor(annotated_rgb, cv2.COLOR_RGB2BGR)
        writer.write(annotated_bgr)
    else:
        # no pose -> write original frame
        writer.write(frame_bgr)

    frame_idx += 1

cap.release()
writer.release()

df = pd.DataFrame(rows)
df.to_csv(OUTPUT_METRICS, index=False)

print("Done.")
print("Frames:", frame_idx, "| Frames with pose:", pose_frames)
print("Saved:", OUTPUT_VIDEO_PATH)
print("Saved:", OUTPUT_METRICS)
df.head()


Done.
Frames: 116 | Frames with pose: 116
Saved: Double360AttemptAnnotated.mp4
Saved: metrics.csv


Unnamed: 0,t,head_drop,elbow_tuck,arm_open,torso_lean,kick_lateral,rot_proxy
0,0.0,-0.203572,0.76866,0.522605,0.042786,0.890464,0.0
1,0.033333,-0.234393,0.751195,0.572096,0.037701,0.910758,0.008092
2,0.066667,-0.239692,0.736794,0.584711,0.04657,0.94638,0.032601
3,0.1,-0.234794,0.711428,0.518487,0.050126,0.955768,0.016159
4,0.133333,-0.214933,0.709593,0.476353,0.075245,0.908793,0.014357


In [106]:
# ‚úÖ COMPLETE CELL: Tabla (ES) + Cues (ES) con raz√≥n num√©rica
import numpy as np
import pandas as pd
from IPython.display import display

def pct_over(arr, thr):
    return float(np.mean(arr > thr)) if len(arr) else 0.0

def insights_from_metrics(df: pd.DataFrame):
    if df is None or df.empty:
        tabla = pd.DataFrame([{
            "M√©trica": "-",
            "Descripci√≥n (ES)": "No se detectaron poses. Verifica que el cuerpo completo sea visible y haya buena iluminaci√≥n.",
            "Tu valor": "-",
            "Recomendado": "-"
        }])
        cues = ["No se detectaron poses. Aseg√∫rate de que el cuerpo completo sea visible y el video no est√© oscuro/borroso."]
        return tabla, cues

    # Extract metrics
    head = df["head_drop"].to_numpy(np.float32)
    tuck = df["elbow_tuck"].to_numpy(np.float32)
    open_ = df["arm_open"].to_numpy(np.float32)
    lean = df["torso_lean"].to_numpy(np.float32)
    kick = df["kick_lateral"].to_numpy(np.float32)
    rot = df["rot_proxy"].to_numpy(np.float32)

    # -----------------------------
    # Reference / recommended ranges (with Spanish descriptions)
    # -----------------------------
    REF = {
        "head_drop": {
            "name_es": "Ca√≠da de cabeza",
            "desc_es": "Indica si bajas la cabeza (mirar al piso) durante el movimiento.",
            "max": 0.10
        },
        "elbow_tuck": {
            "name_es": "Codos recogidos",
            "desc_es": "Distancia de codos al torso (m√°s bajo = m√°s cerrado/compacto).",
            "max": 0.55
        },
        "arm_open": {
            "name_es": "Apertura de brazos",
            "desc_es": "Qu√© tan abiertos est√°n los brazos (m√°s bajo = m√°s cerrado).",
            "max": 0.85
        },
        "torso_lean": {
            "name_es": "Inclinaci√≥n del torso",
            "desc_es": "√Ångulo del torso respecto a la vertical (m√°s alto = m√°s inclinado).",
            "max": 0.60  # rad ‚âà 34¬∞
        },
        "kick_lateral": {
            "name_es": "Patada lateral",
            "desc_es": "Cu√°nto se van las piernas hacia los lados (m√°s bajo = m√°s control).",
            "max": 1.00
        },
        "rot_proxy": {
            "name_es": "Velocidad de rotaci√≥n",
            "desc_es": "Qu√© tan r√°pido rotas en el aire (m√°s alto = mejor rotaci√≥n).",
            "min": 0.02
        }
    }

    # -----------------------------
    # Table first (metric + brief Spanish description + your value + recommended)
    # -----------------------------
    tabla = pd.DataFrame([
        {
            "M√©trica": REF["head_drop"]["name_es"],
            "Descripci√≥n (ES)": REF["head_drop"]["desc_es"],
            "Tu valor": round(float(np.mean(head)), 2),
            "Recomendado": f"‚â§ {REF['head_drop']['max']:.2f}"
        },
        {
            "M√©trica": REF["elbow_tuck"]["name_es"],
            "Descripci√≥n (ES)": REF["elbow_tuck"]["desc_es"],
            "Tu valor": round(float(np.mean(tuck)), 2),
            "Recomendado": f"‚â§ {REF['elbow_tuck']['max']:.2f}"
        },
        {
            "M√©trica": REF["arm_open"]["name_es"],
            "Descripci√≥n (ES)": REF["arm_open"]["desc_es"],
            "Tu valor": round(float(np.mean(open_)), 2),
            "Recomendado": f"‚â§ {REF['arm_open']['max']:.2f}"
        },
        {
            "M√©trica": REF["torso_lean"]["name_es"],
            "Descripci√≥n (ES)": REF["torso_lean"]["desc_es"],
            "Tu valor": f"{np.degrees(np.mean(lean)):.1f}¬∞",
            "Recomendado": f"‚â§ {np.degrees(REF['torso_lean']['max']):.1f}¬∞"
        },
        {
            "M√©trica": REF["kick_lateral"]["name_es"],
            "Descripci√≥n (ES)": REF["kick_lateral"]["desc_es"],
            "Tu valor": round(float(np.mean(kick)), 2),
            "Recomendado": f"‚â§ {REF['kick_lateral']['max']:.2f}"
        },
        {
            "M√©trica": REF["rot_proxy"]["name_es"],
            "Descripci√≥n (ES)": REF["rot_proxy"]["desc_es"],
            "Tu valor": round(float(np.mean(rot)), 3),
            "Recomendado": f"‚â• {REF['rot_proxy']['min']:.3f}"
        }
    ])

    # -----------------------------
    # Cues with reasons (Spanish)
    # -----------------------------
    cues = []

    if pct_over(head, REF["head_drop"]["max"]) > 0.35:
        cues.append(
            f"‚ùó **Cabeza hacia abajo**\n"
            f"Raz√≥n: tu promedio = {np.mean(head):.2f} (recomendado ‚â§ {REF['head_drop']['max']:.2f}).\n"
            f"Cue: ment√≥n neutro y mirada al frente durante el vuelo."
        )

    if pct_over(tuck, REF["elbow_tuck"]["max"]) > 0.40:
        cues.append(
            f"‚ùó **Codos no recogidos**\n"
            f"Raz√≥n: tu promedio = {np.mean(tuck):.2f} (recomendado ‚â§ {REF['elbow_tuck']['max']:.2f}).\n"
            f"Cue: pega codos a las costillas y mant√©nlos ah√≠ hasta casi terminar la rotaci√≥n."
        )

    if pct_over(open_, REF["arm_open"]["max"]) > 0.45:
        cues.append(
            f"‚ùó **Apertura temprana de brazos**\n"
            f"Raz√≥n: tu promedio = {np.mean(open_):.2f} (recomendado ‚â§ {REF['arm_open']['max']:.2f}).\n"
            f"Cue: mantente compacto m√°s tiempo; abre solo para preparar el aterrizaje."
        )

    if pct_over(lean, REF["torso_lean"]["max"]) > 0.30:
        cues.append(
            f"‚ùó **Torso muy inclinado**\n"
            f"Raz√≥n: tu promedio = {np.degrees(np.mean(lean)):.1f}¬∞ "
            f"(recomendado ‚â§ {np.degrees(REF['torso_lean']['max']):.1f}¬∞).\n"
            f"Cue: aprieta el core, costillas abajo y no lances el pecho al despegar."
        )

    if pct_over(kick, REF["kick_lateral"]["max"]) > 0.35:
        cues.append(
            f"‚ùó **Patada muy lateral**\n"
            f"Raz√≥n: tu promedio = {np.mean(kick):.2f} (recomendado ‚â§ {REF['kick_lateral']['max']:.2f}).\n"
            f"Cue: dirige la patada m√°s hacia atr√°s y controla caderas (sin abrirte hacia los lados)."
        )

    if float(np.mean(rot)) < REF["rot_proxy"]["min"]:
        cues.append(
            f"‚ùó **Rotaci√≥n lenta**\n"
            f"Raz√≥n: tu promedio = {np.mean(rot):.3f} (recomendado ‚â• {REF['rot_proxy']['min']:.3f}).\n"
            f"Cue: inicia antes el snap de hombros+caderas y cierra brazos r√°pido al despegar."
        )

    if not cues:
        cues.append(
            "‚úÖ **Sin banderas mayores**\n"
            "Tus promedios est√°n dentro de los rangos recomendados. "
            "Siguiente paso: an√°lisis por fases (despegue/vuelo/aterrizaje) para cues m√°s precisos."
        )

    return tabla, cues


# ---- RUN ----
tabla, cues = insights_from_metrics(df)

display(tabla)
print("\n--- CUES ---\n")
print("\n\n".join(cues))


Unnamed: 0,M√©trica,Descripci√≥n (ES),Tu valor,Recomendado
0,Ca√≠da de cabeza,Indica si bajas la cabeza (mirar al piso) dura...,-0.01,‚â§ 0.10
1,Codos recogidos,Distancia de codos al torso (m√°s bajo = m√°s ce...,1.36,‚â§ 0.55
2,Apertura de brazos,Qu√© tan abiertos est√°n los brazos (m√°s bajo = ...,1.76,‚â§ 0.85
3,Inclinaci√≥n del torso,√Ångulo del torso respecto a la vertical (m√°s a...,17.6¬∞,‚â§ 34.4¬∞
4,Patada lateral,Cu√°nto se van las piernas hacia los lados (m√°s...,1.73,‚â§ 1.00
5,Velocidad de rotaci√≥n,Qu√© tan r√°pido rotas en el aire (m√°s alto = me...,0.14,‚â• 0.020



--- CUES ---

‚ùó **Codos no recogidos**
Raz√≥n: tu promedio = 1.36 (recomendado ‚â§ 0.55).
Cue: pega codos a las costillas y mant√©nlos ah√≠ hasta casi terminar la rotaci√≥n.
