# Tutorial: Dance Tempo Estimation

This notebook tests the multi-anchor tempo estimation by:
1. Loading COCO-format keypoint data
2. Extracting left/right hands, feet, and torso
3. Preprocessing signals (detrend + normalize)
4. Combining signals (both_hands_y, both_feet_y, torso_y)
5. Running tempo estimation


In [1]:
import os
import sys
import json
import random
import numpy as np
from pathlib import Path

# Ensure the package root is importable (repo root is parent of this notebook dir)
# notebook_dir = Path().resolve()
# parent_dir = notebook_dir.parent
# if str(parent_dir) not in sys.path:
#     sys.path.insert(0, str(parent_dir))

# Reproducibility for random demo selection
random.seed(42)

# Imports from DanceIR
from danceir.io import KeypointLoader
from danceir.pipelines import estimate_tempo_from_keypoints
from danceir.config import MARKER_DICT
from danceir.evaluation import dance_tempo_evaluation

# Constants
FPS = 60.0


## Load Keypoint Data


In [2]:
# Path to keypoints directory
keypoints_dir = "./keypoints2d"

# Optional mapping from music_id -> tempo (if available)
mapping_path = Path("music_id.json")

if mapping_path.exists():
    with open(mapping_path, "r") as f:
        music_id_to_tempo = json.load(f)

# Pick a random keypoint file
sample_files = list(Path(keypoints_dir).glob("*.pkl"))
if not sample_files:
    raise FileNotFoundError(f"No .pkl files found in {keypoints_dir}. Place COCO-format keypoints there.")

rand_idx = random.randint(0, len(sample_files) - 1)
sample_file = sample_files[rand_idx]
print(f"Loading: {sample_file.name}")

# Derive music_id from filename (e.g., *_mXX_* pattern)
music_id = sample_file.name.split("_")[-2]


# Lookup reference tempo if mapping is provided
if music_id_to_tempo and music_id in music_id_to_tempo:
    tempo = music_id_to_tempo[music_id]

# Load keypoint data
loader = KeypointLoader()
keypoints_data = loader.load_keypoints_pickle(sample_file)
keypoints_2d = loader.extract_all_keypoints_2d(keypoints_data)

print(f"Extracted keypoints shape: {keypoints_2d.shape}")
if tempo is not None:
    print(f"Reference tempo from mapping: {tempo} BPM")

Loading: gKR_sBM_cAll_d29_mKR1_ch08.pkl
Extracted keypoints shape: (640, 17, 2)
Reference tempo from mapping: 90 BPM


## Dance Tempo Estimation


In [3]:
# Define marker groups
# (9, 10) = left_wrist + right_wrist (both hands)
# (15, 16) = left_ankle + right_ankle (both feet)
# Torso will be added via use_com=True

marker_groups = [(9, 10), (15, 16)]  # both_hands, both_feet

# Run the complete pipeline
result = estimate_tempo_from_keypoints(
    keypoints_2d,
    marker_groups=marker_groups,
    axis='y',
    use_com=True,
    com_type='torso',
    com_axis=None,  # Inherits from axis='y'
    fps=FPS,
    anchor_method='zero_velocity',
    height_thres=0.1
)

# Results
tempo_info = result['combined_anchors']
print("\nResults:")
if 'tempo' in globals() and tempo is not None:
    print("  - Reference tempo:", tempo)
print(f"  - Global tempo: {tempo_info['gtempo']} BPM")
print(f"  - Best segment: {tempo_info['best_segment']}")


Results:
  - Reference tempo: 90
  - Global tempo: 91.8 BPM
  - Best segment: left_ankle_right_ankle


In [4]:
if 'tempo' in globals() and tempo is not None:
    result = dance_tempo_evaluation(tempo, tempo_info['gtempo'])
    print(f"Accuracy: {result.accuracy}%")
    print(f"Mean DTS: {result.mean_dts:.3f}")
else:
    print("Reference tempo not available; skipping evaluation.")

Accuracy: 100.0%
Mean DTS: 0.780


## Load video from Youtube

In [5]:
import yt_dlp  # pip install yt-dlp
from extract_2dpose import extract_pose_mediapipe


# Copy youtube video url of a dance video and paste it here
# Only one dancer in the video is supported
url = "https://www.youtube.com/shorts/-Z2TrmiOORg?feature=share"

if "video.mp4" in os.listdir():
    os.remove("video.mp4")
    

ydl_opts = {"format": "mp4", "outtmpl": "video.mp4"}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
    ydl.download([url])


[youtube] Extracting URL: https://www.youtube.com/shorts/-Z2TrmiOORg?feature=share
[youtube] -Z2TrmiOORg: Downloading webpage
[youtube] -Z2TrmiOORg: Downloading android sdkless player API JSON
[youtube] -Z2TrmiOORg: Downloading tv client config
[youtube] -Z2TrmiOORg: Downloading player b75a8e80-main
[youtube] -Z2TrmiOORg: Downloading tv player API JSON
[youtube] -Z2TrmiOORg: Downloading web safari player API JSON


ERROR: [youtube] -Z2TrmiOORg: Video unavailable. This video has been removed by the uploader


DownloadError: ERROR: [youtube] -Z2TrmiOORg: Video unavailable. This video has been removed by the uploader

In [None]:
poses, pfps = extract_pose_mediapipe("video.mp4", output_npy="pose_data.npy", display=False)

mean = np.mean(poses[:, :, :2], axis=(0, 1))
std = np.std(poses[:, :, :2], axis=(0, 1))
poses[:, :, :2] = (poses[:, :, :2] - mean) / (std + 1e-8)

In [None]:
marker_groups = [(9, 10), (15, 16)]  # both_hands, both_feet
# Torso will be added via use_com=True

result = estimate_tempo_from_keypoints(
    poses[:, :, :2],     #  poses[:,:,:2], #
    marker_groups=marker_groups,
    axis='y',
    use_com=True,
    com_type='torso',
    com_axis=None,  # Inherits from axis='y'
    fps=pfps,
    anchor_method='zero_velocity',
    height_thres=0.1
)


tempo_info = result['combined_anchors']
print(f"\nResults:")
print(f"  - Global tempo: {tempo_info['gtempo']} BPM")
print(f"  - Best segment: {tempo_info['best_segment']}")
print(f"\nPer-anchor tempo estimates:")
for i, anchor_info in enumerate(tempo_info['per_anchor_tempo_features']):
    segment_names = tempo_info.get('segment_names', ['segment_0', 'segment_1', 'segment_2'])
    # Get segment names from the result structure
    print(f"  - Anchor {i}: {anchor_info['median_tempo']:.2f} BPM")