In [2]:
import os
import json
import pickle
import numpy as np
import pandas as pd
from tqdm import tqdm
from scipy import stats
import matplotlib.pyplot as plt

# from utils import compute_tempo as ctempo
from utils.anchor_io import *

In [3]:
with open("music_id_tempo.json", "r") as file:
    aist_tempo = json.load(file)
     
segment_keys = ["torso_y",
                "both_hand_x", "both_hand_y", "both_foot_x", "both_foot_y", 
                "lefthand_xy", "righthand_xy", "leftfoot_xy", "rightfoot_xy", 
                "left_hand_x", "right_hand_x", "left_hand_y", "right_hand_y", 
                "left_foot_x", "right_foot_x", "left_foot_y", "right_foot_y", 
                
                "bothhand_x_bothfoot_x", "bothhand_y_bothfoot_y",
                "lefthand_xy_righthand_xy", "leftfoot_xy_rightfoot_xy",
                "bothhand_x_bothhand_y", "bothfoot_x_bothfoot_y",
                
                "both_hand_resultant", "both_foot_resultant", "left_hand_resultant", 
                "right_hand_resultant", "left_foot_resultant", "right_foot_resultant"]

result = { key: {
    "filename": [],
    "dance_genre": [],
    "situation": [],
    "camera_id": [],
    "dancer_id": [],
    "music_id": [],
    "choreo_id": [],
    "music_tempo": [],
    "estimated_bpm_per_window": [],
    "magnitude_per_window": [],
    "bpm_avg": [],
    "bpm_mode": [],
    "bpm_median": [],
} for key in segment_keys }

fps = 60
w_sec = 5
h_sec = w_sec/2
window_size = int(fps*w_sec)
hop_size = int(fps*h_sec)

a = 45 
b = 140
tempi_range = np.arange(a,b,1)
anchor_type = "anchor_zero"
mode = "uni"

output_dir = "tempo_estimation_output"
create_output_dir(output_dir, f"tempo_{a}_{b}")

save_dir = os.path.join(output_dir, f"tempo_{a}_{b}/")


aist2d_path = "./aist_dataset/aist_annotation/keypoints2d"
aist_filelist = os.listdir(aist2d_path)

In [None]:
count= 0
for idx, filename in enumerate(tqdm(aist_filelist)):
    
    file_info = filename.split("_")
    dance_genre = file_info[0] 
    situation = file_info[1] 
    camera_id = file_info[2] 
    dancer_id = file_info[3]
    music_id = file_info[4]
    choreo_id = file_info[5].strip(".pkl")
    
    
    paths = get_all_anchor_paths(anchor_type, mode, filename)
    
    if os.path.exists(paths["markers"]["left_wrist"]["ax0"]) == False:
        continue
    
    
    torso_y = load_pickle(paths["com"]["com_torso"]["ax1"])
                            
    left_hand_x  = load_pickle(paths["markers"]["left_wrist"]["ax0"])
    left_hand_y  = load_pickle(paths["markers"]["left_wrist"]["ax1"])
    
    right_hand_x = load_pickle(paths["markers"]["right_wrist"]["ax0"])
    right_hand_y = load_pickle(paths["markers"]["right_wrist"]["ax1"])
    
    left_foot_x  = load_pickle(paths["markers"]["left_ankle"]["ax0"])
    left_foot_y  = load_pickle(paths["markers"]["left_ankle"]["ax1"])
    
    right_foot_x = load_pickle(paths["markers"]["right_ankle"]["ax0"])
    right_foot_y = load_pickle(paths["markers"]["right_ankle"]["ax1"])
    
    
    novelty_length = left_hand_x['raw_signal'].shape[0]
    
    key = 'sensor_onsets'       #   sensor_abs_pos_filtered
    thres = 0.2     # time threshold
    
    bothhand_x = ctempo.filter_dir_onsets_by_threshold((left_hand_x[key] + right_hand_x[key]), threshold_s= thres, fps=fps)
    bothhand_y = ctempo.filter_dir_onsets_by_threshold((left_hand_y[key] + right_hand_y[key]), threshold_s= thres, fps=fps)

    bothfoot_x = ctempo.filter_dir_onsets_by_threshold((left_foot_x[key] + right_foot_x[key]), threshold_s= thres, fps=fps)
    bothfoot_y = ctempo.filter_dir_onsets_by_threshold((left_foot_y[key] + right_foot_y[key]), threshold_s= thres, fps=fps)
    
    lefthand_xy = ctempo.filter_dir_onsets_by_threshold((left_hand_x[key] + left_hand_y[key]), threshold_s= thres, fps=fps)
    righthand_xy = ctempo.filter_dir_onsets_by_threshold((right_hand_x[key] + right_hand_y[key]), threshold_s= thres, fps=fps)

    leftfoot_xy = ctempo.filter_dir_onsets_by_threshold((left_foot_x[key] + left_foot_y[key]), threshold_s= thres, fps=fps)
    rightfoot_xy = ctempo.filter_dir_onsets_by_threshold((right_foot_x[key] + right_foot_y[key]), threshold_s= thres, fps=fps)
    
    
    ############
    bothhand_x_bothfoot_x = ctempo.filter_dir_onsets_by_threshold((bothhand_x + bothfoot_x), threshold_s= thres, fps=fps)
    bothhand_y_bothfoot_y = ctempo.filter_dir_onsets_by_threshold((bothhand_y + bothfoot_y), threshold_s= thres, fps=fps)
    
    lefthand_xy_righthand_xy = ctempo.filter_dir_onsets_by_threshold((lefthand_xy + righthand_xy), threshold_s= thres, fps=fps)
    leftfoot_xy_rightfoot_xy = ctempo.filter_dir_onsets_by_threshold((leftfoot_xy + rightfoot_xy), threshold_s= thres, fps=fps)
    
    bothhand_x_bothhand_y = ctempo.filter_dir_onsets_by_threshold((bothhand_x + bothhand_y), threshold_s= thres, fps=fps)
    bothfoot_x_bothfoot_y = ctempo.filter_dir_onsets_by_threshold((bothfoot_x + bothfoot_y), threshold_s= thres, fps=fps)
    
    
    
    # Resultant part
    key1 = 'resultant_onsets'
    left_hand_resultant  = load_pickle(paths["markers"]["left_wrist"]["resultant"])
    right_hand_resultant  = load_pickle(paths["markers"]["right_wrist"]["resultant"])

    left_foot_resultant = load_pickle(paths["markers"]["left_ankle"]["resultant"])
    right_foot_resultant = load_pickle(paths["markers"]["right_ankle"]["resultant"])
    
    both_hand_resultant = ctempo.filter_dir_onsets_by_threshold((left_hand_resultant[key1] + right_hand_resultant[key1]), threshold_s= thres, fps=fps)
    both_foot_resultant = ctempo.filter_dir_onsets_by_threshold((left_foot_resultant[key1] + right_foot_resultant[key1]), threshold_s= thres, fps=fps)
    
    segment_ax = {
        
                "torso_y": torso_y[key],
                "both_hand_x": bothhand_x, "both_hand_y": bothhand_y, "both_foot_x": bothfoot_x, "both_foot_y": bothfoot_y,
                "lefthand_xy": lefthand_xy, "righthand_xy": righthand_xy, "leftfoot_xy": leftfoot_xy, "rightfoot_xy": rightfoot_xy,
                
                "left_hand_x": left_hand_x[key], "right_hand_x": right_hand_x[key], 
                "left_hand_y": left_hand_y[key], "right_hand_y": right_hand_y[key],
                
                "left_foot_x": left_foot_x[key], "right_foot_x": right_foot_x[key],
                "left_foot_y": left_foot_y[key], "right_foot_y": right_foot_y[key],
                
                "bothhand_x_bothfoot_x": bothhand_x_bothfoot_x, "bothhand_y_bothfoot_y": bothhand_y_bothfoot_y,
                "lefthand_xy_righthand_xy": lefthand_xy_righthand_xy, "leftfoot_xy_rightfoot_xy": leftfoot_xy_rightfoot_xy,
                "bothhand_x_bothhand_y": bothhand_x_bothhand_y, "bothfoot_x_bothfoot_y": bothfoot_x_bothfoot_y,
                
                
                "both_hand_resultant": both_hand_resultant, "both_foot_resultant": both_foot_resultant,                         
                "left_hand_resultant": left_hand_resultant[key1], "right_hand_resultant": right_hand_resultant[key1],
                "left_foot_resultant": left_foot_resultant[key1], "right_foot_resultant": right_foot_resultant[key1],
                }
    
    tempo_data = {}
    for seg_key, seg in segment_ax.items():
        
        sensor_onsets = ctempo.binary_to_peak(seg, peak_duration=0.1)

        tempogram_ab, tempogram_raw, time_axis_seconds, tempo_axis_bpm = ctempo.compute_tempogram(sensor_onsets, fps, 
                                                                        window_length=window_size, hop_size=hop_size, tempi=tempi_range)
        

        tempo_data_info = ctempo.dance_tempo_estimation_single(tempogram_ab[0], tempogram_raw[0], fps, 
                                                        novelty_length, window_size, hop_size, tempi_range)
    
        tempo_data[seg_key] = tempo_data_info
        
        estimated_bpm_per_window = tempo_data_info["bpm_arr"]
        magnitude_per_window = tempo_data_info["mag_arr"]
        
        tempo_avg = np.round(np.average(estimated_bpm_per_window), 2)     # mean
        tempo_mode = stats.mode(estimated_bpm_per_window.flatten())[0]        # 
        tempo_median = np.median(estimated_bpm_per_window.flatten())

        # Append the rows to the DataFrame
        result[seg_key]["filename"].append(filename.strip(".pkl"))
        result[seg_key]["dance_genre"].append(dance_genre)
        result[seg_key]["situation"].append(situation)
        result[seg_key]["camera_id"].append(camera_id)
        result[seg_key]["dancer_id"].append(dancer_id)
        result[seg_key]["music_id"].append(music_id)
        result[seg_key]["choreo_id"].append(choreo_id)
        result[seg_key]["music_tempo"].append(aist_tempo[music_id])
        result[seg_key]["estimated_bpm_per_window"].append(estimated_bpm_per_window)
        result[seg_key]["magnitude_per_window"].append(magnitude_per_window)
        result[seg_key]["bpm_avg"].append(tempo_avg)
        result[seg_key]["bpm_mode"].append(tempo_mode)
        result[seg_key]["bpm_median"].append(tempo_median)

    
    count +=1
print("total processed:",count)    

for seg_key in segment_keys:
    
    fname1 = f"{anchor_type}/{seg_key}_{mode}_W{w_sec}_H{h_sec}_{a}_{b}.pkl"
    fpath1 = os.path.join(save_dir, fname1)
    df_seg = pd.DataFrame(result[seg_key])
    df_seg.to_pickle(fpath1)
    
    # tempodata_fname = f"tempo_data/{metric}/{seg_key}_{mode}_W{w_sec}_H{h_sec}_{a}_{b}_tempo_data.pkl"
    # fpath2 = os.path.join(save_dir, tempodata_fname)
    # save_to_pickle(fpath2, tempo_data[seg_key])
#     print(f"Saved {fname1}")

In [4]:
from utils.dance_tempo_pipeline import *

# -------------------------------------------------------------
# 6. Main processing loop
# -------------------------------------------------------------
def process_all_files(aist_filelist, anchor_type, mode, fps, window_size, hop_size, tempi_range, save_dir):
    """Master loop to process all files and save per-segment tempo results."""
    result = {}
    count = 0

    for filename in tqdm(aist_filelist):
        paths = get_all_anchor_paths(anchor_type, mode, filename)
        if not os.path.exists(paths["markers"]["left_wrist"]["ax0"]):
            continue

        meta = parse_filename(filename)
        data = load_marker_data(paths)
        combined = compute_combinations(data, fps, thres=0.2)
        resultants = load_resultant(paths, thres=0.2, fps=fps)

        segment_ax = {**combined, **resultants}
        novelty_length = data["left_hand_x"].shape[0]

        tempo_data = compute_tempo_for_segments(segment_ax, fps, window_size, hop_size, tempi_range, novelty_length)

        for seg_key, info in tempo_data.items():
            if seg_key not in result:
                result[seg_key] = {k: [] for k in [
                    "filename", "dance_genre", "situation", "camera_id", "dancer_id",
                    "music_id", "choreo_id", "music_tempo", "estimated_bpm_per_window",
                    "magnitude_per_window", "bpm_avg", "bpm_mode", "bpm_median"
                ]}

            result[seg_key]["filename"].append(filename.replace(".pkl", ""))
            for k, v in meta.items():
                result[seg_key][k].append(v)
            result[seg_key]["music_tempo"].append(aist_tempo[meta["music_id"]])
            result[seg_key]["estimated_bpm_per_window"].append(info["bpm_arr"])
            result[seg_key]["magnitude_per_window"].append(info["mag_arr"])
            result[seg_key]["bpm_avg"].append(info["bpm_avg"])
            result[seg_key]["bpm_mode"].append(info["bpm_mode"])
            result[seg_key]["bpm_median"].append(info["bpm_median"])

        count += 1

    print("Total processed:", count)
    # Save results
    for seg_key, df_data in result.items():
        df_seg = pd.DataFrame(df_data)
        fname = f"{anchor_type}/{seg_key}_{mode}.pkl"
        df_seg.to_pickle(os.path.join(save_dir, fname))

In [5]:
process_all_files(aist_filelist, anchor_type, mode, fps, window_size, hop_size, tempi_range, save_dir)

100%|██████████| 1510/1510 [06:42<00:00,  3.75it/s]


Total processed: 1341
