In [1]:
import os
import json
import pickle
import numpy as np
import pandas as pd
from tqdm import tqdm
from scipy import stats
import matplotlib.pyplot as plt

from compute_tempo import *
# from extract_dance_onsets import *
from rms_extract_dance_onsets import *


# from aist_pos1s_EsTempo import *
# coco={    
# 0: "nose", 1: "left_eye", 2: "right_eye", 3: "left_ear",4: "right_ear",5: "left_shoulder",
# 6: "right_shoulder",7: "left_elbow",8: "right_elbow",9: "left_wrist",10: "right_wrist",
# 11: "left_hip",12: "right_hip",13: "left_knee",14: "right_knee",15: "left_ankle",16: "right_ankle",}  

def load_pickle(filepath):
    with open(filepath, "rb") as f:
        json_data = pickle.load(f)
    return json_data

def save_to_pickle(filepath, data):
    # filepath = os.path.join(savepath, filename)
    with open(filepath, "wb") as f:
        pickle.dump(data, f)
        
def create_onset_dir(tempo_dir):
    # main_dir = "/itf-fi-ml/home/sagardu/aist_tempo_est/saved_result"
    directories = [f"{tempo_dir}/pos", f"{tempo_dir}/pos/ax0",
                   f"{tempo_dir}/pos/ax1", f"{tempo_dir}/pos/combination",
                   f"{tempo_dir}/pos/resultant",
                   
                   f"{tempo_dir}/vel", f"{tempo_dir}/vel/ax0",
                   f"{tempo_dir}/vel/ax1", f"{tempo_dir}/vel/combination",
                   f"{tempo_dir}/vel/resultant",]
    
    for dir_path in directories:
        # full_path = os.path.join(main_dir, dir_path)
        os.makedirs(dir_path, exist_ok=True)
        
# create_onset_dir("/itf-fi-ml/home/sagardu/aist_tempo_est/rms_extracted_body_onsets")

## March 11 Updates

## RMS Onset extraction

In [2]:
config1 = {"sub_dir": ["hand"], "mode": ["zero_uni", "zero_bi"], 
           "markerA_id": [9, 10], "a": 60, "b": 140, "metric": ["pos"]}
config2 = {"sub_dir": ["foot"], "mode": ["zero_uni", "zero_bi"],
           "markerA_id": [15, 16], "a": 60, "b": 140, "metric": ["pos"]}

configs = [config1, config2]
# create_onset_dir("/itf-fi-ml/home/sagardu/aist_tempo_est/extracted_body_onsets", "thres_0.4")
for cfg in configs:
    a = cfg["a"]
    b = cfg["b"]
    
    for sub_dir in cfg["sub_dir"]:
        for mode in cfg["mode"]:
            for markerA_id in cfg["markerA_id"]:
                for metric in cfg["metric"]:
                    
                    savepath = f"./rms_extracted_body_onsets/{metric}"           
                    extract_body_onsets(mode, markerA_id, savepath, h_thres = 0.1,
                               vel_mode= "on" if metric == "vel" else "off")

  0%|          | 0/1510 [00:00<?, ?it/s]

100%|██████████| 1510/1510 [01:09<00:00, 21.59it/s]
100%|██████████| 1510/1510 [00:32<00:00, 46.81it/s]
100%|██████████| 1510/1510 [00:33<00:00, 45.50it/s]
100%|██████████| 1510/1510 [00:32<00:00, 45.86it/s]
100%|██████████| 1510/1510 [00:33<00:00, 45.11it/s]
100%|██████████| 1510/1510 [00:32<00:00, 47.09it/s]
100%|██████████| 1510/1510 [00:31<00:00, 47.81it/s]
100%|██████████| 1510/1510 [00:32<00:00, 47.12it/s]


## Estimate Tempo - Discrete

In [2]:
json_filename = "music_id_tempo.json"
with open(json_filename, "r") as file:
    aist_tempo = json.load(file)
    
def create_dir(main_dir, tempo_dir):
    # main_dir = "/itf-fi-ml/home/sagardu/aist_tempo_est/saved_result"
    directories = [f"{tempo_dir}/pos", f"{tempo_dir}/vel",
                   f"{tempo_dir}/tempo_data/pos", f"{tempo_dir}/tempo_data/vel",]
    
    for dir_path in directories:
        full_path = os.path.join(main_dir, dir_path)
        os.makedirs(full_path, exist_ok=True)
        
segment_keys = ["both_hand_x", "both_hand_y", "both_foot_x", "both_foot_y", 
                "lefthand_xy", "righthand_xy", "leftfoot_xy", "rightfoot_xy", 
                "left_hand_x", "right_hand_x", "left_hand_y", "right_hand_y", 
                "left_foot_x", "right_foot_x", "left_foot_y", "right_foot_y", 
                
                "bothhand_x_bothfoot_x", "bothhand_y_bothfoot_y",
                "lefthand_xy_righthand_xy", "leftfoot_xy_rightfoot_xy",
                "bothhand_x_bothhand_y", "bothfoot_x_bothfoot_y",
                
                "both_hand_resultant", "both_foot_resultant", "left_hand_resultant", 
                "right_hand_resultant", "left_foot_resultant", "right_foot_resultant"]

result = { key: {
    "filename": [],
    "dance_genre": [],
    "situation": [],
    "camera_id": [],
    "dancer_id": [],
    "music_id": [],
    "choreo_id": [],
    "music_tempo": [],
    "estimated_bpm_per_window": [],
    "magnitude_per_window": [],
    "bpm_avg": [],
    "bpm_mode": [],
    "bpm_median": [],
} for key in segment_keys }

fps = 60
w_sec = 5
h_sec = w_sec/2
window_size = int(fps*w_sec)
hop_size = int(fps*h_sec)

a = 60 
b = 140
tempi_range = np.arange(a,b,1)
metric = "pos"
mode = "zero_uni"

main_dir = "/itf-fi-ml/home/sagardu/aist_tempo_est/saved_result_rms"
create_dir(main_dir, f"tempo_{a}_{b}")

save_dir = f"./saved_result_rms/tempo_{a}_{b}/"     # RMS
onset_dir = f"./rms_extracted_body_onsets/{metric}/"        # RMS
f_path = "./aist_dataset/aist_annotation/keypoints2d"
aist_filelist = os.listdir(f_path)


count= 0
for idx, filename in enumerate(tqdm(aist_filelist)):
    
    file_info = filename.split("_")
    dance_genre = file_info[0] 
    situation = file_info[1] 
    camera_id = file_info[2] 
    dancer_id = file_info[3]
    music_id = file_info[4]
    choreo_id = file_info[5].strip(".pkl")
    
    test_path = os.path.join(onset_dir, "ax0", f"left_wrist_{mode}_{filename}")
    isExist = os.path.exists(test_path) 
    if not isExist:
        continue
                            
    left_hand_x  = load_pickle(os.path.join(onset_dir, "ax0", f"left_wrist_{mode}_{filename}"))
    left_hand_y  = load_pickle(os.path.join(onset_dir, "ax1", f"left_wrist_{mode}_{filename}"))
    
    right_hand_x = load_pickle(os.path.join(onset_dir, "ax0", f"right_wrist_{mode}_{filename}"))
    right_hand_y = load_pickle(os.path.join(onset_dir, "ax1", f"right_wrist_{mode}_{filename}"))
    
    left_foot_x  = load_pickle(os.path.join(onset_dir, "ax0", f"left_ankle_{mode}_{filename}"))
    left_foot_y  = load_pickle(os.path.join(onset_dir, "ax1", f"left_ankle_{mode}_{filename}"))
    
    right_foot_x = load_pickle(os.path.join(onset_dir, "ax0", f"right_ankle_{mode}_{filename}"))
    right_foot_y = load_pickle(os.path.join(onset_dir, "ax1", f"right_ankle_{mode}_{filename}"))
    
    novelty_length = left_hand_x['raw_signal'].shape[0]
    
    key = 'sensor_onsets'       #   sensor_abs_pos_filtered
    thres = 0.2     # time threshold
    
    bothhand_x = filter_dir_onsets_by_threshold((left_hand_x[key] + right_hand_x[key]), threshold_s= thres, fps=fps)
    bothhand_y = filter_dir_onsets_by_threshold((left_hand_y[key] + right_hand_y[key]), threshold_s= thres, fps=fps)

    bothfoot_x = filter_dir_onsets_by_threshold((left_foot_x[key] + right_foot_x[key]), threshold_s= thres, fps=fps)
    bothfoot_y = filter_dir_onsets_by_threshold((left_foot_y[key] + right_foot_y[key]), threshold_s= thres, fps=fps)
    
    lefthand_xy = filter_dir_onsets_by_threshold((left_hand_x[key] + left_hand_y[key]), threshold_s= thres, fps=fps)
    righthand_xy = filter_dir_onsets_by_threshold((right_hand_x[key] + right_hand_y[key]), threshold_s= thres, fps=fps)

    leftfoot_xy = filter_dir_onsets_by_threshold((left_foot_x[key] + left_foot_y[key]), threshold_s= thres, fps=fps)
    rightfoot_xy = filter_dir_onsets_by_threshold((right_foot_x[key] + right_foot_y[key]), threshold_s= thres, fps=fps)
    
    # New combinations
    
    bothhand_x_bothfoot_x = filter_dir_onsets_by_threshold((bothhand_x + bothfoot_x), threshold_s= thres, fps=fps)
    bothhand_y_bothfoot_y = filter_dir_onsets_by_threshold((bothhand_y + bothfoot_y), threshold_s= thres, fps=fps)
    
    lefthand_xy_righthand_xy = filter_dir_onsets_by_threshold((lefthand_xy + righthand_xy), threshold_s= thres, fps=fps)
    leftfoot_xy_rightfoot_xy = filter_dir_onsets_by_threshold((leftfoot_xy + rightfoot_xy), threshold_s= thres, fps=fps)
    
    bothhand_x_bothhand_y = filter_dir_onsets_by_threshold((bothhand_x + bothhand_y), threshold_s= thres, fps=fps)
    bothfoot_x_bothfoot_y = filter_dir_onsets_by_threshold((bothfoot_x + bothfoot_y), threshold_s= thres, fps=fps)
    
    
    # Resultant part
    key1 = 'resultant_onsets'
    left_hand_resultant  = load_pickle(os.path.join(onset_dir, "resultant", f"left_wrist_{mode}_{filename}"))
    right_hand_resultant  = load_pickle(os.path.join(onset_dir, "resultant", f"right_wrist_{mode}_{filename}"))

    left_foot_resultant = load_pickle(os.path.join(onset_dir, "resultant", f"left_ankle_{mode}_{filename}"))
    right_foot_resultant = load_pickle(os.path.join(onset_dir, "resultant", f"right_ankle_{mode}_{filename}"))
    
    both_hand_resultant = filter_dir_onsets_by_threshold((left_hand_resultant[key1] + right_hand_resultant[key1]), threshold_s= thres, fps=fps)
    both_foot_resultant = filter_dir_onsets_by_threshold((left_foot_resultant[key1] + right_foot_resultant[key1]), threshold_s= thres, fps=fps)
    
    segment_ax = {
                "both_hand_x": bothhand_x, "both_hand_y": bothhand_y, "both_foot_x": bothfoot_x, "both_foot_y": bothfoot_y,
                "lefthand_xy": lefthand_xy, "righthand_xy": righthand_xy, "leftfoot_xy": leftfoot_xy, "rightfoot_xy": rightfoot_xy,
                
                "left_hand_x": left_hand_x[key], "right_hand_x": right_hand_x[key], 
                "left_hand_y": left_hand_y[key], "right_hand_y": right_hand_y[key],
                
                "left_foot_x": left_foot_x[key], "right_foot_x": right_foot_x[key],
                "left_foot_y": left_foot_y[key], "right_foot_y": right_foot_y[key],
                
                "bothhand_x_bothfoot_x": bothhand_x_bothfoot_x, "bothhand_y_bothfoot_y": bothhand_y_bothfoot_y,
                "lefthand_xy_righthand_xy": lefthand_xy_righthand_xy, "leftfoot_xy_rightfoot_xy": leftfoot_xy_rightfoot_xy,
                "bothhand_x_bothhand_y": bothhand_x_bothhand_y, "bothfoot_x_bothfoot_y": bothfoot_x_bothfoot_y,
                
                
                "both_hand_resultant": both_hand_resultant, "both_foot_resultant": both_foot_resultant,                         
                "left_hand_resultant": left_hand_resultant[key1], "right_hand_resultant": right_hand_resultant[key1],
                "left_foot_resultant": left_foot_resultant[key1], "right_foot_resultant": right_foot_resultant[key1],
                }
    tempo_data = {}
    for seg_key, seg in segment_ax.items():
        
        sensor_onsets = binary_to_peak(seg, peak_duration=0.05)
        
        tempogram_ab, tempogram_raw, time_axis_seconds, tempo_axis_bpm = compute_tempogram(sensor_onsets, fps, 
                                                                        window_length=window_size, hop_size=hop_size, tempi=tempi_range)
        

        tempo_data_maxmethod = dance_beat_tempo_estimation_maxmethod(tempogram_ab, tempogram_raw, fps, 
                                                        novelty_length, window_size, hop_size, tempi_range)
    
        tempo_data[seg_key] = tempo_data_maxmethod
        
        estimated_bpm_per_window = tempo_data_maxmethod["bpm_arr"]
        magnitude_per_window = tempo_data_maxmethod["mag_arr"]
        
        tempo_avg = np.round(np.average(estimated_bpm_per_window), 2)     # mean
        tempo_mode = stats.mode(estimated_bpm_per_window.flatten())[0]        # 
        tempo_median = np.median(estimated_bpm_per_window.flatten())

        # Append the rows to the DataFrame
        result[seg_key]["filename"].append(filename.strip(".pkl"))
        result[seg_key]["dance_genre"].append(dance_genre)
        result[seg_key]["situation"].append(situation)
        result[seg_key]["camera_id"].append(camera_id)
        result[seg_key]["dancer_id"].append(dancer_id)
        result[seg_key]["music_id"].append(music_id)
        result[seg_key]["choreo_id"].append(choreo_id)
        result[seg_key]["music_tempo"].append(aist_tempo[music_id])
        result[seg_key]["estimated_bpm_per_window"].append(estimated_bpm_per_window)
        result[seg_key]["magnitude_per_window"].append(magnitude_per_window)
        result[seg_key]["bpm_avg"].append(tempo_avg)
        result[seg_key]["bpm_mode"].append(tempo_mode)
        result[seg_key]["bpm_median"].append(tempo_median)

    
    count +=1
print("total processed:",count)    
for seg_key in segment_keys:
    
    fname1 = f"{metric}/{seg_key}_{mode}_W{w_sec}_H{h_sec}_{a}_{b}.pkl"
    fpath1 = os.path.join(save_dir, fname1)
    df_seg = pd.DataFrame(result[seg_key])
    df_seg.to_pickle(fpath1)
    
    # tempodata_fname = f"tempo_data/{metric}/{seg_key}_{mode}_W{w_sec}_H{h_sec}_{a}_{b}_tempo_data.pkl"
    # fpath2 = os.path.join(save_dir, tempodata_fname)
    # save_to_pickle(fpath2, tempo_data[seg_key])
#     print(f"Saved {fname1}")

  0%|          | 0/1510 [00:00<?, ?it/s]

100%|██████████| 1510/1510 [07:22<00:00,  3.41it/s]


total processed: 1341


#### Estimate Tempo - Tempogram combination method

## RMS estimate tempo

In [None]:
json_filename = "music_id_tempo.json"
with open(json_filename, "r") as file:
    aist_tempo = json.load(file)
    
def create_dir(main_dir, tempo_dir):
    # main_dir = "/itf-fi-ml/home/sagardu/aist_tempo_est/saved_result"
    directories = [f"{tempo_dir}/pos", f"{tempo_dir}/vel",
                   f"{tempo_dir}/tempo_data/pos", f"{tempo_dir}/tempo_data/vel",]
    
    for dir_path in directories:
        full_path = os.path.join(main_dir, dir_path)
        os.makedirs(full_path, exist_ok=True)

# ------------------------------------------------------------
#  Peak-based adaptive weights (2-axis version)
# ------------------------------------------------------------
def adaptive_axis_weights_by_peak(tempograms_abs):
    """
    tempograms_abs : list/tuple of abs-valued tempograms, same shape
    Returns        : weights that sum to 1.0
    """
    peaks = np.array([np.max(t) for t in tempograms_abs], dtype=float)
    peaks = np.nan_to_num(peaks, nan=0.0)           # safety
    total = peaks.sum()
    if total <= 0:
        return np.ones_like(peaks) / len(peaks)     # uniform fallback
    return peaks / total

import numpy as np

def adaptive_axis_weights_by_peak_framewise(tempograms_abs):
    """
    Frame-wise adaptive weights based on per-frame peaks.

    Parameters
    ----------
    tempograms_abs : list of np.ndarray
        Each tempogram has shape (freq_bins, time_frames).
        All must have the same shape.

    Returns
    -------
    weights : np.ndarray
        Shape (len(tempograms_abs), time_frames).
        At each time frame, weights across axes sum to 1.0.
    """
    # Stack: shape (num_axes, freq_bins, time_frames)
    T = np.stack(tempograms_abs, axis=0)

    # Per-frame peak per axis → shape (num_axes, time_frames)
    peaks = np.max(T, axis=1)

    # Safety replace NaN
    peaks = np.nan_to_num(peaks, nan=0.0)

    # Normalize frame-wise
    totals = np.sum(peaks, axis=0, keepdims=True)  # (1, time_frames)
    totals[totals <= 0] = 1.0  # avoid div by 0
    weights = peaks / totals

    return weights

        
segment_keys = ['adaptv_Bhandfoot_y', "adaptv_LRfoot_xy", 
                "adaptv_LRhand_xy",  "adaptv_Bfoot_x_y",
                "adaptv_Bhandfoot_x", "adaptv_Bhand_x_y",
                
                "adaptv_LRfoot_res", "adaptv_LRhand_res", "adaptv_Bhandfoot_res",
                ]

result ={ key:
    {"filename": [],
    "dance_genre": [],
    "situation": [],
    "camera_id": [],
    "dancer_id": [],
    "music_id": [],
    "choreo_id": [],
    "music_tempo": [],
    "estimated_bpm_per_window": [],
    "magnitude_per_window": [],
    "bpm_avg": [],
    "bpm_mode": [],
    "bpm_median": [],
} for key in segment_keys }

fps = 60
w_sec = 5
h_sec = w_sec/2
window_size = int(fps*w_sec)
hop_size = int(fps*h_sec)

a = 60; b =140
tempi_range = np.arange(a,b,1)
metric = "pos"
mode = "zero_bi"

main_dir = "/itf-fi-ml/home/sagardu/aist_tempo_est/saved_result_rms_adaptive"
create_dir(main_dir, f"tempo_{a}_{b}")

save_dir = f"./saved_result_rms_adaptive/tempo_{a}_{b}/"
f_path = "./aist_dataset/aist_annotation/keypoints2d"
aist_filelist = os.listdir(f_path)

pos_onset_dir = f"./rms_extracted_body_onsets/pos/"
# vel_onset_dir = f"./extracted_body_onsets/vel/"


count= 0
for idx, filename in enumerate(tqdm(aist_filelist)):
    count +=1
    file_info = filename.split("_")
    dance_genre = file_info[0] 
    situation = file_info[1] 
    camera_id = file_info[2] 
    dancer_id = file_info[3]
    music_id = file_info[4]
    choreo_id = file_info[5].strip(".pkl")
    
    test_path = os.path.join(pos_onset_dir, "ax0", f"left_wrist_{mode}_{filename}")
    isExist = os.path.exists(test_path) 
    if not isExist:
        continue
    
    left_hand_x  = load_pickle(os.path.join(pos_onset_dir, "ax0", f"left_wrist_{mode}_{filename}"))
    left_hand_y  = load_pickle(os.path.join(pos_onset_dir, "ax1", f"left_wrist_{mode}_{filename}"))

    right_hand_x = load_pickle(os.path.join(pos_onset_dir, "ax0", f"right_wrist_{mode}_{filename}"))
    right_hand_y = load_pickle(os.path.join(pos_onset_dir, "ax1", f"right_wrist_{mode}_{filename}"))

    left_foot_x  = load_pickle(os.path.join(pos_onset_dir, "ax0", f"left_ankle_{mode}_{filename}"))
    left_foot_y  = load_pickle(os.path.join(pos_onset_dir, "ax1", f"left_ankle_{mode}_{filename}"))

    right_foot_x = load_pickle(os.path.join(pos_onset_dir, "ax0", f"right_ankle_{mode}_{filename}"))
    right_foot_y = load_pickle(os.path.join(pos_onset_dir, "ax1", f"right_ankle_{mode}_{filename}"))

    novelty_length = left_hand_x['raw_signal'].shape[0]


    key = 'sensor_onsets'  # or 'sensor_abs_vel_filtered', depending on your data
    thres = 0.2            # time threshold

    # Position-based filtered onsets
    bothhand_x = filter_dir_onsets_by_threshold((left_hand_x[key] + right_hand_x[key]), threshold_s=thres, fps=fps)
    bothhand_y = filter_dir_onsets_by_threshold((left_hand_y[key] + right_hand_y[key]), threshold_s=thres, fps=fps)

    bothfoot_x = filter_dir_onsets_by_threshold((left_foot_x[key] + right_foot_x[key]), threshold_s=thres, fps=fps)
    bothfoot_y = filter_dir_onsets_by_threshold((left_foot_y[key] + right_foot_y[key]), threshold_s=thres, fps=fps)

    lefthand_xy = filter_dir_onsets_by_threshold((left_hand_x[key] + left_hand_y[key]), threshold_s=thres, fps=fps)
    righthand_xy = filter_dir_onsets_by_threshold((right_hand_x[key] + right_hand_y[key]), threshold_s=thres, fps=fps)

    leftfoot_xy = filter_dir_onsets_by_threshold((left_foot_x[key] + left_foot_y[key]), threshold_s=thres, fps=fps)
    rightfoot_xy = filter_dir_onsets_by_threshold((right_foot_x[key] + right_foot_y[key]), threshold_s=thres, fps=fps)

    
    # Resultant part
    key1 = 'resultant_onsets'
    left_hand_resultant  = load_pickle(os.path.join(pos_onset_dir, "resultant", f"left_wrist_{mode}_{filename}"))
    right_hand_resultant  = load_pickle(os.path.join(pos_onset_dir, "resultant", f"right_wrist_{mode}_{filename}"))

    left_foot_resultant = load_pickle(os.path.join(pos_onset_dir, "resultant", f"left_ankle_{mode}_{filename}"))
    right_foot_resultant = load_pickle(os.path.join(pos_onset_dir, "resultant", f"right_ankle_{mode}_{filename}"))
    
    both_hand_resultant = filter_dir_onsets_by_threshold((left_hand_resultant[key1] + right_hand_resultant[key1]), threshold_s= thres, fps=fps)
    both_foot_resultant = filter_dir_onsets_by_threshold((left_foot_resultant[key1] + right_foot_resultant[key1]), threshold_s= thres, fps=fps)

    
    adap_map = {
    '1': [bothhand_y,          bothfoot_y,          'adaptv_Bhandfoot_y'],
    '2': [leftfoot_xy,         rightfoot_xy,        'adaptv_LRfoot_xy'],
    '3': [left_foot_resultant, right_foot_resultant,'adaptv_LRfoot_res'],
    '4': [lefthand_xy,         righthand_xy,        'adaptv_LRhand_xy'],
    '5': [left_hand_resultant, right_hand_resultant,'adaptv_LRhand_res'],
    '6': [bothfoot_x,          bothfoot_y,          'adaptv_Bfoot_x_y'],
    '7': [bothhand_x,          bothfoot_x,          'adaptv_Bhandfoot_x'],
    '8': [bothhand_x,          bothhand_y,          'adaptv_Bhand_x_y'],
    '9': [both_hand_resultant, both_foot_resultant, 'adaptv_Bhandfoot_res'],
}
       
    # nid = "8"
    for nid, _ in adap_map.items():
        data_pair, tag = adap_map[nid][0:2], adap_map[nid][2]
        ###########################################################################################    
        sensor_onsets1 = binary_to_peak(data_pair[0], peak_duration=0.05)
        sensor_onsets2 = binary_to_peak(data_pair[1], peak_duration=0.05)
        
        # fname1 = f"{metric}/{tag}_{mode}_W{w_sec}_H{h_sec}_{a}_{b}.pkl"

        
        # Compute tempograms for each sensor onset sequence
        tempogram_ab1, tempogram_raw1, _, _ = compute_tempogram(sensor_onsets1, fps, 
                                            window_length=window_size, hop_size=hop_size, tempi=tempi_range)

        tempogram_ab2, tempogram_raw2, _, _ = compute_tempogram(sensor_onsets2, fps, 
                                            window_length=window_size, hop_size=hop_size, tempi=tempi_range)
        
        
        tempograms_raw = [tempogram_raw1[0], tempogram_raw2[0]]     
        tempograms_abs = [tempogram_ab1[0], tempogram_ab2[0]]    # abs of tempogram_raw

        # w_x, w_y = adaptive_axis_weights_by_peak(tempograms_abs)  # gives two weights
        # tempogram_ab_comb = [w_x * tempograms_abs[0] + w_y * tempograms_abs[1]]
        # tempogram_raw_comb = [w_x * tempograms_raw[0] + w_y * tempograms_raw[1]]
        
        weights = adaptive_axis_weights_by_peak_framewise(tempograms_abs)  
        tempogram_ab_comb = [                           # Weighted combination frame-wise
            weights[0, None, :] * tempograms_abs[0] +
            weights[1, None, :] * tempograms_abs[1]
        ]
        
        tempogram_raw_comb = [                          # Weighted combination frame-wise
            weights[0, None, :] * tempograms_raw[0] +
            weights[1, None, :] * tempograms_raw[1]
        ]
        

        tempo_data_maxmethod = dance_beat_tempo_estimation_maxmethod(tempogram_ab_comb, tempogram_raw_comb, fps, 
                                                        novelty_length, window_size, hop_size, tempi_range)
        
        

        #############################################################################################
        estimated_bpm_per_window = tempo_data_maxmethod["bpm_arr"]
        magnitude_per_window = tempo_data_maxmethod["mag_arr"]
        
        tempo_avg = np.round(np.average(estimated_bpm_per_window), 2)     # mean
        tempo_mode = stats.mode(estimated_bpm_per_window.flatten())[0]        # 
        tempo_median = np.median(estimated_bpm_per_window.flatten())

        # Append the rows to the DataFrame
        result[tag]["filename"].append(filename.strip(".pkl"))
        result[tag]["dance_genre"].append(dance_genre)
        result[tag]["situation"].append(situation)
        result[tag]["camera_id"].append(camera_id)
        result[tag]["dancer_id"].append(dancer_id)
        result[tag]["music_id"].append(music_id)
        result[tag]["choreo_id"].append(choreo_id)
        result[tag]["music_tempo"].append(aist_tempo[music_id])
        result[tag]["estimated_bpm_per_window"].append(estimated_bpm_per_window)
        result[tag]["magnitude_per_window"].append(magnitude_per_window)
        result[tag]["bpm_avg"].append(tempo_avg)
        result[tag]["bpm_mode"].append(tempo_mode)
        result[tag]["bpm_median"].append(tempo_median)


    # fpath1 = os.path.join(save_dir, fname1)
    # df_seg = pd.DataFrame(result)
    # df_seg.to_pickle(fpath1)
    
for seg_key in segment_keys:
    
    fname1 = f"{metric}/{seg_key}_{mode}_W{w_sec}_H{h_sec}_{a}_{b}.pkl"
    fpath1 = os.path.join(save_dir, fname1)
    df_seg = pd.DataFrame(result[seg_key])
    df_seg.to_pickle(fpath1)

100%|██████████| 1510/1510 [04:21<00:00,  5.78it/s]


### Using raw input to tempogram

In [21]:
json_filename = "music_id_tempo.json"
with open(json_filename, "r") as file:
    aist_tempo = json.load(file)
    
def create_dir(main_dir, tempo_dir):
    # main_dir = "/itf-fi-ml/home/sagardu/aist_tempo_est/saved_result"
    directories = [f"{tempo_dir}/pos", f"{tempo_dir}/vel",
                   f"{tempo_dir}/tempo_data/pos", f"{tempo_dir}/tempo_data/vel",]
    
    for dir_path in directories:
        full_path = os.path.join(main_dir, dir_path)
        os.makedirs(full_path, exist_ok=True)

# ------------------------------------------------------------
#  Peak-based adaptive weights (2-axis version)
# ------------------------------------------------------------
def adaptive_axis_weights_by_peak(tempograms_abs):
    """
    tempograms_abs : list/tuple of abs-valued tempograms, same shape
    Returns        : weights that sum to 1.0
    """
    peaks = np.array([np.max(t) for t in tempograms_abs], dtype=float)
    peaks = np.nan_to_num(peaks, nan=0.0)           # safety
    total = peaks.sum()
    if total <= 0:
        return np.ones_like(peaks) / len(peaks)     # uniform fallback
    return peaks / total

import numpy as np

def adaptive_axis_weights_by_peak_framewise(tempograms_abs):
    """
    Frame-wise adaptive weights based on per-frame peaks.

    Parameters
    ----------
    tempograms_abs : list of np.ndarray
        Each tempogram has shape (freq_bins, time_frames).
        All must have the same shape.

    Returns
    -------
    weights : np.ndarray
        Shape (len(tempograms_abs), time_frames).
        At each time frame, weights across axes sum to 1.0.
    """
    # Stack: shape (num_axes, freq_bins, time_frames)
    T = np.stack(tempograms_abs, axis=0)

    # Per-frame peak per axis → shape (num_axes, time_frames)
    peaks = np.max(T, axis=1)

    # Safety replace NaN
    peaks = np.nan_to_num(peaks, nan=0.0)

    # Normalize frame-wise
    totals = np.sum(peaks, axis=0, keepdims=True)  # (1, time_frames)
    totals[totals <= 0] = 1.0  # avoid div by 0
    weights = peaks / totals

    return weights

        
segment_keys = ['adaptv_Bhandfoot_y', "adaptv_LRfoot_xy", 
                "adaptv_LRhand_xy",  "adaptv_Bfoot_x_y",
                "adaptv_Bhandfoot_x", "adaptv_Bhand_x_y",
                
                "adaptv_LRfoot_res", "adaptv_LRhand_res", "adaptv_Bhandfoot_res",
                ]

result ={ key:
    {"filename": [],
    "dance_genre": [],
    "situation": [],
    "camera_id": [],
    "dancer_id": [],
    "music_id": [],
    "choreo_id": [],
    "music_tempo": [],
    "estimated_bpm_per_window": [],
    "magnitude_per_window": [],
    "bpm_avg": [],
    "bpm_mode": [],
    "bpm_median": [],
} for key in segment_keys }

fps = 60
w_sec = 5
h_sec = w_sec/2
window_size = int(fps*w_sec)
hop_size = int(fps*h_sec)

a = 60; b =140
tempi_range = np.arange(a,b,1)
metric = "pos"
mode = "zero_bi"

main_dir = "/itf-fi-ml/home/sagardu/aist_tempo_est/saved_result_rms_adaptive"
create_dir(main_dir, f"tempo_{a}_{b}")

save_dir = f"./saved_result_rms_adaptive/tempo_{a}_{b}/"
f_path = "./aist_dataset/aist_annotation/keypoints2d"
aist_filelist = os.listdir(f_path)

pos_onset_dir = f"./rms_extracted_body_onsets/pos/"
# vel_onset_dir = f"./extracted_body_onsets/vel/"


count= 0
for idx, filename in enumerate(tqdm(aist_filelist)):
    count +=1
    file_info = filename.split("_")
    dance_genre = file_info[0] 
    situation = file_info[1] 
    camera_id = file_info[2] 
    dancer_id = file_info[3]
    music_id = file_info[4]
    choreo_id = file_info[5].strip(".pkl")
    
    test_path = os.path.join(pos_onset_dir, "ax0", f"left_wrist_{mode}_{filename}")
    isExist = os.path.exists(test_path) 
    if not isExist:
        continue
    
    left_hand_x  = load_pickle(os.path.join(pos_onset_dir, "ax0", f"left_wrist_{mode}_{filename}"))
    left_hand_y  = load_pickle(os.path.join(pos_onset_dir, "ax1", f"left_wrist_{mode}_{filename}"))

    right_hand_x = load_pickle(os.path.join(pos_onset_dir, "ax0", f"right_wrist_{mode}_{filename}"))
    right_hand_y = load_pickle(os.path.join(pos_onset_dir, "ax1", f"right_wrist_{mode}_{filename}"))

    left_foot_x  = load_pickle(os.path.join(pos_onset_dir, "ax0", f"left_ankle_{mode}_{filename}"))
    left_foot_y  = load_pickle(os.path.join(pos_onset_dir, "ax1", f"left_ankle_{mode}_{filename}"))

    right_foot_x = load_pickle(os.path.join(pos_onset_dir, "ax0", f"right_ankle_{mode}_{filename}"))
    right_foot_y = load_pickle(os.path.join(pos_onset_dir, "ax1", f"right_ankle_{mode}_{filename}"))

    novelty_length = left_hand_x['raw_signal'].shape[0]


    key = 'sensor_abs_pos_norm'  # or 'sensor_abs_vel_filtered', depending on your data
    thres = 0.2            # time threshold

    # Position-based filtered onsets
    bothhand_x = left_hand_x[key] + right_hand_x[key]
    bothhand_y = left_hand_y[key] + right_hand_y[key]

    bothfoot_x = left_foot_x[key] + right_foot_x[key]
    bothfoot_y = left_foot_y[key] + right_foot_y[key]

    lefthand_xy = left_hand_x[key] + left_hand_y[key]
    righthand_xy = right_hand_x[key] + right_hand_y[key]

    leftfoot_xy = left_foot_x[key] + left_foot_y[key]
    rightfoot_xy = right_foot_x[key] + right_foot_y[key]

    
    # Resultant part
    key1 = 'resultant_onsets'
    left_hand_resultant  = load_pickle(os.path.join(pos_onset_dir, "resultant", f"left_wrist_{mode}_{filename}"))
    right_hand_resultant  = load_pickle(os.path.join(pos_onset_dir, "resultant", f"right_wrist_{mode}_{filename}"))

    left_foot_resultant = load_pickle(os.path.join(pos_onset_dir, "resultant", f"left_ankle_{mode}_{filename}"))
    right_foot_resultant = load_pickle(os.path.join(pos_onset_dir, "resultant", f"right_ankle_{mode}_{filename}"))
    
    both_hand_resultant = filter_dir_onsets_by_threshold((left_hand_resultant[key1] + right_hand_resultant[key1]), threshold_s= thres, fps=fps)
    both_foot_resultant = filter_dir_onsets_by_threshold((left_foot_resultant[key1] + right_foot_resultant[key1]), threshold_s= thres, fps=fps)

    
    adap_map = {
    '1': [bothhand_y,          bothfoot_y,          'adaptv_Bhandfoot_y'],
    '2': [leftfoot_xy,         rightfoot_xy,        'adaptv_LRfoot_xy'],
    '3': [left_foot_resultant[key1], right_foot_resultant[key1],'adaptv_LRfoot_res'],
    '4': [lefthand_xy,         righthand_xy,        'adaptv_LRhand_xy'],
    '5': [left_hand_resultant[key1], right_hand_resultant[key1],'adaptv_LRhand_res'],
    '6': [bothfoot_x,          bothfoot_y,          'adaptv_Bfoot_x_y'],
    '7': [bothhand_x,          bothfoot_x,          'adaptv_Bhandfoot_x'],
    '8': [bothhand_x,          bothhand_y,          'adaptv_Bhand_x_y'],
    '9': [both_hand_resultant, both_foot_resultant, 'adaptv_Bhandfoot_res'],
        }
       
    # nid = "8"
    for nid, _ in adap_map.items():
        data_pair, tag = adap_map[nid][0:2], adap_map[nid][2]
        ###########################################################################################    
        sensor_onsets1 = data_pair[0]
        sensor_onsets2 = data_pair[1]
        
        # fname1 = f"{metric}/{tag}_{mode}_W{w_sec}_H{h_sec}_{a}_{b}.pkl"

        
        # Compute tempograms for each sensor onset sequence
        tempogram_ab1, tempogram_raw1, _, _ = compute_tempogram(sensor_onsets1.reshape(-1,1), fps, 
                                            window_length=window_size, hop_size=hop_size, tempi=tempi_range)

        tempogram_ab2, tempogram_raw2, _, _ = compute_tempogram(sensor_onsets2.reshape(-1,1), fps, 
                                            window_length=window_size, hop_size=hop_size, tempi=tempi_range)
        
        
        tempograms_raw = [tempogram_raw1[0], tempogram_raw2[0]]     
        tempograms_abs = [tempogram_ab1[0], tempogram_ab2[0]]    # abs of tempogram_raw

        # w_x, w_y = adaptive_axis_weights_by_peak(tempograms_abs)  # gives two weights
        # tempogram_ab_comb = [w_x * tempograms_abs[0] + w_y * tempograms_abs[1]]
        # tempogram_raw_comb = [w_x * tempograms_raw[0] + w_y * tempograms_raw[1]]
        
        weights = adaptive_axis_weights_by_peak_framewise(tempograms_abs)  
        tempogram_ab_comb = [                           # Weighted combination frame-wise
            weights[0, None, :] * tempograms_abs[0] +
            weights[1, None, :] * tempograms_abs[1]
        ]
        
        tempogram_raw_comb = [                          # Weighted combination frame-wise
            weights[0, None, :] * tempograms_raw[0] +
            weights[1, None, :] * tempograms_raw[1]
        ]
        

        tempo_data_maxmethod = dance_beat_tempo_estimation_maxmethod(tempogram_ab_comb, tempogram_raw_comb, fps, 
                                                        novelty_length, window_size, hop_size, tempi_range)
        
        

        #############################################################################################
        estimated_bpm_per_window = tempo_data_maxmethod["bpm_arr"]
        magnitude_per_window = tempo_data_maxmethod["mag_arr"]
        
        tempo_avg = np.round(np.average(estimated_bpm_per_window), 2)     # mean
        tempo_mode = stats.mode(estimated_bpm_per_window.flatten())[0]        # 
        tempo_median = np.median(estimated_bpm_per_window.flatten())

        # Append the rows to the DataFrame
        result[tag]["filename"].append(filename.strip(".pkl"))
        result[tag]["dance_genre"].append(dance_genre)
        result[tag]["situation"].append(situation)
        result[tag]["camera_id"].append(camera_id)
        result[tag]["dancer_id"].append(dancer_id)
        result[tag]["music_id"].append(music_id)
        result[tag]["choreo_id"].append(choreo_id)
        result[tag]["music_tempo"].append(aist_tempo[music_id])
        result[tag]["estimated_bpm_per_window"].append(estimated_bpm_per_window)
        result[tag]["magnitude_per_window"].append(magnitude_per_window)
        result[tag]["bpm_avg"].append(tempo_avg)
        result[tag]["bpm_mode"].append(tempo_mode)
        result[tag]["bpm_median"].append(tempo_median)


    # fpath1 = os.path.join(save_dir, fname1)
    # df_seg = pd.DataFrame(result)
    # df_seg.to_pickle(fpath1)
    
for seg_key in segment_keys:
    
    fname1 = f"{metric}/{seg_key}_{mode}_W{w_sec}_H{h_sec}_{a}_{b}.pkl"
    fpath1 = os.path.join(save_dir, fname1)
    df_seg = pd.DataFrame(result[seg_key])
    df_seg.to_pickle(fpath1)

  0%|          | 0/1510 [00:00<?, ?it/s]

100%|██████████| 1510/1510 [02:54<00:00,  8.64it/s]
