In [1]:
import os
import pandas as pd

In [2]:
import os
import pandas as pd

# Paths
drum_dir = r"D:\pyspace\Djembe\2025\mocap_formats\data\drum_onsets"
start_times_path = r"D:\pyspace\Djembe\2025\mocap_formats\data\start_times_part12.csv"

# Load start times and keep only recordings with a defined Part 2 start
df_times = pd.read_csv(start_times_path)
df_p2 = df_times.dropna(subset=['start_time_p2_seconds'])

for _, row in df_p2.iterrows():
    recording = row['recording']               # e.g. "BKO_E1_D1_01_Suku_P2"
    threshold = row['start_time_p2_seconds']   # float seconds

    # Derive the CSV filename by stripping the "_P2" suffix
    base_name = recording.rsplit('_', 1)[0]    # e.g. "BKO_E1_D1_01_Suku"
    csv_path = os.path.join(drum_dir, f"{base_name}.csv")

    if not os.path.isfile(csv_path):
        print(f"Warning: file not found: {csv_path}")
        continue

    # Read the CSV
    df = pd.read_csv(csv_path)
    print(f"\nProcessing {base_name}")
    print(f"Threshold: {threshold}")
    print(f"Original number of rows: {len(df)}")
    
    # Print column lengths before trimming
    print("\nColumn lengths before trimming:")
    for col in ['Dun', 'J1', 'J2']:
        print(f"{col}: {df[col].count()} non-NaN values")
    
    # Trim each column independently
    df['Dun'] = df['Dun'].where(df['Dun'] <= threshold)
    df['J1'] = df['J1'].where(df['J1'] <= threshold)
    df['J2'] = df['J2'].where(df['J2'] <= threshold)
    
    # Print column lengths after trimming
    print("\nColumn lengths after trimming:")
    for col in ['Dun', 'J1', 'J2']:
        print(f"{col}: {df[col].count()} non-NaN values")
    
    # Save the modified dataframe
    df.to_csv(csv_path, index=False)
    print(f"Trimmed and saved: {csv_path}")


Processing BKO_E1_D1_01_Suku
Threshold: 363.07
Original number of rows: 2821

Column lengths before trimming:
Dun: 1523 non-NaN values
J1: 2326 non-NaN values
J2: 2821 non-NaN values

Column lengths after trimming:
Dun: 1186 non-NaN values
J1: 1859 non-NaN values
J2: 2156 non-NaN values
Trimmed and saved: D:\pyspace\Djembe\2025\mocap_formats\data\drum_onsets\BKO_E1_D1_01_Suku.csv

Processing BKO_E1_D1_07_Suku
Threshold: 218.994
Original number of rows: 1296

Column lengths before trimming:
Dun: 755 non-NaN values
J1: 1277 non-NaN values
J2: 1296 non-NaN values

Column lengths after trimming:
Dun: 580 non-NaN values
J1: 992 non-NaN values
J2: 947 non-NaN values
Trimmed and saved: D:\pyspace\Djembe\2025\mocap_formats\data\drum_onsets\BKO_E1_D1_07_Suku.csv

Processing BKO_E1_D1_08_Suku
Threshold: 175.488
Original number of rows: 1141

Column lengths before trimming:
Dun: 600 non-NaN values
J1: 1141 non-NaN values
J2: 1125 non-NaN values

Column lengths after trimming:
Dun: 455 non-NaN va

In [11]:
import os
import re
import csv
import math

parts_dir = r"D:\pyspace\Djembe\2025\mocap_formats\data\parts"
output_csv = r"D:\pyspace\Djembe\2025\mocap_formats\data\start_times_part12_seconds.csv"

def timestamp_to_seconds(ts):
    """
    Convert a timestamp 'HH:MM:SS,mmm' to total seconds (float).
    """
    hh, mm, ss_ms = ts.split(':')
    ss, ms = ss_ms.split(',')
    return int(hh) * 3600 + int(mm) * 60 + int(ss) + int(ms) / 1000.0

with open(output_csv, 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(['recording', 'start_time_p1_seconds', 'start_time_p2_seconds'])

    # include both *_P1.txt and *_P2.txt files
    for fname in os.listdir(parts_dir):
        if not fname.endswith(('_P1.txt', '_P2.txt')):
            continue

        recording_name = fname.rsplit('.txt', 1)[0]
        full_path = os.path.join(parts_dir, fname)

        p1_ts = None
        p2_ts = None

        with open(full_path, 'r', encoding='utf-8') as f:
            for line in f:
                if p1_ts is None:
                    m1 = re.match(r'^\s*([\d]{2}:[\d]{2}:[\d]{2},\d{3})\s+"1"', line)
                    if m1:
                        p1_ts = m1.group(1)
                if p2_ts is None:
                    m2 = re.match(r'^\s*([\d]{2}:[\d]{2}:[\d]{2},\d{3})\s+"2"', line)
                    if m2:
                        p2_ts = m2.group(1)
                if p1_ts and p2_ts:
                    break

        # convert to seconds, use NaN if missing
        secs1 = timestamp_to_seconds(p1_ts) if p1_ts else math.nan
        secs2 = timestamp_to_seconds(p2_ts) if p2_ts else math.nan

        writer.writerow([recording_name, secs1, secs2])


### Build Tempo Data

In [3]:
import os
import pandas as pd

# Directory containing the original CSV files
cycles_dir = r"D:\pyspace\Djembe\2025\mocap_formats\data\virtual_cycles"
save_dir= r"D:\pyspace\Djembe\2025\mocap_formats\data"

# Directory where the new CSVs (with cycle_onset, IOI, and tempo columns) will be saved
output_dir = os.path.join(save_dir, "tempo")
os.makedirs(output_dir, exist_ok=True)

for filename in os.listdir(cycles_dir):
    if not filename.lower().endswith(".csv"):
        continue

    input_path = os.path.join(cycles_dir, filename)
    df = pd.read_csv(input_path)

    # Ensure "Virtual Onset" column exists
    if "Virtual Onset" not in df.columns:
        print(f"Skipping {filename}: no 'Virtual Onset' column.")
        continue

    # Create a new dataframe with the required columns
    df_new = pd.DataFrame()
    df_new["cycle_onset"] = df["Virtual Onset"]

    # Compute IOI (cycle duration) = time from this onset to the next onset
    df_new["ioi"] = df_new["cycle_onset"].shift(-1) - df_new["cycle_onset"]

    # Compute tempo in BPM for each cycle: BPM = 4 beats * (60 seconds / cycle duration) = 240 / IOI
    df_new["tempo"] = 240.0 / df_new["ioi"]

    # Optional: if you prefer to fill the last row’s tempo with the previous value instead of NaN:
    # df["tempo"].fillna(method="ffill", inplace=True)

    # Save to the new directory, preserving the original filename
    output_path = os.path.join(output_dir, filename)
    df_new.to_csv(output_path, index=False)

    print(f"Processed {filename} → saved to {output_path}")


Processed BKO_E1_D1_01_Suku_C.csv → saved to D:\pyspace\Djembe\2025\mocap_formats\data\tempo\BKO_E1_D1_01_Suku_C.csv
Processed BKO_E1_D1_02_Maraka_C.csv → saved to D:\pyspace\Djembe\2025\mocap_formats\data\tempo\BKO_E1_D1_02_Maraka_C.csv
Processed BKO_E1_D1_03_Wasulunka_C.csv → saved to D:\pyspace\Djembe\2025\mocap_formats\data\tempo\BKO_E1_D1_03_Wasulunka_C.csv
Processed BKO_E1_D1_04_Dansa_C.csv → saved to D:\pyspace\Djembe\2025\mocap_formats\data\tempo\BKO_E1_D1_04_Dansa_C.csv
Processed BKO_E1_D1_05_Sandia_C.csv → saved to D:\pyspace\Djembe\2025\mocap_formats\data\tempo\BKO_E1_D1_05_Sandia_C.csv
Processed BKO_E1_D1_06_Manjanin_C.csv → saved to D:\pyspace\Djembe\2025\mocap_formats\data\tempo\BKO_E1_D1_06_Manjanin_C.csv
Processed BKO_E1_D1_07_Suku_C.csv → saved to D:\pyspace\Djembe\2025\mocap_formats\data\tempo\BKO_E1_D1_07_Suku_C.csv
Processed BKO_E1_D1_08_Suku_C.csv → saved to D:\pyspace\Djembe\2025\mocap_formats\data\tempo\BKO_E1_D1_08_Suku_C.csv
Processed BKO_E1_D2_01_Manjanin_C.cs