# Read an .npy file

Load a NumPy binary file and inspect shape, dtype, and a sample.

In [1]:
import sys
from pathlib import Path

import numpy as np
import re

from dijon.global_config import DERIVED_DIR, RAW_AUDIO_DIR, DERIVED_LOGS_DIR

# Ensure project root on path (works from repo root or notebooks/scratch)
project_root = Path.cwd()
while not (project_root / "src" / "dijon").exists() and project_root != project_root.parent:
    project_root = project_root.parent
sys.path.insert(0, str(project_root))

def _parse_novelty_filename_params(filename: str) -> dict | None:
    """Parse N, H, gamma, M from novelty filename like YTB-005_novelty_spectrum_1024-256-100.0-10.npy."""
    m = re.search(r"_novelty_\w+_([\d.]+)-([\d.]+)-([\d.]+)-([\d.]+)\.npy$", filename)
    if not m:
        return None
    return {"N": int(float(m.group(1))), "H": int(float(m.group(2))), "gamma": float(m.group(3)), "M": int(float(m.group(4)))}


def inspect_novelty(path, head=None, tail=None, full=False):
    """Inspect novelty .npy file. By default only metadata is printed. Pass head and tail to print array head/tail."""
    path = Path(path)
    if not path.exists():
        raise FileNotFoundError(path)

    arr = np.load(path, allow_pickle=False)

    print("=" * 80)
    print("******  NOVELTY  ******")
    print(f"File:   {path}")
    print(f"Shape:  {arr.shape}")
    print(f"Dtype:  {arr.dtype}")
    print(f"Dim:    {arr.ndim}")
    print(f"Size:   {arr.size}")
    if arr.size:
        print(f"Max/min/mean/std: {np.max(arr):.2f}/{np.min(arr):.2f}/{np.mean(arr):.2f}/{np.std(arr):.2f}")

    params = _parse_novelty_filename_params(path.name)
    if params:
        print(f"N (window): {params['N']}")
        print(f"H (hop):    {params['H']}")

    log_info = _find_novelty_log_info(path.name)
    fs = log_info["feature_sample_rate_hz"]
    fs_str = f"{fs:.3f} Hz" if isinstance(fs, (int, float)) else str(fs)
    print(f"Feature sample rate: {fs_str}")
    if isinstance(fs, (int, float)) and fs > 0:
        print(f"Feature sample period: {1.0 / fs:.6f} s")

    if arr.ndim == 0:
        print("Scalar:")
        print(arr)

    elif full:
        print("\n--- FULL ---")
        print(arr)

    elif head is not None and tail is not None:
        if arr.ndim == 1:
            print("\n--- HEAD ---")
            print(arr[:head])
            print("\n--- TAIL ---")
            print(arr[-tail:])
        else:
            print("\n--- HEAD ---")
            print(arr[:head, ...])
            print("\n--- TAIL ---")
            print(arr[-tail:, ...])

    return None

def track_name_from_novelty_stem(stem: str) -> str:
    """Extract track name from novelty filename stem. E.g. YTB-001_novelty_spectrum_... -> YTB-001."""
    if "_novelty_" in stem:
        return stem.split("_novelty_")[0]
    return stem

def _find_novelty_log_info(novelty_filename: str) -> dict[str, str | float]:
    """Look up markers, region, and feature sample rate from novelty log files. Returns NOT FOUND for missing values."""
    result: dict[str, str | float] = {
        "start_marker": "NOT FOUND",
        "end_marker": "NOT FOUND",
        "start_sec": "NOT FOUND",  # type: ignore[dict-item]
        "end_sec": "NOT FOUND",  # type: ignore[dict-item]
        "feature_sample_rate_hz": "NOT FOUND",  # type: ignore[dict-item]
    }
    if not DERIVED_LOGS_DIR.exists():
        return result

    marker_region_re = re.compile(
        r"markers:\s*([^\s|]+)\s*->\s*([^\s|]+)\s*\|\s*region:\s*([\d.]+)s\s*->\s*([\d.]+)s"
    )
    feature_fs_re = re.compile(r"features:\s*\d+\s*@\s*([\d.]+)\s*Hz")

    for log_path in sorted(DERIVED_LOGS_DIR.glob("*_novelty_*.log")):
        try:
            text = log_path.read_text(encoding="utf-8")
        except (OSError, UnicodeDecodeError):
            continue
        if novelty_filename not in text:
            continue
        for line in text.splitlines():
            if novelty_filename in line and "markers:" in line:
                m = marker_region_re.search(line)
                if m:
                    result["start_marker"] = m.group(1).strip()
                    result["end_marker"] = m.group(2).strip()
                    result["start_sec"] = float(m.group(3))
                    result["end_sec"] = float(m.group(4))
                fm = feature_fs_re.search(line)
                if fm:
                    result["feature_sample_rate_hz"] = float(fm.group(1))
                return result
    return result


def inspect_wav(novelty_filename: str) -> None:
    """Print basic info for the WAV file and markers/region from logs for a novelty .npy filename."""
    import soundfile as sf

    wav_path = RAW_AUDIO_DIR / f"{track_name_from_novelty_stem(Path(novelty_filename).stem)}.wav"
    log_info = _find_novelty_log_info(novelty_filename)

    print("=" * 80)
    print("******  WAVE  ******")
    if wav_path.exists():
        info = sf.info(str(wav_path))
        data, _ = sf.read(str(wav_path))
        print(f"File:   {wav_path}")
        print(f"Sample rate: {info.samplerate} Hz")
        print(f"Duration: {info.duration:.3f} s")
        print(f"Frames: {info.frames}")
        if data.size:
            print(f"Max/min/mean/std: {np.max(data):.2f}/{np.min(data):.2f}/{np.mean(data):.2f}/{np.std(data):.2f}")
    else:
        print(f"WAV not found: {wav_path}")

    start_sec = log_info["start_sec"]
    end_sec = log_info["end_sec"]
    print(f"Markers: {log_info['start_marker']} -> {log_info['end_marker']}")
    start_str = f"{start_sec} s" if isinstance(start_sec, (int, float)) else str(start_sec)
    end_str = f"{end_sec} s" if isinstance(end_sec, (int, float)) else str(end_sec)
    print(f"Start: {start_str}")
    print(f"End: {end_str}")

    if isinstance(start_sec, (int, float)) and isinstance(end_sec, (int, float)) and wav_path.exists():
        dur_sec = end_sec - start_sec
        dur_frames = int(dur_sec * info.samplerate)
        print(f"Duration: {dur_sec:.3f} s = {dur_frames} f")
    else:
        print("Duration: NOT FOUND")


def _find_tempogram_for_novelty(novelty_filename: str) -> Path | None:
    """Find tempogram path for a novelty file: from log mapping, else first match by track name."""
    tempo_dir = DERIVED_DIR / "tempogram"
    if not tempo_dir.exists():
        return None

    # Try log: "• novelty.npy: success -> tempogram.npy"
    success_out_re = re.compile(r"success\s*->\s*(\S+\.npy)")
    for log_path in sorted(DERIVED_LOGS_DIR.glob("*_tempogram_*.log")):
        try:
            text = log_path.read_text(encoding="utf-8")
        except (OSError, UnicodeDecodeError):
            continue
        if novelty_filename not in text:
            continue
        for line in text.splitlines():
            if novelty_filename in line and "->" in line:
                m = success_out_re.search(line)
                if m:
                    out_name = m.group(1)
                    p = tempo_dir / out_name
                    if p.exists():
                        return p
        return None

    # Fallback: first tempogram for this track
    track = track_name_from_novelty_stem(Path(novelty_filename).stem)
    matches = sorted(tempo_dir.glob(f"{track}_tempogram_*.npy"))
    return matches[0] if matches else None


def _find_tempogram_log_info(novelty_filename: str) -> dict[str, str | float]:
    """Extract tempogram item details from log. Returns NOT FOUND for missing values."""
    result: dict[str, str | float] = {
        "num_features": "NOT FOUND",  # type: ignore[dict-item]
        "feature_fs_hz": "NOT FOUND",  # type: ignore[dict-item]
        "N": "NOT FOUND",  # type: ignore[dict-item]
        "H": "NOT FOUND",  # type: ignore[dict-item]
        "shape": "NOT FOUND",
        "dtype": "NOT FOUND",
        "tempo_resolution_bpm": "NOT FOUND",  # type: ignore[dict-item]
        "tempo_bin_count": "NOT FOUND",  # type: ignore[dict-item]
    }
    if not DERIVED_LOGS_DIR.exists():
        return result

    # Pattern: "feat: n=N fs=FHz | win: N=N H=H | arr: shape=(...) dtype=... | tempo: d=D bins=B"
    feat_re = re.compile(r"feat:\s*n=(\d+)\s+fs=([\d.]+)Hz")
    win_re = re.compile(r"win:\s*N=(\d+)\s+H=(\d+)")
    arr_re = re.compile(r"arr:\s*shape=\(([^)]+)\)\s+dtype=(\S+)")
    tempo_re = re.compile(r"tempo:\s*d=([\d.]+)\s+bins=(\d+)")

    for log_path in sorted(DERIVED_LOGS_DIR.glob("*_tempogram_*.log")):
        try:
            text = log_path.read_text(encoding="utf-8")
        except (OSError, UnicodeDecodeError):
            continue
        if novelty_filename not in text:
            continue
        lines = text.splitlines()
        for i, line in enumerate(lines):
            if novelty_filename in line and "->" in line:
                # Detail line is typically the next line (indented)
                detail_line = lines[i + 1].strip() if i + 1 < len(lines) else ""
                if m := feat_re.search(detail_line):
                    result["num_features"] = int(m.group(1))
                    result["feature_fs_hz"] = float(m.group(2))
                if m := win_re.search(detail_line):
                    result["N"] = int(m.group(1))
                    result["H"] = int(m.group(2))
                if m := arr_re.search(detail_line):
                    result["shape"] = m.group(1)
                    result["dtype"] = m.group(2)
                if m := tempo_re.search(detail_line):
                    result["tempo_resolution_bpm"] = float(m.group(1))
                    result["tempo_bin_count"] = int(m.group(2))
                return result
    return result


def _parse_tempogram_filename_params(filename: str) -> dict | None:
    """Parse N, H, theta_min, theta_max from tempogram filename."""
    m = re.search(r"_tempogram_\w+_(\d+)-(\d+)-(\d+)-(\d+)\.npy$", filename)
    if not m:
        return None
    return {"N": int(m.group(1)), "H": int(m.group(2)), "theta_min": int(m.group(3)), "theta_max": int(m.group(4))}


def inspect_tempogram(novelty_filename: str) -> None:
    """Print basic info for the tempogram associated with a novelty .npy filename."""
    tempo_path = _find_tempogram_for_novelty(novelty_filename)
    log_info = _find_tempogram_log_info(novelty_filename)

    print("=" * 80)
    print("******  TEMPOGRAM  ******")
    if tempo_path is None or not tempo_path.exists():
        print("Tempogram not found")
    else:
        arr = np.load(tempo_path, allow_pickle=False)
        tp = _parse_tempogram_filename_params(tempo_path.name)
        theta_min = tp["theta_min"] if tp else None
        theta_max = tp["theta_max"] if tp else None

        print(f"File:   {tempo_path}")
        print(f"Shape:  {arr.shape}  => (tempo bins, time frames)")
        print(f"Dtype:  {arr.dtype}")
        print(f"Dim:    {arr.ndim}")
        print(f"Size:   {arr.size}")
        if arr.size:
            print(f"Magnitude max/min/mean/std: {np.max(arr):.2f}/{np.min(arr):.2f}/{np.mean(arr):.2f}/{np.std(arr):.2f}")

        if theta_min is not None and theta_max is not None and arr.ndim == 2:
            Theta = np.arange(theta_min, theta_max + 1, dtype=float)
            if len(Theta) == arr.shape[0]:
                range_str = f"({theta_min}-{theta_max} BPM inclusive)"
                print(f"Tempo bins: {arr.shape[0]} {range_str}")

                mean_per_tempo = np.mean(arr, axis=1)
                global_bin = int(np.argmax(mean_per_tempo))
                global_bpm = Theta[global_bin]
                peak_mag = mean_per_tempo[global_bin]

                sorted_vals = np.sort(mean_per_tempo)[::-1]
                peak_ratio = sorted_vals[0] / sorted_vals[1] if len(sorted_vals) > 1 and sorted_vals[1] > 1e-12 else float("nan")

                dominant_per_frame = Theta[np.argmax(arr, axis=0)]
                tempo_stability = float(np.std(dominant_per_frame))

                neighbor_means = []
                if global_bin > 0:
                    neighbor_means.append(np.mean(arr[global_bin - 1, :]))
                if global_bin < arr.shape[0] - 1:
                    neighbor_means.append(np.mean(arr[global_bin + 1, :]))
                mean_neighbor = np.mean(neighbor_means) if neighbor_means else 0.0
                peak_sharpness = peak_mag / mean_neighbor if mean_neighbor > 1e-12 else float("nan")

                print(f"Tempo max/min/mean/std: {np.max(dominant_per_frame):.2f}/{np.min(dominant_per_frame):.2f}/{np.mean(dominant_per_frame):.2f}/{tempo_stability:.2f} BPM")
                print(f"Global tempo estimate: {global_bpm:.0f} BPM")
                print(f"Global peak ratio (1st/2nd): {peak_ratio:.2f}" if not np.isnan(peak_ratio) else "Global peak ratio (1st/2nd): N/A")
                print(f"Global tempo peak magnitude: {peak_mag:.2f}")
                print(f"Dominant tempo std over time: {tempo_stability:.2f} BPM")

                beats_info = _find_beats_log_info(novelty_filename)
                implied_bpm = beats_info.get("implied_bpm", "NOT FOUND")
                if isinstance(implied_bpm, (int, float)):
                    print(f"Tempo agreement (|global - beat|): {abs(global_bpm - implied_bpm):.2f} BPM")
                else:
                    print("Tempo agreement (|global - beat|): NOT FOUND")

                print(f"Global tempo peak sharpness: {peak_sharpness:.2f}" if not np.isnan(peak_sharpness) else "Global tempo peak sharpness: N/A")

                # Optional advanced diagnostics
                beats_path = _find_beats_for_novelty(novelty_filename)
                novelty_path = DERIVED_DIR / "novelty" / novelty_filename
                meter_info = _find_meter_log_info(novelty_filename)
                beats_per_bar = meter_info.get("beats_per_bar")
                beats_arr = None
                if beats_path and beats_path.exists():
                    beats_arr = np.load(beats_path, allow_pickle=False).flatten()
                    if len(beats_arr) >= 2 and global_bpm > 0:
                        phases = ((beats_arr - beats_arr[0]) * global_bpm / 60.0) % 1.0
                        print(f"Beat phase variance: {float(np.var(phases)):.4f}")
                if novelty_path.exists() and beats_arr is not None and isinstance(beats_per_bar, int) and beats_per_bar >= 2:
                    nov = np.load(novelty_path, allow_pickle=False).flatten()
                    fs_nov = 100.0
                    downbeat_idx = np.arange(0, len(beats_arr), beats_per_bar)
                    other_idx = np.setdiff1d(np.arange(len(beats_arr)), downbeat_idx)
                    if len(downbeat_idx) > 0 and len(other_idx) > 0:
                        di = np.clip(np.round(beats_arr[downbeat_idx] * fs_nov).astype(int), 0, len(nov) - 1)
                        oi = np.clip(np.round(beats_arr[other_idx] * fs_nov).astype(int), 0, len(nov) - 1)
                        mean_down = float(np.mean(nov[di]))
                        mean_other = float(np.mean(nov[oi]))
                        ratio = mean_down / mean_other if mean_other > 1e-12 else float("nan")
                        print(f"Downbeat strength ratio: {ratio:.2f}" if not np.isnan(ratio) else "Downbeat strength ratio: N/A")

    nf = log_info["num_features"]
    fs = log_info["feature_fs_hz"]
    n_val = log_info["N"]
    h_val = log_info["H"]
    tempo_res = log_info["tempo_resolution_bpm"]

    print(f"Feat n:   {nf}")
    fs_str = f"{fs} Hz" if isinstance(fs, (int, float)) else str(fs)
    print(f"Feat fs:  {fs_str}")
    print(f"N:        {n_val}")
    print(f"H:        {h_val}")
    if isinstance(tempo_res, (int, float)):
        print(f"Tempo d:  {tempo_res:.2f} bpm")


BEATS_DIR = DERIVED_DIR / "beats"


def _find_beats_for_novelty(novelty_filename: str) -> Path | None:
    """Find beats path for a novelty file: from log mapping, else by track name."""
    if not BEATS_DIR.exists():
        return None

    success_out_re = re.compile(r"success\s*->\s*(\S+\.npy)")
    for log_path in sorted(DERIVED_LOGS_DIR.glob("*_beats.log")):
        try:
            text = log_path.read_text(encoding="utf-8")
        except (OSError, UnicodeDecodeError):
            continue
        if novelty_filename not in text:
            continue
        for line in text.splitlines():
            if novelty_filename in line and "->" in line:
                m = success_out_re.search(line)
                if m:
                    p = BEATS_DIR / m.group(1)
                    if p.exists():
                        return p
        return None

    track = track_name_from_novelty_stem(Path(novelty_filename).stem)
    p = BEATS_DIR / f"{track}_beats.npy"
    return p if p.exists() else None


def _find_beats_log_info(novelty_filename: str) -> dict[str, str | float]:
    """Extract beats item details from log. Returns NOT FOUND for missing values."""
    result: dict[str, str | float] = {
        "num_beats": "NOT FOUND",  # type: ignore[dict-item]
        "implied_bpm": "NOT FOUND",  # type: ignore[dict-item]
        "shape": "NOT FOUND",
        "dtype": "NOT FOUND",
        "ibi_min": "NOT FOUND",  # type: ignore[dict-item]
        "ibi_max": "NOT FOUND",  # type: ignore[dict-item]
        "ibi_mean": "NOT FOUND",  # type: ignore[dict-item]
        "ibi_std": "NOT FOUND",  # type: ignore[dict-item]
        "t_first": "NOT FOUND",  # type: ignore[dict-item]
        "t_last": "NOT FOUND",  # type: ignore[dict-item]
        "duration": "NOT FOUND",  # type: ignore[dict-item]
        "coverage": "NOT FOUND",  # type: ignore[dict-item]
    }
    if not DERIVED_LOGS_DIR.exists():
        return result

    beats_re = re.compile(
        r"beats:\s*n=(\d+)\s*\|\s*bpm=([\d.]+)\s*\|\s*arr:\s*shape=\(([^)]+)\)\s+dtype=(\S+)\s+"
        r"min/max/mean/std=([\d.]+)/([\d.]+)/([\d.]+)/([\d.]+)\s*\|\s*"
        r"t0=([\d.]+)\s+tLast=([\d.]+)\s+dur=([\d.]+)\s*\|\s*coverage=([\d.]+)"
    )

    for log_path in sorted(DERIVED_LOGS_DIR.glob("*_beats.log")):
        try:
            text = log_path.read_text(encoding="utf-8")
        except (OSError, UnicodeDecodeError):
            continue
        if novelty_filename not in text:
            continue
        lines = text.splitlines()
        for i, line in enumerate(lines):
            if novelty_filename in line and "->" in line:
                detail_line = lines[i + 1].strip() if i + 1 < len(lines) else ""
                m = beats_re.search(detail_line)
                if m:
                    result["num_beats"] = int(m.group(1))
                    result["implied_bpm"] = float(m.group(2))
                    result["shape"] = m.group(3)
                    result["dtype"] = m.group(4)
                    result["ibi_min"] = float(m.group(5))
                    result["ibi_max"] = float(m.group(6))
                    result["ibi_mean"] = float(m.group(7))
                    result["ibi_std"] = float(m.group(8))
                    result["t_first"] = float(m.group(9))
                    result["t_last"] = float(m.group(10))
                    result["duration"] = float(m.group(11))
                    result["coverage"] = float(m.group(12))
                return result
    return result


def inspect_beats(novelty_filename: str) -> None:
    """Print basic info for the beats file associated with a novelty .npy filename."""
    beats_path = _find_beats_for_novelty(novelty_filename)
    log_info = _find_beats_log_info(novelty_filename)

    print("=" * 80)
    print("******  BEATS  ******")
    if beats_path is None or not beats_path.exists():
        print("Beats not found")
    else:
        arr = np.load(beats_path, allow_pickle=False)
        print(f"File:   {beats_path}")
        print(f"Shape:  {arr.shape}")
        print(f"Dtype:  {arr.dtype}")
        print(f"Dim:    {arr.ndim}")
        print(f"Size:   {arr.size}")
        if arr.size:
            print(f"Max/min/mean/std: {np.max(arr):.2f}/{np.min(arr):.2f}/{np.mean(arr):.2f}/{np.std(arr):.2f}")

    print(f"Num beats:   {log_info['num_beats']}")
    bpm = log_info["implied_bpm"]
    bpm_str = f"{bpm:.1f}" if isinstance(bpm, (int, float)) else str(bpm)
    print(f"Implied BPM: {bpm_str}")
    if isinstance(log_info["ibi_min"], (int, float)):
        print(f"IBI max/min/mean/std: {log_info['ibi_max']:.3f}/{log_info['ibi_min']:.3f}/{log_info['ibi_mean']:.3f}/{log_info['ibi_std']:.3f} s")
    else:
        print(f"IBI:         {log_info['ibi_min']}")
    t_first = log_info["t_first"]
    t_last = log_info["t_last"]
    dur = log_info["duration"]
    cov = log_info["coverage"]
    if isinstance(t_first, (int, float)) and isinstance(t_last, (int, float)):
        print(f"t first:     {t_first:.3f} s")
        print(f"t last:      {t_last:.3f} s")
    else:
        print(f"t first:     {t_first}")
        print(f"t last:      {t_last}")
    dur_str = f"{dur:.3f} s" if isinstance(dur, (int, float)) else str(dur)
    cov_str = f"{cov:.2f}" if isinstance(cov, (int, float)) else str(cov)
    print(f"Duration:    {dur_str}")
    print(f"Coverage:    {cov_str}")


METER_DIR = DERIVED_DIR / "meter"


def _find_meter_for_novelty(novelty_filename: str) -> Path | None:
    """Find meter path for a novelty file by track name."""
    if not METER_DIR.exists():
        return None
    track = track_name_from_novelty_stem(Path(novelty_filename).stem)
    p = METER_DIR / f"{track}_meter.npy"
    return p if p.exists() else None


def _find_meter_log_info(novelty_filename: str) -> dict[str, str | float]:
    """Extract meter item details from log. Returns NOT FOUND for missing values."""
    result: dict[str, str | float] = {
        "head_in": "NOT FOUND",  # type: ignore[dict-item]
        "num_beats": "NOT FOUND",  # type: ignore[dict-item]
        "t_first_beat": "NOT FOUND",  # type: ignore[dict-item]
        "t_last_beat": "NOT FOUND",  # type: ignore[dict-item]
        "beats_per_bar": "NOT FOUND",  # type: ignore[dict-item]
        "label_shape": "NOT FOUND",
        "bar_count": "NOT FOUND",  # type: ignore[dict-item]
        "beat_counts": "NOT FOUND",
        "head_in_nearest_beat": "NOT FOUND",  # type: ignore[dict-item]
        "head_in_offset": "NOT FOUND",  # type: ignore[dict-item]
    }
    if not DERIVED_LOGS_DIR.exists():
        return result

    track = track_name_from_novelty_stem(Path(novelty_filename).stem)
    meter_name = f"{track}_meter.npy"
    line1_re = re.compile(
        r"head_in=([\d.]+)s\s*\|\s*beats:\s*(\d+)\s*\(([\d.]+)s→([\d.]+)s\)\s*\|\s*"
        r"beats_per_bar=(\d+)\s*\|\s*label_shape=\(([^)]+)\)"
    )
    line2_re = re.compile(
        r"bar_count=(\d+)\s*\|\s*beat_counts=(\{[^}]+\})\s*\|\s*"
        r"head_in_nearest_beat=([\d.]+)s\s*\(offset=([+-]?[\d.]+)s\)"
    )

    for log_path in sorted(DERIVED_LOGS_DIR.glob("*_meter.log")):
        try:
            text = log_path.read_text(encoding="utf-8")
        except (OSError, UnicodeDecodeError):
            continue
        if meter_name not in text:
            continue
        lines = text.splitlines()
        for i, line in enumerate(lines):
            if meter_name in line and "output:" in line:
                l1 = lines[i + 1].strip() if i + 1 < len(lines) else ""
                l2 = lines[i + 2].strip() if i + 2 < len(lines) else ""
                m1 = line1_re.search(l1)
                if m1:
                    result["head_in"] = float(m1.group(1))
                    result["num_beats"] = int(m1.group(2))
                    result["t_first_beat"] = float(m1.group(3))
                    result["t_last_beat"] = float(m1.group(4))
                    result["beats_per_bar"] = int(m1.group(5))
                    result["label_shape"] = m1.group(6)
                m2 = line2_re.search(l2)
                if m2:
                    result["bar_count"] = int(m2.group(1))
                    result["beat_counts"] = m2.group(2)
                    result["head_in_nearest_beat"] = float(m2.group(3))
                    result["head_in_offset"] = float(m2.group(4))
                return result
    return result


def inspect_meter(novelty_filename: str) -> None:
    """Print basic info for the meter file associated with a novelty .npy filename."""
    meter_path = _find_meter_for_novelty(novelty_filename)
    log_info = _find_meter_log_info(novelty_filename)

    print("=" * 80)
    print("******  METER  ******")
    if meter_path is None or not meter_path.exists():
        print("Meter not found")
    else:
        arr = np.load(meter_path, allow_pickle=False)
        print(f"File:   {meter_path}")
        print(f"Shape:  {arr.shape}")
        print(f"Dtype:  {arr.dtype}")
        print(f"Dim:    {arr.ndim}")
        print(f"Size:   {arr.size}")

    print(f"Head in:     {log_info['head_in']}")
    print(f"Num beats:   {log_info['num_beats']}")
    t_first = log_info["t_first_beat"]
    t_last = log_info["t_last_beat"]
    if isinstance(t_first, (int, float)) and isinstance(t_last, (int, float)):
        print(f"t first:     {t_first:.3f} s")
        print(f"t last:      {t_last:.3f} s")
    else:
        print(f"t first:     {t_first}")
        print(f"t last:      {t_last}")
    print(f"Beats per bar: {log_info['beats_per_bar']}")
    print(f"Label shape:   {log_info['label_shape']}")
    print(f"Bar count:    {log_info['bar_count']}")
    print(f"Beat counts:  {log_info['beat_counts']}")
    nearest = log_info["head_in_nearest_beat"]
    offset = log_info["head_in_offset"]
    if isinstance(nearest, (int, float)) and isinstance(offset, (int, float)):
        print(f"Head in nearest beat: {nearest:.3f} s (offset={offset:+.3f} s)")
    else:
        print(f"Head in nearest: {nearest}")

In [2]:
base_dir = DERIVED_DIR
next_dir = "novelty"
file_name = "YTB-005_novelty_spectrum_1024-256-100.0-10.npy"
npy_path = base_dir / next_dir / file_name

inspect_wav(file_name)
inspect_novelty(npy_path)
inspect_tempogram(file_name)
inspect_beats(file_name)
inspect_meter(file_name)


******  WAVE  ******
File:   /Users/petermynett/dev/active/dijon/data/datasets/raw/audio/YTB-005.wav
Sample rate: 22050 Hz
Duration: 175.485 s
Frames: 3869441
Max/min/mean/std: 0.98/-0.94/0.00/0.17
Markers: F1.A1p -> END
Start: 0.28 s
End: 167.262 s
Duration: 166.982 s = 3681953 f
******  NOVELTY  ******
File:   /Users/petermynett/dev/active/dijon/data/derived/novelty/YTB-005_novelty_spectrum_1024-256-100.0-10.npy
Shape:  (16699,)
Dtype:  float64
Dim:    1
Size:   16699
Max/min/mean/std: 0.89/-0.00/0.03/0.06
N (window): 1024
H (hop):    256
Feature sample rate: 100.000 Hz
Feature sample period: 0.010000 s
******  TEMPOGRAM  ******
File:   /Users/petermynett/dev/active/dijon/data/derived/tempogram/YTB-005_tempogram_fourier_512-1-40-320.npy
Shape:  (281, 16700)  => (tempo bins, time frames)
Dtype:  float64
Dim:    2
Size:   4692700
Magnitude max/min/mean/std: 5.69/0.00/0.73/0.66
Tempo bins: 281 (40-320 BPM inclusive)
Tempo max/min/mean/std: 307.00/40.00/243.62/15.54 BPM
Global tempo esti

In [3]:
# Tempogram info is printed above via `inspect_tempogram(file_name)`
