# DJ Loop Pipeline Notebook WalkthroughThis notebook wraps the project into one place to:- inspect training outputs- inspect classification outputs- preview example audio- visualize divider grids on top of waveforms

In [ ]:
from pathlib import Pathimport jsonimport numpy as npimport pandas as pdimport matplotlib.pyplot as pltimport librosaimport librosa.displayfrom IPython.display import display, Audioroot_candidates = [    Path.cwd(),    Path(r"C:\Users\dunnm\AppData\Roaming\Code\User\globalStorage\github.remotehub\97e68e6861b5eb731c023ae3ca3e3578\changestore\vscode-vfs-github\mikdunn\dj-loop-pipeline"),    Path(r"vscode-vfs://github/mikdunn/dj-loop-pipeline"),]ROOT = Nonefor c in root_candidates:    if (c / "train_multilabel_loop_classifier.py").exists():        ROOT = c        breakif ROOT is None:    ROOT = Path.cwd()MODELS = ROOT / 'training' / 'models'print('Project root:', ROOT)print('Models dir exists:', MODELS.exists())

## 1) Main training reports

In [ ]:
def read_json(path: Path):    if not path.exists():        print(f'Missing: {path}')        return None    with path.open('r', encoding='utf-8') as f:        return json.load(f)report_ml = read_json(MODELS / 'loop_multilabel_report_drumbreaks.json')report_struct = read_json(MODELS / 'loop_structure_classifier_report_smoke.json')if report_ml:    ml_summary = {        'n_rows': report_ml.get('n_rows'),        'split_strategy': report_ml.get('split_strategy', 'n/a'),        'val_micro_f1_calibrated': report_ml.get('val_metrics', {}).get('micro_f1_calibrated'),        'val_macro_f1_calibrated': report_ml.get('val_metrics', {}).get('macro_f1_calibrated'),        'test_micro_f1_calibrated': report_ml.get('test_metrics', {}).get('micro_f1_calibrated'),        'test_macro_f1_calibrated': report_ml.get('test_metrics', {}).get('macro_f1_calibrated'),    }    display(pd.DataFrame([ml_summary]))if report_struct:    struct_summary = {        'n_rows': report_struct.get('n_rows'),        'n_train': report_struct.get('n_train'),        'n_test': report_struct.get('n_test'),        'weighted_f1': report_struct.get('report', {}).get('weighted avg', {}).get('f1-score'),        'macro_f1': report_struct.get('report', {}).get('macro avg', {}).get('f1-score'),    }    display(pd.DataFrame([struct_summary]))

## 2) Sweep leaderboard (if available)

In [ ]:
leaderboard_csv = MODELS / 'loop_multilabel_leaderboard_smoke.csv'if leaderboard_csv.exists():    lb = pd.read_csv(leaderboard_csv)    display(lb.head(10))else:    print('No leaderboard CSV found at', leaderboard_csv)

## 3) Main output tables

In [ ]:
paths = {    'multilabel_predictions': MODELS / 'loop_multilabel_predictions_drumbreaks.csv',    'structure_analysis': MODELS / 'loop_structure_analysis_smoke.csv',    'structure_predictions': MODELS / 'loop_structure_predictions_smoke.csv',}for name, p in paths.items():    print(f'\n{name}: {p}')    if p.exists():        df = pd.read_csv(p)        print('rows:', len(df))        display(df.head(5))    else:        print('missing')

## 4) Example audio files

In [ ]:
analysis_json = MODELS / 'loop_structure_analysis_smoke.json'analysis = read_json(analysis_json)example_files = []if analysis and analysis.get('details'):    for row in analysis['details'][:3]:        example_files.append(Path(row['file']))print('Example files:')for i, f in enumerate(example_files, 1):    print(f'{i}. {f}')

In [ ]:
# Play first example full audioif example_files and example_files[0].exists():    y, sr = librosa.load(str(example_files[0]), sr=22050, mono=True)    print('Duration (sec):', round(len(y)/sr, 2))    display(Audio(y, rate=sr))else:    print('No playable example found.')

## 5) Grid visualization (dividers + beats)

In [ ]:
def draw_grid_from_analysis(detail: dict, sr: int = 22050, max_seconds: float = 45.0):    f = Path(detail['file'])    if not f.exists():        print('Missing audio:', f)        return    y, fs = librosa.load(str(f), sr=sr, mono=True)    max_len = int(max_seconds * fs)    y = y[:max_len]    beat_times = librosa.frames_to_time(librosa.beat.beat_track(y=y, sr=fs, units='frames')[1], sr=fs)    divider_boundaries = detail.get('divider_boundaries_sec', [])    divider_preds = detail.get('divider_drum_predictions', [])    t = np.arange(len(y)) / fs    y_peak = float(np.max(np.abs(y)) + 1e-6)    plt.figure(figsize=(16, 4))    plt.plot(t, y, linewidth=0.7, alpha=0.8)    # Beat grid (light gray)    for bt in beat_times:        if bt <= t[-1]:            plt.axvline(bt, color='gray', alpha=0.20, linewidth=0.8)    # Divider boundaries (red) + divider index labels    for i, d in enumerate(divider_boundaries):        if d <= t[-1]:            plt.axvline(d, color='red', alpha=0.75, linewidth=2.0)            if i < len(divider_boundaries) - 1:                plt.text(d, 0.92 * y_peak, f'D{i+1}', color='red', fontsize=9)    # Overlay predicted drum label and confidence per divider at segment midpoints    for i in range(min(len(divider_preds), max(0, len(divider_boundaries) - 1))):        left = divider_boundaries[i]        right = divider_boundaries[i + 1]        mid = 0.5 * (left + right)        if mid <= t[-1]:            lbl = divider_preds[i].get('predicted_label', 'n/a')            conf = float(divider_preds[i].get('confidence', 0.0))            txt = f"{lbl} ({conf:.2f})"            plt.text(                mid,                0.78 * y_peak,                txt,                color='darkblue',                fontsize=8,                ha='center',                va='top',                bbox=dict(boxstyle='round,pad=0.2', facecolor='white', alpha=0.65, edgecolor='none'),            )    plt.title(f"{f.name} | dividers={detail.get('chosen_dividers')} | pattern={detail.get('structure_pattern')}")    plt.xlabel('Time (s)')    plt.ylabel('Amplitude')    plt.tight_layout()    plt.show()if analysis and analysis.get('details'):    draw_grid_from_analysis(analysis['details'][0])else:    print('No analysis details found.')

## 6) Superimposition diagnostics

In [ ]:
if analysis and analysis.get('details'):    d0 = analysis['details'][0]    cols = ['repetition_score', 'half_similarity', 'superimpose_similarity', 'structure_score', 'chosen_dividers', 'structure_pattern']    display(pd.DataFrame([{k: d0.get(k) for k in cols}]))    # Show per-divider drum labels and confidence    preds = d0.get('divider_drum_predictions', [])    if preds:        display(pd.DataFrame(preds)[['divider_index', 'predicted_label', 'confidence']])else:    print('No structure analysis available.')

## 7) Listen to divider slices

In [ ]:
if analysis and analysis.get('details'):    d0 = analysis['details'][0]    f = Path(d0['file'])    if f.exists():        y, sr = librosa.load(str(f), sr=22050, mono=True)        bounds = d0.get('divider_boundaries_sec', [])        for i in range(len(bounds)-1):            s = int(bounds[i] * sr)            e = int(bounds[i+1] * sr)            clip = y[s:e]            print(f'Divider {i+1} | {bounds[i]:.2f}s to {bounds[i+1]:.2f}s | label={d0.get("divider_drum_predictions", [{}]*8)[i].get("predicted_label", "n/a")}')            display(Audio(clip, rate=sr))    else:        print('Audio file missing for divider playback.')else:    print('No analysis details available.')

## 8) Optional: rerun latest structure analysis from notebook

In [ ]:
# Uncomment and run to regenerate outputs from notebook# import subprocess# cmd = [#     'python', 'analyze_loop_structure.py',#     '--folder', r'C:\Users\dunnm\Downloads\Drum Breaks',#     '--out_json', 'training/models/loop_structure_analysis_notebook.json',#     '--out_csv', 'training/models/loop_structure_analysis_notebook.csv',# ]# subprocess.run(cmd, check=True)# print('Done')

## 9) Multi-label prediction visualsThis section visualizes multi-label outputs using:- tag prevalence (count of predicted tags)- average predicted probability by tag- top-tag co-occurrence heatmap

In [ ]:
# Build multi-label visual summaries from prediction CSVimport itertoolsmodels_dir = MODELS if 'MODELS' in globals() else (Path.cwd() / 'training' / 'models')pred_csv = models_dir / 'loop_multilabel_predictions_drumbreaks.csv'if not pred_csv.exists():    print('Missing prediction CSV:', pred_csv)else:    pred_df = pd.read_csv(pred_csv)    print('Loaded:', pred_csv)    print('Rows:', len(pred_df))    # 1) Tag prevalence from predicted tag strings    tags_series = (        pred_df.get('predicted_tags', pd.Series(dtype=str))        .fillna('')        .astype(str)        .str.split(',')        .explode()        .str.strip()    )    tags_series = tags_series[tags_series != '']    tag_counts = tags_series.value_counts().sort_values(ascending=False)    # 2) Average probability by tag from prob_* columns    prob_cols = [c for c in pred_df.columns if c.startswith('prob_')]    avg_probs = None    if prob_cols:        avg_probs = pred_df[prob_cols].mean().sort_values(ascending=False)        avg_probs.index = [c.replace('prob_', '') for c in avg_probs.index]    # 3) Co-occurrence matrix on top tags    top_tags = tag_counts.head(10).index.tolist()    co_mat = pd.DataFrame(0, index=top_tags, columns=top_tags, dtype=float)    per_row_tags = (        pred_df.get('predicted_tags', pd.Series(dtype=str))        .fillna('')        .astype(str)        .str.split(',')        .apply(lambda xs: sorted({x.strip() for x in xs if x and x.strip()}))    )    for tag_list in per_row_tags:        filtered = [t for t in tag_list if t in top_tags]        for t in filtered:            co_mat.loc[t, t] += 1        for a, b in itertools.combinations(filtered, 2):            co_mat.loc[a, b] += 1            co_mat.loc[b, a] += 1    # Plot figure    fig = plt.figure(figsize=(20, 5))    ax1 = fig.add_subplot(1, 3, 1)    if not tag_counts.empty:        tag_counts.head(15).plot(kind='bar', ax=ax1, color='steelblue')    ax1.set_title('Predicted Tag Prevalence (Top 15)')    ax1.set_xlabel('Tag')    ax1.set_ylabel('Count')    ax1.tick_params(axis='x', rotation=45)    ax2 = fig.add_subplot(1, 3, 2)    if avg_probs is not None and not avg_probs.empty:        avg_probs.head(15).plot(kind='bar', ax=ax2, color='seagreen')    ax2.set_title('Average Predicted Probability by Tag (Top 15)')    ax2.set_xlabel('Tag')    ax2.set_ylabel('Mean Probability')    ax2.tick_params(axis='x', rotation=45)    ax3 = fig.add_subplot(1, 3, 3)    if not co_mat.empty:        im = ax3.imshow(co_mat.values, cmap='magma', aspect='auto')        ax3.set_xticks(np.arange(len(top_tags)))        ax3.set_yticks(np.arange(len(top_tags)))        ax3.set_xticklabels(top_tags, rotation=45, ha='right')        ax3.set_yticklabels(top_tags)        ax3.set_title('Tag Co-occurrence (Top 10)')        for i in range(len(top_tags)):            for j in range(len(top_tags)):                val = int(co_mat.iloc[i, j])                if val > 0:                    ax3.text(j, i, str(val), ha='center', va='center', color='white', fontsize=7)        fig.colorbar(im, ax=ax3, fraction=0.046, pad=0.04)    plt.tight_layout()    plt.show()    display(tag_counts.head(20).rename('count').to_frame())