# Bass Matching Demo with Colored Notation

This notebook demonstrates the `percent_bass_pc_match` function from `music_df.harmony.matching`,
with **colored notation** to visually highlight which bass notes match their expected pitch classes.

- **Green** bass notes: match the expected bass PC (root position)
- **Red** bass notes: don't match the expected bass PC (inverted)

In [None]:
import sys
from pathlib import Path
sys.path.insert(0, str(Path.cwd().parent))

In [None]:
import verovio
from IPython.display import SVG, display
import pandas as pd

from music_df.humdrum_export import df2hum
from music_df.harmony.matching import percent_bass_pc_match

In [None]:
def show_notation(humdrum_str: str, width: int = 800, scale: int = 50) -> None:
    """Render Humdrum string as notation using Verovio."""
    tk = verovio.toolkit()
    tk.setOptions({
        "pageWidth": width,
        "scale": scale,
        "adjustPageHeight": True,
    })
    tk.loadData(humdrum_str)
    for page in range(1, tk.getPageCount() + 1):
        display(SVG(tk.renderToSVG(page)))


def create_music_df(notes: list[tuple], bar_length: float = 4.0) -> pd.DataFrame:
    """Create a music_df DataFrame from a list of note tuples.

    Each tuple: (onset, release, pitch, spelling)
    Automatically adds bar events at regular intervals.
    """
    rows = []
    max_release = 0.0
    for onset, release, pitch, spelling in notes:
        rows.append({
            "type": "note",
            "onset": float(onset),
            "release": float(release),
            "pitch": float(pitch),
            "spelling": spelling,
        })
        max_release = max(max_release, release)

    # Add bar events (use NaN for non-applicable fields)
    nan = float("nan")
    bar_onset = 0.0
    while bar_onset <= max_release:
        rows.append({
            "type": "bar",
            "onset": bar_onset,
            "release": nan,
            "pitch": nan,
            "spelling": nan,
        })
        bar_onset += bar_length

    df = pd.DataFrame(rows)
    df = df.sort_values(["onset", "type"]).reset_index(drop=True)
    return df


MATCH_COLOR = "#00AA00"  # Green for matching bass
NO_MATCH_COLOR = "#DD0000"  # Red for non-matching bass

## Example 1: Root Position Chords (100% bass match)

C major (C-E-G) and G major (G-B-D) triads in root position.
Bass notes C and G match their expected PCs, so they appear **green**.

In [None]:
df1 = create_music_df([
    # C major triad (onset=0)
    (0, 2, 48, "C"),   # C3 - bass
    (0, 2, 64, "E"),   # E4
    (0, 2, 67, "G"),   # G4
    # G major triad (onset=2)
    (2, 4, 43, "G"),   # G2 - bass
    (2, 4, 59, "B"),   # B3
    (2, 4, 62, "D"),   # D4
])

chord_df1 = pd.DataFrame({
    "onset": [0.0, 2.0],
    "release": [2.0, 4.0],
    "chord_pcs": ["047", "72B"],  # C major, G major
})

print("Notes:")
display(df1[["onset", "release", "pitch", "spelling"]])
print("\nChords:")
print("  047 = C major (expected bass PC = 0 = C)")
print("  72B = G major (expected bass PC = 7 = G)")
display(chord_df1)

In [None]:
result1 = percent_bass_pc_match(df1, chord_df1)
result_df1 = result1["music_df"]

print(f"Bass match rate: {result1['microaverage']:.0%}")
print("\nNotes with bass match column:")
display(result_df1[["onset", "pitch", "spelling", "is_bass_match"]])

In [None]:
result_df1["color"] = result_df1["is_bass_match"].map(
    {True: "match", False: "no_match"}
)
result_df1["is_bass"] = result_df1["is_bass_match"].notna()

humdrum1 = df2hum(
    result_df1,
    color_col="color",
    color_mask_col="is_bass",
    color_mapping={"match": MATCH_COLOR, "no_match": NO_MATCH_COLOR},
)

print("Bass notes are GREEN (both match their expected pitch class):")
show_notation(humdrum1)

## Example 2: Inverted Chords (0% bass match)

Same C major and G major chords, but in first inversion.
Bass notes E and B don't match expected PCs (0 and 7), so they appear **red**.

In [None]:
df2 = create_music_df([
    # C major 1st inversion (onset=0)
    (0, 2, 52, "E"),   # E3 - bass (PC=4, expected=0)
    (0, 2, 60, "C"),   # C4
    (0, 2, 67, "G"),   # G4
    # G major 1st inversion (onset=2)
    (2, 4, 47, "B"),   # B2 - bass (PC=11, expected=7)
    (2, 4, 62, "D"),   # D4
    (2, 4, 67, "G"),   # G4
])

chord_df2 = pd.DataFrame({
    "onset": [0.0, 2.0],
    "release": [2.0, 4.0],
    "chord_pcs": ["047", "72B"],
})

print("Notes (first inversion):")
display(df2[["onset", "release", "pitch", "spelling"]])

In [None]:
result2 = percent_bass_pc_match(df2, chord_df2)
result_df2 = result2["music_df"]

print(f"Bass match rate: {result2['microaverage']:.0%}")
print("\nNotes with bass match column:")
display(result_df2[["onset", "pitch", "spelling", "is_bass_match"]])

In [None]:
result_df2["color"] = result_df2["is_bass_match"].map(
    {True: "match", False: "no_match"}
)
result_df2["is_bass"] = result_df2["is_bass_match"].notna()

humdrum2 = df2hum(
    result_df2,
    color_col="color",
    color_mask_col="is_bass",
    color_mapping={"match": MATCH_COLOR, "no_match": NO_MATCH_COLOR},
)

print("Bass notes are RED (neither matches their expected pitch class):")
show_notation(humdrum2)

## Example 3: Mixed (Partial match)

First chord in root position (bass matches → green),
second chord inverted (bass doesn't match → red).

In [None]:
df3 = create_music_df([
    # C major root position (onset=0)
    (0, 2, 48, "C"),   # C3 - bass (PC=0, expected=0) MATCH
    (0, 2, 64, "E"),   # E4
    (0, 2, 67, "G"),   # G4
    # G major 1st inversion (onset=2)
    (2, 4, 47, "B"),   # B2 - bass (PC=11, expected=7) NO MATCH
    (2, 4, 62, "D"),   # D4
    (2, 4, 67, "G"),   # G4
])

chord_df3 = pd.DataFrame({
    "onset": [0.0, 2.0],
    "release": [2.0, 4.0],
    "chord_pcs": ["047", "72B"],
})

In [None]:
result3 = percent_bass_pc_match(df3, chord_df3)
result_df3 = result3["music_df"]

print(f"Bass match rate: {result3['microaverage']:.0%}")
print("\nNotes with bass match column:")
display(result_df3[["onset", "pitch", "spelling", "is_bass_match"]])

In [None]:
result_df3["color"] = result_df3["is_bass_match"].map(
    {True: "match", False: "no_match"}
)
result_df3["is_bass"] = result_df3["is_bass_match"].notna()

humdrum3 = df2hum(
    result_df3,
    color_col="color",
    color_mask_col="is_bass",
    color_mapping={"match": MATCH_COLOR, "no_match": NO_MATCH_COLOR},
)

print("First bass note is GREEN (C matches), second is RED (B doesn't match G):")
show_notation(humdrum3)

## Example 4: Duration Weighting Demo

Two bass notes within a single chord:
- C (matching) held for 3 beats → green
- D (non-matching) held for 1 beat → red

With duration weighting: 3/(3+1) = 75%
Without duration weighting: 1/2 = 50%

In [None]:
df4 = create_music_df([
    # C major triad (onset=0, duration=3)
    (0, 3, 48, "C"),   # C3 - bass (matches)
    (0, 3, 64, "E"),   # E4
    (0, 3, 67, "G"),   # G4
    # D minor triad (onset=3, duration=1)
    (3, 4, 50, "D"),   # D3 - bass (doesn't match C major)
    (3, 4, 65, "F"),   # F4
    (3, 4, 69, "A"),   # A4
])

chord_df4 = pd.DataFrame({
    "onset": [0.0],
    "release": [4.0],
    "chord_pcs": ["047"],  # C major expected throughout
})

print("Single C major chord spanning 4 beats:")
print("  - C bass (beats 0-3): matches PC 0")
print("  - D bass (beats 3-4): doesn't match PC 0")
display(df4[["onset", "release", "pitch", "spelling"]])

In [None]:
result4_weighted = percent_bass_pc_match(df4, chord_df4, weight_by_duration=True)
result4_unweighted = percent_bass_pc_match(df4, chord_df4, weight_by_duration=False)

print(f"With duration weighting:    {result4_weighted['microaverage']:.0%}")
print(f"Without duration weighting: {result4_unweighted['microaverage']:.0%}")

assert result4_weighted["microaverage"] == 0.75
assert result4_unweighted["microaverage"] == 0.5

In [None]:
result_df4 = result4_weighted["music_df"]
result_df4["color"] = result_df4["is_bass_match"].map(
    {True: "match", False: "no_match"}
)
result_df4["is_bass"] = result_df4["is_bass_match"].notna()

humdrum4 = df2hum(
    result_df4,
    color_col="color",
    color_mask_col="is_bass",
    color_mapping={"match": MATCH_COLOR, "no_match": NO_MATCH_COLOR},
)

print("C bass (long, green) contributes more to weighted average than D bass (short, red):")
show_notation(humdrum4)

## Summary

The `percent_bass_pc_match` function:
1. Identifies bass notes (lowest pitch at each onset)
2. Checks if each bass note's pitch class matches the expected bass PC (first char of `chord_pcs`)
3. Returns match rates and adds `is_bass_match` column to the DataFrame

Using `df2hum` with color mapping:
- `color_col`: column containing color category ("match" or "no_match")
- `color_mask_col`: boolean column for which notes to color (True = bass notes)
- `color_mapping`: dict mapping categories to hex colors