## 00 Soundfile Exploration

Exploring Soundfile Exploration with Python

In [None]:
import numpy as np 
import soundfile as sf 
import pandas as pd 
from sf2utils.sf2parse import Sf2File
import os 
from IPython.display import Audio, display

import logging

# Suppress sf2utils warnings about midi start and stops
# Warning (Ironic): Suppresses all warnings
logging.getLogger().setLevel(logging.ERROR)

# Midi Imports
import tempfile
import pretty_midi
import fluidsynth


from dotenv import load_dotenv

dotenv_path = '/home/robbizorg/classes/RT_MusicGen'
load_dotenv(dotenv_path=dotenv_path)

music_path = os.getenv("music_path")
sf_path = os.getenv('sf_path')

def midi_to_note_name(n):
    """Converts MIDI note number → name like C4."""
    names = ["C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "A#", "B"]
    octave = (n // 12) - 1
    name = names[n % 12]
    return f"{name}{octave}"

def inspect_soundfont(sf2_path: str):
    # Load SoundFont
    with open(sf2_path, "rb") as f:
        sf2 = Sf2File(f)

    print(f"Loaded SoundFont: {sf2_path}")
    print("=" * 80)

    for preset in sf2.presets:
        # Skip sentinel preset (EOP = End Of Presets)
        if getattr(preset, "name", None) == "EOP":
            continue

        print(f"\nPreset: {preset.name}  (Bank {preset.bank}, Program {preset.preset})")
        print("-" * 80)

        # To avoid printing exact duplicates
        seen = set()

        # Preset-level bags (can include a "global" bag with no instrument)
        for pbag in preset.bags:
            instrument = pbag.instrument
            if instrument is None:
                # Global bag: carries default gens like global key_range, etc.
                continue

            preset_key_range = pbag.key_range  # may be None

            # Instrument-level bags: usually where samples + key ranges live
            for ibag in instrument.bags:
                sample = ibag.sample
                if sample is None:
                    continue  # e.g., global instrument bag

                # Determine effective key range:
                # 1) instrument bag key_range if present
                # 2) else preset bag key_range
                # 3) else full MIDI range
                if ibag.key_range is not None:
                    lo, hi = ibag.key_range
                elif preset_key_range is not None:
                    lo, hi = preset_key_range
                else:
                    lo, hi = 0, 127

                # Determine root key:
                # - bag.base_note (overriding root key) if present
                # - else sample.original_pitch
                base_note = getattr(ibag, "base_note", None)
                if base_note is not None:
                    root_key = base_note
                else:
                    root_key = sample.original_pitch

                sig = (instrument.name, sample.name, root_key, lo, hi)
                if sig in seen:
                    continue
                seen.add(sig)

                print(f"  Instrument: {instrument.name}")
                print(f"    Sample: {sample.name}")
                print(f"      Root key: {root_key} ({midi_to_note_name(root_key)})")
                print(
                    f"      Key range: {lo}–{hi} "
                    f"({midi_to_note_name(lo)} → {midi_to_note_name(hi)})"
                )

# Render a single note a numpy array
def render_note_to_numpy(
    sf2_path: str,
    midi_pitch: int = 60,      # C4
    velocity: int = 100,
    duration_sec: float = 2.0,
    program: int = 0,          # GM program number
    sample_rate: int = 44100,
):
    """
    Render a single MIDI note from a SoundFont to a NumPy array using FluidSynth.
    Returns: np.ndarray shape (num_samples, 2) for stereo audio.
    """
    # Create synthesizer
    fs = fluidsynth.Synth(samplerate=sample_rate)
    sfid = fs.sfload(sf2_path)
    fs.program_select(0, sfid, 0, program)

    # Start note
    fs.noteon(0, midi_pitch, velocity)

    # Render audio into buffer
    num_frames = int(duration_sec * sample_rate)
    audio = fs.get_samples(num_frames)  # returns float32 array interleaved L/R

    # Stop note
    fs.noteoff(0, midi_pitch)

    # Clean up
    fs.delete()

    # Convert interleaved stereo → shape (N, 2)
    audio = np.array(audio, dtype=np.float32)
    audio = audio.reshape(-1, 2)  # stereo

    return audio

def get_soundfont_structure(sf2_path):
    with open(sf2_path, "rb") as f:
        sf2 = Sf2File(f)

    presets = []

    for preset in sf2.presets:
        if preset.name == "EOP":     # Skip sentinel ending preset
            continue
        presets.append({
            "bank": preset.bank,
            "program": preset.preset,
            "name": preset.name,
            "preset_obj": preset,
        })

    return presets

In [19]:
sf_filepath = os.path.join(sf_path, 'Touhou.sf2')
inspect_soundfont(sf_filepath)

presets = get_soundfont_structure(sf_filepath)

for p in presets[:10]:  # show first 10
    print(p["bank"], p["program"], p["name"])

print("Total presets:", len(presets))

Loaded SoundFont: /data/robbizorg/music/soundfiles/Touhou.sf2

Preset: Grand Piano  (Bank 0, Program 0)
--------------------------------------------------------------------------------
  Instrument: Giga Piano
    Sample: piano_Region_002(L)
      Root key: 36 (C2)
      Key range: 0–38 (C-1 → D2)
  Instrument: Giga Piano
    Sample: piano_Region_002(R)
      Root key: 36 (C2)
      Key range: 0–38 (C-1 → D2)
  Instrument: Giga Piano
    Sample: piano_Region_003(L)
      Root key: 39 (D#2)
      Key range: 39–41 (D#2 → F2)
  Instrument: Giga Piano
    Sample: piano_Region_003(R)
      Root key: 39 (D#2)
      Key range: 39–41 (D#2 → F2)
  Instrument: Giga Piano
    Sample: piano_Region_004(L)
      Root key: 42 (F#2)
      Key range: 42–44 (F#2 → G#2)
  Instrument: Giga Piano
    Sample: piano_Region_004(R)
      Root key: 42 (F#2)
      Key range: 42–44 (F#2 → G#2)
  Instrument: Giga Piano
    Sample: piano_Region_005(L)
      Root key: 45 (A2)
      Key range: 45–47 (A2 → B2)
  Instr

In [16]:
sr = 48000

audio = render_note_to_numpy(
    sf2_path=sf_filepath,
    midi_pitch=72,   # C5
    velocity=100,
    duration_sec=4,
    program=130,        # Grand Piano
    sample_rate=sr
)

print(audio.shape)
print(audio[:10])

display(Audio(audio.T, rate=sr))

(192000, 2)
[[ 0.  0.]
 [ 0.  0.]
 [ 0.  0.]
 [ 0.  0.]
 [ 0.  0.]
 [ 0.  0.]
 [ 0.  0.]
 [ 0.  0.]
 [-1.  0.]
 [ 0.  0.]]


fluidsynth: error: There is no preset with bank number 0 and preset number 130 in SoundFont 1


In [23]:
pitch_list = [48, 60, 72]  # C3, C4, C5 for example
velocities = [20, 90, 120]

for p in presets[-5:]:
    bank = p["bank"]
    program = p["program"]
    name = p["name"]

    for pitch in pitch_list:
        for vel in velocities:
            audio = render_note_to_numpy(
                sf2_path=sf_filepath,
                midi_pitch=pitch,
                velocity=vel,
                duration_sec=2.0,
                program=program,
            )

            print(
                f"Preset={name:20s}  "
                f"Prog={program:3d}  Pitch={pitch:3d}  Vel={vel:3d}  "
                f"AudioShape={audio.shape}"
            )

            display(Audio(audio.T, rate=sr))

    break 


Preset=ORCHESTRA             Prog= 48  Pitch= 48  Vel= 20  AudioShape=(88200, 2)


Preset=ORCHESTRA             Prog= 48  Pitch= 48  Vel= 90  AudioShape=(88200, 2)


Preset=ORCHESTRA             Prog= 48  Pitch= 48  Vel=120  AudioShape=(88200, 2)


Preset=ORCHESTRA             Prog= 48  Pitch= 60  Vel= 20  AudioShape=(88200, 2)


Preset=ORCHESTRA             Prog= 48  Pitch= 60  Vel= 90  AudioShape=(88200, 2)


Preset=ORCHESTRA             Prog= 48  Pitch= 60  Vel=120  AudioShape=(88200, 2)


Preset=ORCHESTRA             Prog= 48  Pitch= 72  Vel= 20  AudioShape=(88200, 2)


Preset=ORCHESTRA             Prog= 48  Pitch= 72  Vel= 90  AudioShape=(88200, 2)


Preset=ORCHESTRA             Prog= 48  Pitch= 72  Vel=120  AudioShape=(88200, 2)
