### Snippet to MiDi

In [13]:
import sys
from pathlib import Path

import numpy as np
from music21 import stream, note

# If your python files are in src/
sys.path.append("src")

DATA_PATH = Path("../data/processed/snippets.npz")

# Load processed snippets
data = np.load(DATA_PATH)
intervals_arr = data["intervals"]   # (N, L)
durations_arr = data["durations"]   # (N, L)
song_ids = data["song_ids"]

intervals_arr.shape, durations_arr.shape


((34513, 32), (34513, 32))

In [14]:
from music21 import pitch

STEPS_PER_QUARTER = 4  # must match your preprocessing setting

def snippet_to_stream(interval_seq, duration_seq, base_midi_pitch=60):
    """
    Convert one snippet (intervals, durations) into a music21 Stream.
    base_midi_pitch: starting pitch (60 = middle C).
    """
    s = stream.Stream()
    
    current_pitch = base_midi_pitch
    
    for interval_val, dur_steps in zip(interval_seq, duration_seq):
        # update pitch using interval
        current_pitch += int(interval_val)
        p = pitch.Pitch()
        p.midi = current_pitch
        
        # convert duration steps back to quarterLength
        ql = float(dur_steps) / STEPS_PER_QUARTER
        
        n = note.Note(p)
        n.quarterLength = ql
        s.append(n)
    
    return s


In [15]:
from music21 import midi

SNIPPET_MIDI_DIR = Path("../data/processed/snippet_midis")
SNIPPET_MIDI_DIR.mkdir(parents=True, exist_ok=True)

def save_snippet_as_midi(snippet_index, base_midi_pitch=60):
    """
    Take snippet_index from intervals_arr/durations_arr and save as a MIDI file.
    Returns the path to the MIDI file.
    """
    if snippet_index < 0 or snippet_index >= intervals_arr.shape[0]:
        raise ValueError(f"snippet_index {snippet_index} out of range [0, {intervals_arr.shape[0]-1}]")
    
    interval_seq = intervals_arr[snippet_index]
    duration_seq = durations_arr[snippet_index]
    
    s = snippet_to_stream(interval_seq, duration_seq, base_midi_pitch=base_midi_pitch)
    
    out_path = SNIPPET_MIDI_DIR / f"snippet_{snippet_index}_song{song_ids[snippet_index]}.mid"
    s.write('midi', fp=str(out_path))
    print(f"Saved snippet {snippet_index} (song_id={song_ids[snippet_index]}) to {out_path}")
    return out_path


In [27]:
# pick a snippet index, e.g. 10
midi_path = save_snippet_as_midi(18106)

midi_path

Saved snippet 18106 (song_id=494) to ../data/processed/snippet_midis/snippet_18106_song494.mid


PosixPath('../data/processed/snippet_midis/snippet_18106_song494.mid')

### To get which snippets were generated from this song:

In [17]:
import numpy as np
from pathlib import Path

data = np.load("../data/processed/snippets.npz", allow_pickle=True)
intervals = data["intervals"]
durations = data["durations"]
song_ids = data["song_ids"]
midi_filenames = data.get("midi_filenames", None)  # may or may not exist

def snippet_indices_for_song_id(song_id: int):
    """
    Return a numpy array of snippet indices belonging to the given song_id.
    """
    idxs = np.where(song_ids == song_id)[0]
    print(f"Song id {song_id} has {len(idxs)} snippets.")
    return idxs

# Example:
idxs = snippet_indices_for_song_id(5)
idxs[:]  # show first few indices


Song id 5 has 73 snippets.


array([214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226,
       227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
       240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252,
       253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265,
       266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278,
       279, 280, 281, 282, 283, 284, 285, 286])

### Search by Song Name to find which snippets it generated

In [24]:
midi_filenames = data["midi_filenames"]  # shape: (num_songs,)

def snippet_indices_for_filename(filename: str):
    """
    Return snippet indices for a given MIDI filename.
    filename should match midi_filenames entries, e.g., 'fur_elise.mid'.
    """
    # find song_id by filename
    matches = np.where(midi_filenames == filename)[0]
    if len(matches) == 0:
        raise ValueError(f"Filename {filename} not found in midi_filenames.")
    song_id = int(matches[0])
    idxs = np.where(song_ids == song_id)[0]
    print(f"File {filename} (song_id={song_id}) has {len(idxs)} snippets.")
    return idxs

# Example:
snippet_idxs = snippet_indices_for_filename("Axel_F_1.mid")
snippet_idxs[:]


File Axel_F_1.mid (song_id=4) has 13 snippets.


array([201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213])

### To get which song this snippet was generated from:

In [24]:
import numpy as np

data = np.load("../data/processed/snippets.npz", allow_pickle=True)
intervals = data["intervals"]
durations = data["durations"]
song_ids = data["song_ids"]
midi_filenames = data["midi_filenames"]  # shape: (num_songs,)

def describe_snippet(snippet_index):
    sid = int(song_ids[snippet_index])
    print(f"Snippet {snippet_index} comes from song_id={sid}, file={midi_filenames[sid]}")
    return midi_filenames[sid]

describe_snippet(28122)


Snippet 28122 comes from song_id=727, file=Blame It on the Boogie.mid


'Blame It on the Boogie.mid'

In [11]:
from music21 import converter, instrument, note, chord, stream
from pathlib import Path

midi_path = Path("../data/mini_dataset/classic/Axel_F_1.mid")
score = converter.parse(str(midi_path))

print(score)  # basic summary


<music21.stream.Score 0x7fef33ad3510>


In [2]:
for i, p in enumerate(score.parts):
    print(f"\n=== PART {i} ===")
    print("repr:", p)
    print("id:", p.id)
    print("partName:", p.partName)

    # list instruments declared in this part
    insts = list(p.getInstruments())
    if not insts:
        print("  Instruments: (none explicitly listed)")
    else:
        for inst in insts:
            print("  Instrument:",
                  f"name='{inst.instrumentName}'",
                  f"bestName='{inst.bestName()}'",
                  f"midiProgram={inst.midiProgram}")
    
    # quick stats: note count and average pitch
    notes_chords = [n for n in p.recurse().notes if isinstance(n, (note.Note, chord.Chord))]
    print("  #notes:", len(notes_chords))
    if notes_chords:
        pitches = []
        for n in notes_chords:
            if isinstance(n, note.Note):
                pitches.append(n.pitch.midi)
            elif isinstance(n, chord.Chord):
                pitches.append(max(nn.pitch.midi for nn in n.notes))
        avg_pitch = sum(pitches) / len(pitches)
        print("  avg_pitch:", avg_pitch)



=== PART 0 ===
repr: <music21.stream.Part 0x7f9f15b93a10>
id: 140321240988176
partName: SYNBASS 1
  Instrument: name='Electric Bass' bestName='SYNBASS 1' midiProgram=38
  #notes: 442
  avg_pitch: 36.418552036199095

=== PART 1 ===
repr: <music21.stream.Part 0x7f9f1588e990>
id: 140321237821840
partName: MUTED GTR
  Instrument: name='Electric Guitar' bestName='MUTED GTR' midiProgram=28
  #notes: 99
  avg_pitch: 82.9090909090909

=== PART 2 ===
repr: <music21.stream.Part 0x7f9f1576f0d0>
id: 140321236644048
partName: MARIMBA
  Instrument: name='MARIMBA' bestName='MARIMBA' midiProgram=12
  Instrument: name='Marimba' bestName='Marimba' midiProgram=12
  #notes: 123
  avg_pitch: 79.64227642276423

=== PART 3 ===
repr: <music21.stream.Part 0x7f9f1524bc10>
id: 140321231256592
partName: WOODBLOCK
  Instrument: name='WOODBLOCK' bestName='WOODBLOCK' midiProgram=115
  Instrument: name='Woodblock' bestName='Woodblock' midiProgram=115
  #notes: 856
  avg_pitch: 98.15303738317758

=== PART 4 ===
repr:

In [34]:
melody_part_index = 0  # whichever index you decide is correct
melody = score.parts[melody_part_index]

# Optionally transpose the melody the same way as in your pipeline
from music21 import interval, pitch

def detect_key_and_transpose(melody_part):
    key_guess = melody_part.analyze('key')
    if key_guess.mode == 'major':
        target_pitch = pitch.Pitch('C')
    else:
        target_pitch = pitch.Pitch('A')
    itvl = interval.Interval(key_guess.tonic, target_pitch)
    return melody_part.transpose(itvl)

melody_transposed = detect_key_and_transpose(melody)

# Save just this part as MIDI
from pathlib import Path
out_path = Path("../data/processed/debug_pirates_melody.mid")
melody_transposed.write("midi", fp=str(out_path))
print("Wrote:", out_path)


Wrote: ../data/processed/debug_pirates_melody.mid


In [9]:
from music21 import converter, instrument, note, chord

orig = converter.parse("../data/mini_dataset/classic/Abba _ Mamma Mia L 1.mid")

for i, p in enumerate(orig.parts):
    print(f"\n=== ORIGINAL PART {i} ===")
    print("partName:", p.partName)
    insts = list(p.getInstruments())
    if insts:
        for inst in insts:
            print("  Instrument:",
                  f"name='{inst.instrumentName}'",
                  f"bestName='{inst.bestName()}'",
                  f"midiProgram={inst.midiProgram}",
                  f"midiChannel={inst.midiChannel}")
    else:
        print("  Instrument: (none explicitly)")



=== ORIGINAL PART 0 ===
partName: Mama Mia
  Instrument: name='None' bestName='Mama Mia' midiProgram=None midiChannel=0
  Instrument: name='Piano' bestName='Piano' midiProgram=0 midiChannel=1
  Instrument: name='Electric Bass' bestName='Electric Bass' midiProgram=33 midiChannel=2
  Instrument: name='StringInstrument' bestName='StringInstrument' midiProgram=48 midiChannel=3
  Instrument: name='Electric Piano' bestName='Electric Piano' midiProgram=2 midiChannel=4
  Instrument: name='Flute' bestName='Flute' midiProgram=73 midiChannel=5
  Instrument: name='Marimba' bestName='Marimba' midiProgram=12 midiChannel=6
  Instrument: name='Choir' bestName='Choir' midiProgram=52 midiChannel=7
  Instrument: name='Percussion' bestName='Percussion' midiProgram=127 midiChannel=9
  Instrument: name='Piano' bestName='Piano' midiProgram=3 midiChannel=10
  Instrument: name='Electric Guitar' bestName='Electric Guitar' midiProgram=30 midiChannel=11
  Instrument: name='Sampler' bestName='Sampler' midiProgram

In [13]:
trans = converter.parse("../data/processed/transposed_midis/transposed_Smoothcr.mid")

for i, p in enumerate(trans.parts):
    print(f"\n=== TRANSPOSED PART {i} ===")
    print("partName:", p.partName)
    insts = list(p.getInstruments())
    if insts:
        for inst in insts:
            print("  Instrument:",
                  f"name='{inst.instrumentName}'",
                  f"bestName='{inst.bestName()}'",
                  f"midiProgram={inst.midiProgram}",
                  f"midiChannel={inst.midiChannel}")
    else:
        print("  Instrument: (none explicitly)")



=== TRANSPOSED PART 0 ===
partName: Piano
  Instrument: name='Piano' bestName='Piano' midiProgram=0 midiChannel=0
  Instrument: name='Piano' bestName='Piano' midiProgram=0 midiChannel=0
  Instrument: name='Piano' bestName='Piano' midiProgram=0 midiChannel=0
  Instrument: name='Piano' bestName='Piano' midiProgram=0 midiChannel=0
  Instrument: name='Fretless Bass' bestName='Fretless Bass' midiProgram=35 midiChannel=0
  Instrument: name='Electric Organ' bestName='Electric Organ' midiProgram=18 midiChannel=0
  Instrument: name='Alto Saxophone' bestName='Alto Saxophone' midiProgram=65 midiChannel=0
  Instrument: name='Electric Piano' bestName='Electric Piano' midiProgram=5 midiChannel=0
  Instrument: name='Electric Guitar' bestName='Electric Guitar' midiProgram=30 midiChannel=0
  Instrument: name='Electric Guitar' bestName='Electric Guitar' midiProgram=28 midiChannel=0
  Instrument: name='Piano' bestName='Piano' midiProgram=0 midiChannel=0
  Instrument: name='Sampler' bestName='Sampler' mi

In [14]:
from pathlib import Path

ROOT_DIR = Path("../data/mini_dataset")  # change this

# Collect all .mid files recursively
all_mid_paths = list(ROOT_DIR.rglob("*.mid"))

print(f"Total .mid files found (including duplicates by name): {len(all_mid_paths)}")

# ---- unique by filename only ----
unique_by_name = {p.name for p in all_mid_paths}
print(f"Unique .mid files by filename: {len(unique_by_name)}")


Total .mid files found (including duplicates by name): 908
Unique .mid files by filename: 528
