### Snippet to MiDi

In [1]:
import sys
from pathlib import Path

import numpy as np
from music21 import stream, note

# If your python files are in src/
sys.path.append("src")

DATA_PATH = Path("../data/processed/snippets.npz")

# Load processed snippets
data = np.load(DATA_PATH)
intervals_arr = data["intervals"]   # (N, L)
durations_arr = data["durations"]   # (N, L)
song_ids = data["song_ids"]

intervals_arr.shape, durations_arr.shape


((2608, 32), (2608, 32))

In [2]:
from music21 import pitch

STEPS_PER_QUARTER = 4  # must match your preprocessing setting

def snippet_to_stream(interval_seq, duration_seq, base_midi_pitch=60):
    """
    Convert one snippet (intervals, durations) into a music21 Stream.
    base_midi_pitch: starting pitch (60 = middle C).
    """
    s = stream.Stream()
    
    current_pitch = base_midi_pitch
    
    for interval_val, dur_steps in zip(interval_seq, duration_seq):
        # update pitch using interval
        current_pitch += int(interval_val)
        p = pitch.Pitch()
        p.midi = current_pitch
        
        # convert duration steps back to quarterLength
        ql = float(dur_steps) / STEPS_PER_QUARTER
        
        n = note.Note(p)
        n.quarterLength = ql
        s.append(n)
    
    return s


In [3]:
from music21 import midi

SNIPPET_MIDI_DIR = Path("../data/processed/snippet_midis")
SNIPPET_MIDI_DIR.mkdir(parents=True, exist_ok=True)

def save_snippet_as_midi(snippet_index, base_midi_pitch=60):
    """
    Take snippet_index from intervals_arr/durations_arr and save as a MIDI file.
    Returns the path to the MIDI file.
    """
    if snippet_index < 0 or snippet_index >= intervals_arr.shape[0]:
        raise ValueError(f"snippet_index {snippet_index} out of range [0, {intervals_arr.shape[0]-1}]")
    
    interval_seq = intervals_arr[snippet_index]
    duration_seq = durations_arr[snippet_index]
    
    s = snippet_to_stream(interval_seq, duration_seq, base_midi_pitch=base_midi_pitch)
    
    out_path = SNIPPET_MIDI_DIR / f"snippet_{snippet_index}_song{song_ids[snippet_index]}.mid"
    s.write('midi', fp=str(out_path))
    print(f"Saved snippet {snippet_index} (song_id={song_ids[snippet_index]}) to {out_path}")
    return out_path


In [15]:
# pick a snippet index, e.g. 10
midi_path = save_snippet_as_midi(1759)

midi_path

Saved snippet 1759 (song_id=20) to ../data/processed/snippet_midis/snippet_1759_song20.mid


PosixPath('../data/processed/snippet_midis/snippet_1759_song20.mid')

### To get which snippets were generated from this song:

In [5]:
import numpy as np
from pathlib import Path

data = np.load("../data/processed/snippets.npz", allow_pickle=True)
intervals = data["intervals"]
durations = data["durations"]
song_ids = data["song_ids"]
midi_filenames = data.get("midi_filenames", None)  # may or may not exist

def snippet_indices_for_song_id(song_id: int):
    """
    Return a numpy array of snippet indices belonging to the given song_id.
    """
    idxs = np.where(song_ids == song_id)[0]
    print(f"Song id {song_id} has {len(idxs)} snippets.")
    return idxs

# Example:
idxs = snippet_indices_for_song_id(26)
idxs[:10]  # show first few indices


Song id 26 has 73 snippets.


array([2141, 2142, 2143, 2144, 2145, 2146, 2147, 2148, 2149, 2150])

### Search by Song Name to find which snippets it generated

In [7]:
midi_filenames = data["midi_filenames"]  # shape: (num_songs,)

def snippet_indices_for_filename(filename: str):
    """
    Return snippet indices for a given MIDI filename.
    filename should match midi_filenames entries, e.g., 'fur_elise.mid'.
    """
    # find song_id by filename
    matches = np.where(midi_filenames == filename)[0]
    if len(matches) == 0:
        raise ValueError(f"Filename {filename} not found in midi_filenames.")
    song_id = int(matches[0])
    idxs = np.where(song_ids == song_id)[0]
    print(f"File {filename} (song_id={song_id}) has {len(idxs)} snippets.")
    return idxs

# Example:
snippet_idxs = snippet_indices_for_filename("Pirates of the Caribbean - He's a Pirate (3).mid")
snippet_idxs[:]


File Pirates of the Caribbean - He's a Pirate (3).mid (song_id=0) has 16 snippets.


array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])

### To get which song this snippet was generated from:

In [8]:
import numpy as np

data = np.load("../data/processed/snippets.npz", allow_pickle=True)
intervals = data["intervals"]
durations = data["durations"]
song_ids = data["song_ids"]
midi_filenames = data["midi_filenames"]  # shape: (num_songs,)

def describe_snippet(snippet_index):
    sid = int(song_ids[snippet_index])
    print(f"Snippet {snippet_index} comes from song_id={sid}, file={midi_filenames[sid]}")
    return midi_filenames[sid]

describe_snippet(10)


Snippet 10 comes from song_id=0, file=Pirates of the Caribbean - He's a Pirate (3).mid


np.str_("Pirates of the Caribbean - He's a Pirate (3).mid")

In [9]:
from music21 import converter, instrument, note, chord, stream
from pathlib import Path

midi_path = Path("../data/raw_midi/Pirates of the Caribbean - He's a Pirate (3).mid")
score = converter.parse(str(midi_path))

print(score)  # basic summary


<music21.stream.Score 0x7480fa3c7890>


In [10]:
for i, p in enumerate(score.parts):
    print(f"\n=== PART {i} ===")
    print("repr:", p)
    print("id:", p.id)
    print("partName:", p.partName)

    # list instruments declared in this part
    insts = list(p.getInstruments())
    if not insts:
        print("  Instruments: (none explicitly listed)")
    else:
        for inst in insts:
            print("  Instrument:",
                  f"name='{inst.instrumentName}'",
                  f"bestName='{inst.bestName()}'",
                  f"midiProgram={inst.midiProgram}")
    
    # quick stats: note count and average pitch
    notes_chords = [n for n in p.recurse().notes if isinstance(n, (note.Note, chord.Chord))]
    print("  #notes:", len(notes_chords))
    if notes_chords:
        pitches = []
        for n in notes_chords:
            if isinstance(n, note.Note):
                pitches.append(n.pitch.midi)
            elif isinstance(n, chord.Chord):
                pitches.append(max(nn.pitch.midi for nn in n.notes))
        avg_pitch = sum(pitches) / len(pitches)
        print("  avg_pitch:", avg_pitch)



=== PART 0 ===
repr: <music21.stream.Part 0x7480fa3c7110>
id: 128097302901008
partName: Right Hand
  Instrument: name='Piano' bestName='Right Hand' midiProgram=0
  Instrument: name='Piano' bestName='Right Hand' midiProgram=0
  #notes: 274
  avg_pitch: 70.43430656934306

=== PART 1 ===
repr: <music21.stream.Part 0x7480fa3c6ad0>
id: 128097302899408
partName: Left Hand
  Instrument: name='Piano' bestName='Left Hand' midiProgram=0
  Instrument: name='Piano' bestName='Left Hand' midiProgram=0
  #notes: 350
  avg_pitch: 47.31428571428572


In [34]:
melody_part_index = 0  # whichever index you decide is correct
melody = score.parts[melody_part_index]

# Optionally transpose the melody the same way as in your pipeline
from music21 import interval, pitch

def detect_key_and_transpose(melody_part):
    key_guess = melody_part.analyze('key')
    if key_guess.mode == 'major':
        target_pitch = pitch.Pitch('C')
    else:
        target_pitch = pitch.Pitch('A')
    itvl = interval.Interval(key_guess.tonic, target_pitch)
    return melody_part.transpose(itvl)

melody_transposed = detect_key_and_transpose(melody)

# Save just this part as MIDI
from pathlib import Path
out_path = Path("../data/processed/debug_pirates_melody.mid")
melody_transposed.write("midi", fp=str(out_path))
print("Wrote:", out_path)


Wrote: ../data/processed/debug_pirates_melody.mid
