In [None]:
import glob
import pickle
import numpy as np
from music21 import converter, instrument, note, chord

def load_midi_files():
    notes = []
    for file in glob.glob("data/**/*.mid", recursive=True):
        midi = converter.parse(file)
        parts = instrument.partitionByInstrument(midi)
        elements = parts.parts[0].recurse() if parts else midi.flat.notes

        for element in elements:
            if isinstance(element, note.Note):
                notes.append(str(element.pitch))
            elif isinstance(element, chord.Chord):
                notes.append('.'.join(str(n) for n in element.normalOrder))

    with open("data/notes.pkl", "wb") as f:
        pickle.dump(notes, f)

    return notes

def prepare_sequences(notes, seq_length=100):
    pitchnames = sorted(set(notes))
    note_to_int = {note: num for num, note in enumerate(pitchnames)}

    network_input = []
    network_output = []

    for i in range(len(notes) - seq_length):
        seq_in = notes[i:i + seq_length]
        seq_out = notes[i + seq_length]
        network_input.append([note_to_int[n] for n in seq_in])
        network_output.append(note_to_int[seq_out])

    n_patterns = len(network_input)
    input_tensor = np.reshape(network_input, (n_patterns, seq_length, 1)) / float(len(pitchnames))
    output_tensor = np.array(network_output)

    return input_tensor, output_tensor, note_to_int, pitchnames

if __name__ == "__main__":
    notes = load_midi_files()
    print(f"Collected {len(notes)} notes")
