In [None]:
from music21 import *
import os
import numpy as np
import pickle

In [None]:
def load_score(filepath):
    return converter.parse(filepath)

def discretize_to_16th(score):
    processed = []
    melody_part = score.parts[0]
    
    # Filter out chord symbols and get only notes and rests
    notes_and_rests = [elem for elem in melody_part.flatten().notesAndRests 
                       if isinstance(elem, (note.Note, note.Rest))]
    
    for element in notes_and_rests:
        duration_16ths = int(element.duration.quarterLength * 4)
        
        if isinstance(element, note.Note):
            # For notes in chords, take only the highest note
            if hasattr(element, 'pitch'):
                processed.extend([element.pitch.midi] * duration_16ths)
        elif isinstance(element, note.Rest):
            processed.extend([-1] * duration_16ths)
    return processed

In [None]:
# Process all files in data directory
data_dir = './data'
processed_sequences = []

for filename in os.listdir(data_dir):
    if filename.endswith('.xml') or filename.endswith('.musicxml'):
        filepath = os.path.join(data_dir, filename)
        score = load_score(filepath)
        sequence = discretize_to_16th(score)
        processed_sequences.append(sequence)
        print(f'Processed {filename}: {len(sequence)} events')

with open('processed_sequences.pkl', 'wb') as f:
    pickle.dump(processed_sequences, f)

In [None]:
def sequence_to_stream(sequence):
    """Convert a sequence of MIDI numbers (and -1 for rests) back to a music21 stream"""
    output_stream = stream.Stream()
    
    # Group consecutive identical numbers and count their length
    current_val = sequence[0]
    current_count = 1
    
    for next_val in sequence[1:]:
        if next_val == current_val:
            current_count += 1
        else:
            # Convert the counted segment to a note/rest
            duration = current_count / 4.0  # Convert 16th notes back to quarter notes
            
            if current_val == -1:
                output_stream.append(note.Rest(quarterLength=duration))
            else:
                output_stream.append(note.Note(current_val, quarterLength=duration))
            
            # Reset counters
            current_val = next_val
            current_count = 1
    
    # Don't forget the last group
    duration = current_count / 4.0
    if current_val == -1:
        output_stream.append(note.Rest(quarterLength=duration))
    else:
        output_stream.append(note.Note(current_val, quarterLength=duration))
    
    return output_stream

# Test cell to verify the conversion works
test_sequence = processed_sequences[0] if processed_sequences else [60, 60, 60, 60, -1, -1, 62, 62]
reconstructed = sequence_to_stream(test_sequence)
reconstructed.show('midi')  # Play the reconstructed melody