In [16]:
import os
import mido
from pathlib import Path
from mido import MidiFile, MidiTrack, merge_tracks

In [17]:
mid_file = 'Banjo-Kazooie_N64_Banjo-Tooie_Cauldron Keep.mid'

## COMBINE MIDI TRACKS

In [65]:
def combine_midi_tracks(mid):
    if not isinstance(mid, MidiFile):
        raise ValueError("Expected a MidiFile object as input")
    
    combined_midi = MidiFile(ticks_per_beat=mid.ticks_per_beat)

    combined_midi.tracks = [merge_tracks(mid.tracks)]

    return combined_midi

# Save midi

In [68]:
def save_midi(mid, output_path):
    """
    Save MIDI file.
    
    Parameters:
        mid: MidiFile, MIDI file to save.
        output_path (str): Path to save the MIDI file.
    """
    os.makedirs(os.path.dirname(output_path), exist_ok=True)
    mid.save(output_path)
    print(f"MIDI file saved to {output_path}")

## Split by bars

In [20]:
BARS_TO_EXTRACT = 4

In [42]:
def save_bar_messages(message, bar_number, ticks_per_beat, filename, output_path="../data/processed/"):
    bar_midi = MidiFile(ticks_per_beat=ticks_per_beat)
    bar_track = MidiTrack()
    bar_midi.tracks.append(bar_track)
    for msg in message:
        msg.time = int(msg.time) # int(msg.time)
        bar_track.append(msg)
    output_file = os.path.join(output_path, f"{filename}_{bar_number}.mid")
    save_midi(bar_midi, output_file)
    print(f"Bar {bar_number} saved to {output_file}")

In [22]:
# BUENO
def split_midi_by_bar(mid, bars_to_extract, filename=None):
    ticks_per_beat = mid.ticks_per_beat
    numerator_time_signature = [msg.numerator for msg in mid if msg.type == 'time_signature'][0]
    ticks_per_bar = ticks_per_beat * numerator_time_signature
    ticks_to_extract = ticks_per_bar * bars_to_extract

    current_bar = 0
    current_ticks = 0
    metadata_info = [msg for msg in mid if msg.is_meta]
    bar_messages = []
    chord_messages = []

    for track in mid.tracks:
        for msg in track:
            if msg.time == 0:
                chord_messages.append(msg)
            else: 
                chord_messages.append(msg)
                # CASE 1: the chord is of the next section
                if current_ticks + msg.time >= ticks_to_extract:
                    current_bar += 1
                    current_ticks = 0
                    bar_messages.append(metadata_info[-1])
                    save_bar_messages(bar_messages, current_bar, ticks_per_beat, filename)
                    
                    bar_messages = metadata_info[:-1]
                    bar_messages.extend(chord_messages)
                    chord_messages = []
                # CASE 2: the chord is of the current section
                else:
                    current_ticks += msg.time
                    bar_messages.extend(chord_messages)
                    chord_messages = []

In [69]:
# IMPROVED VERSION
# def split_midi_by_bar(mid, bars_to_extract = 4, path="data/processed"):
#     filename = Path(mid.filename).stem

#     ticks_per_beat = mid.ticks_per_beat
#     numerator_time_signature = next((msg.numerator for msg in mid if msg.type == 'time_signature'), 4)
#     ticks_per_bar = ticks_per_beat * numerator_time_signature
#     ticks_to_extract = ticks_per_bar * bars_to_extract

#     current_bar = 0
#     current_ticks = 0
#     metadata_info = [msg for msg in mid if msg.is_meta]
#     bar_messages = []
#     chord_messages = []

#     for track in mid.tracks:
#         for msg in track:
#             chord_messages.append(msg)

#             if msg.time > 0: 
#                 current_ticks += msg.time

#                 if current_ticks >= ticks_to_extract:
#                     current_bar += 1
#                     current_ticks -= ticks_to_extract

#                     # Save the current bar messages
#                     bar_messages.extend(metadata_info)
#                     bar_messages.append(metadata_info[-1])
#                     save_bar_messages(bar_messages, current_bar, ticks_per_beat, filename)

#                     # Prepare for the next bar
#                     bar_messages = metadata_info[:-1]
#                     chord_messages.clear()

#     # Save remaining messages if any
#     if chord_messages:
#         current_bar += 1
#         bar_messages.extend(chord_messages)
#         save_bar_messages(bar_messages, current_bar, ticks_per_beat, filename)

# Pitches analysis

In [24]:
import os
import mido
from collections import defaultdict

In [25]:
def count_pitches(midi_raw_path):
    pitch_counts = defaultdict(int)
    min_pitch = 127
    max_pitch = 0

    for midi in midi_raw_path:
        mid = mido.MidiFile(midi)
        for track in mid.tracks:
            for msg in track:
                if msg.type in ['note_on', 'note_off']:
                    pitch_counts[msg.note] += 1
                    min_pitch = min(min_pitch, msg.note)
                    max_pitch = max(max_pitch, msg.note)

    return pitch_counts, min_pitch, max_pitch

midi_raw_path = [os.path.join("../data/raw/midi/", midi) for midi in os.listdir("../data/raw/midi/")]
pitch_counts, min_pitch, max_pitch = count_pitches(midi_raw_path)

# Imprimir los resultados
print(f"Min pitch: {min_pitch}")
print(f"Max pitch: {max_pitch}")
print("Pitch counts:")
for pitch in range(128):
    print(f"Pitch {pitch}: {pitch_counts[pitch]} times")

Min pitch: 0
Max pitch: 109
Pitch counts:
Pitch 0: 2 times
Pitch 1: 0 times
Pitch 2: 0 times
Pitch 3: 0 times
Pitch 4: 0 times
Pitch 5: 0 times
Pitch 6: 0 times
Pitch 7: 0 times
Pitch 8: 0 times
Pitch 9: 0 times
Pitch 10: 0 times
Pitch 11: 0 times
Pitch 12: 2 times
Pitch 13: 0 times
Pitch 14: 2 times
Pitch 15: 0 times
Pitch 16: 0 times
Pitch 17: 0 times
Pitch 18: 0 times
Pitch 19: 0 times
Pitch 20: 0 times
Pitch 21: 6 times
Pitch 22: 48 times
Pitch 23: 30 times
Pitch 24: 1010 times
Pitch 25: 448 times
Pitch 26: 1296 times
Pitch 27: 978 times
Pitch 28: 1799 times
Pitch 29: 2414 times
Pitch 30: 1226 times
Pitch 31: 4392 times
Pitch 32: 1600 times
Pitch 33: 4357 times
Pitch 34: 3390 times
Pitch 35: 3028 times
Pitch 36: 6858 times
Pitch 37: 3688 times
Pitch 38: 9959 times
Pitch 39: 4883 times
Pitch 40: 10113 times
Pitch 41: 12272 times
Pitch 42: 6932 times
Pitch 43: 17150 times
Pitch 44: 8653 times
Pitch 45: 16109 times
Pitch 46: 12253 times
Pitch 47: 11756 times
Pitch 48: 20651 times
Pitc

## Establecemos el rango de pitch de 20-110

# TIME SIGNATURE ANALYSYS

In [26]:
def count_time_signatures(midi_raw_path):
    time_signature_counts = defaultdict(int)
    files_with_multiple_time_signatures = 0

    for midi in midi_raw_path:
        mid = mido.MidiFile(midi)
        time_signatures_in_file = set()
        for track in mid.tracks:
            for msg in track:
                if msg.type == 'time_signature':
                    time_signature_counts[(msg.numerator, msg.denominator)] += 1
                    time_signatures_in_file.add((msg.numerator, msg.denominator))
        
        if len(time_signatures_in_file) > 1:
            files_with_multiple_time_signatures += 1

    return time_signature_counts, files_with_multiple_time_signatures

midi_raw_path = [os.path.join("../data/raw/midi/", midi) for midi in os.listdir("../data/raw/midi/")]
time_signature_counts, files_with_multiple_time_signatures = count_time_signatures(midi_raw_path)

# Imprimir los resultados
print("Time signature counts:")
for (numerator, denominator), count in time_signature_counts.items():
    print(f"Time signature {numerator}/{denominator}: {count} times")

print(f"\nNumber of files with more than one time signature: {files_with_multiple_time_signatures}")

Time signature counts:
Time signature 4/4: 1373 times
Time signature 1/4: 1 times
Time signature 2/4: 26 times
Time signature 3/4: 76 times
Time signature 5/8: 7 times
Time signature 7/8: 11 times
Time signature 6/8: 12 times
Time signature 1/8: 1 times
Time signature 5/4: 79 times
Time signature 2/2: 14 times
Time signature 6/4: 21 times
Time signature 7/4: 5 times
Time signature 9/8: 7 times
Time signature 11/8: 4 times
Time signature 19/16: 1 times
Time signature 17/16: 1 times
Time signature 11/16: 1 times
Time signature 1/32: 1 times
Time signature 12/8: 1 times

Number of files with more than one time signature: 36


# TICKS PER BIT ANALYSIS

In [27]:
def count_ticks_per_beat(midi_raw_path):
    tpb_counts = defaultdict(int)
    files_with_multiple_tpb = 0

    for midi in midi_raw_path:
        mid = mido.MidiFile(midi)
        tpb_in_file = set()
        tpb_in_file.add(mid.ticks_per_beat)
        tpb_counts[mid.ticks_per_beat] += 1
        
        if len(tpb_in_file) > 1:
            files_with_multiple_tpb += 1

    return tpb_counts, files_with_multiple_tpb

midi_raw_path = [os.path.join("../data/raw/midi/", midi) for midi in os.listdir("../data/raw/midi/")]
tpb_counts, files_with_multiple_tpb = count_ticks_per_beat(midi_raw_path)

# Imprimir los resultados
print("Ticks per beat counts:")
for tpb, count in tpb_counts.items():
    print(f"Ticks per beat {tpb}: {count} times")

print(f"\nNumber of files with more than one ticks per beat value: {files_with_multiple_tpb}")

Ticks per beat counts:
Ticks per beat 1024: 59 times
Ticks per beat 48: 12 times
Ticks per beat 480: 14 times
Ticks per beat 96: 3 times
Ticks per beat 192: 70 times
Ticks per beat 960: 8 times
Ticks per beat 120: 18 times
Ticks per beat 256: 19 times
Ticks per beat 384: 1072 times

Number of files with more than one ticks per beat value: 0


# MIDI-TO-REMI

In [28]:
def extract_notes_and_chords(mid):
    events = []
    note_start_times = {}
    current_time = 0
    time_signature = []

    for track in mid.tracks:
        current_time = 0 # Ojo aquí si las pistas comienzan en tiempos distintos
        # events.append(mid.tracks.index(track))  
        for msg in track:
            if msg.type == 'time_signature':
                time_signature = [msg.numerator, msg.denominator]

            if msg.type in ['note_on', 'note_off']:
                current_time += msg.time
            
            if msg.type == 'note_on' and msg.velocity > 0:
                note_start_times[msg.note] = current_time
            elif msg.type == 'note_off' or (msg.type == 'note_on' and msg.velocity == 0):
                if msg.note in note_start_times:
                    duration = current_time - note_start_times[msg.note]
                    # print(f"Duration of note {msg.note}: {duration}, current time: {current_time} and note start time: {note_start_times[msg.note]}")
                    events.append({
                        'time': note_start_times[msg.note],
                        'note': msg.note,
                        'velocity': msg.velocity,
                        'duration': duration,
                        # 'track': mid.tracks.index(track)
                    })
                    del note_start_times[msg.note]

    return sorted(events, key=lambda x: x['time']), time_signature

In [29]:
def quantize_to_grid(ticks_time, ticks_duration, ticks_per_beat, grids_per_bar=32):
    ticks_per_grid = (ticks_per_beat * 4) // grids_per_bar
    ticks_per_bar = ticks_per_beat * 4

    # Quantize the start time
    quantized_time = round(ticks_time / ticks_per_grid) * ticks_per_grid

    # Quantize the duration
    quantized_duration = round(ticks_duration / ticks_per_grid) * ticks_per_grid # 1024

    # Calculate the bar and position in the bar of the quantized time
    bar = quantized_time // ticks_per_bar
    position = int((ticks_time % ticks_per_bar) / ticks_per_grid)

    # Calcute the duration in grids
    duration_position = int((quantized_duration % ticks_per_bar) / ticks_per_grid)

    return bar, position, duration_position

In [31]:
mid = mido.MidiFile("../data/raw/midi/" + "Banjo-Kazooie_N64_Banjo-Kazooie_Boggys Igloo Happy.mid")

In [32]:
events, time_signature = extract_notes_and_chords(mid)
events[:5], time_signature

([{'time': 5, 'note': 66, 'velocity': 0, 'duration': 1043},
  {'time': 5, 'note': 42, 'velocity': 0, 'duration': 505},
  {'time': 1049, 'note': 70, 'velocity': 0, 'duration': 752},
  {'time': 1049, 'note': 37, 'velocity': 0, 'duration': 474},
  {'time': 1802, 'note': 73, 'velocity': 0, 'duration': 246}],
 [4, 4])

In [33]:
for event in events[:5]:
    bar, position, duration = quantize_to_grid(event['time'], event['duration'], mid.ticks_per_beat)
    print(f"Bar {bar}, Position {position}, Note {event['note']}, Duration {duration}")

Bar 0, Position 0, Note 66, Duration 8
Bar 0, Position 0, Note 42, Duration 4
Bar 0, Position 8, Note 70, Duration 6
Bar 0, Position 8, Note 37, Duration 4
Bar 0, Position 14, Note 73, Duration 2


In [34]:
TIMESIGN={'[2, 2]': 0, '[2, 4]': 1, '[3, 4]': 2, '[4, 4]': 3, '[5, 4]': 4, '[6, 4]': 5, '[5, 8]': 6, '[6, 8]': 7, '[7, 8]': 8, '[9, 8]': 9}
TPB = {48: 0, 96: 1, 120: 2, 192: 3, 256: 4, 384: 5, 480: 6, 960: 7, 1024: 8}
def midi_to_REMI(mid):
    """
    Extracts the notes and chords from a MIDI file and returns a list of tokens in the REMI format.
    Format: 
        - 0: Bar
        - 1-32: Position, 32 values
        - 33-122: Pitch, 90 values [20-109]
        - 123-154: Duration, 32 values
        - 155-164: Time signature, 10 values
        - 165-173: Ticks per beat, 9 values
    """
    ticks_per_beat = mid.ticks_per_beat

    events, time_signature = extract_notes_and_chords(mid)

    tokens = []
    position_set = set()
    duration_set = set()
    current_bar = None

    # Add ticks per beat token
    ticks_per_beat_token = TPB[ticks_per_beat] + 165
    tokens.append(ticks_per_beat_token)

    # Add time signature token
    time_signature_token = TIMESIGN[str(time_signature)] + 155
    tokens.append(time_signature_token)

    for event in events:
        bar, position, duration = quantize_to_grid(event['time'], event['duration'], ticks_per_beat)
        if current_bar is None or bar > current_bar:
            tokens.append(0)
            current_bar = bar

        position_token = position + 1
        pitch_token = event['note'] - 20 + 33 # 20 is minimum pitch, 33 is the offset
        duration_token = duration + 123
        try: 
            tokens.append(position_token)
            tokens.append(pitch_token)
            tokens.append(duration_token)
            position_set.add(position)
            duration_set.add(event['duration'])
        except:
            print(f"Error with {Path(mid.filename).stem} - {event['duration']}")
            
    return tokens
