In [24]:
import os 
from pathlib import Path
from miditok import REMI, TokenizerConfig

import mido

from random import shuffle
from miditok.utils import split_files_for_training
from miditok.data_augmentation import augment_dataset

In [4]:
BASE_DIR = '../'

In [5]:
def load_midi(midi_path):
    """
    Load a midi file
    
    Input:
        midi_path: str, path to the midi file
    
    Output:
        midi_data: mido.MidiFile, midi data
    """
    try:
        midi_data = mido.MidiFile(midi_path)    
        return midi_data
    
    except Exception as e:
        print(f"Error loading {midi_path}: {e}")
        return None

In [6]:
def midi_to_remi(mid):
    remi_tokens = []
    current_time = 0
    ticks_per_beat = mid.ticks_per_beat
    tempo = 500000  

    ticks_per_position = ticks_per_beat // 16

    for track in mid.tracks:
        remi_tokens.append(f"Track {track.name}")
        for msg in track:
            if not msg.is_meta:
                current_time += msg.time

                if msg.type == 'note_on' and msg.velocity > 0:
                    position = (current_time % ticks_per_beat) // ticks_per_position
                    remi_tokens.append(f"Note-On: {msg.note}")
                    remi_tokens.append(f"Velocity: {msg.velocity}")
                    remi_tokens.append(f"Position: {position}")
                elif msg.type == 'note_off' or (msg.type == 'note_on' and msg.velocity == 0):
                    remi_tokens.append(f"Note-Off: {msg.note}")
                
            elif msg.type == 'set_tempo':
                tempo_bpm = mido.tempo2bpm(msg.tempo)
                remi_tokens.append(f"Tempo: {tempo_bpm}")

            if current_time % ticks_per_beat == 0:
                remi_tokens.append("Bar")

    return remi_tokens

In [7]:
def build_vocab(remi_tokens):
    vocab = {token: idx for idx, token in enumerate(sorted(set(remi_tokens)))}
    return vocab

def tokenize_sequence(remi_tokens, vocab):
    return [vocab[token] for token in remi_tokens]

In [8]:
mid_file = os.path.join(BASE_DIR, 'data', 'raw', "Donkey Kong_SNES_Donkey Kong Country 2 Diddy's Kong Quest_Hot Head Bop.mid")
mid = load_midi(mid_file)

remi_tokens = midi_to_remi(mid)
vocab = build_vocab(remi_tokens)
tokenized_sequence = tokenize_sequence(remi_tokens, vocab)
print(tokenized_sequence)

[83, 0, 0, 0, 82, 0, 82, 0, 0, 85, 0, 0, 0, 0, 0, 0, 48, 97, 73, 12, 52, 92, 81, 16, 51, 93, 73, 15, 54, 92, 81, 18, 51, 95, 73, 15, 52, 92, 81, 16, 49, 93, 73, 13, 50, 92, 81, 14, 48, 97, 73, 12, 52, 92, 81, 16, 51, 93, 73, 15, 54, 92, 81, 18, 51, 95, 73, 15, 52, 92, 81, 16, 49, 93, 73, 13, 50, 92, 81, 14, 48, 97, 73, 12, 52, 92, 81, 16, 51, 93, 73, 15, 54, 92, 81, 18, 51, 95, 73, 15, 52, 92, 81, 16, 49, 93, 73, 13, 50, 92, 81, 14, 48, 97, 73, 12, 52, 92, 81, 16, 51, 93, 73, 15, 54, 92, 81, 18, 51, 95, 73, 15, 52, 92, 81, 16, 49, 93, 73, 13, 50, 92, 81, 14, 58, 92, 81, 22, 57, 93, 73, 21, 58, 92, 81, 22, 57, 92, 76, 21, 56, 92, 79, 20, 58, 92, 81, 22, 57, 93, 73, 21, 58, 92, 81, 22, 59, 92, 76, 23, 58, 92, 81, 22, 57, 93, 73, 21, 58, 92, 81, 22, 57, 97, 73, 21, 56, 92, 81, 20, 56, 93, 73, 20, 54, 92, 81, 18, 56, 92, 76, 20, 58, 92, 81, 22, 57, 93, 73, 21, 58, 92, 81, 22, 57, 92, 76, 21, 56, 92, 79, 20, 56, 92, 81, 20, 59, 97, 73, 23, 58, 93, 73, 22, 57, 95, 73, 21, 58, 93, 73, 22, 57,

In [13]:
TOKENIZER_PARAMS = {
    "pitch_range": (21, 109),
    "beat_res": {(0, 4): 8, (4, 12): 4},
    "num_velocities": 32,
    "special_tokens": ["PAD", "BOS", "EOS", "MASK"],
    "use_chords": True,
    "use_rests": False,
    "use_tempos": True,
    "use_time_signatures": False,
    "use_programs": False,
    "num_tempos": 32,  # number of tempo bins
    "tempo_range": (40, 250),  # (min, max)
}
config = TokenizerConfig(**TOKENIZER_PARAMS)

tokenizer = REMI(config)

In [17]:
tokens = tokenizer(mid_file)
print(tokens)

[TokSequence(tokens=['Bar_None', 'Position_0', 'Tempo_107.74', 'Bar_None', 'Bar_None', 'Bar_None', 'Bar_None', 'Bar_None', 'Bar_None', 'Bar_None', 'Bar_None', 'Position_0', 'Pitch_57', 'Velocity_71', 'Duration_0.4.8', 'Position_4', 'Pitch_64', 'Velocity_63', 'Duration_0.3.8', 'Position_8', 'Pitch_62', 'Velocity_67', 'Duration_0.3.8', 'Position_12', 'Pitch_67', 'Velocity_63', 'Duration_0.4.8', 'Position_16', 'Pitch_62', 'Velocity_67', 'Duration_0.4.8', 'Position_20', 'Pitch_64', 'Velocity_63', 'Duration_0.3.8', 'Position_24', 'Pitch_59', 'Velocity_67', 'Duration_0.3.8', 'Position_28', 'Pitch_60', 'Velocity_63', 'Duration_0.3.8', 'Bar_None', 'Position_0', 'Pitch_57', 'Velocity_71', 'Duration_0.4.8', 'Position_4', 'Pitch_64', 'Velocity_63', 'Duration_0.3.8', 'Position_8', 'Pitch_62', 'Velocity_67', 'Duration_0.3.8', 'Position_12', 'Pitch_67', 'Velocity_63', 'Duration_0.4.8', 'Position_16', 'Pitch_62', 'Velocity_67', 'Duration_0.4.8', 'Position_20', 'Pitch_64', 'Velocity_63', 'Duration_0.3

In [None]:
def add_emotion_metadata(midi_file, emotion_level):
    """
    Agrega un metadato de emoción como un evento MIDI personalizado.
    
    :param midi_file: Ruta al archivo MIDI.
    :param emotion_level: Nivel de emoción (1-4).
    :return: Objeto MIDI modificado.
    """
    mid = mido.MidiFile(midi_file)

    # Crear un evento de texto para representar el sentimiento
    emotion_event = mido.MetaMessage(
        type='text', text=f'Emotion:{emotion_level}', time=0
    )
    mid.tracks[0].append(emotion_event)
    
    return mid

### Prepare a dataset before training

In [2]:
from miditok import REMI
from pathlib import Path

# Creates the tokenizer and list the file paths
tokenizer = REMI()  # using defaults parameters (constants.py)
midi_paths = list(Path("..", "data", "raw").glob("**/*.mid"))
print(len(midi_paths), type(midi_paths))

# Builds the vocabulary with BPE
tokenizer.train(vocab_size=30000, files_paths=midi_paths)

1275 <class 'list'>


In [41]:
BASE_DIR = Path('C:/Users/luiss/Documents/VIU/TFM/KeyEmotions')

raw_data_dir = Path(os.path.join(BASE_DIR, 'data', 'raw'))

In [44]:
midi_paths = list(raw_data_dir.glob("**/*.mid"))
total_num_files = len(midi_paths)
num_files_valid = round(total_num_files * 0.15)
num_files_test = round(total_num_files * 0.15)
shuffle(midi_paths)
midi_paths_valid = midi_paths[:num_files_valid]
midi_paths_test = midi_paths[num_files_valid:num_files_valid + num_files_test]
midi_paths_train = midi_paths[num_files_valid + num_files_test:]

for files_paths, subset in (
    (midi_paths_train, "train"),
    (midi_paths_valid, "valid"),
    (midi_paths_test, "test"),
):
    subset_chunks_dir = Path(os.path.join(BASE_DIR, 'data', 'splits', f'dataset_{subset}'))
    os.makedirs(subset_chunks_dir, exist_ok=True)
    split_files_for_training(
        files_paths=files_paths,
        tokenizer=tokenizer,
        save_dir=subset_chunks_dir,
        max_seq_len=1024,
        num_overlap_bars=2
    )

Splitting music files (C:\Users\luiss\Documents\VIU\TFM\KeyEmotions\data\splits\dataset_train): 100%|██████████| 893/893 [00:00<00:00, 1339.18it/s]
Splitting music files (C:\Users\luiss\Documents\VIU\TFM\KeyEmotions\data\splits\dataset_valid): 100%|██████████| 191/191 [00:00<00:00, 1233.72it/s]
Splitting music files (C:\Users\luiss\Documents\VIU\TFM\KeyEmotions\data\splits\dataset_test): 100%|██████████| 191/191 [00:00<00:00, 1261.82it/s]


### Creates a Dataset and collator for training

In [None]:
from miditok.pytorch_data import DatasetMIDI, DataCollator
from torch.utils.data import DataLoader

tokenizer = REMI()  # using defaults parameters (constants.py)
midi_paths = list(Path(os.path.join(BASE_DIR, 'data', 'raw')).glob("**/*.mid"))
dataset = DatasetMIDI(
    files_paths=midi_paths,
    tokenizer=tokenizer,
    max_seq_len=1024,
    bos_token_id=tokenizer.pad_token_id,
    eos_token_id=tokenizer["BOS_None"],
)
collator = DataCollator(tokenizer.pad_token_id)
data_loader = DataLoader(dataset=dataset, collate_fn=collator)

# Using the data loader in the training loop
for batch in data_loader:
    print("Train your model on this batch...")

### Tokenize a dataset

In [48]:
# from miditok.data_augmentation import augment_midi_dataset
from pathlib import Path

# Creates the tokenizer and list the file paths
tokenizer = REMI()  # using defaults parameters (constants.py)
data_path = Path(os.path.join(BASE_DIR, 'data', 'raw'))

# A validation method to discard MIDIs we do not want
# It can also be used for custom pre-processing, for instance if you want to merge
# some tracks before tokenizing a MIDI file
def midi_valid(midi) -> bool:
    if any(ts.numerator != 4 for ts in midi.time_signature_changes):
        return False  # time signature different from 4/*, 4 beats per bar
    return True

# Performs data augmentation on one pitch octave (up and down), velocities and
# durations
# midi_aug_path = Path("to", "new", "location", "augmented")
# augment_midi_dataset(
#     data_path,
#     pitch_offsets=[-12, 12],
#     velocity_offsets=[-4, 5],
#     duration_offsets=[-0.5, 1],
#     out_path=midi_aug_path,
# )
tokenizer.tokenize_dataset(        # 2 velocity and 1 duration values
    data_path,
    Path(BASE_DIR, 'data'),
    midi_valid,
)

Tokenizing music files (KeyEmotions/data): 100%|██████████| 1275/1275 [00:03<00:00, 321.93it/s]


In [None]:
def add_emotion_metadata(file_path, emotion_level, output_path):
    """
    Agrega un metadato de sentimiento (1-4) a un archivo MIDI.

    :param file_path: Ruta al archivo MIDI original.
    :param emotion_level: Nivel de emoción (1-4).
    :param output_path: Ruta para guardar el archivo MIDI modificado.
    """
    midi = mido.MidiFile(file_path)

    # Crear un evento MetaMessage con el sentimiento
    emotion_event = mido.MetaMessage('text', text=f'Emotion:{emotion_level}', time=0)
    
    # Agregar el evento a la primera pista
    midi.tracks[0].insert(0, emotion_event)
    
    # Guardar el archivo MIDI modificado
    midi.save(output_path)
    print(f"Archivo MIDI guardado con metadato de sentimiento en: {output_path}")

In [10]:
# def remi_to_midi(remi_tokens, output_file):
#     midi = mido.MidiFile()
#     track = mido.MidiTrack()
#     midi.tracks.append(track)
    
#     current_time = 0
#     ticks_per_beat = midi.ticks_per_beat  # Asume la resolución original
    
#     # Procesar tokens
#     for token in remi_tokens:
#         if token.startswith("Note-On"):
#             note = int(token.split(":")[1])
#             track.append(mido.Message('note_on', note=note, velocity=64, time=current_time))
#             current_time = 0  # Reinicia el tiempo acumulativo
#         elif token.startswith("Note-Off"):
#             note = int(token.split(":")[1])
#             track.append(mido.Message('note_off', note=note, velocity=0, time=current_time))
#             current_time = 0
#         elif token.startswith("Tempo"):
#             bpm = int(token.split(":")[1])
#             tempo = int(60000000 / bpm)
#             track.append(mido.MetaMessage('set_tempo', tempo=tempo, time=current_time))
#             current_time = 0
#         elif token.startswith("Bar"):
#             current_time += ticks_per_beat  # Avanzar al siguiente compás
#         elif token.startswith("Position"):
#             position = int(token.split(":")[1])
#             current_time += position * (ticks_per_beat // 16)  # Ajustar tiempo a la posición
    
#     midi.save(output_file)