### What is happening in the preprocessing

# preprocess_midi Function:

- Takes a file path as input.
- Loads the MIDI file using `pyd.PrettyMIDI`.
- Extracts notes from all instruments in the MIDI file.
- Rounds the start and end times of each note to two decimal places.
- Sorts the notes based on their start times.
- Uses `MidiEventProcessor` to encode the notes into a representation sequence.
- Updates a global variable `total` by adding the length of the representation sequence.
- Returns the representation sequence.

# preprocess_pop909 Function:

- Takes the root folder of the POP909 dataset (`midi_root`) and a directory to save processed data (`save_dir`) as inputs.
- Iterates through MIDI files in the `midi_root` directory.
- Calls `preprocess_midi` for each MIDI file to obtain the representation sequence.
- Collects the representation sequences into a NumPy array (`save_py`).
- Saves the NumPy array as a file named "pop909-event-token.npy" using `np.save`.

# Data Processing:

- The representation sequence appears to be a sequence of events or tokens derived from the notes in the MIDI files. The exact nature of these events depends on the implementation of `MidiEventProcessor`.
- The total length of the representation sequences is accumulated in the global variable `total`.

# Execution:

- The code is executed with the POP909 dataset located in the "../pop909" folder, and the processed data is saved in the "midi_data/" directory.


ref: https://github.com/music-x-lab/POP909-Dataset/blob/master/data_process/data_process.ipynb

In [14]:
#  '''
# This is the data processing script for POP909:A Pop song Dataset for Music Arrangement Generation
# ============
# It will allow you to quickly process the POP909 Files (Midi) into the Google Magenta's music representation 
#     as like [Music Transformer](https://magenta.tensorflow.org/music-transformer) 
#             [Performance RNN](https://magenta.tensorflow.org/performance-rnn).

# '''


In [11]:
import os
import pickle
import numpy as np
import pretty_midi as pyd
from music21 import converter, metadata, environment

total = 0

def preprocess_midi(path):
    global total
    data = pyd.PrettyMIDI(path)
    main_notes = []
    acc_notes = []
    for ins in data.instruments:
        acc_notes.extend(ins.notes)
    for i in range(len(main_notes)):
        main_notes[i].start = round(main_notes[i].start, 2)
        main_notes[i].end = round(main_notes[i].end, 2)
    for i in range(len(acc_notes)):
        acc_notes[i].start = round(acc_notes[i].start, 2)
        acc_notes[i].end = round(acc_notes[i].end, 2)
    main_notes.sort(key=lambda x: x.start)
    acc_notes.sort(key=lambda x: x.start)
    
    # Replace the following lines with your own processing logic
    main_events = [(note.start, note.end, note.pitch) for note in main_notes]
    acc_events = [(note.start, note.end, note.pitch) for note in acc_notes]
    
    total += len(main_events) + len(acc_events)
    return main_events, acc_events

def is_midi_file(filename):
    return filename.lower().endswith(".mid")

def preprocess_pop909(midi_root, save_dir):
    save_py = []

    try:
        # Create the "midi_data/" directory if it doesn't exist
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)

        # Traverse subdirectories in "POP909_v3"
        for subdir, dirs, files in os.walk(midi_root):
            for filename in files:
                file_path = os.path.join(subdir, filename)
                if is_midi_file(file_path):
                    try:
                        main_events, acc_events = preprocess_midi(file_path)
                        save_py.append((main_events, acc_events))
                    except KeyboardInterrupt:
                        print(' Abort')
                        return
                    except Exception as e:
                        print(f'Error processing file {file_path}: {e}')

        save_py = np.array(save_py)
        print(save_py.size)
        np.save(os.path.join(save_dir, "pop909-event-token.npy"), save_py)

    except Exception as e:
        print(f'An error occurred: {e}')

# Specify the paths
midi_root = "POP909_v3"
save_dir = "midi_data/"

# Call the preprocessing function
preprocess_pop909(midi_root, save_dir)


  save_py = np.array(save_py)


1756


In [1]:
import os
import pickle
import numpy as np
import pretty_midi as pyd

total = 0

def preprocess_midi(path):
    global total
    data = pyd.PrettyMIDI(path)
    main_notes = []
    acc_notes = []
    
    # Extract all notes from all instruments
    for ins in data.instruments:
        acc_notes.extend(ins.notes)
    
    # Quantize notes in each bar
    for i in range(len(acc_notes)):
        acc_notes[i].start = round(acc_notes[i].start, 2)
        acc_notes[i].end = round(acc_notes[i].end, 2)
        # Quantize to the nearest 16th note
        acc_notes[i].start = round(acc_notes[i].start * 4) / 4
        acc_notes[i].end = round(acc_notes[i].end * 4) / 4
    
    acc_notes.sort(key=lambda x: x.start)
    
    # Replace the following lines with your own processing logic
    main_events = [(note.start, note.end, note.pitch, note.duration, note.velocity) for note in main_notes]
    acc_events = [(note.start, note.end, note.pitch, note.duration, note.velocity) for note in acc_notes]
    
    total += len(main_events) + len(acc_events)
    return main_events, acc_events

def is_midi_file(filename):
    return filename.lower().endswith(".mid")

def preprocess_pop909(midi_root, save_dir):
    save_py = []

    try:
        # Create the "midi_data/" directory if it doesn't exist
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)

        # Traverse subdirectories in "POP909_v3"
        for subdir, dirs, files in os.walk(midi_root):
            for filename in files:
                file_path = os.path.join(subdir, filename)
                if is_midi_file(file_path):
                    try:
                        main_events, acc_events = preprocess_midi(file_path)
                        save_py.append((main_events, acc_events))
                    except KeyboardInterrupt:
                        print(' Abort')
                        return
                    except Exception as e:
                        print(f'Error processing file {file_path}: {e}')

        save_py = np.array(save_py)
        print(save_py.size)
        np.save(os.path.join(save_dir, "pop909-event-token.npy"), save_py)

    except Exception as e:
        print(f'An error occurred: {e}')

# Specify the paths
midi_root = "POP909_v3"
save_dir = "midi_data/"

# Call the preprocessing function
preprocess_pop909(midi_root, save_dir)


  save_py = np.array(save_py)


1756


In [2]:
import os
import numpy as np
import pretty_midi as pyd

total = 0

def preprocess_midi(path):
    global total
    data = pyd.PrettyMIDI(path)
    main_notes = []
    acc_notes = []
    
    # Extract all notes from all instruments
    for ins in data.instruments:
        acc_notes.extend(ins.notes)
    
    # Quantize notes in each bar
    for i in range(len(acc_notes)):
        acc_notes[i].start = round(acc_notes[i].start, 2)
        acc_notes[i].end = round(acc_notes[i].end, 2)
        # Quantize to the nearest 16th note
        acc_notes[i].start = round(acc_notes[i].start * 4) / 4
        acc_notes[i].end = round(acc_notes[i].end * 4) / 4
    
    acc_notes.sort(key=lambda x: x.start)
    
    # Replace the following lines with your own processing logic
    main_events = [(note.start, note.end, note.pitch, note.end - note.start, note.velocity) for note in main_notes]
    acc_events = [(note.start, note.end, note.pitch, note.end - note.start, note.velocity) for note in acc_notes]
    
    total += len(main_events) + len(acc_events)
    return main_events, acc_events


def is_midi_file(filename):
    return filename.lower().endswith(".mid")

def preprocess_pop909(midi_root, save_dir):
    save_py = []

    try:
        # Create the "midi_data_v2/" directory if it doesn't exist
        save_dir_v2 = save_dir + "_v2"
        if not os.path.exists(save_dir_v2):
            os.makedirs(save_dir_v2)

        # Traverse subdirectories in "POP909_v3"
        for subdir, dirs, files in os.walk(midi_root):
            for filename in files:
                file_path = os.path.join(subdir, filename)
                if is_midi_file(file_path):
                    try:
                        main_events, acc_events = preprocess_midi(file_path)
                        save_py.append((main_events, acc_events))
                    except KeyboardInterrupt:
                        print(' Abort')
                        return
                    except Exception as e:
                        print(f'Error processing file {file_path}: {e}')

        save_py = np.array(save_py)
        print(save_py.size)
        np.save(os.path.join(save_dir_v2, "pop909-event-token.npy"), save_py)

    except Exception as e:
        print(f'An error occurred: {e}')

# Specify the paths
midi_root = "POP909_v3"
save_dir = "midi_data"

# Call the preprocessing function
preprocess_pop909(midi_root, save_dir)


  save_py = np.array(save_py)


1756


In [None]:
miditok.MIDITokenizer

import os
import numpy as np
import pretty_midi as pyd
from miditok import REMI, TokenizerConfig
from pathlib import Path

def is_midi_file(filename):
    return filename.lower().endswith(".mid")

def tokenize_midi(midi_path, tokenizer):
    data = pyd.PrettyMIDI(midi_path)

    # Extract tempo information
    tempo_changes = data.get_tempo_changes()
    tempo_tokens = tokenizer.tokenize_tempo(tempo_changes[0], tempo_changes[1])

    # Extract note information
    notes = []
    for instrument in data.instruments:
        notes.extend(instrument.notes)

    note_tokens = tokenizer.tokenize_notes(notes)

    # Extract chord, pitch, duration, and velocity information
    chord_tokens = tokenizer.tokenize_chords(data)
    pitch_tokens, duration_tokens, velocity_tokens = tokenizer.tokenize_notes_details(notes)

    return tempo_tokens, note_tokens, chord_tokens, pitch_tokens, duration_tokens, velocity_tokens

def preprocess_pop909(midi_root, save_dir):
    save_tokens = []

    try:
        # Create the "midi_data_v2/" directory if it doesn't exist
        save_dir_v2 = save_dir + "_v2"
        if not os.path.exists(save_dir_v2):
            os.makedirs(save_dir_v2)

        # Creating a multitrack tokenizer configuration
        config = TokenizerConfig(num_velocities=16, use_chords=True, use_programs=True)
        tokenizer = REMI(config)

        # Traverse subdirectories in "POP909_v3"
        for subdir, dirs, files in os.walk(midi_root):
            for filename in files:
                file_path = os.path.join(subdir, filename)
                if is_midi_file(file_path):
                    try:
                        tempo_tokens, note_tokens, chord_tokens, pitch_tokens, duration_tokens, velocity_tokens = tokenize_midi(file_path, tokenizer)
                        save_tokens.append((tempo_tokens, note_tokens, chord_tokens, pitch_tokens, duration_tokens, velocity_tokens))
                    except KeyboardInterrupt:
                        print(' Abort')
                        return
                    except Exception as e:
                        print(f'Error processing file {file_path}: {e}')

        save_tokens = np.array(save_tokens)
        np.save(os.path.join(save_dir_v2, "pop909-tokenized.npy"), save_tokens)

        # Tokenize a whole dataset and save it at JSON files
        midi_paths = list(Path(midi_root).rglob("*.mid"))
        data_augmentation_offsets = [2, 1, 1]
        tokenizer.tokenize_midi_dataset(midi_paths, Path(save_dir_v2, "tokens_noBPE"), data_augment_offsets=data_augmentation_offsets)

        # Constructs the vocabulary with BPE, from the token files
        tokenizer.learn_bpe(
            vocab_size=10000,
            tokens_paths=list(Path(save_dir_v2, "tokens_noBPE").rglob("**/*.json")),
            start_from_empty_voc=False,
        )

        # Saving the tokenizer configuration
        tokenizer.save_params(Path(save_dir_v2, "tokenizer.json"))

    except Exception as e:
        print(f'An error occurred: {e}')

# Specify the paths
midi_root = "POP909_v3"
save_dir = "midi_data"

# Call the preprocessing function
preprocess_pop909(midi_root, save_dir)


Error processing file POP909_v3\001\001.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\002\002.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\003\003.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\004\004.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\005\005.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\006\006.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\007\007.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\008\008.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\009\009.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\010\010.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\011\011.mid: 'REMI' object has no attr

Error processing file POP909_v3\099\099.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\100\100.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\101\101.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\103\103.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\104\104.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\105\105.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\106\106.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\107\107.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\108\108.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\109\109.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\110\110.mid: 'REMI' object has no attr

Error processing file POP909_v3\190\190.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\191\191.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\192\192.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\193\193.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\194\194.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\195\195.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\196\196.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\197\197.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\198\198.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\199\199.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\200\200.mid: 'REMI' object has no attr

Error processing file POP909_v3\282\282.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\283\283.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\284\284.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\285\285.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\286\286.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\287\287.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\288\288.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\289\289.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\290\290.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\291\291.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\292\292.mid: 'REMI' object has no attr

Error processing file POP909_v3\373\373.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\374\374.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\375\375.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\376\376.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\377\377.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\378\378.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\379\379.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\380\380.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\381\381.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\382\382.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\383\383.mid: 'REMI' object has no attr

Error processing file POP909_v3\465\465.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\466\466.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\467\467.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\468\468.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\469\469.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\470\470.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\471\471.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\472\472.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\473\473.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\474\474.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\475\475.mid: 'REMI' object has no attr

Error processing file POP909_v3\562\562.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\564\564.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\565\565.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\566\566.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\567\567.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\568\568.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\569\569.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\570\570.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\571\571.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\572\572.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\573\573.mid: 'REMI' object has no attr

Error processing file POP909_v3\656\656.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\657\657.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\658\658.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\659\659.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\660\660.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\661\661.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\662\662.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\663\663.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\664\664.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\665\665.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\666\666.mid: 'REMI' object has no attr

Error processing file POP909_v3\763\763.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\764\764.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\765\765.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\766\766.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\767\767.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\768\768.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\769\769.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\770\770.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\771\771.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\772\772.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\773\773.mid: 'REMI' object has no attr

Error processing file POP909_v3\853\853.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\854\854.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\855\855.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\856\856.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\857\857.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\858\858.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\859\859.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\860\860.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\861\861.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\862\862.mid: 'REMI' object has no attribute 'tokenize_tempo'
Error processing file POP909_v3\863\863.mid: 'REMI' object has no attr

Tokenizing MIDIs (midi_data_v2/tokens_noBPE): 100%|██████████████████████████████████| 878/878 [01:24<00:00, 10.38it/s]
Performing data augmentation: 100%|██████████████████████████████████████████████████| 878/878 [00:39<00:00, 22.21it/s]
Loading token files: 100%|████████████████████████████████████████████████████████| 6129/6129 [00:40<00:00, 153.04it/s]


mmm-lmd
https://colab.research.google.com/drive/1KLbe-ZnIyvpPypVqYapBRs-o5Q1E7a9R?usp=sharing#scrollTo=ex9Lt0yWw5Ud

In [None]:
from pathlib import Path
from copy import deepcopy
from math import ceil

from miditoolkit import MidiFile
from tqdm import tqdm

MAX_NB_BAR = 8
MIN_NB_NOTES = 20
dataset = "POP909"  # Change to your dataset name

merged_out_dir = Path("path/to/output/directory", f"{dataset}-chunked")
merged_out_dir.mkdir(parents=True, exist_ok=True)

# Adjust the base path to your dataset
midi_paths = list(Path("path/to/POP909/dataset", dataset).glob("**/*.mid")) + 
                list(Path("path/to/POP909/dataset", dataset).glob("**/*.midi"))

for i, midi_path in enumerate(tqdm(midi_paths, desc="CHUNKING MIDIS")):
    try:
        # Determine the output directory for this file
        relative_path = midi_path.relative_to(Path("path/to/POP909/dataset", dataset))
        output_dir = merged_out_dir / relative_path.parent
        output_dir.mkdir(parents=True, exist_ok=True)

        # Check if chunks already exist
        chunk_paths = list(output_dir.glob(f"{midi_path.stem}_*.mid"))
        if len(chunk_paths) > 0:
            print(f"Chunks for {midi_path} already exist, skipping...")
            continue

        # Loads MIDI, merges, and saves it
        midi = MidiFile(midi_path)
        ticks_per_cut = MAX_NB_BAR * midi.ticks_per_beat * 4
        nb_cuts = ceil(midi.max_tick / ticks_per_cut)
        if nb_cuts < 2:
            continue

        print(f"Processing {midi_path}")
        midis = [deepcopy(midi) for _ in range(nb_cuts)]

        for j, track in enumerate(midi.instruments):  # sort notes as they are not always sorted right
            track.notes.sort(key=lambda x: x.start)
            for midi_short in midis:  # clears notes from shorten MIDIs
                midi_short.instruments[j].notes = []
            for note in track.notes:
                cut_id = note.start // ticks_per_cut
                note_copy = deepcopy(note)
                note_copy.start -= cut_id * ticks_per_cut
                note_copy.end -= cut_id * ticks_per_cut
                midis[cut_id].instruments[j].notes.append(note_copy)

        # Saving MIDIs
        for j, midi_short in enumerate(midis):
            if sum(len(track.notes) for track in midi_short.instruments) < MIN_NB_NOTES:
                continue
            midi_short.dump(output_dir / f"{midi_path.stem}_{j}.mid")

    except Exception as e:
        print(f"An error occurred while processing {midi_path}: {e}")
