In [1]:
import pypianoroll
import os
import numpy as np
import matplotlib.pyplot as plt
from copy import deepcopy
import numpy as np
import torch
from torch.utils.data import Dataset
import io
import symusic

In [2]:
class LiveMelCATDataset(Dataset):
    def __init__(self, midis_folder, chroma_tokenizer=None, midi_tokenizer=None, text_tokenizer=None, segment_size=64, resolution=24):
        self.midis_folder = midis_folder
        self.midis_list = os.listdir(midis_folder)
        self.segment_size = segment_size
        self.resolution = resolution
        self.chroma_tokenizer = chroma_tokenizer
        self.midi_tokenizer = midi_tokenizer
        self.text_tokenizer = text_tokenizer
    # end init
    def __len__(self):
        return len(self.midis_list)
    # end len
    def __getitem__(self, idx):
        print(self.midis_list[idx])
        # load a midi file in pianoroll
        main_piece = pypianoroll.read(self.midis_folder + os.sep + self.midis_list[idx], resolution=self.resolution)
        # make deepcopy
        new_piece = deepcopy(main_piece)
        # get random start_idx and end_idx, segment_size apart
        piece_size = main_piece.downbeat.shape[0]
        start_idx = np.random.randint(piece_size - self.segment_size*new_piece.resolution)
        end_idx = start_idx + self.segment_size*new_piece.resolution
        # trim to start_idx - end_idx
        new_piece.trim(start_idx,end_idx)
        # randomly transpose in -6, 6
        new_piece.transpose( np.random.randint(-6,6) )
        # get binary chroma 12D of entire midi file
        # first binarize a new deep copy
        binary_piece = deepcopy(new_piece)
        binary_piece.binarize()
        # make chroma
        chroma = binary_piece.tracks[0].pianoroll[:,:12]
        for i in range(12, 128-12, 12):
            chroma = np.logical_or(chroma, new_piece.tracks[0].pianoroll[:,i:(i+12)])
        chroma[:,-6:] = np.logical_or(chroma[:,-6:], new_piece.tracks[0].pianoroll[:,-6:])
        if self.chroma_tokenizer is None:
            tokenized_chroma = chroma
        else:
            tokenized_chroma = self.chroma_tokenizer(chroma)
        # split melody - accompaniment
        melody_piece, accomp_piece = self.split_melody_accompaniment( new_piece )
        # make ghost files of melody and accomp pieces
        melody_file = self.make_midi_bytes(melody_piece)
        accomp_file = self.make_midi_bytes(accomp_piece)
        # tokenize melody and accompaniment midi
        if self.midi_tokenizer is None:
            tokenized_melody = melody_file
            tokenized_accomp = accomp_file
        else:
            tokenized_melody = self.midi_tokenizer(melody_file)
            tokenized_accomp = self.midi_tokenizer(accomp_file)
        # get text from title
        text_description = self.midis_list[idx]
        # tokenize text
        if self.text_tokenizer is None:
            tokenized_text = text_description
        else:
            tokenized_text = self.text_tokenizer(text_description)
        return {
            'melody': tokenized_melody,
            'chroma': tokenized_chroma,
            'text': tokenized_text,
            'accomp': tokenized_accomp
        }
    # end getitem

    def split_melody_accompaniment(self, pypianoroll_structure):
        melody_piece = deepcopy( pypianoroll_structure )
        accomp_piece = deepcopy( pypianoroll_structure )

        mel_pr = melody_piece.tracks[0].pianoroll
        acc_pr = accomp_piece.tracks[0].pianoroll

        pr = np.array(melody_piece.tracks[0].pianoroll)
        running_melody = -1
        i = 0
        # for i in range( pr.shape[0] ):
        while i < pr.shape[0]:
            # check if any note
            if np.sum(pr[i,:]) > 0:
                # get running max
                running_max = np.max( np.nonzero( pr[i,:] ) )
                # check if there exists a running melody
                if running_melody > -1:
                    # check if running melody is continued
                    if running_melody == running_max:
                        # remove all lower pitches from melody
                        mel_pr[i, :running_max] = 0
                        # remove higher pitch from accomp
                        acc_pr[i, running_max] = 0
                    else:
                        # running melody may need to change
                        # check if new highest pitch just started
                        if running_max > running_melody:
                            # a new higher note has started
                            # finish previous note that was highest until now
                            j = 0
                            while j+i < mel_pr.shape[0] and mel_pr[i+j, running_melody] > 0 and running_max > running_melody:
                                mel_pr[i+j, :running_melody] = 0
                                mel_pr[i+j, running_melody+1:running_max] = 0
                                acc_pr[i+j, running_melody] = 0
                                acc_pr[i+j, running_max] = 0
                                if np.sum( pr[i+j,:] ) > 0:
                                    running_max = np.max( np.nonzero( pr[i+j,:] ) )
                                else:
                                    running_melody = -1
                                    break
                                j += 1
                            # start new running melody
                            i += j-1
                            running_melody = running_max
                        else:
                            # i should be > 0 since we have that running_melody > -1
                            # a lower note has come
                            # if has begun earlier, it should be ignored
                            if pr[i-1, running_max] > 0:
                                # its continuing an existing note - not part of melody
                                mel_pr[i, :] = 0
                                # running max should not be canceled, it remains as ghost max
                                # until a new higher max or a fresh lower max starts
                            else:
                                # a new fresh lower max starts that shouldn't be ignored
                                # start new running melody
                                running_melody = running_max
                                # remove all lower pitches from melody
                                mel_pr[i, :running_max] = 0
                                # remove higher pitch from accomp
                                acc_pr[i, running_max] = 0
                else:
                    # no running melody, check max conditions
                    # new note started - make it the running melody
                    running_melody = running_max
                    # remove all lower pitches from melody
                    mel_pr[i, :running_max] = 0
                    # remove higher pitch from accomp
                    acc_pr[i, running_max] = 0
                # end if
            else:
                # there is a gap
                running_melody = -1
            # end if
            i += 1
        # end for
        return melody_piece, accomp_piece
    # end split_melody_accompaniment

    def make_midi_bytes(self, pianoroll_structure):
        # initialize bytes handle
        b_handle = io.BytesIO()
        # write midi data to bytes handle
        pianoroll_structure.write(b_handle)
        # start read pointer from the beginning
        b_handle.seek(0)
        # create a buffered reader to read the handle
        buffered_reader = io.BufferedReader(b_handle)
        # create a midi object from the "file", i.e., buffered reader
        midi_bytes = symusic.Score.from_midi(b_handle.getvalue())
        # close the bytes handle
        b_handle.close()
        return midi_bytes
    # end 

In [3]:
from BinaryTokenizer import BinaryTokenizer
from miditok import REMI, TokenizerConfig
from transformers import RobertaTokenizer, RobertaModel

In [4]:
text_tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
text_encoder = RobertaModel.from_pretrained('roberta-base')

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [5]:
text = "One two three"
encoded_input = text_tokenizer(text, return_tensors='pt')
output = text_encoder(**encoded_input)

In [6]:
print(output['last_hidden_state'].shape)

torch.Size([1, 5, 768])


In [7]:
# MIDI tokenizer configuration
PITCH_RANGE = (21, 109)
BEAT_RES = {(0, 1): 8, (1, 2): 4, (2, 4): 2, (4, 8): 1}
NB_VELOCITIES = 24
SPECIAL_TOKENS = ["PAD", "MASK", "BOS", "EOS"]
USE_CHORDS = False
USE_RESTS = False
USE_TEMPOS = False
USE_TIME_SIGNATURE = False
USE_PROGRAMS = False
NB_TEMPOS = 32
TEMPO_RANGE = (50, 200)  # (min_tempo, max_tempo)
TOKENIZER_PARAMS = {
    "pitch_range": PITCH_RANGE,
    "beat_res": BEAT_RES,
    "num_velocities": NB_VELOCITIES,
    "special_tokens": SPECIAL_TOKENS,
    "use_chords": USE_CHORDS,
    "use_rests": USE_RESTS,
    "use_tempos": USE_TEMPOS,
    "use_time_signatures": USE_TIME_SIGNATURE,
    "use_programs": USE_PROGRAMS,
    "num_tempos": NB_TEMPOS,
    "tempo_range": TEMPO_RANGE,
}
config = TokenizerConfig(**TOKENIZER_PARAMS)
midi_tokenizer = REMI(config)

binary_tokenizer = BinaryTokenizer(num_digits=12)

  config = TokenizerConfig(**TOKENIZER_PARAMS)


In [8]:
midifolder = '../data/giantmidi_small/'
d = LiveMelCATDataset(midifolder, chroma_tokenizer=binary_tokenizer, midi_tokenizer=midi_tokenizer.encode, text_tokenizer=text_tokenizer)

In [9]:
d0 = d[0]

A., Jag, Je t'aime Juliette, OXC7Fd0ZN8o.mid


In [13]:
print(d0['melody'])
print(d0['chroma'])
print(d0['text'])
print(d0['accomp'])

[TokSequence(tokens=['Bar_None', 'Position_0', 'Pitch_86', 'Velocity_84', 'Duration_4.0.1', 'Position_15', 'Pitch_87', 'Velocity_89', 'Duration_1.0.4', 'Position_24', 'Pitch_87', 'Velocity_95', 'Duration_1.1.4', 'Bar_None', 'Position_1', 'Pitch_87', 'Velocity_95', 'Duration_1.1.4', 'Position_10', 'Pitch_87', 'Velocity_89', 'Duration_1.3.4', 'Position_22', 'Pitch_89', 'Velocity_84', 'Duration_1.1.4', 'Bar_None', 'Position_0', 'Pitch_89', 'Velocity_89', 'Duration_1.1.4', 'Position_9', 'Pitch_89', 'Velocity_89', 'Duration_1.0.4', 'Position_17', 'Pitch_89', 'Velocity_84', 'Duration_6.0.1', 'Bar_None', 'Position_29', 'Pitch_89', 'Velocity_79', 'Duration_1.0.4', 'Bar_None', 'Position_6', 'Pitch_89', 'Velocity_84', 'Duration_1.0.4', 'Position_15', 'Pitch_89', 'Velocity_84', 'Duration_1.1.4', 'Position_24', 'Pitch_89', 'Velocity_84', 'Duration_1.1.4', 'Bar_None', 'Position_9', 'Pitch_87', 'Velocity_89', 'Duration_1.0.4', 'Position_12', 'Pitch_96', 'Velocity_74', 'Duration_0.1.8', 'Position_18'