# Please install the packages below to run the code

numpy == 1.23.5

miditoolkit

transformers

torch

# Midi tutorial

## Import packages

In [132]:
import numpy as np
import miditoolkit
import copy
import pickle

## Some functions

In [133]:
# parameters for input
DEFAULT_VELOCITY_BINS = np.linspace(0, 128, 32+1, dtype=int)
DEFAULT_FRACTION = 16
DEFAULT_DURATION_BINS = np.arange(60, 3841, 60, dtype=int)
DEFAULT_TEMPO_INTERVALS = [range(30, 90), range(90, 150), range(150, 210)]

# parameters for output
DEFAULT_RESOLUTION = 480

# define "Item" for general storage
class Item(object):
    def __init__(self, name, start, end, velocity, pitch):
        self.name = name
        self.start = start
        self.end = end
        self.velocity = velocity
        self.pitch = pitch

    def __repr__(self):
        return 'Item(name={}, start={}, end={}, velocity={}, pitch={})'.format(
            self.name, self.start, self.end, self.velocity, self.pitch)

# read notes and tempo changes from midi (assume there is only one track)
def read_items(file_path):
    midi_obj = miditoolkit.midi.parser.MidiFile(file_path)
    # note
    note_items = []
    notes = midi_obj.instruments[0].notes
    notes.sort(key=lambda x: (x.start, x.pitch))
    for note in notes:
        note_items.append(Item(
            name='Note', 
            start=note.start, 
            end=note.end, 
            velocity=note.velocity, 
            pitch=note.pitch))
    note_items.sort(key=lambda x: x.start)
    # tempo
    tempo_items = []
    for tempo in midi_obj.tempo_changes:
        tempo_items.append(Item(
            name='Tempo',
            start=tempo.time,
            end=None,
            velocity=None,
            pitch=int(tempo.tempo)))
    tempo_items.sort(key=lambda x: x.start)
    # expand to all beat
    max_tick = tempo_items[-1].start
    existing_ticks = {item.start: item.pitch for item in tempo_items}
    wanted_ticks = np.arange(0, max_tick+1, DEFAULT_RESOLUTION)
    output = []
    for tick in wanted_ticks:
        if tick in existing_ticks:
            output.append(Item(
                name='Tempo',
                start=tick,
                end=None,
                velocity=None,
                pitch=existing_ticks[tick]))
        else:
            output.append(Item(
                name='Tempo',
                start=tick,
                end=None,
                velocity=None,
                pitch=output[-1].pitch))
    tempo_items = output
    return note_items, tempo_items

# quantize items
def quantize_items(items, ticks=120):
    # grid
    grids = np.arange(0, items[-1].start, ticks, dtype=int)
    # process
    for item in items:
        index = np.argmin(abs(grids - item.start))
        shift = grids[index] - item.start
        item.start += shift
        item.end += shift
    return items      

# extract chord
def chord_extract(midi_path, max_time):
    midi_obj = miditoolkit.midi.parser.MidiFile(midi_path)
    interval = midi_obj.ticks_per_beat * 1
    chord_items = []
    ###
    # implement your chord extraction here
    # it's fine if you don't use chord items
    ###
    return chord_items

# group items
def group_items(items, max_time, ticks_per_bar=DEFAULT_RESOLUTION*4):
    items.sort(key=lambda x: x.start)
    downbeats = np.arange(0, max_time+ticks_per_bar, ticks_per_bar)
    groups = []
    for db1, db2 in zip(downbeats[:-1], downbeats[1:]):
        insiders = []
        for item in items:
            if (item.start >= db1) and (item.start < db2):
                insiders.append(item)
        overall = [db1] + insiders + [db2]
        groups.append(overall)
    return groups

# define "Event" for event storage
class Event(object):
    def __init__(self, name, time, value, text):
        self.name = name
        self.time = time
        self.value = value
        self.text = text

    def __repr__(self):
        return 'Event(name={}, time={}, value={}, text={})'.format(
            self.name, self.time, self.value, self.text)

# item to event
def item2event(groups):
    events = []
    n_downbeat = 0
    for i in range(len(groups)):
        if 'Note' not in [item.name for item in groups[i][1:-1]]:
            continue
        bar_st, bar_et = groups[i][0], groups[i][-1]
        n_downbeat += 1
        events.append(Event(
            name='Bar',
            time=None, 
            value=None,
            text='{}'.format(n_downbeat)))
        for item in groups[i][1:-1]:
            # position
            flags = np.linspace(bar_st, bar_et, DEFAULT_FRACTION, endpoint=False)
            index = np.argmin(abs(flags-item.start))
            events.append(Event(
                name='Position', 
                time=item.start,
                value='{}/{}'.format(index+1, DEFAULT_FRACTION),
                text='{}'.format(item.start)))
            if item.name == 'Note':
                # velocity
                velocity_index = np.searchsorted(
                    DEFAULT_VELOCITY_BINS, 
                    item.velocity, 
                    side='right') - 1
                events.append(Event(
                    name='Note Velocity',
                    time=item.start, 
                    value=velocity_index,
                    text='{}/{}'.format(item.velocity, DEFAULT_VELOCITY_BINS[velocity_index])))
                # pitch
                events.append(Event(
                    name='Note On',
                    time=item.start, 
                    value=item.pitch,
                    text='{}'.format(item.pitch)))
                # duration
                duration = item.end - item.start
                index = np.argmin(abs(DEFAULT_DURATION_BINS-duration))
                events.append(Event(
                    name='Note Duration',
                    time=item.start,
                    value=index,
                    text='{}/{}'.format(duration, DEFAULT_DURATION_BINS[index])))
            elif item.name == 'Chord':
                events.append(Event(
                    name='Chord', 
                    time=item.start,
                    value=item.pitch,
                    text='{}'.format(item.pitch)))
            elif item.name == 'Tempo':
                tempo = item.pitch
                if tempo in DEFAULT_TEMPO_INTERVALS[0]:
                    tempo_style = Event('Tempo Class', item.start, 'slow', None)
                    tempo_value = Event('Tempo Value', item.start, 
                        tempo-DEFAULT_TEMPO_INTERVALS[0].start, None)
                elif tempo in DEFAULT_TEMPO_INTERVALS[1]:
                    tempo_style = Event('Tempo Class', item.start, 'mid', None)
                    tempo_value = Event('Tempo Value', item.start, 
                        tempo-DEFAULT_TEMPO_INTERVALS[1].start, None)
                elif tempo in DEFAULT_TEMPO_INTERVALS[2]:
                    tempo_style = Event('Tempo Class', item.start, 'fast', None)
                    tempo_value = Event('Tempo Value', item.start, 
                        tempo-DEFAULT_TEMPO_INTERVALS[2].start, None)
                elif tempo < DEFAULT_TEMPO_INTERVALS[0].start:
                    tempo_style = Event('Tempo Class', item.start, 'slow', None)
                    tempo_value = Event('Tempo Value', item.start, 0, None)
                elif tempo > DEFAULT_TEMPO_INTERVALS[2].stop:
                    tempo_style = Event('Tempo Class', item.start, 'fast', None)
                    tempo_value = Event('Tempo Value', item.start, 59, None)
                events.append(tempo_style)
                events.append(tempo_value)     
    return events

#############################################################################################
# WRITE MIDI
#############################################################################################
def word_to_event(words, word2event):
    events = []
    for word in words:
        event_name, event_value = word2event.get(word).split('_')
        events.append(Event(event_name, None, event_value, None))
    return events

def write_midi(words, word2event, output_path, prompt_path=None):
    events = word_to_event(words, word2event)
    # get downbeat and note (no time)
    temp_notes = []
    temp_chords = []
    temp_tempos = []
    for i in range(len(events)-3):
        if events[i].name == 'Bar' and i > 0:
            temp_notes.append('Bar')
            temp_chords.append('Bar')
            temp_tempos.append('Bar')
        elif events[i].name == 'Position' and \
            events[i+1].name == 'Note Velocity' and \
            events[i+2].name == 'Note On' and \
            events[i+3].name == 'Note Duration':
            # start time and end time from position
            position = int(events[i].value.split('/')[0]) - 1
            # velocity
            index = int(events[i+1].value)
            velocity = int(DEFAULT_VELOCITY_BINS[index])
            # pitch
            pitch = int(events[i+2].value)
            # duration
            index = int(events[i+3].value)
            duration = DEFAULT_DURATION_BINS[index]
            # adding
            temp_notes.append([position, velocity, pitch, duration])
        elif events[i].name == 'Position' and events[i+1].name == 'Chord':
            position = int(events[i].value.split('/')[0]) - 1
            temp_chords.append([position, events[i+1].value])
        elif events[i].name == 'Position' and \
            events[i+1].name == 'Tempo Class' and \
            events[i+2].name == 'Tempo Value':
            position = int(events[i].value.split('/')[0]) - 1
            if events[i+1].value == 'slow':
                tempo = DEFAULT_TEMPO_INTERVALS[0].start + int(events[i+2].value)
            elif events[i+1].value == 'mid':
                tempo = DEFAULT_TEMPO_INTERVALS[1].start + int(events[i+2].value)
            elif events[i+1].value == 'fast':
                tempo = DEFAULT_TEMPO_INTERVALS[2].start + int(events[i+2].value)
            temp_tempos.append([position, tempo])
    # get specific time for notes
    ticks_per_beat = DEFAULT_RESOLUTION
    ticks_per_bar = DEFAULT_RESOLUTION * 4 # assume 4/4
    notes = []
    current_bar = 0
    for note in temp_notes:
        if note == 'Bar':
            current_bar += 1
        else:
            position, velocity, pitch, duration = note
            # position (start time)
            current_bar_st = current_bar * ticks_per_bar
            current_bar_et = (current_bar + 1) * ticks_per_bar
            flags = np.linspace(current_bar_st, current_bar_et, DEFAULT_FRACTION, endpoint=False, dtype=int)
            st = flags[position]
            # duration (end time)
            et = st + duration
            notes.append(miditoolkit.Note(velocity, pitch, st, et))
    # get specific time for chords
    if len(temp_chords) > 0:
        chords = []
        current_bar = 0
        for chord in temp_chords:
            if chord == 'Bar':
                current_bar += 1
            else:
                position, value = chord
                # position (start time)
                current_bar_st = current_bar * ticks_per_bar
                current_bar_et = (current_bar + 1) * ticks_per_bar
                flags = np.linspace(current_bar_st, current_bar_et, DEFAULT_FRACTION, endpoint=False, dtype=int)
                st = flags[position]
                chords.append([st, value])
    # get specific time for tempos
    tempos = []
    current_bar = 0
    for tempo in temp_tempos:
        if tempo == 'Bar':
            current_bar += 1
        else:
            position, value = tempo
            # position (start time)
            current_bar_st = current_bar * ticks_per_bar
            current_bar_et = (current_bar + 1) * ticks_per_bar
            flags = np.linspace(current_bar_st, current_bar_et, DEFAULT_FRACTION, endpoint=False, dtype=int)
            st = flags[position]
            tempos.append([int(st), value])
    # write
    if prompt_path:
        midi = miditoolkit.midi.parser.MidiFile(prompt_path)
        #
        last_time = DEFAULT_RESOLUTION * 4 * 4
        # note shift
        for note in notes:
            note.start += last_time
            note.end += last_time
        midi.instruments[0].notes.extend(notes)
        # tempo changes
        temp_tempos = []
        for tempo in midi.tempo_changes:
            if tempo.time < DEFAULT_RESOLUTION*4*4:
                temp_tempos.append(tempo)
            else:
                break
        for st, bpm in tempos:
            st += last_time
            temp_tempos.append(miditoolkit.midi.containers.TempoChange(bpm, st))
        midi.tempo_changes = temp_tempos
        # write chord into marker
        if len(temp_chords) > 0:
            for c in chords:
                midi.markers.append(
                    miditoolkit.midi.containers.Marker(text=c[1], time=c[0]+last_time))
    else:
        midi = miditoolkit.midi.parser.MidiFile()
        midi.ticks_per_beat = DEFAULT_RESOLUTION
        # write instrument
        inst = miditoolkit.midi.containers.Instrument(0, is_drum=False)
        inst.notes = notes
        midi.instruments.append(inst)
        # write tempo
        tempo_changes = []
        for st, bpm in tempos:
            tempo_changes.append(miditoolkit.midi.containers.TempoChange(bpm, st))
        midi.tempo_changes = tempo_changes
        # write chord into marker
        if len(temp_chords) > 0:
            for c in chords:
                midi.markers.append(
                    miditoolkit.midi.containers.Marker(text=c[1], time=c[0]))
    # write
    midi.dump(output_path)


## Convert midi 2 remi tokens

### 0. Take a look at midi file

In [134]:
# change your midi file path here
path = '../test.mid'
midi = miditoolkit.midi.parser.MidiFile(path)
print(midi)

ticks per beat: 8
max tick: 1142
tempo changes: 1
time sig: 1
key sig: 0
markers: 0
lyrics: False
instruments: 1


In [135]:
# notes
print('Notes')
for inst in midi.instruments:
    print(inst) 
    print(*inst.notes[0:10], sep='\n')
# temples
print('\nTempos')
print(*midi.tempo_changes[0:10], sep='\n')

Notes
Instrument(program=0, is_drum=False, name=Acoustic Grand Piano) - 277 notes
Note(velocity=47, pitch=59, start=32, end=40)
Note(velocity=47, pitch=54, start=32, end=48)
Note(velocity=55, pitch=47, start=32, end=49)
Note(velocity=63, pitch=66, start=32, end=49)
Note(velocity=47, pitch=57, start=32, end=49)
Note(velocity=47, pitch=62, start=32, end=49)
Note(velocity=55, pitch=49, start=48, end=64)
Note(velocity=51, pitch=61, start=48, end=64)
Note(velocity=51, pitch=56, start=48, end=65)
Note(velocity=63, pitch=68, start=48, end=66)

Tempos
120.0 BPM at 0 ticks


### 1. Read midi and convert to Item object

In [136]:
note_items, tempo_items = read_items(path)

print('Note items')
print(*note_items[0:10], sep='\n')
print('\nTempo items')
print(*tempo_items[0:10], sep='\n')

Note items
Item(name=Note, start=32, end=49, velocity=55, pitch=47)
Item(name=Note, start=32, end=48, velocity=47, pitch=54)
Item(name=Note, start=32, end=49, velocity=47, pitch=57)
Item(name=Note, start=32, end=40, velocity=47, pitch=59)
Item(name=Note, start=32, end=49, velocity=47, pitch=62)
Item(name=Note, start=32, end=49, velocity=63, pitch=66)
Item(name=Note, start=48, end=64, velocity=55, pitch=49)
Item(name=Note, start=48, end=65, velocity=51, pitch=56)
Item(name=Note, start=48, end=79, velocity=47, pitch=59)
Item(name=Note, start=48, end=64, velocity=51, pitch=61)

Tempo items
Item(name=Tempo, start=0, end=None, velocity=None, pitch=120)


### 2. Quantize note items

In [137]:
note_items = quantize_items(note_items)

print('Quantized note items')
print(*note_items[:10], sep='\n')

Quantized note items
Item(name=Note, start=0, end=17, velocity=55, pitch=47)
Item(name=Note, start=0, end=16, velocity=47, pitch=54)
Item(name=Note, start=0, end=17, velocity=47, pitch=57)
Item(name=Note, start=0, end=8, velocity=47, pitch=59)
Item(name=Note, start=0, end=17, velocity=47, pitch=62)
Item(name=Note, start=0, end=17, velocity=63, pitch=66)
Item(name=Note, start=0, end=16, velocity=55, pitch=49)
Item(name=Note, start=0, end=17, velocity=51, pitch=56)
Item(name=Note, start=0, end=31, velocity=47, pitch=59)
Item(name=Note, start=0, end=16, velocity=51, pitch=61)


### 3. extract chord (if needed)

In [138]:
chord_items = chord_extract(path, note_items[-1].end)

print('Chord items')
print(*chord_items[:10], sep='\n')

Chord items



### 4. group items

In [139]:
# if using chord items
# items = chord_items + tempo_items + note_items

# if not using chord items
items = tempo_items + note_items

max_time = note_items[-1].end
groups = group_items(items, max_time)

for g in groups:
    print(*g, sep='\n')
    print()

0
Item(name=Tempo, start=0, end=None, velocity=None, pitch=120)
Item(name=Note, start=0, end=17, velocity=55, pitch=47)
Item(name=Note, start=0, end=16, velocity=47, pitch=54)
Item(name=Note, start=0, end=17, velocity=47, pitch=57)
Item(name=Note, start=0, end=8, velocity=47, pitch=59)
Item(name=Note, start=0, end=17, velocity=47, pitch=62)
Item(name=Note, start=0, end=17, velocity=63, pitch=66)
Item(name=Note, start=0, end=16, velocity=55, pitch=49)
Item(name=Note, start=0, end=17, velocity=51, pitch=56)
Item(name=Note, start=0, end=31, velocity=47, pitch=59)
Item(name=Note, start=0, end=16, velocity=51, pitch=61)
Item(name=Note, start=0, end=40, velocity=63, pitch=64)
Item(name=Note, start=0, end=18, velocity=63, pitch=68)
Item(name=Note, start=120, end=136, velocity=47, pitch=49)
Item(name=Note, start=120, end=136, velocity=47, pitch=57)
Item(name=Note, start=120, end=136, velocity=47, pitch=61)
Item(name=Note, start=120, end=136, velocity=63, pitch=66)
Item(name=Note, start=120, en

### 5. Items 2 Events

In [140]:
events = item2event(groups)

print('Events')
print(*events[:10], sep='\n')

Events
Event(name=Bar, time=None, value=None, text=1)
Event(name=Position, time=0, value=1/16, text=0)
Event(name=Tempo Class, time=0, value=mid, text=None)
Event(name=Tempo Value, time=0, value=30, text=None)
Event(name=Position, time=0, value=1/16, text=0)
Event(name=Note Velocity, time=0, value=13, text=55/52)
Event(name=Note On, time=0, value=47, text=47)
Event(name=Note Duration, time=0, value=0, text=17/60)
Event(name=Position, time=0, value=1/16, text=0)
Event(name=Note Velocity, time=0, value=11, text=47/44)


### 6. Events 2 Tokens

In [141]:
# read your dictionary
event2word, word2event = pickle.load(open('./basic_event_dictionary.pkl', 'rb'))

words = []
for event in events:
    e = '{}_{}'.format(event.name, event.value)
    if e in event2word:
        words.append(event2word[e])
    else:
        # OOV
        if event.name == 'Note Velocity':
            # replace with max velocity based on our training data
            words.append(event2word['Note Velocity_21'])
        else:
            # something is wrong
            # you should handle it for your own purpose
            print('something is wrong! {}'.format(e))

print('Tokens')
print(words[:10])

Tokens
[0, 1, 18, 50, 1, 90, 124, 185, 1, 88]


### 7. Play Music

In [142]:
import pygame

def play_music(midi_filename):
  '''Stream music_file in a blocking manner'''
  clock = pygame.time.Clock()
  pygame.mixer.music.load(midi_filename)
  pygame.mixer.music.play()
  while pygame.mixer.music.get_busy():
    clock.tick(30) # check if playback has finished
    
midi_filename = path

# mixer config
freq = 44100  # audio CD quality
bitsize = -16   # unsigned 16 bit
channels = 2  # 1 is mono, 2 is stereo
buffer = 1024   # number of samples
pygame.mixer.init(freq, bitsize, channels, buffer)

# optional volume 0 to 1.0
pygame.mixer.music.set_volume(0.8)

# listen for interruptions
try:
  # use the midi file you just saved
  play_music(midi_filename)
except KeyboardInterrupt:
  # if user hits Ctrl/C then exit
  # (works only in console mode)
  pygame.mixer.music.fadeout(1000)
  pygame.mixer.music.stop()
  raise SystemExit

AttributeError: 'tuple' object has no attribute 'tb_frame'

# Hugging-Face tutorial

## Import packages

In [128]:
from transformers import TransfoXLConfig, TransfoXLModel
import torch

## Call the API

In [129]:
# you can change the parameters of the config
configuration = TransfoXLConfig()
model = TransfoXLModel(configuration)

In [130]:
configuration

TransfoXLConfig {
  "adaptive": true,
  "attn_type": 0,
  "clamp_len": 1000,
  "cutoffs": [
    20000,
    40000,
    200000
  ],
  "d_embed": 1024,
  "d_head": 64,
  "d_inner": 4096,
  "d_model": 1024,
  "div_val": 4,
  "dropatt": 0.0,
  "dropout": 0.1,
  "eos_token_id": 0,
  "init": "normal",
  "init_range": 0.01,
  "init_std": 0.02,
  "layer_norm_epsilon": 1e-05,
  "mem_len": 1600,
  "model_type": "transfo-xl",
  "n_head": 16,
  "n_layer": 18,
  "pre_lnorm": false,
  "proj_init_std": 0.01,
  "same_length": true,
  "sample_softmax": -1,
  "tie_projs": [
    false,
    true,
    true,
    true
  ],
  "transformers_version": "4.26.0",
  "untie_r": true,
  "vocab_size": 267735
}

In [131]:
x = torch.randint(0, configuration.vocab_size, (1, 512))
y = model(x)
output = y['last_hidden_state'] # outputshape = (batch, x_len, d_embed)