In [None]:
!git clone https://github.com/music-x-lab/POP909-Dataset.git
!git clone https://github.com/Dsqvival/hierarchical-structure-analysis.git
!pip install miditoolkit

Cloning into 'POP909-Dataset'...
remote: Enumerating objects: 9265, done.[K
remote: Counting objects: 100% (19/19), done.[K
remote: Compressing objects: 100% (11/11), done.[K
remote: Total 9265 (delta 6), reused 17 (delta 6), pack-reused 9246[K
Receiving objects: 100% (9265/9265), 45.75 MiB | 25.05 MiB/s, done.
Resolving deltas: 100% (12/12), done.
Cloning into 'hierarchical-structure-analysis'...
remote: Enumerating objects: 6890, done.[K
remote: Counting objects: 100% (6890/6890), done.[K
remote: Compressing objects: 100% (4170/4170), done.[K
remote: Total 6890 (delta 1336), reused 6170 (delta 629), pack-reused 0[K
Receiving objects: 100% (6890/6890), 1.61 MiB | 9.87 MiB/s, done.
Resolving deltas: 100% (1336/1336), done.
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting miditoolkit
  Downloading miditoolkit-0.1.16-py3-none-any.whl (20 kB)
Collecting mido>=1.1.16
  Downloading mido-1.2.10-py2.py3-none-any.whl (51 kB)


In [None]:
import math
import miditoolkit

# Settings

In [None]:
piece_id = 1  #@param {type:"integer"}
assert 1 <= piece_id <= 909, "piece_id must be 1–909"

In [None]:
piece_title = f"{piece_id:03}"

midi_path = f"POP909-Dataset/POP909/{piece_title}/{piece_title}.mid"
beat_annotation_path = f"POP909-Dataset/POP909/{piece_title}/beat_audio.txt"

melody_annotation_path = f"hierarchical-structure-analysis/POP909/{piece_title}/melody.txt"
chord_annotation_path = f"hierarchical-structure-analysis/POP909/{piece_title}/finalized_chord.txt"
phrase_annotation_path = f"hierarchical-structure-analysis/POP909/{piece_title}/human_label1.txt"

In [None]:
resolution = 16  # Resolution of the 16th note
beat = 4         # 4/4 time signatures

# REMI

## Check the time signature

In [None]:
with open(beat_annotation_path) as f:
    beat_annotation = f.read().splitlines()

In [None]:
print(beat_annotation)

['2.66\t1.0', '3.33\t2.0', '4.0\t3.0', '4.66\t4.0', '5.33\t1.0', '5.99\t2.0', '6.66\t3.0', '7.33\t4.0', '7.99\t1.0', '8.66\t2.0', '9.33\t3.0', '9.99\t4.0', '10.66\t1.0', '11.33\t2.0', '12.0\t3.0', '12.66\t4.0', '13.33\t1.0', '14.0\t2.0', '14.66\t3.0', '15.33\t4.0', '16.0\t1.0', '16.66\t2.0', '17.33\t3.0', '18.0\t4.0', '18.66\t1.0', '19.33\t2.0', '20.0\t3.0', '20.66\t4.0', '21.33\t1.0', '22.0\t2.0', '22.66\t3.0', '23.32\t4.0', '24.0\t1.0', '24.66\t2.0', '25.32\t3.0', '25.99\t4.0', '26.66\t1.0', '27.33\t2.0', '28.0\t3.0', '28.66\t4.0', '29.33\t1.0', '29.99\t2.0', '30.66\t3.0', '31.33\t4.0', '32.0\t1.0', '32.66\t2.0', '33.32\t3.0', '33.99\t4.0', '34.66\t1.0', '35.33\t2.0', '35.99\t3.0', '36.66\t4.0', '37.33\t1.0', '37.99\t2.0', '38.66\t3.0', '39.33\t4.0', '40.0\t1.0', '40.66\t2.0', '41.33\t3.0', '41.99\t4.0', '42.66\t1.0', '43.33\t2.0', '44.0\t3.0', '44.66\t4.0', '45.33\t1.0', '46.0\t2.0', '46.66\t3.0', '47.33\t4.0', '48.0\t1.0', '48.66\t2.0', '49.33\t3.0', '50.0\t4.0', '50.66\t1.0', '51.

In [None]:
# Don't use pieces with 3/4 time signatures
if max(float(x.split('\t')[1]) for x in beat_annotation) != beat:
    raise ValueError(f"Pieces with {beat}/4 time signatures can be used.")

## Load MIDI

In [None]:
midi_obj = miditoolkit.midi.parser.MidiFile(midi_path)

In [None]:
print(midi_obj)

ticks per beat: 480
max tick: 141124
tempo changes: 1
time sig: 1
key sig: 0
markers: 0
lyrics: False
instruments: 3


In [None]:
print(*midi_obj.instruments[0].notes[:10], sep='\n')

Note(start=9160, end=9229, pitch=61, velocity=115)
Note(start=9280, end=9325, pitch=63, velocity=109)
Note(start=9400, end=9449, pitch=66, velocity=118)
Note(start=9520, end=9580, pitch=68, velocity=112)
Note(start=9640, end=9719, pitch=70, velocity=112)
Note(start=9880, end=10028, pitch=66, velocity=112)
Note(start=10120, end=10293, pitch=63, velocity=118)
Note(start=10360, end=11071, pitch=68, velocity=115)
Note(start=11560, end=11724, pitch=68, velocity=112)
Note(start=11800, end=11938, pitch=65, velocity=115)


## Quantize and convert to Item

In [None]:
class Item(object):
    def __init__(self, name, start, duration, value):
        self.name = name
        self.start = self.quantize(start, 1)
        self.duration = max(0.5, self.quantize(duration, 0.5))
        self.value = value

    def quantize(self, x, step):
        return math.ceil(x / step - 0.5) * step

    def __repr__(self):
        return f"Item(name={self.name}, start={self.start}, duration={self.duration}, value={self.value})"

#### Note items

In [None]:
resolution_ticks = midi_obj.ticks_per_beat * beat // resolution

melody_note_items = [Item(name='Note', start=note.start / resolution_ticks, duration=(note.end - note.start) / resolution_ticks, value=note.pitch) for note in midi_obj.instruments[0].notes]
bridge_note_items = [Item(name='Note', start=note.start / resolution_ticks, duration=(note.end - note.start) / resolution_ticks, value=note.pitch) for note in midi_obj.instruments[1].notes]
piano_note_items = [Item(name='Note', start=note.start / resolution_ticks, duration=(note.end - note.start) / resolution_ticks, value=note.pitch) for note in midi_obj.instruments[2].notes]

note_items = melody_note_items + bridge_note_items + piano_note_items
note_items.sort(key=lambda x: (x.start, x.value))

In [None]:
print(*note_items[:10], sep='\n')

Item(name=Note, start=14, duration=1.5, value=66)
Item(name=Note, start=16, duration=5.5, value=47)
Item(name=Note, start=16, duration=2.0, value=75)
Item(name=Note, start=17, duration=4.5, value=54)
Item(name=Note, start=18, duration=3.0, value=59)
Item(name=Note, start=18, duration=1.0, value=73)
Item(name=Note, start=19, duration=5.0, value=66)
Item(name=Note, start=20, duration=1.0, value=71)
Item(name=Note, start=22, duration=1.0, value=80)
Item(name=Note, start=23, duration=1.0, value=82)


In the annotations, the piece is manually shifted to align the downbeats to the barlines.    
Thus, we calculate the offset between the first note of the melody in the melody annotation and that in POP909, and we shift all notes.   
Also, we shift them back one bar to consider an anacrusis (auftakt).

In [None]:
with open(melody_annotation_path) as f:
    melody_annotation = f.read().splitlines()

In [None]:
print(melody_annotation)

['0 60', '61 1', '63 1', '66 1', '68 1', '70 1', '0 1', '66 1', '0 1', '63 1', '0 1', '68 6', '0 4', '68 1', '0 1', '65 1', '0 1', '61 1', '0 1', '66 5', '0 1', '61 1', '63 1', '66 1', '68 1', '70 1', '0 1', '66 1', '0 1', '63 1', '0 1', '68 3', '0 1', '61 1', '0 1', '68 1', '0 1', '66 7', '0 11', '66 2', '0 2', '66 2', '0 2', '65 1', '0 1', '66 1', '65 1', '0 4', '65 1', '0 1', '66 1', '65 1', '0 1', '61 1', '0 1', '63 4', '0 3', '63 1', '65 1', '66 3', '0 1', '66 2', '0 2', '65 1', '0 1', '66 1', '65 1', '0 1', '63 1', '0 1', '61 8', '0 9', '66 2', '0 2', '66 3', '0 1', '68 1', '0 1', '70 1', '68 1', '0 4', '68 1', '0 1', '70 1', '68 1', '0 1', '65 1', '0 1', '66 5', '0 2', '61 1', '63 1', '66 1', '0 1', '70 1', '68 1', '0 1', '66 1', '0 2', '68 1', '0 1', '70 1', '68 1', '0 1', '66 1', '0 2', '66 6', '0 6', '61 1', '63 1', '66 1', '68 1', '70 1', '0 1', '66 1', '0 1', '63 1', '0 1', '68 6', '0 4', '68 1', '0 1', '65 1', '0 1', '61 1', '0 1', '66 5', '0 1', '61 1', '63 1', '66 1', '6

In [None]:
note_number, duration = map(int, melody_annotation[0].split())

melody_start = resolution  # Shift for an anacrusis
if note_number == 0:
    melody_start += duration  # Shift for offset of the melody's first note
shift = melody_start - melody_note_items[0].start

for note_item in note_items:
    note_item.start += shift

# Don't use pieces whose position of the notes seems incorrect
if note_items[0].start < 0 or note_items[0].start >= 32:
    raise ValueError("Invalid start time.")

In [None]:
print(*note_items[:10], sep='\n')

Item(name=Note, start=14, duration=1.5, value=66)
Item(name=Note, start=16, duration=5.5, value=47)
Item(name=Note, start=16, duration=2.0, value=75)
Item(name=Note, start=17, duration=4.5, value=54)
Item(name=Note, start=18, duration=3.0, value=59)
Item(name=Note, start=18, duration=1.0, value=73)
Item(name=Note, start=19, duration=5.0, value=66)
Item(name=Note, start=20, duration=1.0, value=71)
Item(name=Note, start=22, duration=1.0, value=80)
Item(name=Note, start=23, duration=1.0, value=82)


#### Bar items

In [None]:
n_bars = math.ceil(max(x.start + x.duration for x in note_items) / resolution)
bar_items = [Item(name='Bar', start=i * resolution, duration=resolution, value=None) for i in range(n_bars + 1)]

In [None]:
print(*bar_items[:10], sep='\n')

Item(name=Bar, start=0, duration=16.0, value=None)
Item(name=Bar, start=16, duration=16.0, value=None)
Item(name=Bar, start=32, duration=16.0, value=None)
Item(name=Bar, start=48, duration=16.0, value=None)
Item(name=Bar, start=64, duration=16.0, value=None)
Item(name=Bar, start=80, duration=16.0, value=None)
Item(name=Bar, start=96, duration=16.0, value=None)
Item(name=Bar, start=112, duration=16.0, value=None)
Item(name=Bar, start=128, duration=16.0, value=None)
Item(name=Bar, start=144, duration=16.0, value=None)


#### Chord items

In [None]:
with open(chord_annotation_path) as f:
    chord_annotation = f.read().splitlines()

In [None]:
print(chord_annotation)

['B:maj [3, 6, 11] 11 2 ', 'C#:maj [1, 5, 8] 1 2 ', 'Bb:min [1, 5, 10] 10 2 ', 'Eb:min [3, 6, 10] 3 2 ', 'B:maj [3, 6, 11] 11 2 ', 'C#:maj [1, 5, 8] 1 2 ', 'F#:maj [1, 6, 10] 6 4 ', 'B:maj [3, 6, 11] 11 1 ', 'B:maj7 [3, 6, 9, 11] 11 1 ', 'C#:maj [1, 5, 8] 1 2 ', 'Bb:min [1, 5, 10] 10 2 ', 'Eb:min [3, 6, 10] 3 2 ', 'B:maj [3, 6, 11] 11 1 ', 'B:maj7 [3, 6, 9, 11] 11 1 ', 'C#:maj [1, 5, 8] 1 2 ', 'F#:maj [1, 6, 10] 6 4 ', 'B:maj [3, 6, 11] 11 2 ', 'C#:maj [1, 5, 8] 1 2 ', 'Bb:min [1, 5, 10] 10 2 ', 'Eb:min [3, 6, 10] 3 2 ', 'B:maj [3, 6, 11] 11 2 ', 'C#:maj [1, 5, 8] 1 2 ', 'F#:maj [1, 6, 10] 6 4 ', 'B:maj [3, 6, 11] 11 2 ', 'C#:maj [1, 5, 8] 1 2 ', 'Bb:min [1, 5, 10] 10 2 ', 'Eb:min [3, 6, 10] 3 2 ', 'B:maj [3, 6, 11] 11 2 ', 'C#:maj [1, 5, 8] 1 2 ', 'F#:maj [1, 6, 10] 6 4 ', 'B:maj [3, 6, 11] 11 1 ', 'B:maj7 [3, 6, 9, 11] 11 1 ', 'C#:maj [1, 5, 8] 1 2 ', 'Bb:min [1, 5, 10] 10 2 ', 'Eb:min [3, 6, 10] 3 2 ', 'B:maj [3, 6, 11] 11 1 ', 'B:maj7 [3, 6, 9, 11] 11 1 ', 'C#:maj [1, 5, 8] 1 2 ', 

All flats on the root note are changed to sharps.   
The chord types are limited to six types (maj, min, dim, aug, sus4, sus2).

In [None]:
root_integration_table = {"Db": "C#", "Eb": "D#", "Gb": "F#", "Ab": "G#", "Bb": "A#"}

chord_items = [Item(name='Chord', start=0, duration=resolution, value='N:N')]
for element in chord_annotation:
    chord, *_, beat_duration = element.split()
    duration = resolution // beat * int(beat_duration)

    if chord.startswith('N'):
        chord = 'N:N'
    else:
        root, symbol = chord.split(':')
        if 'min' in symbol: symbol = 'min'
        elif 'maj' in symbol: symbol = 'maj'
        elif 'dim' in symbol: symbol = 'dim'
        elif 'aug' in symbol: symbol = 'aug'
        elif 'sus4' in symbol: symbol = 'sus4'
        elif 'sus2' in symbol: symbol = 'sus2'
        else: symbol = 'maj'  # 7, 9, ...
        root = root_integration_table.get(root, root)
        chord = f'{root}:{symbol}'

    if chord == chord_items[-1].value:
        chord_items[-1].duration += duration
    else:
        start = chord_items[-1].start + chord_items[-1].duration
        chord_items.append(Item(name='Chord', start=start, duration=duration, value=chord))

In [None]:
print(*chord_items[:10], sep='\n')

Item(name=Chord, start=0, duration=16.0, value=N:N)
Item(name=Chord, start=16, duration=8.0, value=B:maj)
Item(name=Chord, start=24, duration=8.0, value=C#:maj)
Item(name=Chord, start=32, duration=8.0, value=A#:min)
Item(name=Chord, start=40, duration=8.0, value=D#:min)
Item(name=Chord, start=48, duration=8.0, value=B:maj)
Item(name=Chord, start=56, duration=8.0, value=C#:maj)
Item(name=Chord, start=64, duration=16.0, value=F#:maj)
Item(name=Chord, start=80, duration=8.0, value=B:maj)
Item(name=Chord, start=88, duration=8.0, value=C#:maj)


#### Phrase items

In [None]:
with open(phrase_annotation_path) as f:
    phrase_annotation = f.readline().strip()

In [None]:
print(phrase_annotation)

i4A4B8A4A4b4B8A4A4b4b4A4A4b4A4o3


In [None]:
phrase_configuration = [('Start', 1)]
index = 0
while index < len(phrase_annotation):
    label = phrase_annotation[index]
    index += 1
    length = ''
    while index < len(phrase_annotation) and phrase_annotation[index].isdigit():
        length += phrase_annotation[index]
        index += 1
    phrase_configuration.append((label, int(length)))

In [None]:
print(phrase_configuration)

[('Start', 1), ('i', 4), ('A', 4), ('B', 8), ('A', 4), ('A', 4), ('b', 4), ('B', 8), ('A', 4), ('A', 4), ('b', 4), ('b', 4), ('A', 4), ('A', 4), ('b', 4), ('A', 4), ('o', 3)]


In [None]:
n_bars_lack = n_bars - sum(length for _, length in phrase_configuration)

if n_bars_lack > 0:
    last_label, last_length = phrase_configuration[-1]
    phrase_configuration[-1] = (last_label, last_length + n_bars_lack)
elif n_bars_lack < 0:
    raise ValueError("Invalid phrase annotation.")

phrase_configuration.append(('End', 1))

In [None]:
print(phrase_configuration)

[('Start', 1), ('i', 4), ('A', 4), ('B', 8), ('A', 4), ('A', 4), ('b', 4), ('B', 8), ('A', 4), ('A', 4), ('b', 4), ('b', 4), ('A', 4), ('A', 4), ('b', 4), ('A', 4), ('o', 4), ('End', 1)]


In [None]:
phrase_items = []
start = 0
for label, length in phrase_configuration:
    for i in range(length):
        phrase_items.append(Item(name='Phrase', start=start, duration=resolution, value=label))
        phrase_items.append(Item(name='Bar Countdown', start=start, duration=resolution, value=length - i))
        start += resolution

In [None]:
print(*phrase_items[:10], sep='\n')

Item(name=Phrase, start=0, duration=16.0, value=Start)
Item(name=Bar Countdown, start=0, duration=16.0, value=1)
Item(name=Phrase, start=16, duration=16.0, value=i)
Item(name=Bar Countdown, start=16, duration=16.0, value=4)
Item(name=Phrase, start=32, duration=16.0, value=i)
Item(name=Bar Countdown, start=32, duration=16.0, value=3)
Item(name=Phrase, start=48, duration=16.0, value=i)
Item(name=Bar Countdown, start=48, duration=16.0, value=2)
Item(name=Phrase, start=64, duration=16.0, value=i)
Item(name=Bar Countdown, start=64, duration=16.0, value=1)


#### Items

In [None]:
items = bar_items + phrase_items + chord_items + note_items
items.sort(key=lambda x: x.start)

In [None]:
print(*items[:10], sep='\n')

Item(name=Bar, start=0, duration=16.0, value=None)
Item(name=Phrase, start=0, duration=16.0, value=Start)
Item(name=Bar Countdown, start=0, duration=16.0, value=1)
Item(name=Chord, start=0, duration=16.0, value=N:N)
Item(name=Note, start=14, duration=1.5, value=66)
Item(name=Bar, start=16, duration=16.0, value=None)
Item(name=Phrase, start=16, duration=16.0, value=i)
Item(name=Bar Countdown, start=16, duration=16.0, value=4)
Item(name=Chord, start=16, duration=8.0, value=B:maj)
Item(name=Note, start=16, duration=5.5, value=47)


## Items → REMI

In [None]:
class Event(object):
    def __init__(self, name, value):
        self.name = name
        self.value = value

    def __repr__(self):
        return 'Event(name={}, value={})'.format(self.name, self.value)

    def __str__(self):
        return f'{self.name} ({self.value})'

In [None]:
REMI = []
for item in items:
    if item.name in ['Bar', 'Phrase', 'Bar Countdown']:
        REMI.append(Event(name=item.name, value=item.value))
        continue

    REMI.append(Event(name='Beat', value=f'{item.start % resolution + 1}/{resolution}'))
    if item.name == 'Chord':
        REMI.append(Event(name=item.name, value=item.value))
    elif item.name == 'Note':
        REMI.append(Event(name='Note On', value=item.value))
        REMI.append(Event(name='Note Duration', value=item.duration))

In [None]:
for event in REMI[:80]:
    if event.name == 'Bar': print(f'\n{event}', end='\t')
    elif event.name in ['Beat', 'Note On', 'Phrase']: print(event, end='\t')
    else: print(event)


Bar (None)	Phrase (Start)	Bar Countdown (1)
Beat (1/16)	Chord (N:N)
Beat (15/16)	Note On (66)	Note Duration (1.5)

Bar (None)	Phrase (i)	Bar Countdown (4)
Beat (1/16)	Chord (B:maj)
Beat (1/16)	Note On (47)	Note Duration (5.5)
Beat (1/16)	Note On (75)	Note Duration (2.0)
Beat (2/16)	Note On (54)	Note Duration (4.5)
Beat (3/16)	Note On (59)	Note Duration (3.0)
Beat (3/16)	Note On (73)	Note Duration (1.0)
Beat (4/16)	Note On (66)	Note Duration (5.0)
Beat (5/16)	Note On (71)	Note Duration (1.0)
Beat (7/16)	Note On (80)	Note Duration (1.0)
Beat (8/16)	Note On (82)	Note Duration (1.0)
Beat (9/16)	Chord (C#:maj)
Beat (9/16)	Note On (49)	Note Duration (5.0)
Beat (9/16)	Note On (80)	Note Duration (5.0)
Beat (10/16)	Note On (56)	Note Duration (3.5)
Beat (11/16)	Note On (61)	Note Duration (3.5)
Beat (12/16)	Note On (65)	Note Duration (2.5)
Beat (15/16)	Note On (66)	Note Duration (1.5)

Bar (None)	Phrase (i)	Bar Countdown (3)
Beat (1/16)	Chord (A#:min)
Beat (1/16)	Note On (46)	Note Duration (6.5)

# CP

## REMI → CP

In [None]:
CP = []
for i in range(len(REMI) - 2):
    if REMI[i].name == 'Bar' and REMI[i + 1].name == 'Phrase' and REMI[i + 2].name == 'Bar Countdown':
        current_beat = ''
        event = {'Family': 'Bar', 'Beat': '', 'Chord': '', 'Note On': '', 'Note Duration': '', 'Phrase': '', 'Bar Countdown': ''}
        current_phrase = REMI[i + 1].value
        current_barcountdown = REMI[i + 2].value
    elif REMI[i].name == 'Beat' and REMI[i].value != current_beat:
        current_beat = REMI[i].value
        event = {'Family': 'Pos', 'Beat': current_beat, 'Chord': 'CONTI', 'Note On': '', 'Note Duration': '', 'Phrase': '', 'Bar Countdown': ''}
        if REMI[i + 1].name == 'Chord':
            event['Chord'] = REMI[i + 1].value
    elif REMI[i].name == 'Note On' and REMI[i + 1].name == 'Note Duration':
        event = {'Family': 'Note', 'Beat': '', 'Chord': '', 'Note On': REMI[i].value, 'Note Duration': REMI[i + 1].value, 'Phrase': '', 'Bar Countdown': ''}
    else:
        continue
    event['Phrase'] = current_phrase
    event['Bar Countdown'] = current_barcountdown
    CP.append(event)

In [None]:
keys = ['Family', 'Beat', 'Chord', 'Note On', 'Note Duration', 'Phrase', 'Bar Countdown']
for key in keys:
    print(f'{key:^15}', end='')
print('\n' + '-' * 15 * 7, end='')
for event in CP[:40]:
    for key in keys:
        if event[key] == 'Bar': print()
        print(f'{event[key]:^15}', end='')
    print()

    Family          Beat           Chord         Note On     Note Duration     Phrase      Bar Countdown 
---------------------------------------------------------------------------------------------------------
      Bar                                                                       Start            1       
      Pos           1/16            N:N                                         Start            1       
      Pos           15/16          CONTI                                        Start            1       
     Note                                          66             1.5           Start            1       

      Bar                                                                         i              4       
      Pos           1/16           B:maj                                          i              4       
     Note                                          47             5.5             i              4       
     Note                                    