In [1]:
from aria.data.midi import MidiDict
from aria.tokenizer import AbsTokenizer

tokenizer = AbsTokenizer()
midi_file_path = "/import/c4dm-datasets/maestro-v3.0.0/2008/MIDI-Unprocessed_07_R2_2008_01-05_ORIG_MID--AUDIO_07_R2_2008_wav--2.midi" # midi_files[1]
print(midi_file_path)
mid = MidiDict.from_midi(midi_file_path)
tokenized_sequence = tokenizer.tokenize(mid)
print(tokenized_sequence)

/import/c4dm-datasets/maestro-v3.0.0/2008/MIDI-Unprocessed_07_R2_2008_01-05_ORIG_MID--AUDIO_07_R2_2008_wav--2.midi
[('prefix', 'instrument', 'piano'), '<S>', ('piano', 63, 75), ('onset', 0), ('dur', 920), ('piano', 64, 75), ('onset', 860), ('dur', 740), ('piano', 63, 75), ('onset', 1320), ('dur', 280), ('piano', 68, 60), ('onset', 1470), ('dur', 820), ('piano', 71, 75), ('onset', 1480), ('dur', 830), ('piano', 62, 75), ('onset', 1490), ('dur', 790), ('piano', 70, 75), ('onset', 2180), ('dur', 1090), ('piano', 61, 60), ('onset', 2180), ('dur', 1090), ('piano', 67, 60), ('onset', 2180), ('dur', 1080), ('piano', 39, 45), ('onset', 2180), ('dur', 1080), ('piano', 68, 60), ('onset', 3040), ('dur', 3280), ('piano', 44, 45), ('onset', 3060), ('dur', 3260), ('piano', 51, 30), ('onset', 4100), ('dur', 2220), ('piano', 56, 30), ('onset', 4120), ('dur', 2200), ('piano', 59, 30), ('onset', 4130), ('dur', 2200), ('piano', 63, 30), ('onset', 4130), ('dur', 2180), '<T>', ('piano', 70, 45), ('onset', 

In [2]:
# Define a function to flatten the tokenized sequence
def flatten(sequence, add_special_tokens=True):
    flattened_sequence = []
    note_info = []
    for i in range(len(sequence)):
        if add_special_tokens:
            if sequence[i] == "<T>" or sequence[i] == "<D>":
                flattened_sequence.append(sequence[i])
        if sequence[i][0] == "piano":
            note_info.append(sequence[i][1])
            note_info.append(sequence[i][2])
        elif sequence[i][0] == "onset":
            note_info.append(sequence[i][1])
        elif sequence[i][0] == "dur":
            note_info.append(sequence[i][1])
            flattened_sequence.append(note_info) 
            note_info = []

    return flattened_sequence

flattened_sequence = flatten(tokenized_sequence)
print(flattened_sequence)

[[63, 75, 0, 920], [64, 75, 860, 740], [63, 75, 1320, 280], [68, 60, 1470, 820], [71, 75, 1480, 830], [62, 75, 1490, 790], [70, 75, 2180, 1090], [61, 60, 2180, 1090], [67, 60, 2180, 1080], [39, 45, 2180, 1080], [68, 60, 3040, 3280], [44, 45, 3060, 3260], [51, 30, 4100, 2220], [56, 30, 4120, 2200], [59, 30, 4130, 2200], [63, 30, 4130, 2180], '<T>', [70, 45, 460, 860], [71, 60, 840, 480], [73, 75, 1110, 300], [75, 75, 1360, 620], [78, 75, 1840, 180], [76, 60, 1940, 780], [75, 60, 2570, 890], [80, 60, 2950, 510], [75, 60, 3470, 2070], [68, 45, 4300, 1240], [71, 45, 4330, 1210], [63, 30, 4340, 1200], [56, 30, 4340, 1200], [75, 75, 4920, 620], '<T>', [73, 75, 370, 180], [76, 75, 500, 720], [70, 60, 500, 140], [61, 45, 520, 700], [49, 60, 530, 700], [68, 45, 530, 690], [75, 75, 1130, 660], [68, 45, 1150, 640], [71, 45, 1160, 150], [56, 45, 1160, 630], [44, 45, 1170, 620], [73, 60, 1630, 160], [71, 60, 1740, 600], [70, 60, 2220, 220], [73, 60, 2390, 810], [68, 45, 2410, 790], [64, 45, 2410, 7

In [11]:
# Skyline function for separating melody and harmony from the tokenized sequence
def skyline(sequence: list, diff_threshold=50, static_velocity=True, pitch_threshold=None):
    
    if pitch_threshold is None:
        pitch_threshold = 0
    
    melody = []
    harmony = []
    pointer_pitch = sequence[0][0]
    pointer_velocity = sequence[0][1]
    pointer_onset = sequence[0][2]
    pointer_duration = sequence[0][3]
    i = 0

    for i in range(1, len(sequence)):
        if type(sequence[i]) != str:
            current_pitch = sequence[i][0]
            current_velocity = sequence[i][1]
            current_onset = sequence[i][2]
            current_duration = sequence[i][3]

            if type(sequence[i-1]) == str and type(sequence[i-2]) == str:
                diff_curr_prev_onset = 5000
            elif type(sequence[i-1]) == str and type(sequence[i-2]) != str:
                diff_curr_prev_onset = abs(current_onset - sequence[i-2][2])
            else:
                diff_curr_prev_onset = abs(current_onset - sequence[i-1][2])
            
            # Check if the difference between the current onset and the previous onset is greater than the threshold and the pitch is greater than the threshold
            if diff_curr_prev_onset > diff_threshold:

                if pointer_pitch > pitch_threshold:
                    # Append the previous note
                    if static_velocity:
                        melody.append([pointer_pitch, 90, pointer_onset, pointer_duration])                        
                    else:
                        melody.append([pointer_pitch, pointer_velocity, pointer_onset, pointer_duration])
                
                # Update the pointer
                pointer_pitch = current_pitch
                pointer_velocity = current_velocity
                pointer_onset = current_onset
                pointer_duration = current_duration            
            else:
                if current_pitch > pointer_pitch:
                    # Append the previous note
                    harmony.append(("piano", pointer_pitch, pointer_velocity))
                    harmony.append(("onset", pointer_onset))
                    harmony.append(("dur", pointer_duration))
                    # Append <t> based on condition
                    if current_onset < pointer_onset:
                        harmony.append("<T>")
                    # Update the pointer
                    pointer_pitch = current_pitch
                    pointer_velocity = current_velocity
                    pointer_onset = current_onset
                    pointer_duration = current_duration
                else:
                    # Append the previous note
                    harmony.append(("piano", current_pitch, current_velocity))
                    harmony.append(("onset", current_onset))
                    harmony.append(("dur", current_duration))
                    # Append <t> based on condition
                    if current_onset < pointer_onset:
                        harmony.append("<T>")
                    continue

            # Append the last note
            if i == len(sequence) - 1: 
                if diff_curr_prev_onset > diff_threshold:
                    if pointer_pitch > pitch_threshold:
                        if static_velocity:
                            melody.append([pointer_pitch, 90, pointer_onset, pointer_duration])
                        else:
                            melody.append([pointer_pitch, pointer_velocity, pointer_onset, pointer_duration])
                else:
                    if current_pitch > pointer_pitch:
                        if current_pitch > pitch_threshold:
                            if static_velocity:
                                melody.append(["piano", current_pitch, 90, current_onset, current_duration])
                            else:
                                melody.append(["piano", current_pitch, current_velocity, current_onset, current_duration])
                    else:
                        harmony.append(("piano", current_pitch, current_velocity))
                        harmony.append(("onset", current_onset))
                        harmony.append(("dur", current_duration))

        if sequence[i-1] == "<T>":
            melody.append("<T>")
        
        if sequence[i] == "<D>":
            melody.append("<D>")

    return melody, harmony

melody, harmony = skyline(flattened_sequence)
print(melody)

[[63, 90, 0, 920], [64, 90, 860, 740], [63, 90, 1320, 280], [71, 90, 1480, 830], [70, 90, 2180, 1090], [68, 90, 3040, 3280], [63, 90, 4130, 2180], '<T>', [70, 90, 460, 860], [71, 90, 840, 480], [73, 90, 1110, 300], [75, 90, 1360, 620], [78, 90, 1840, 180], [76, 90, 1940, 780], [75, 90, 2570, 890], [80, 90, 2950, 510], [75, 90, 3470, 2070], [71, 90, 4330, 1210], [75, 90, 4920, 620], '<T>', [73, 90, 370, 180], [76, 90, 500, 720], [75, 90, 1130, 660], [73, 90, 1630, 160], [71, 90, 1740, 600], [70, 90, 2220, 220], [73, 90, 2390, 810], [71, 90, 3120, 1500], [64, 90, 3840, 780], [63, 90, 4310, 310], [71, 90, 4500, 870], '<T>', [70, 90, 250, 1030], [68, 90, 1110, 2710], [63, 90, 2040, 1780], [71, 90, 2620, 1200], [75, 90, 3220, 590], [73, 90, 3650, 190], [76, 90, 3780, 760], [68, 90, 3920, 620], [75, 90, 4410, 770], [73, 90, 4900, 280], '<T>', [71, 90, 10, 500], [70, 90, 440, 250], [73, 90, 640, 800], [71, 90, 1340, 890], [64, 90, 1900, 320], [66, 90, 1990, 230], [64, 90, 2070, 900], [63, 90,

In [16]:
# Reverse the flattened function
def unflatten(sequence, static_velocity=False):
    unflattened_sequence = []
    for i in range(len(sequence)):
        if sequence[i] == "<T>":
            unflattened_sequence.append("<T>")
            continue
        elif sequence[i] == "<D>":
            unflattened_sequence.append("<D>")
            continue
        elif type(sequence[i]) == tuple:
            unflattened_sequence.append(sequence[i])
        else:
            note_info = ("onset", sequence[i][2])
            unflattened_sequence.append(note_info)
            note_info = ("dur", sequence[i][3])
            unflattened_sequence.append(note_info)
            if static_velocity:
                note_info = ("piano", sequence[i][0], 90)
            else:
                note_info = ("piano", sequence[i][0], sequence[i][1])
            unflattened_sequence.append(note_info)
            note_info = []

    return unflattened_sequence

unflattened_sequence = unflatten(melody)
print(unflattened_sequence)

[('onset', 0), ('dur', 920), ('piano', 63, 90), ('onset', 860), ('dur', 740), ('piano', 64, 90), ('onset', 1320), ('dur', 280), ('piano', 63, 90), ('onset', 1480), ('dur', 830), ('piano', 71, 90), ('onset', 2180), ('dur', 1090), ('piano', 70, 90), ('onset', 3040), ('dur', 3280), ('piano', 68, 90), ('onset', 4130), ('dur', 2180), ('piano', 63, 90), '<T>', ('onset', 460), ('dur', 860), ('piano', 70, 90), ('onset', 840), ('dur', 480), ('piano', 71, 90), ('onset', 1110), ('dur', 300), ('piano', 73, 90), ('onset', 1360), ('dur', 620), ('piano', 75, 90), ('onset', 1840), ('dur', 180), ('piano', 78, 90), ('onset', 1940), ('dur', 780), ('piano', 76, 90), ('onset', 2570), ('dur', 890), ('piano', 75, 90), ('onset', 2950), ('dur', 510), ('piano', 80, 90), ('onset', 3470), ('dur', 2070), ('piano', 75, 90), ('onset', 4330), ('dur', 1210), ('piano', 71, 90), ('onset', 4920), ('dur', 620), ('piano', 75, 90), '<T>', ('onset', 370), ('dur', 180), ('piano', 73, 90), ('onset', 500), ('dur', 720), ('piano

In [4]:
# melody_sequence = tokenized_sequence[0:2] + melody + tokenized_sequence[-1:]
# # Write the generated sequences to a MIDI file
# mid_dict = tokenizer.detokenize(melody_sequence)
# mid = mid_dict.to_midi()
# mid.save("test.mid")

In [17]:
print(len([t for t in flattened_sequence if t == "<T>"]))
print(len([t for t in flattened_sequence if t == "<D>"]))
print(len([t for t in melody if t == "<T>"]))
print(len([t for t in melody if t == "<D>"]))
print(len([t for t in unflattened_sequence if t == "<T>"]))
print(len([t for t in unflattened_sequence if t == "<D>"]))

18
1
18
1
18
1


In [3]:
# Separate the list of lists based on the <T> token
def separate_list(sequence):
    separated_list = []
    sublist = []
    for i in range(len(sequence)):
        if sequence[i] == "<T>":
            separated_list.append(sublist)
            sublist = []
        elif type(sequence[i]) == list:
            sublist.append(sequence[i])
    if sublist:
        separated_list.append(sublist)
    return separated_list

separated_list = separate_list(flattened_sequence)
separated_list

[[[63, 75, 0, 920],
  [64, 75, 860, 740],
  [63, 75, 1320, 280],
  [68, 60, 1470, 820],
  [71, 75, 1480, 830],
  [62, 75, 1490, 790],
  [70, 75, 2180, 1090],
  [61, 60, 2180, 1090],
  [67, 60, 2180, 1080],
  [39, 45, 2180, 1080],
  [68, 60, 3040, 3280],
  [44, 45, 3060, 3260],
  [51, 30, 4100, 2220],
  [56, 30, 4120, 2200],
  [59, 30, 4130, 2200],
  [63, 30, 4130, 2180]],
 [[70, 45, 460, 860],
  [71, 60, 840, 480],
  [73, 75, 1110, 300],
  [75, 75, 1360, 620],
  [78, 75, 1840, 180],
  [76, 60, 1940, 780],
  [75, 60, 2570, 890],
  [80, 60, 2950, 510],
  [75, 60, 3470, 2070],
  [68, 45, 4300, 1240],
  [71, 45, 4330, 1210],
  [63, 30, 4340, 1200],
  [56, 30, 4340, 1200],
  [75, 75, 4920, 620]],
 [[73, 75, 370, 180],
  [76, 75, 500, 720],
  [70, 60, 500, 140],
  [61, 45, 520, 700],
  [49, 60, 530, 700],
  [68, 45, 530, 690],
  [75, 75, 1130, 660],
  [68, 45, 1150, 640],
  [71, 45, 1160, 150],
  [56, 45, 1160, 630],
  [44, 45, 1170, 620],
  [73, 60, 1630, 160],
  [71, 60, 1740, 600],
  [70,

In [10]:
import pandas as pd
import numpy as np

# Define a function to round a value to the nearest 05
def round_to_nearest_n(input_value, round_to=0.05):
    rounded_value = round(round(input_value / round_to) * round_to, 2)
    return rounded_value

def get_chord_info(chunk):
    if not chunk:
        return 0, 0, pd.DataFrame()
    
    df = pd.DataFrame(chunk, columns=["pitch", "velocity", "onset", "duration"])
    df['previous_onset'] = df['onset'].shift(1).fillna(0).astype(int)
    df['next_onset'] = df['onset'].shift(-1).fillna(0).astype(int)
    df['same_onset_previous'] = np.where((abs(df['onset'] - df['previous_onset']) <= 30), 1, 0)
    df['same_onset_next'] = np.where((abs(df['onset'] - df['next_onset']) <= 30), 1, 0)
    df['same_onset'] = np.where((df['same_onset_previous'] == 0) & (df['same_onset_next'] == 1), 1, 0)

    counter = 0
    group = 0
    new_column = []

    for value in df['same_onset']:
        if value == 1:
            counter += 1
            group = counter
        new_column.append(group)

    df['new_same_onset'] = np.where((df['same_onset_previous'] == 0) & (df['same_onset_next'] == 0), 0, new_column)

    len_df = len(df)
    df_filtered = df.loc[df['new_same_onset']!=0]

    cfr = len(df_filtered) / len_df
    cd = df_filtered['new_same_onset'].mean()
    cd = 8 if cd > 8 else cd

    return round_to_nearest_n(cfr, round_to=0.05), round_to_nearest_n(cd, 0.25), df

def get_conditions(separated_list):
    cfr_list = []
    cd_list = []
    for i in range(len(separated_list)):
        cfr, cd, df = get_chord_info(separated_list[i])
        cfr_list.append(("cfr", cfr))
        cd_list.append(("cd", cd))
    return cfr_list, cd_list

cfr_list, cd_list = get_conditions(separated_list)
print("Chord frequency ratio: ", cfr_list)
print("Chord density: ", cd_list)
    

Chord frequency ratio:  [('cfr', 0.8), ('cfr', 0.3), ('cfr', 0.8), ('cfr', 0.85), ('cfr', 0.75), ('cfr', 1.0), ('cfr', 0.85), ('cfr', 0.7), ('cfr', 0.8), ('cfr', 1.0), ('cfr', 0.8), ('cfr', 0.95), ('cfr', 0.95), ('cfr', 0.85), ('cfr', 0.25), ('cfr', 0.65), ('cfr', 0.85), ('cfr', 0.8), ('cfr', 1.0)]
Chord density:  [('cd', 2.5), ('cd', 1.0), ('cd', 2.75), ('cd', 3.75), ('cd', 2.75), ('cd', 6.0), ('cd', 4.75), ('cd', 3.75), ('cd', 5.0), ('cd', 6.5), ('cd', 4.25), ('cd', 7.25), ('cd', 3.25), ('cd', 2.5), ('cd', 1.0), ('cd', 2.75), ('cd', 3.25), ('cd', 2.75), ('cd', 1.5)]


In [5]:
print(len(cfr_list))
print(len(cd_list))
print(len([t for t in flattened_sequence if t == "<T>"]))

19
19
18


In [6]:
def interleave_conditions(flattened_sequence, cfr_list, cd_list):
    conditioned_flattened_sequence = []
    for n, i in enumerate(flattened_sequence):
        if n == 0:
            cfr_condition = cfr_list.pop(0)
            cd_condition = cd_list.pop(0)
            conditioned_flattened_sequence.append(cfr_condition)
            conditioned_flattened_sequence.append(cd_condition)
            conditioned_flattened_sequence.append(i)
        elif i == "<T>":
            conditioned_flattened_sequence.append(i)
            if len(cfr_list) > 0:
                cfr_condition = cfr_list.pop(0)
                cd_condition = cd_list.pop(0)
                conditioned_flattened_sequence.append(cfr_condition)
                conditioned_flattened_sequence.append(cd_condition)
        else:
            conditioned_flattened_sequence.append(i)

    if len(cfr_list) > 0:
        conditioned_flattened_sequence.append(cfr_list.pop(0))
        conditioned_flattened_sequence.append(cd_list.pop(0))

    return conditioned_flattened_sequence

conditioned_flattened_sequence = interleave_conditions(flattened_sequence, cfr_list, cd_list)

print(conditioned_flattened_sequence)


[('cfr', 0.8), ('cd', 2.5), [63, 75, 0, 920], [64, 75, 860, 740], [63, 75, 1320, 280], [68, 60, 1470, 820], [71, 75, 1480, 830], [62, 75, 1490, 790], [70, 75, 2180, 1090], [61, 60, 2180, 1090], [67, 60, 2180, 1080], [39, 45, 2180, 1080], [68, 60, 3040, 3280], [44, 45, 3060, 3260], [51, 30, 4100, 2220], [56, 30, 4120, 2200], [59, 30, 4130, 2200], [63, 30, 4130, 2180], '<T>', ('cfr', 0.3), ('cd', 1.0), [70, 45, 460, 860], [71, 60, 840, 480], [73, 75, 1110, 300], [75, 75, 1360, 620], [78, 75, 1840, 180], [76, 60, 1940, 780], [75, 60, 2570, 890], [80, 60, 2950, 510], [75, 60, 3470, 2070], [68, 45, 4300, 1240], [71, 45, 4330, 1210], [63, 30, 4340, 1200], [56, 30, 4340, 1200], [75, 75, 4920, 620], '<T>', ('cfr', 0.8), ('cd', 2.75), [73, 75, 370, 180], [76, 75, 500, 720], [70, 60, 500, 140], [61, 45, 520, 700], [49, 60, 530, 700], [68, 45, 530, 690], [75, 75, 1130, 660], [68, 45, 1150, 640], [71, 45, 1160, 150], [56, 45, 1160, 630], [44, 45, 1170, 620], [73, 60, 1630, 160], [71, 60, 1740, 600

In [8]:
# Reverse the flattened function
def unflatten(sequence, static_velocity=False):
    unflattened_sequence = []
    for i in range(len(sequence)):
        if sequence[i] == "<T>":
            unflattened_sequence.append("<T>")
            continue
        elif sequence[i] == "<D>":
            unflattened_sequence.append("<D>")
            continue
        elif type(sequence[i]) == tuple:
            unflattened_sequence.append(sequence[i])
        else:
            if static_velocity:
                note_info = ("piano", sequence[i][0], 90)
            else:
                note_info = ("piano", sequence[i][0], sequence[i][1])
            unflattened_sequence.append(note_info)
            note_info = ("onset", sequence[i][2])
            unflattened_sequence.append(note_info)
            note_info = ("dur", sequence[i][3])
            unflattened_sequence.append(note_info)
            note_info = []
            
            if i < len(sequence)-1:
                if sequence[i+1] == "<T>":
                    continue
                elif sequence[i+1] == "<D>":
                    continue
                else: 
                    if ((sequence[i][2] + sequence[i][3]) >= 5000 and (sequence[i+1][2] + sequence[i+1][3]) < 5000) or (sequence[i+1][2] < sequence[i][2]):
                        unflattened_sequence.append("<T>")

    return unflattened_sequence

unflattened_sequence = unflatten(conditioned_flattened_sequence)
print(unflattened_sequence)

[('cfr', 0.8), ('cd', 2.5), ('piano', 63, 75), ('onset', 0), ('dur', 920), ('piano', 64, 75), ('onset', 860), ('dur', 740), ('piano', 63, 75), ('onset', 1320), ('dur', 280), ('piano', 68, 60), ('onset', 1470), ('dur', 820), ('piano', 71, 75), ('onset', 1480), ('dur', 830), ('piano', 62, 75), ('onset', 1490), ('dur', 790), ('piano', 70, 75), ('onset', 2180), ('dur', 1090), ('piano', 61, 60), ('onset', 2180), ('dur', 1090), ('piano', 67, 60), ('onset', 2180), ('dur', 1080), ('piano', 39, 45), ('onset', 2180), ('dur', 1080), ('piano', 68, 60), ('onset', 3040), ('dur', 3280), ('piano', 44, 45), ('onset', 3060), ('dur', 3260), ('piano', 51, 30), ('onset', 4100), ('dur', 2220), ('piano', 56, 30), ('onset', 4120), ('dur', 2200), ('piano', 59, 30), ('onset', 4130), ('dur', 2200), ('piano', 63, 30), ('onset', 4130), ('dur', 2180), '<T>', ('cfr', 0.3), ('cd', 1.0), ('piano', 70, 45), ('onset', 460), ('dur', 860), ('piano', 71, 60), ('onset', 840), ('dur', 480), ('piano', 73, 75), ('onset', 1110)