In [114]:
import pandas as pd

dataset_dir = "Datasets/asap-dataset"
# read in json file as pandas dataframe
annotations=pd.read_json(f"{dataset_dir}/asap_annotations.json").transpose()
# add column for score_filename to annotations that converts row name to score_filename
annotations['score_filename'] = annotations.index.map(lambda x: f"{'/'.join(x.split('/')[:-1])}/midi_score.mid")
# rename index to performance_filename
annotations.index=annotations.index.rename('performance_filename')
annotations.reset_index(inplace=True)

annotations.head(2)

# only keep rows of annotations where score_and_performance_aligned is True	
annotations = annotations[annotations['score_and_performance_aligned'] == True]
annotations.head(2)

# unique score_filename values
score_filenames = annotations['score_filename'].unique()

performance_filenames = annotations['performance_filename'].unique()
annotations.head(4)

Unnamed: 0,performance_filename,performance_beats,performance_downbeats,performance_beats_type,perf_time_signatures,perf_key_signatures,midi_score_beats,midi_score_downbeats,midi_score_beats_type,midi_score_time_signatures,midi_score_key_signatures,downbeats_score_map,score_and_performance_aligned,score_filename
0,Bach/Fugue/bwv_846/Shi05M.mid,"[1.095052, 2.364583, 3.66276, 4.924479, 6.2109...","[4.924479, 9.818359, 14.721354999999999, 19.56...","{'1.095052': 'b', '2.364583': 'b', '3.66276': ...","{'4.924479': ['4/4', 4]}","{'1.095052': [0, 0]}","[0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, ...","[2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 1...","{'0.5': 'b', '1.0': 'b', '1.5': 'b', '2.0': 'd...","{'2.0': ['4/4', 4]}","{'0.5': [0, 0]}","[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...",True,Bach/Fugue/bwv_846/midi_score.mid
1,Bach/Fugue/bwv_848/Denisova06M.mid,"[0.825521, 1.341146, 1.9335939999999998, 2.467...","[1.9335939999999998, 4.113281, 6.186198, 8.280...","{'0.825521': 'b', '1.341146': 'b', '1.933594':...","{'1.933594': ['4/4', 4]}","{'0.825521': [1, 7]}","[1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0, ...","[2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 1...","{'1.0': 'b', '1.5': 'b', '2.0': 'db', '2.5': '...","{'2.0': ['4/4', 4]}","{'1.0': [1, 7]}","[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...",True,Bach/Fugue/bwv_848/midi_score.mid
2,Bach/Fugue/bwv_848/Lee01M.mid,"[0.8463539999999999, 1.4375, 2.085938, 2.70052...","[2.085938, 4.597656, 6.9069009999999995, 9.206...","{'0.846354': 'b', '1.4375': 'b', '2.085938': '...","{'2.085938': ['4/4', 4]}","{'0.846354': [1, 7]}","[1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0, ...","[2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 1...","{'1.0': 'b', '1.5': 'b', '2.0': 'db', '2.5': '...","{'2.0': ['4/4', 4]}","{'1.0': [1, 7]}","[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...",True,Bach/Fugue/bwv_848/midi_score.mid
3,Bach/Fugue/bwv_848/LeeSH01M.mid,"[0.855208, 1.520833, 2.229167, 2.928125, 3.642...","[2.229167, 5.088542, 7.7276039999999995, 10.4,...","{'0.855208': 'b', '1.520833': 'b', '2.229167':...","{'2.229167': ['4/4', 4]}","{'0.855208': [1, 7]}","[1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0, ...","[2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 1...","{'1.0': 'b', '1.5': 'b', '2.0': 'db', '2.5': '...","{'2.0': ['4/4', 4]}","{'1.0': [1, 7]}","[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...",True,Bach/Fugue/bwv_848/midi_score.mid


In [118]:
from random import randrange
import numpy as np
from miditoolkit.midi import parser as mid_parser  
from miditoolkit.midi import containers as ct
import miditoolkit as mtk
from tqdm import tqdm

def read_mid(filename):
    """
    Reads a midi file and returns a midi object.
    """
    full_filename=f"{dataset_dir}/{filename}"
    midi_obj=mtk.MidiFile(full_filename)
    return(midi_obj)

def getNotes(midi_obj):
    notes=[]
    for instrument in midi_obj.instruments:
        notes.extend(instrument.notes)
    notes=sorted(notes,key=lambda x:x.start)
    return notes

def getBeats(filename,annotations,type="score"):
    if type=="score":
        beats=annotations[annotations['score_filename']==filename]['midi_score_beats'].iloc[0]
    elif type=="performance":
        beats=annotations[annotations['performance_filename']==filename]['performance_beats'].iloc[0]
    else:
        return None
    beats=[0]+beats
    return beats

In [145]:
# Input : Note pitch, beat number, notes in that beat for performance
# Output : Note in Score

def identifyNote(perf_note,score_notes,perf_notes):
    """
    Returns note in score_notes corresponding to perf_note
    """
    radius=5
    perf_pitches=[note.pitch for note in perf_notes]
    score_pitches=[note.pitch for note in score_notes]
    
    # Assert that perf_note is in perf_notes
    
    try: 
        assert checkNoteInNotes(perf_note,perf_notes)
    except:
        print(perf_note.pitch)
        print(perf_pitches)
        print(score_pitches)


    assert perf_note.pitch in score_pitches

    # Get the set of notes in perf_notes that are near perf_note
    perf_pitches_near=pitchesNear(perf_note,perf_notes,radius=radius)

    # Find position of perf_pitch in score_pitches with maximum similarity
    sim_list=[]
    for i in range(len(score_pitches)):
        score_pitch=score_pitches[i]
        if score_pitch==perf_note.pitch:
            score_pitch_near=pitchesNear(score_notes[i],score_notes,radius=radius)
            sim=similarity(perf_pitches_near,score_pitch_near)
            
            sim_list.append((i,sim))

    # Get the index of the note in score_notes with maximum similarity
    max_sim=max(sim_list,key=lambda x:x[1])
    max_sim_index=max_sim[0]

    prob_dist=getDistribution(sim_list)

    # Return the note in score_notes corresponding to max_sim_index
    score_note=score_notes[max_sim_index]
    
    return score_note,prob_dist

def getDistribution(sim_list):
    """
    Returns the probability distribution of sim_list
    """
    sim_list=np.array(sim_list)
    sim_list[:,1]=sim_list[:,1]/sum(sim_list[:,1])
    # Convert to a list of tuples
    sim_list=[tuple(x) for x in sim_list]
    # Sort the list by similarity
    sim_list=sorted(sim_list,key=lambda x:x[1],reverse=True)
    return sim_list
    

def similarity(set1,set2):
    """
    Returns the similarity between two sets
    """
    return len(set1.intersection(set2))/len(set1.union(set2))


def notesNear(note,notes,radius=5):
    """
    Returns the notes in notes that are near note
    """
    note_index=notes.index(note)
    start_index=max(0,note_index-radius)
    end_index=min(len(notes),note_index+radius)
    return notes[start_index:end_index]


def pitchesNear(note,notes,radius=5):
    """
    Returns the pitches of notes in notes that are near note
    """
    notes_near=notesNear(note,notes,radius)
    return set([note.pitch for note in notes_near])
    

def checkNoteInNotes(note,notes):
    """
    Returns True if note is in notes
    """
    for n in notes:
        if note.pitch==n.pitch and note.start==n.start and note.end==n.end:
            return True
    return False

def perfNote2ScoreNote(perf_note,score,performance,score_beats,performance_beats):
    """
    Score, Performance are a list of notes
    """
    # Get the beats in between which perf_note is present
    start_beat=getStartBeat(perf_note,performance_beats,performance)
    
    end_beat=min(start_beat+3,len(score_beats)-1)
    start_beat=max(0,start_beat-3)

    # Get the start and end time of the beat for performance
    perf_start_time=performance_beats[start_beat]
    perf_end_time=performance_beats[end_beat]
    
    # Get the notes in that beat for performance
    perf_notes=notesBetween(perf_start_time,perf_end_time,performance)

    # Get the start and end time of the beat for score
    score_start_time=score_beats[start_beat]
    score_end_time=score_beats[end_beat]

    # Get the notes in that beat for score
    score_notes=notesBetween(score_start_time,score_end_time,score)

    # Get the note in score_notes corresponding to perf_note
    score_note,prob_dist=identifyNote(perf_note,score_notes,perf_notes)

    return(score_note,prob_dist)


def getStartBeat(note,beats,midi_obj):
    t2tmap=midi_obj.get_tick_to_time_mapping()
    
    start_ticks=note.start

    start_time=t2tmap[start_ticks]

    for i in range(len(beats)-1):
        if beats[i]<start_time and beats[i+1]>start_time:
            return i


def notesBetween(start_time,end_time, midi_obj : mtk.MidiFile):
    """
    Returns notes between start_time and end_time
    """
    notes=getNotes(midi_obj)
    t2tmap=midi_obj.get_tick_to_time_mapping()
    notes_in_range=[]
    for note in notes:
        note_start_time=t2tmap[note.start]
        note_end_time=t2tmap[note.end]
        if note_start_time>=start_time and note_end_time<=end_time:
            notes_in_range.append(note)
    return notes_in_range


In [147]:
# set random seed
import random
score_filename=score_filenames[0]
performance_filename=performance_filenames[0]

# Read in score and performance
score_obj=read_mid(score_filename)
performance_obj=read_mid(performance_filename)

# Get beats
score_beats = getBeats(score_filename,annotations,"score")
performance_beats = getBeats(performance_filename,annotations,"performance")

# Get notes
score_notes=getNotes(score_obj)
performance_notes=getNotes(performance_obj)
for index in range(len(performance_notes)):
    # Get a random note from performance
    index=randrange(len(performance_notes))
    perf_note=performance_notes[index]

    # convert start time of perf_note to seconds
    perf_note_start=performance_obj.get_tick_to_time_mapping()[perf_note.start]

    # Get the corresponding note in score
    score_note,sim_list=perfNote2ScoreNote(perf_note,score_obj,performance_obj,score_beats,performance_beats)
    # print(sim_list)

0
1
2
3
4
5
6
7
8
83
[59, 67, 69, 71, 72, 74, 76, 77, 67, 69, 72, 74, 76, 77, 79, 81, 77, 74]
[62, 59, 67, 69, 71, 72, 74, 76, 77, 67, 69, 72, 74, 76, 77, 79, 81, 77, 74, 83]


ValueError: Note(start=107373, end=108283, pitch=83, velocity=65) is not in list