# Score Melody Dataset

### Imports

In [1]:
import pandas as pd
from utils import read_annotations, read_mid, getNotes, getBeats,save_mid
from random import randrange
import numpy as np
from miditoolkit.midi import parser as mid_parser  
from miditoolkit.midi import containers as ct
import miditoolkit as mtk
from tqdm import tqdm
from melody_extraction import extractMelody,split2midi
import random
random.seed(420)

### Read annotations

In [2]:
dataset_dir = "Sample_data/asap-dataset"
annotations_file=f"{dataset_dir}/asap_annotations.json"

annotations=read_annotations(annotations_file,process=False)

score_filenames=annotations['score_filename'].unique()
performance_filenames=annotations['performance_filename'].unique()

### Helper functions

In [3]:
def perfNote2ScoreNote(perf_note,score,performance,score_beats,performance_beats):
    """
    Score, Performance are a list of notes
    """
    # Get the beats in between which perf_note is present
    start_beat=getStartBeat(perf_note,performance_beats,performance)
    
    end_beat=min(start_beat+3,len(score_beats)-1)
    start_beat=max(0,start_beat-3)

    # Get the start and end time of the beat for performance
    perf_start_time=performance_beats[start_beat]
    perf_end_time=performance_beats[end_beat]
    
    # Get the notes in that beat for performance
    perf_notes=notesBetween(perf_start_time,perf_end_time,performance)

    # Get the start and end time of the beat for score
    score_start_time=score_beats[start_beat]
    score_end_time=score_beats[end_beat]

    # Get the notes in that beat for score
    score_notes=notesBetween(score_start_time,score_end_time,score)

    # Get the note in score_notes corresponding to perf_note
    score_note,prob_dist=identifyNote(perf_note,score_notes,perf_notes)

    return(score_note,prob_dist)

def identifyNote(perf_note,score_notes,perf_notes):
    """
    Returns note in score_notes corresponding to perf_note
    """
    radius=5
    perf_pitches=[note.pitch for note in perf_notes]
    score_pitches=[note.pitch for note in score_notes]
    
    # Assert that perf_note is in perf_notes
    
    if not noteInNotes(perf_note,perf_notes):
        print(perf_note.pitch)
        print(perf_pitches)
        print(score_pitches)
        exception_str=f"perf_note {perf_note} not in perf_notes"
        raise Exception(exception_str)
    

    assert perf_note.pitch in score_pitches

    # Get the set of notes in perf_notes that are near perf_note
    perf_pitches_near=pitchesNear(perf_note,perf_notes,radius=radius)

    # Find position of perf_pitch in score_pitches with maximum similarity
    sim_list=[]
    for i in range(len(score_pitches)):
        score_pitch=score_pitches[i]
        if score_pitch==perf_note.pitch:
            score_pitch_near=pitchesNear(score_notes[i],score_notes,radius=radius)
            sim=similarity(perf_pitches_near,score_pitch_near)
            
            sim_list.append((i,sim))

    # Get the index of the note in score_notes with maximum similarity
    max_sim=max(sim_list,key=lambda x:x[1])
    max_sim_index=max_sim[0]

    prob_dist=getDistribution(sim_list)

    # Return the note in score_notes corresponding to max_sim_index
    score_note=score_notes[max_sim_index]
    
    return score_note,prob_dist

def getDistribution(sim_list):
    """
    Returns the probability distribution of sim_list
    """
    sim_list=np.array(sim_list)
    sim_list[:,1]=sim_list[:,1]/sum(sim_list[:,1])
    # Convert to a list of tuples
    sim_list=[tuple(x) for x in sim_list]
    # Sort the list by similarity
    sim_list=sorted(sim_list,key=lambda x:x[1],reverse=True)
    return sim_list
    

def similarity(set1,set2):
    """
    Returns the similarity between two sets
    """
    return len(set1.intersection(set2))/len(set1.union(set2))


def notesNear(note,notes,radius=5):
    """
    Returns the notes in notes that are near note
    """
    
    note_index=findNote(note,notes)
    start_index=max(0,note_index-radius)
    end_index=min(len(notes),note_index+radius)
    return notes[start_index:end_index]

def findNote(note,notes):
    """
    Returns the note index in notes that is equal to note
    """
    for i in range(len(notes)):
        current_note=notes[i]
        if note.pitch==current_note.pitch and note.start==current_note.start and note.end==current_note.end:
            return i
    return None

def pitchesNear(note,notes,radius=5):
    """
    Returns the pitches of notes in notes that are near note
    """
    notes_near=notesNear(note,notes,radius)
    return set([note.pitch for note in notes_near])
    

def noteInNotes(note,notes):
    """
    Returns True if note is in notes
    """
    for n in notes:
        if note.pitch==n.pitch and note.start==n.start and note.end==n.end:
            return True
    return False


def getStartBeat(note,beats,midi_obj):
    ''' Returns the start beat of note in midi_obj '''
    t2tmap=midi_obj.get_tick_to_time_mapping()
    
    start_ticks=note.start

    start_time=t2tmap[start_ticks]

    for i in range(len(beats)-1):
        if beats[i]<start_time and beats[i+1]>start_time:
            return i


def notesBetween(start_time,end_time, midi_obj : mtk.MidiFile):
    """
    Returns notes between start_time and end_time
    """
    notes=getNotes(midi_obj)
    t2tmap=midi_obj.get_tick_to_time_mapping()
    notes_in_range=[]
    for note in notes:
        note_start_time=t2tmap[note.start]
        note_end_time=t2tmap[note.end]
        if note_start_time>=start_time and note_end_time<=end_time:
            notes_in_range.append(note)
    return notes_in_range


In [4]:
import os
def extractScoreMelody(score_filename,performance_filename,annotations,dataset_dir="Sample_Data/asap-dataset"):

    score_path=os.path.join(dataset_dir,score_filename)
    performance_path=os.path.join(dataset_dir,performance_filename)

    # Read in score and performance
    score_obj=read_mid(score_path)
    performance_obj=read_mid(performance_path)

    # Get notes
    score_notes=getNotes(score_obj)
    performance_notes=getNotes(performance_obj)

    # Get beats
    score_beats = getBeats(score_filename,annotations,"score")
    performance_beats = getBeats(performance_filename,annotations,"performance")

    # Extract melody from performance and get melody notes
    performance_w_melody=extractMelody(performance_path,save=False)
    perf_melody_instrument=performance_w_melody.instruments[0]
    perf_melody_notes=perf_melody_instrument.notes

    # Get score melody
    score_melody_notes=set([])
    for perf_note in tqdm(perf_melody_notes):
        # Get the corresponding note in score
        score_note,sim_list=perfNote2ScoreNote(perf_note,score_obj,performance_obj,score_beats,performance_beats)
        score_melody_notes.add(score_note)
    
    # Normal notes are notes not in melody
    score_normal_notes=list(set(score_notes)-score_melody_notes)
    score_melody_notes=list(score_melody_notes)

    score_w_melody=split2midi(score_normal_notes,score_melody_notes)

    return score_w_melody,performance_w_melody

### Extract melody for the score

In [7]:
from utils import score2PerfFileMap

def genScore2MelodyDataset(annotations,dataset_dir="Sample_Data/asap-dataset"):

    file_map=score2PerfFileMap(annotations)

    score_filenames=list(file_map.keys())


    score_filename=score_filenames[0]

    performance_filenames=file_map[score_filename]

    for performance_filename in performance_filenames:
        score_w_melody,performance_w_melody=extractScoreMelody(score_filename,performance_filename,annotations, dataset_dir)

        store_dir="Store/Score2Melody"

        score_output_path=os.path.join(store_dir,score_filename)
        performance_output_path=os.path.join(store_dir,performance_filename)

        save_mid(score_w_melody,score_output_path)
        save_mid(performance_w_melody,performance_output_path)

### Test for a single pair, saved in Store/Score2Melody
TODO : Create folder if it doesn't exist

In [None]:
file_map=score2PerfFileMap(annotations)

score_filenames=list(file_map.keys())

score_filename=score_filenames[0]
performance_filename=file_map[score_filename][0]

score_w_melody,performance_w_melody=extractScoreMelody(score_filename,performance_filename,annotations, dataset_dir)

store_dir="Store/Score2Melody"

save_mid(score_w_melody,os.path.join(store_dir,"score.mid"))
save_mid(performance_w_melody,os.path.join(store_dir,"performance.mid"))