# Filtering Melody Notes 

### Helper Functions

In [1]:
from random import randrange
import numpy as np
from miditoolkit.midi import parser as mid_parser  
from miditoolkit.midi import containers as ct
import miditoolkit as mtk


def filterMelodyNotes(notes):
    """
    Filter out melody notes.
    Returns a tuple of (normal notes,ghost notes)
    """
    normal_notes=[]
    ghost_notes=[]
    for i,note in enumerate(notes):
        context=notes[i-10:i+10]
        if isMelodyNote(note,context):
            ghost_notes.append(note)
        else:
            normal_notes.append(note)
    return (normal_notes,ghost_notes)

def isMelodyNote(note,context_notes,heuristic="velocity",params=None):
    """
    Check if a note is a ghost note.
    """
    heuristic=heuristics[heuristic]
    return heuristic(note,context_notes,params)

def velocityThreshold(test_note,context_notes,params):
    ''' 
    Detect outliers in velocity.
    '''
    method=params["method"] if params is not None else 1
    all_velocities=[note.velocity for note in context_notes]+[test_note.velocity]
    outliers=getOutliers(all_velocities,method)

    # check if test_note is an outlier
    if test_note.velocity in outliers:
        return True

def getOutliers(data,method=1):
    if method==1:
        outliers=[]
        
        z_threshold=2
        median = np.median(data)
        std =np.std(data,)
        
        
        for y in data:
            z_score= (y - median)/std
            if z_score>z_threshold:
                outliers.append(y)
    elif method==2:
        data=sorted(data)
        q1, q3= np.percentile(data,[25,75])
        iqr = q3 - q1
        lower_bound = q1 -(1.5 * iqr) 
        outliers=[]
        for y in data:
            if y<lower_bound:
                outliers.append(y)
    return outliers

def split2midi(normal_notes,melody_notes):
    """
    Convert a list of notes to a midi file where first instrument is normal notes and second instrument is ghost notes.
    """
    normal_notes = [note for note in normal_notes if note.velocity != 0]
    melody_notes = [note for note in melody_notes if note.velocity != 0]
    mido_obj = mid_parser.MidiFile()
    beat_resol = mido_obj.ticks_per_beat

    # create instruments
    melody_instrument = mid_parser.Instrument(program=0)
    melody_instrument.name= "Melody Notes"

    normal_instrument = mid_parser.Instrument(program=1)
    normal_instrument.name= "Normal Notes"
    

    mido_obj.instruments.append(melody_instrument)
    mido_obj.instruments.append(normal_instrument)
    
    
    melody_instrument.notes = melody_notes
    normal_instrument.notes = normal_notes
    
    return mido_obj

def extractMelody(midi_file,output_filename="melody.mid"):
    """
    Extract melody from midi file.
    """
    midi_file=mtk.MidiFile(midi_file)
    notes=[]
    for instrument in midi_file.instruments:
        notes.extend(instrument.notes)
    notes=sorted(notes,key=lambda x:x.start)
    normal_notes,melody_notes=filterMelodyNotes(notes)
    out_midi=split2midi(normal_notes,melody_notes)
    out_midi.dump(output_filename)

    
heuristics={
    "velocity":velocityThreshold,
}

### Read annotations

In [2]:
import pandas as pd
from utils import read_annotations

ASAP="Sample_Data/asap-dataset"
annotations_path=f"{ASAP}/asap_annotations.json"

annotations=read_annotations(annotations_path,process=False)

# unique score_filename values
score_filenames = annotations['score_filename'].unique()

performance_filenames = annotations['performance_filename'].unique()

### Extract melody for composers

In [3]:
from tqdm import tqdm
import miditoolkit as mtk
import os

samples=2
output_folder="Store/Filter"
composers=["Bach","Chopin","Mozart"]

ASAP_DIR="Sample_Data/asap-dataset"

if not os.path.exists(output_folder):
    os.makedirs(output_folder)

counts=[0 for i in range(len(composers))]

for filename in tqdm(performance_filenames):
    composer=filename.split("/")[0]
    if composer in composers and counts[composers.index(composer)]<samples:
        counts[composers.index(composer)]+=1
        full_filename=f"{ASAP_DIR}/{filename}"
        
        output_filename=f"{composer}_{filename.split('/')[-1]}"
        output_filename=f"{output_folder}/{output_filename}"

        extractMelody(full_filename,output_filename)

100%|██████████| 48/48 [00:03<00:00, 14.29it/s]
