In [1]:
import os,sys
import numpy as np

sys.path.append('models')
from large_vocab_adt_dafx2018.transcribe import transcribe, PERC_VOICE_SET
import large_vocab_adt_dafx2018.model as model

  from ._conv import register_converters as _register_converters
  from . import h5a, h5d, h5ds, h5f, h5fd, h5g, h5r, h5s, h5t, h5p, h5z
  from .. import h5g, h5i, h5o, h5r, h5t, h5l, h5p
Using TensorFlow backend.
  from .tslib import iNaT, NaT, Timestamp, Timedelta, OutOfBoundsDatetime
  from pandas._libs import (hashtable as _hashtable,
  from pandas._libs import algos, lib
  from pandas._libs import hashing, tslib
  from pandas._libs import (lib, index as libindex, tslib as libts,
  import pandas._libs.tslibs.offsets as liboffsets
  from pandas._libs import algos as libalgos, ops as libops
  from pandas._libs.interval import (
  from pandas._libs import internals as libinternals
  import pandas._libs.sparse as splib
  import pandas._libs.window as _window
  from pandas._libs import (lib, reduction,
  from pandas._libs import algos as _algos, reshape as _reshape
  import pandas._libs.parsers as parsers
  from pandas._libs import algos, lib, writers as libwriters


In [2]:
# files in dataset
import pandas as pd

groove_path = 'groove/'

# load info from dataset
magenta_ds = pd.read_csv('groove/info.csv')

# remove rows without wav file
magenta_ds = magenta_ds.dropna(axis=0, how='any', thresh=None, subset=None, inplace=False) 

magenta_ds.head()

Unnamed: 0,drummer,session,id,style,bpm,beat_type,time_signature,midi_filename,audio_filename,duration,split
0,drummer1,drummer1/eval_session,drummer1/eval_session/1,funk/groove1,138,beat,4-4,drummer1/eval_session/1_funk-groove1_138_beat_...,drummer1/eval_session/1_funk-groove1_138_beat_...,27.872308,test
1,drummer1,drummer1/eval_session,drummer1/eval_session/10,soul/groove10,102,beat,4-4,drummer1/eval_session/10_soul-groove10_102_bea...,drummer1/eval_session/10_soul-groove10_102_bea...,37.691158,test
2,drummer1,drummer1/eval_session,drummer1/eval_session/2,funk/groove2,105,beat,4-4,drummer1/eval_session/2_funk-groove2_105_beat_...,drummer1/eval_session/2_funk-groove2_105_beat_...,36.351218,test
3,drummer1,drummer1/eval_session,drummer1/eval_session/3,soul/groove3,86,beat,4-4,drummer1/eval_session/3_soul-groove3_86_beat_4...,drummer1/eval_session/3_soul-groove3_86_beat_4...,44.716543,test
4,drummer1,drummer1/eval_session,drummer1/eval_session/4,soul/groove4,80,beat,4-4,drummer1/eval_session/4_soul-groove4_80_beat_4...,drummer1/eval_session/4_soul-groove4_80_beat_4...,47.9875,test


In [3]:
import mir_eval

def compute_scores(magenta_transcription, out_8v_thres,instrument_index):
    """
    inputs must be np arrays
    """
        
    onsets_eval_magenta = np.arange(magenta_transcription.shape[0])*magenta_transcription[:,instrument_index]*0.01 
    onsets_eval_magenta = np.sort(onsets_eval_magenta[onsets_eval_magenta!=0])

    onsets_eval_model = np.arange(out_8v_thres.shape[0])*out_8v_thres[:,instrument_index]*0.01
    onsets_eval_model = np.sort(onsets_eval_model[onsets_eval_model!=0])


    #print(onsets_eval_magenta[0:5])
    #print(np.round(onsets_eval_model[0:5],2))
    #print(len(onsets_eval_model),len(onsets_eval_magenta))

    # mir_eval: 
    # Onsets should be provided in the form of a 1-dimensional array of onset 
    # times in seconds in increasing order.
    # default window is 0.05 seconds !! CHOICE OF WINDOW IS CRUCIAL
    # same value as in paper
    mir_eval.onset.validate(onsets_eval_magenta,onsets_eval_model)
    scores = mir_eval.onset.f_measure(onsets_eval_magenta,onsets_eval_model, window=0.5)
    
    return scores

In [4]:
def get_model_onsets(input_audio_file, model_num=303):
    
    models_path = 'models/large_vocab_adt_dafx2018/trained_models/'

    # load model
    model_definition_path = os.path.join(models_path,str(model_num)+'_s0_def.json')
    model_weights_path = os.path.join(models_path,str(model_num)+'_s0_weights.h5')
    model_configuration_id = model_num
    sample_audio_files = dict([(v, os.path.join('models/audio/', '{}.wav'.format(v))) for v in PERC_VOICE_SET])


    # compute model's onsets >> outputs 14 voices
    output = transcribe(model_definition_path,
                    model_weights_path,
                    input_audio_file,
                    model_configuration_id,
                    sample_audio_files,
                    peak_params=None,
                    output_sample_rate=44100)

    # save into out_14v
    out_14v = output['14v']['onset_activations']
    
    # reduce 14 voices to the 8 present in the magenta dataset
    length_in_samples = out_14v.shape[0]
    out_8v = np.zeros([length_in_samples,8])

    reduced_mapping = {
    0: [0],     # kick
    1: [1,2],   # snare + snare rim 
    2: [3],     # crash
    3: [4,12],  # ride + bell
    4: [5],     # open hh
    5: [6],     # closed hh
    6: [7,8],   # low+mid tom
    7: [9]      # high tom
    }


    eps = 0 # threshold ?

    for t in range(length_in_samples):
        t_roll = out_14v[t,:]
        t_roll[t_roll<eps] = 0
        for i in reduced_mapping.keys():
            out_8v[t,i] = np.max(t_roll[reduced_mapping[i]])
    
    return out_8v

In [5]:
import pretty_midi

def get_magenta_onsets(input_midi_file):
    # read with prettyMIDI
    midi_data = pretty_midi.PrettyMIDI(input_midi_file)
        
    length_in_samples = int(midi_data.get_end_time()/0.01)

    # extract onsets from midi file with midi_data.get_onsets()
    onsets = midi_data.get_onsets()
    
    # get pretty_midi Notes objects in a list
    drums_notes = midi_data.instruments[0].notes[:]
    
    # initiate drum_onsets and instruments array
    drums_onsets = []
    drums_instrument = []
    
    # fill with info from prettymidi notes objects
    for note in drums_notes:
        drums_onsets.append(note.start)    # onsets list
        drums_instrument.append(note.pitch)   # instrument  
    
    # transform onsets to seconds
    drums_onsets = np.round(drums_onsets,2) # in seconds
    drums_onsets = np.round(drums_onsets/0.01,0) # in samples

    
    # magenta pitch to model pitch (map to 8 voices)
    magenta_mapping = { # [key, [pitches]]
        "kick":[0,[36]],
        "snare":[1,[38,40,37]],
        "crash":[2,[49,55,57,52]],
        "ride":[3,[51,59,53]],
        "open_hh":[4,[46,26]],
        "closed_hh":[5,[42,22,44]],
        "low_mid_tom":[6,[45,47]],
        "high_tom":[7,[48,50]]
        }
    
    # initiate magenta_transcription matrix with zeros
    magenta_transcription = np.zeros([length_in_samples,8])

    # fill magenta_transcription with onsets info
    for onset,inst in zip(drums_onsets,drums_instrument):
        onset = int(onset)
    
        # map pretty_midi pitch to magenta pitch value
        for magenta_inst in magenta_mapping.keys():
        
            if inst in magenta_mapping[magenta_inst][1]:
                magenta_transcription[onset][magenta_mapping[magenta_inst][0]] = 1
    
    return magenta_transcription
    

# hasta aquí ya está en el notebook

## best threshold
TODO para cada modelo distinto¿?

In [6]:
threshold_eval_dir = 'threshold_eval/'

if not os.path.exists(threshold_eval_dir):
    os.mkdir(threshold_eval_dir)
    
threshold_candidates = np.arange(0.01, 0.3, 0.01)

In [7]:
run = False # change to true to run

# randomly select 20 files from beat performances
audio_files_list = np.array(magenta_ds[magenta_ds['beat_type']=='beat']['audio_filename'])
audio_files_list = np.random.choice(audio_files_list, 20)

num_performances = len(audio_files_list)
num_instruments = 8

for performance_idx in range(num_performances):
    
    if run==False:
        break
        
    # print percentage done
    percentage = (performance_idx+1) * 100  / num_performances
    if percentage % 10 == 0:
        print(str(percentage) + '%')
    
    # get audio and midi file path
    audio_file = os.path.join(groove_path, audio_files_list[performance_idx])
    midi_file = os.path.join(groove_path, audio_files_list[performance_idx].split('.')[-2] + '.mid')

    
    # csv_path
    csv_path = os.path.join(threshold_eval_dir,'-'.join(audio_file.split('.')[-2].split('/')[1:])+'.csv')
    
    # if csv file already exists, jump to next file
    if os.path.isfile(csv_path):
        continue
    
    # get magenta onsets 
    magenta_onsets = get_magenta_onsets(midi_file)
    # get model onsets
    model_onsets = get_model_onsets(audio_file)
    
    
    # compute scores for each threshold_candidate and instrument and store them in dataframe
    df = pd.DataFrame(columns=['threshold','f_measure','instrument_idx'])

    
    for thres in threshold_candidates:
        
        out_8v_thres = np.array(model_onsets)
        out_8v_thres[out_8v_thres < thres] = 0
        out_8v_thres[out_8v_thres > 0] = 1
        
        for instrument_idx in range(num_instruments):
            f_measure,_,_ = compute_scores(magenta_onsets, out_8v_thres,instrument_idx)
            
            df = df.append({'threshold':thres,
                        'f_measure':f_measure,
                        'instrument_idx':instrument_idx
                           }, ignore_index=True)     

    # remove rows with instruments that do not appear
    df = df.replace(0, np.nan)
    df = df.dropna()
    idx = df.groupby(['instrument_idx'])['f_measure'].transform(max) == df['f_measure']
    df = df[idx].set_index('instrument_idx')
    
    # store data to csv
    df.to_csv(csv_path, index = True)


In [77]:
# best value by instrument 
csv_files = os.listdir(threshold_eval_dir)

df_total = pd.DataFrame(columns=['instrument_idx', 'threshold','f_measure'])

for idx,f in enumerate(csv_files):
    df = pd.DataFrame()
    
    file_path = os.path.join(threshold_eval_dir,f)  
    df = df.append(pd.read_csv(file_path),ignore_index=True)

    # if threshold values gave same f_measure, take max
    if 'instrument_idx' in df.columns:
        idx = df.groupby(['instrument_idx'])['threshold'].transform(max) == df['threshold']
        df = df[idx]
    
    # remove nan column
    if 'Unnamed: 0' in df.columns:
        df = df.drop(columns=['Unnamed: 0'])
            
    df_total = df_total.append(df)

    
# best threshold?
#df_total = df_total.astype({'instrument_idx':int})
df_total = df_total.drop(columns=['f_measure'])
df_total = df_total.groupby(['instrument_idx']).mean()

display(df_total)

Unnamed: 0_level_0,threshold
instrument_idx,Unnamed: 1_level_1
1.0,0.267778
2.0,0.1325
3.0,0.08
4.0,0.246429
5.0,0.214375
6.0,0.199091
7.0,0.117273


In [85]:
threshold = {
    0: 0,
    1: 0,
    2: 0,
    3: 0,
    4: 0,
    5: 0,
    6: 0,
    7: 0
}

for idx,instrument in enumerate(list(df_total.index.values)):
    threshold[instrument] = np.round(df_total.iloc[idx]['threshold'],2)

#instruments with no threshold optimized get the lowest threshold
for instrument in threshold.keys():
    if threshold[instrument] == 0:
        threshold[instrument] = np.round(df_total.min()['threshold'],2)

print(threshold)


{0: 0.08, 1: 0.27, 2: 0.13, 3: 0.08, 4: 0.25, 5: 0.21, 6: 0.2, 7: 0.12}


## save evaluation for each file

In [86]:
eval_model_dir = 'eval_model'
if not os.path.exists(eval_model_dir):
    os.mkdir(eval_model_dir)

selected_models = [291, 295]
results_dir = 'results'

# list files in results folders
computed_files = os.listdir(os.path.join(results_dir, str(selected_models[0]) ))
num_performances = len(computed_files)
num_instruments = 8
print(num_performances)

139


In [114]:
import json
import pandas as pd

selected_models = [291, 295]
    
for f in computed_files:
    
    performance_name = '_'.join(f.split('_')[1:])

    for model in selected_models:

        # path for storing model results
        model_dir = os.path.join(eval_model_dir, str(model))

        if not os.path.exists(model_dir):
            os.mkdir(model_dir)

        file_name = str(model) + '_' + performance_name
        file_path = os.path.join(os.path.join(results_dir,str(model)),file_name)
        result_path = os.path.join(model_dir,file_name)
        
        if not os.path.isfile(file_path):
            print('NotExist', file_path)
            break
        
        with open(file_path) as json_file:
            result = json.load(json_file)

        magenta_onsets = np.array(result['magenta_onsets'])
        model_onsets = np.array(result['model_onsets'])    

        csv_eval_path =  os.path.join(model_dir, file_name.split('.')[-2]+'.csv')

        # if file already exists, continue
        #if os.path.isfile(csv_eval_path):
        #    continue

        # compute scores for each threshold_candidate and instrument and store them in dataframe
        df = pd.DataFrame(columns=['threshold','f_measure','precision','recall','instrument_idx'
                                   ,'num_of_occ'])

        for instrument_idx in range(num_instruments):  

            # 0 if < threshold, 1 if > threshold
            out_8v_thres = np.array(model_onsets)
            out_8v_thres[out_8v_thres < threshold[instrument_idx]] = 0
            out_8v_thres[out_8v_thres > 0] = 1
            
            # compute evaluation scores
            f_measure,precision,recall = compute_scores(magenta_onsets, out_8v_thres,instrument_idx)
            
            # number_of_ocurrences in ground truth
            num_of_occ= len(np.where(magenta_onsets[:,instrument_idx]==1)[0])
            
            #save in datatrame
            df = df.append({'threshold':threshold[instrument_idx],
                            'f_measure':f_measure,
                            'precision':precision,
                            'recall':recall,
                            'instrument_idx':instrument_idx,
                            'num_of_occ': num_of_occ
                            }, ignore_index=True)

        # remove rows with instruments that do not appear
        #df = df.replace(0, np.nan)
        #df = df.dropna()
        df= df.astype({'instrument_idx':int, 'num_of_occ':int})
    

        # store data to csv
        df.to_csv(csv_eval_path, index = True)

## general eval stats

In [115]:

for model in selected_models:
    
    df = pd.DataFrame()
    model_dir = os.path.join(eval_model_dir, str(model))
    results_list = os.listdir(model_dir)
    results_list.sort()
    print(model, 'files: ', len(results_list))
    
    for idx,result in enumerate(results_list):
        #print(result)
        #if idx>1:
        #    break
        result_path = os.path.join(model_dir, result)
        df = df.append(pd.read_csv(result_path))
    
        #display(df)
    
    df = df.drop(columns=['Unnamed: 0'])
    
    f_measure = df.groupby("instrument_idx").apply(lambda x: np.average(x['f_measure'], 
                                                                    weights=x['num_of_occ']))
    precision = df.groupby("instrument_idx").apply(lambda x: np.average(x['precision'], 
                                                                    weights=x['num_of_occ']))
    recall = df.groupby("instrument_idx").apply(lambda x: np.average(x['recall'], 
                                                                    weights=x['num_of_occ']))
    occurrence_sum = df.groupby("instrument_idx").apply(lambda x: np.sum(x['num_of_occ']))
    
    results = pd.DataFrame({'f_measure':f_measure, 'precision':precision, 
                            'recall':recall,'occurrence':occurrence_sum})
    display(results)

(291, 'files: ', 139)


Unnamed: 0_level_0,f_measure,occurrence,precision,recall
instrument_idx,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,0.804476,18769,0.810558,0.844872
1,0.837049,30437,0.933705,0.77565
2,0.073587,1100,0.498023,0.045682
3,0.699623,12372,0.824256,0.667403
4,0.185851,2768,0.681483,0.119744
5,0.826909,27035,0.744486,0.952098
6,0.006366,1198,0.070673,0.003339
7,0.036436,2930,0.58938,0.020822


(295, 'files: ', 139)


Unnamed: 0_level_0,f_measure,occurrence,precision,recall
instrument_idx,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,0.733247,18769,0.674915,0.877381
1,0.789087,30437,0.950326,0.692477
2,0.259014,1100,0.623581,0.182727
3,0.718481,12372,0.80141,0.700922
4,0.012744,2768,0.298952,0.006867
5,0.807298,27035,0.862163,0.782503
6,0.176685,1198,0.519783,0.127713
7,0.046277,2930,0.612876,0.026629


In [116]:
display(results)

Unnamed: 0_level_0,f_measure,occurrence,precision,recall
instrument_idx,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,0.733247,18769,0.674915,0.877381
1,0.789087,30437,0.950326,0.692477
2,0.259014,1100,0.623581,0.182727
3,0.718481,12372,0.80141,0.700922
4,0.012744,2768,0.298952,0.006867
5,0.807298,27035,0.862163,0.782503
6,0.176685,1198,0.519783,0.127713
7,0.046277,2930,0.612876,0.026629


In [57]:
#print(np.where(magenta_onsets[:,0]==1))
print(magenta_onsets[200:204,0])
print(len(np.where(magenta_onsets[200:204,0]==1)[0]))

[0. 0. 1. 0.]
1


In [92]:
print(np.where(magenta_onsets==1))

(array([  151,   166,   170, ..., 22535, 22548, 22559]), array([1, 1, 1, ..., 1, 7, 7]))
