In [64]:
import os,sys
import numpy as np

sys.path.append('models')
from large_vocab_adt_dafx2018.transcribe import transcribe, PERC_VOICE_SET
import large_vocab_adt_dafx2018.model as model

  from ._conv import register_converters as _register_converters
  from . import h5a, h5d, h5ds, h5f, h5fd, h5g, h5r, h5s, h5t, h5p, h5z
  from .. import h5g, h5i, h5o, h5r, h5t, h5l, h5p
Using TensorFlow backend.


In [65]:
# files in dataset
import pandas as pd

groove_path = 'groove/'

# load info from dataset
magenta_ds = pd.read_csv('groove/info.csv')

# remove rows without wav file
magenta_ds = magenta_ds.dropna(axis=0, how='any', thresh=None, subset=None, inplace=False) 

magenta_ds.head()

Unnamed: 0,drummer,session,id,style,bpm,beat_type,time_signature,midi_filename,audio_filename,duration,split
0,drummer1,drummer1/eval_session,drummer1/eval_session/1,funk/groove1,138,beat,4-4,drummer1/eval_session/1_funk-groove1_138_beat_...,drummer1/eval_session/1_funk-groove1_138_beat_...,27.872308,test
1,drummer1,drummer1/eval_session,drummer1/eval_session/10,soul/groove10,102,beat,4-4,drummer1/eval_session/10_soul-groove10_102_bea...,drummer1/eval_session/10_soul-groove10_102_bea...,37.691158,test
2,drummer1,drummer1/eval_session,drummer1/eval_session/2,funk/groove2,105,beat,4-4,drummer1/eval_session/2_funk-groove2_105_beat_...,drummer1/eval_session/2_funk-groove2_105_beat_...,36.351218,test
3,drummer1,drummer1/eval_session,drummer1/eval_session/3,soul/groove3,86,beat,4-4,drummer1/eval_session/3_soul-groove3_86_beat_4...,drummer1/eval_session/3_soul-groove3_86_beat_4...,44.716543,test
4,drummer1,drummer1/eval_session,drummer1/eval_session/4,soul/groove4,80,beat,4-4,drummer1/eval_session/4_soul-groove4_80_beat_4...,drummer1/eval_session/4_soul-groove4_80_beat_4...,47.9875,test


In [66]:
import mir_eval

def compute_scores(magenta_transcription, out_8v_thres,instrument_index):

    onsets_eval_magenta = np.arange(magenta_transcription.shape[0])*magenta_transcription[:,instrument_index]*0.01 
    onsets_eval_magenta = np.sort(onsets_eval_magenta[onsets_eval_magenta!=0])

    onsets_eval_model = np.arange(out_8v_thres.shape[0])*out_8v_thres[:,instrument_index]*0.01
    onsets_eval_model = np.sort(onsets_eval_model[onsets_eval_model!=0])


    #print(onsets_eval_magenta[0:5])
    #print(np.round(onsets_eval_model[0:5],2))
    #print(len(onsets_eval_model),len(onsets_eval_magenta))

    # mir_eval: 
    # Onsets should be provided in the form of a 1-dimensional array of onset 
    # times in seconds in increasing order.
    # default window is 0.05 seconds !! CHOICE OF WINDOW IS CRUCIAL
    # same value as in paper
    mir_eval.onset.validate(onsets_eval_magenta,onsets_eval_model)
    scores = mir_eval.onset.f_measure(onsets_eval_magenta,onsets_eval_model, window=0.5)
    
    return scores

In [67]:
def get_model_onsets(input_audio_file, model_num=303):
    
    models_path = 'models/large_vocab_adt_dafx2018/trained_models/'

    # load model
    model_definition_path = os.path.join(models_path,str(model_num)+'_s0_def.json')
    model_weights_path = os.path.join(models_path,str(model_num)+'_s0_weights.h5')
    model_configuration_id = model_num
    sample_audio_files = dict([(v, os.path.join('models/audio/', '{}.wav'.format(v))) for v in PERC_VOICE_SET])


    # compute model's onsets >> outputs 14 voices
    output = transcribe(model_definition_path,
                    model_weights_path,
                    input_audio_file,
                    model_configuration_id,
                    sample_audio_files,
                    peak_params=None,
                    output_sample_rate=44100)

    # save into out_14v
    out_14v = output['14v']['onset_activations']
    
    # reduce 14 voices to the 8 present in the magenta dataset
    length_in_samples = out_14v.shape[0]
    out_8v = np.zeros([length_in_samples,8])

    reduced_mapping = {
    0: [0],     # kick
    1: [1,2],   # snare + snare rim 
    2: [3],     # crash
    3: [4,12],  # ride + bell
    4: [5],     # open hh
    5: [6],     # closed hh
    6: [7,8],   # low+mid tom
    7: [9]      # high tom
    }


    eps = 0 # threshold ?

    for t in range(length_in_samples):
        t_roll = out_14v[t,:]
        t_roll[t_roll<eps] = 0
        for i in reduced_mapping.keys():
            out_8v[t,i] = np.max(t_roll[reduced_mapping[i]])
    
    return out_8v

In [68]:
import pretty_midi

def get_magenta_onsets(input_midi_file):
    # read with prettyMIDI
    midi_data = pretty_midi.PrettyMIDI(input_midi_file)
        
    length_in_samples = int(midi_data.get_end_time()/0.01)

    # extract onsets from midi file with midi_data.get_onsets()
    onsets = midi_data.get_onsets()
    
    # get pretty_midi Notes objects in a list
    drums_notes = midi_data.instruments[0].notes[:]
    
    # initiate drum_onsets and instruments array
    drums_onsets = []
    drums_instrument = []
    
    # fill with info from prettymidi notes objects
    for note in drums_notes:
        drums_onsets.append(note.start)    # onsets list
        drums_instrument.append(note.pitch)   # instrument  
    
    # transform onsets to seconds
    drums_onsets = np.round(drums_onsets,2) # in seconds
    drums_onsets = np.round(drums_onsets/0.01,0) # in samples

    
    # magenta pitch to model pitch (map to 8 voices)
    magenta_mapping = { # [key, [pitches]]
        "kick":[0,[36]],
        "snare":[1,[38,40,37]],
        "crash":[2,[49,55,57,52]],
        "ride":[3,[51,59,53]],
        "open_hh":[4,[46,26]],
        "closed_hh":[5,[42,22,44]],
        "low_mid_tom":[6,[45,47]],
        "high_tom":[7,[48,50]]
        }
    
    # initiate magenta_transcription matrix with zeros
    magenta_transcription = np.zeros([length_in_samples,8])

    # fill magenta_transcription with onsets info
    for onset,inst in zip(drums_onsets,drums_instrument):
        onset = int(onset)
    
        # map pretty_midi pitch to magenta pitch value
        for magenta_inst in magenta_mapping.keys():
        
            if inst in magenta_mapping[magenta_inst][1]:
                magenta_transcription[onset][magenta_mapping[magenta_inst][0]] = 1
    
    return magenta_transcription
    

## best threshold

In [69]:
threshold_eval_dir = 'threshold_eval/'

if not os.path.exists(threshold_eval_dir):
    os.mkdir(threshold_eval_dir)
    
threshold_candidates = np.arange(0.01, 0.3, 0.01)

In [70]:
run = True # change to true to run

# randomly select 20 files from beat performances
audio_files_list = np.array(magenta_ds[magenta_ds['beat_type']=='beat']['audio_filename'])
audio_files_list = np.random.choice(audio_files_list, 20)

num_performances = len(audio_files_list)
num_instruments = 8

for performance_idx in range(num_performances):
    
    if run==False:
        break
        
    # print percentage done
    percentage = (performance_idx+1) * 100  / num_performances
    if percentage % 10 == 0:
        print(str(percentage) + '%')
    
    # get audio and midi file path
    audio_file = os.path.join(groove_path, audio_files_list[performance_idx])
    midi_file = os.path.join(groove_path, audio_files_list[performance_idx].split('.')[-2] + '.mid')

    
    # csv_path
    csv_path = os.path.join(threshold_eval_dir,'-'.join(audio_file.split('.')[-2].split('/')[1:])+'.csv')
    
    # if csv file already exists, jump to next file
    if os.path.isfile(csv_path):
        continue
    
    # get magenta onsets 
    magenta_onsets = get_magenta_onsets(midi_file)
    # get model onsets
    model_onsets = get_model_onsets(audio_file)
    
    
    # compute scores for each threshold_candidate and instrument and store them in dataframe
    df = pd.DataFrame(columns=['threshold','f_measure','instrument_idx'])

    
    for thres in threshold_candidates:
        
        out_8v_thres = np.array(model_onsets)
        out_8v_thres[out_8v_thres < thres] = 0
        out_8v_thres[out_8v_thres > 0] = 1
        
        for instrument_idx in range(num_instruments):
            f_measure,_,_ = compute_scores(magenta_onsets, out_8v_thres,instrument_idx)
            
            df = df.append({'threshold':thres,
                        'f_measure':f_measure,
                        'instrument_idx':instrument_idx
                           }, ignore_index=True)     

    # remove rows with instruments that do not appear
    df = df.replace(0, np.nan)
    df = df.dropna()
    idx = df.groupby(['instrument_idx'])['f_measure'].transform(max) == df['f_measure']
    df = df[idx].set_index('instrument_idx')
    
    # store data to csv
    df.to_csv(csv_path, index = True)


  out = out_full[ind]


10%
20%
30%
40%
50%
60%
70%
80%
90%
100%


In [71]:
# best value by instrument 
csv_files = os.listdir(threshold_eval_dir)

df_total = pd.DataFrame()

for idx,f in enumerate(csv_files):
    df = pd.DataFrame()
    
    file_path = os.path.join(threshold_eval_dir,f)    
    df = df.append(pd.read_csv(file_path),ignore_index=True)

    # if threshold values gave same f_measure, take min
    if 'instrument_idx' in df.columns:
        idx = df.groupby(['instrument_idx'])['threshold'].transform(min) == df['threshold']
        df = df[idx]
    
    # remove nan column
    if 'Unnamed: 0' in df.columns:
        df = df.drop(columns=['Unnamed: 0'])
        
    df_total = df_total.append(df)
    
df = df.astype({'instrument_idx':int})
df = df.drop(columns=['f_measure'])
df = df.groupby(['instrument_idx']).mean()

display(df)

Unnamed: 0_level_0,threshold
instrument_idx,Unnamed: 1_level_1
1,0.29
2,0.13
3,0.26
4,0.29
5,0.08
6,0.27
7,0.22


In [73]:
threshold = {
    0: 0,
    1: 0,
    2: 0,
    3: 0,
    4: 0,
    5: 0,
    6: 0,
    7: 0
}

for idx,instrument in enumerate(list(df.index.values)):
    threshold[instrument] = np.round(df.iloc[idx]['threshold'],2)

#instruments with no threshold optimized get the lowest threshold
for instrument in threshold.keys():
    if threshold[instrument] == 0:
        threshold[instrument] = np.round(df.min()['threshold'],2)

print(threshold)


{0: 0.08, 1: 0.29, 2: 0.13, 3: 0.26, 4: 0.29, 5: 0.08, 6: 0.27, 7: 0.22}


## model eval

In [18]:
eval_model_dir = 'eval_model'
if not os.path.exists(eval_model_dir):
    os.mkdir(eval_model_dir)

selected_models = [291, 295]
results_dir = 'results'

# list files in results folders
computed_files = os.listdir(os.path.join(results_dir, str(selected_models[0]) ))
print(computed_files)

num_instruments = 8
print(num_performances)

results/291
['291_drummer5-eval_session-5_funk-groove5_84_beat_4-4.json', '291_drummer7-session1-18_hiphop_100_beat_4-4.json', '291_drummer7-session1-14_jazz_100_beat_4-4.json', '291_drummer3-session2-12_rock_100_beat_4-4.json', '291_drummer8-session1-13_latin_118_beat_4-4.json', '291_drummer7-session3-22_pop-soft_83_beat_4-4.json', '291_drummer8-session1-14_hiphop_94_beat_4-4.json', '291_drummer8-session1-6_funk_80_beat_4-4.json', '291_drummer5-session1-10_latin-brazilian-sambareggae_96_beat_4-4.json', '291_drummer7-session2-97_pop_142_beat_4-4.json', '291_drummer5-session1-8_latin-venezuelan-merengue_162_beat_5-8.json', '291_drummer7-session3-67_neworleans-funk_93_beat_4-4.json', '291_drummer1-session3-6_dance-disco_120_beat_4-4.json']
444


In [28]:
f = os.path.join(os.path.join(results_dir, str(selected_models[0]) ),
                 '291_drummer5-eval_session-5_funk-groove5_84_beat_4-4.json')

performance_name = '_'.join(f.split('_')[1:])
print(performance_name)


drummer5-eval_session-5_funk-groove5_84_beat_4-4.json


In [47]:
import json
import pandas as pd
selected_models = [291, 295]
thres = {0: 0.04, 1: 0.08, 2: 0.04, 3: 0.04, 4: 0.04, 5: 0.05, 6: 0.04, 7: 0.04}
    
for model in selected_models:
    
    # path for storing model results
    model_dir = os.path.join(eval_model_dir, str(model))
    
    if not os.path.exists(model_dir):
        os.mkdir(model_dir)
        
    file_name = str(model) + '_' + performance_name
    file_path = f
    result_path = os.path.join(model_dir,file_name)
    
    with open(file_path) as json_file:
        result = json.load(json_file)
    
    magenta_onsets = result['magenta_onsets']
    model_onsets = result['model_onsets']
    
    
    csv_eval_path =  os.path.join(model_dir,
                                      file_name.split('.')[-2]+'.csv')
    
    # compute scores for each threshold_candidate and instrument and store them in dataframe
    df = pd.DataFrame(columns=['threshold','f_measure','precision','recall','instrument_idx'])
    
    for instrument_idx in range(num_instruments):  
            
            # 0 if < threshold, 1 if > threshold
            out_8v_thres = np.array(model_onsets)
            out_8v_thres[out_8v_thres < thres[instrument_idx]] = 0
            out_8v_thres[out_8v_thres > 0] = 1
            
            # compute evaluation scores
            f_measure,precision,recall = compute_scores(magenta_onsets, out_8v_thres,instrument_idx)
            
            #save in datatrame
            df = df.append({'threshold':thres,
                            'f_measure':f_measure,
                            'precision':precision,
                            'recall':recall,
                            'instrument_idx':instrument_idx
                           }, ignore_index=True)   

    # remove rows with instruments that do not appear
    df = df.replace(0, np.nan)
    df = df.dropna()
    idx = df.groupby(['instrument_idx'])['f_measure'].transform(max) == df['f_measure']
    df = df[idx].set_index('instrument_idx')
    
    # store data to csv
    df.to_csv(csv_path, index = True)
    

AttributeError: 'list' object has no attribute 'shape'

In [45]:
selected_models = [291, 295]
    
for model in selected_models:
    
    # path for storing model results
    model_dir = os.path.join(eval_model_dir, str(model))
    
    if not os.path.exists(model_dir):
        os.mkdir(model_dir)
        
    file_name = str(model) + '_' + f
    
    print(file_name)
    
    
    for performance_idx in range(num_performances):
    
        """
        # dev
        #if performance_idx>0:
        #    break
        
        # print percentage done
        percentage = (performance_idx+1) * 100  / num_performances
        if percentage % 10 == 0:
            print(str(percentage) + '%')
    
        # get audio and midi file path
        audio_file = os.path.join(groove_path, audio_files_list[performance_idx])
        midi_file = os.path.join(groove_path, audio_files_list[performance_idx].split('.')[-2] + '.mid')

    
        # model_results_json_path
        results_json_path = os.path.join(model_dir,'-'.join(audio_file.split('.')[-2].split('/')[1:])+'.json')
    
        # if csv file already exists do not compute again
        if not os.path.isfile(results_json_path):
            # get magenta onsets 
            magenta_onsets = get_magenta_onsets(midi_file)
            # get model onsets
            model_onsets = get_model_onsets(audio_file,model=model)
            results = {'magenta_onsets': magenta_onsets,
                       'model_onsets': model_onsets}
        
            with open(results_json_path, "w") as outfile:  
                json.dump(results, outfile,default=default) 
        """
        
        csv_eval_path =  os.path.join(eval_model_dir,
                                      'eval-'+'-'.join(audio_file.split('.')[-2].split('/')[1:])+'.json')
            
        # compute scores for each threshold_candidate and instrument and store them in dataframe
        df = pd.DataFrame(columns=['threshold','f_measure','precision','recall','instrument_idx'])
        
        for instrument_idx in range(num_instruments):       
            out_8v_thres = np.array(model_onsets)
            out_8v_thres[out_8v_thres < thres[instrument_idx]] = 0
            out_8v_thres[out_8v_thres > 0] = 1
            f_measure,precision,recall = compute_scores(magenta_transcription, out_8v_thres,instrument_idx)
            
            df = df.append({'threshold':thres,
                            'f_measure':f_measure,
                            'precision':precision,
                            'recall':recall,
                            'instrument_idx':instrument_idx
                           }, ignore_index=True)   

        # remove rows with instruments that do not appear
        df = df.replace(0, np.nan)
        df = df.dropna()
        idx = df.groupby(['instrument_idx'])['f_measure'].transform(max) == df['f_measure']
        df = df[idx].set_index('instrument_idx')
    
        # store data to csv
        df.to_csv(csv_path, index = True)

291_results/291/291_drummer5-eval_session-5_funk-groove5_84_beat_4-4.json


NameError: name 'audio_file' is not defined