In [6]:
import pandas as pd

In [7]:
def symbol_basic_eval(df, tokenizer_name='ChordSymbolTokenizer'):
    total_pieces = len(df['labels'])

    correct_bar_predictions = 0
    total_bars = 0

    correct_new_chord_predictions = 0
    total_new_chord_predictions = 0

    correct_position_predictions = 0
    total_position_predictions = 0

    correct_chord_predictions = 0
    correct_root_predictions = 0
    total_chord_predictions = 0

    for p_i in range(total_pieces):
        l = df['labels'].iloc[p_i]
        p = df['predictions'].iloc[p_i]
        # for each token that should have been predicted
        l_split = l.split(' ')
        p_split = p.split(' ')
        for i in range( len (l_split) ):
            # how many bars were correctly predicted
            if l_split[i] == '<bar>':
                total_bars += 1
                if p_split[i] == '<bar>':
                    correct_bar_predictions += 1
            # how many new chords were correctly predicted
            if 'position_' in l_split[i]:
                total_new_chord_predictions += 1
                if 'position_' in p_split[i]:
                    correct_new_chord_predictions += 1
            # how many correct positions were predicted
            if 'position_' in l_split[i]:
                total_position_predictions += 1
                if p_split[i] == l_split[i]:
                    correct_position_predictions += 1
            # how many exact chords and roots were predicted
            if tokenizer_name == 'ChordSymbolTokenizer':
                if ':' in l_split[i]:
                    total_chord_predictions += 1
                    if p_split[i] == l_split[i]:
                        correct_chord_predictions += 1
                    l_chord_split = l_split[i].split(':')
                    p_chord_split = p_split[i].split(':')
                    if l_chord_split[0] == p_chord_split[0]:
                        correct_root_predictions += 1
            elif tokenizer_name == 'GCTSymbolTokenizer':
                if '[' in l_split[i]:
                    total_chord_predictions += 1
                    if p_split[i] == l_split[i]:
                        correct_chord_predictions += 1
                    l_chord_split = l_split[i][1:].split('x')
                    p_chord_split = p_split[i][1:].split('x')
                    if l_chord_split[0] == p_chord_split[0]:
                        correct_root_predictions += 1
    results = {
        'correct_bar_predictions': correct_bar_predictions/total_bars,
        'correct_new_chord_predictions': correct_new_chord_predictions/total_new_chord_predictions,
        'correct_position_predictions': correct_position_predictions/total_position_predictions,
        'correct_chord_predictions': correct_chord_predictions/total_chord_predictions,
        'correct_root_predictions': correct_root_predictions/total_chord_predictions
    }
    return results
# end symbol_basic_eval

In [8]:
tokenizers = {
    'ChordSymbolTokenizer': symbol_basic_eval,
    'RootTypeTokenizer': None,
    'PitchClassTokenizer': None,
    'RootPCTokenizer': None,
    'GCTRootPCTokenizer': None,
    'GCTSymbolTokenizer': symbol_basic_eval,
    'GCTRootTypeTokenizer': None
}

tokenized_folder = 'tokenized/gen/'

results = {}

In [None]:
for tokenizer_name in tokenizers.keys():
    if tokenizers[tokenizer_name] is not None:
        df = pd.read_csv( tokenized_folder + tokenizer_name + '.csv' )
        results[tokenizer_name] = tokenizers[tokenizer_name](df, tokenizer_name=tokenizer_name)

In [12]:
print(results['ChordSymbolTokenizer'])
print(results['GCTSymbolTokenizer'])

{'correct_bar_predictions': 0.9731942575145806, 'correct_new_chord_predictions': 0.7508786103817411, 'correct_position_predictions': 0.7477681276509796, 'correct_chord_predictions': 0.11949101191678449, 'correct_root_predictions': 0.19495051504746516}
{'correct_bar_predictions': 0.9344997756841633, 'correct_new_chord_predictions': 0.7916380529186023, 'correct_position_predictions': 0.7857402544940416, 'correct_chord_predictions': 0.16068453051114615, 'correct_root_predictions': 0.335014636343166}


In [10]:
print(results)

{'ChordSymbolTokenizer': {'correct_bar_predictions': 0.9731942575145806, 'correct_new_chord_predictions': 0.7508786103817411, 'correct_position_predictions': 0.7477681276509796, 'correct_chord_predictions': 0.11949101191678449, 'correct_root_predictions': 0.19495051504746516}, 'GCTSymbolTokenizer': {'correct_bar_predictions': 0.9344997756841633, 'correct_new_chord_predictions': 0.7916380529186023, 'correct_position_predictions': 0.7857402544940416, 'correct_chord_predictions': 0.16068453051114615, 'correct_root_predictions': 0.335014636343166}}
