# Converter-Test: test of the Converters Module

In [41]:
import os
import sys
import glob

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from pathlib import Path
from statistics import mean

RESULTS_PATH = Path('../choco/converters/validation/results')

sys.path.append(os.path.join(os.path.dirname(os.getcwd()), 'choco'))

In [42]:
def open_results(results_path: str) -> None:
    pass

### Process results folder

In [43]:
converters_group = {
    'wikifonia': 'leadsheet',
    'ireal-pro': 'leadsheet',
    'weimar': 'leadsheet',
    'nottingham': 'leadsheet',
    'when-in-rome': 'roman',
    'rock-corpus': 'roman',
    'jazz-corpus': 'leadsheet',
    'band-in-a-box': 'prettify-harte',
    'mozart-piano-sonatas': 'roman',
}

biased_paths = [p for p in RESULTS_PATH.rglob('s2-0[1-2]/**/*.xlsx')]
unbiased_paths = [p for p in RESULTS_PATH.rglob('s2-0[3-4]/**/*.xlsx')]

print(f'Found {len(biased_paths)} biased evaluation files:\n {[x.stem for x in biased_paths]}\n')
print(f'Found {len(unbiased_paths)} unbiased evaluation files:\n {[x.stem for x in unbiased_paths]}\n')

Found 20 biased evaluation files:
 ['ireal-pro_116', 'ireal-pro_503', 'rock-corpus_104', 'rock-corpus_30', 'when-in-rome_250', 'when-in-rome_244', 'weimar_222', 'weimar_328', 'wikifonia_4429', 'wikifonia_2286', 'ireal-pro_914', 'ireal-pro_542', 'rock-corpus_4', 'rock-corpus_16', 'when-in-rome_109', 'when-in-rome_406', 'weimar_263', 'weimar_8', 'wikifonia_1047', 'wikifonia_1194']

Found 10 unbiased evaluation files:
 ['ireal-pro_558', 'ireal-pro_1103', 'rock-corpus_34', 'rock-corpus_183', 'when-in-rome_205', 'when-in-rome_61', 'weimar_433', 'weimar_279', 'wikifonia_4327', 'wikifonia_3442']



In [44]:
all_validated = []

biased_validated = []

for biased_file in biased_paths:
    biased_data = pd.read_excel(biased_file, engine='openpyxl')
    validated_data = biased_data[biased_data['correct?'].notnull()]
    validated_data = validated_data.assign(partition=biased_file.stem.split('_')[0])
    validated_data = validated_data.assign(converter=converters_group[biased_file.stem.split('_')[0]])

    biased_validated.append(validated_data)

all_biased_validated = pd.concat(biased_validated)
all_biased_validated.drop(['type', 'time', 'duration'], axis=1, inplace=True)
all_validated.append(all_biased_validated)
all_biased_validated

Unnamed: 0,original,converted,correct?,notes,partition,converter,Unnamed: 7
24,E^7,E:maj7,1.0,,ireal-pro,leadsheet,
25,C7b13,C:9(1b13),0.0,C:9(b13),ireal-pro,leadsheet,
26,F-7,F:min7,1.0,,ireal-pro,leadsheet,
27,Bb-7,Bb:min7,1.0,,ireal-pro,leadsheet,
28,Eb7,Eb:7,1.0,,ireal-pro,leadsheet,
...,...,...,...,...,...,...,...
6,B-7/E-,Bb:7/4,1.0,,wikifonia,leadsheet,
7,E-,Eb:maj,1.0,,wikifonia,leadsheet,
8,E-7,Eb:7,1.0,,wikifonia,leadsheet,
9,A-/E-,Ab:maj/5,1.0,,wikifonia,leadsheet,


In [45]:
unbiased_validated = []

for unbiased_file in unbiased_paths:
    unbiased_data = pd.read_excel(unbiased_file, engine='openpyxl')
    unbiased_data = unbiased_data.assign(partition=unbiased_file.stem.split('_')[0])
    unbiased_data = unbiased_data.assign(converter=converters_group[unbiased_file.stem.split('_')[0]])

    unbiased_validated.append(unbiased_data)

all_unbiased_validated = pd.concat(unbiased_validated)
all_validated.append(all_unbiased_validated)
all_unbiased_validated

Unnamed: 0,original,converted,correct?,notes,partition,converter
0,A7,A:7,True,,ireal-pro,leadsheet
1,Eb-7,Eb:min7,True,,ireal-pro,leadsheet
2,Db7,Db:7,True,,ireal-pro,leadsheet
3,G13,G:9(113),False,"G:7(9,11,13)",ireal-pro,leadsheet
4,Db^7,Db:maj7,True,,ireal-pro,leadsheet
...,...,...,...,...,...,...
5,Cm9,C:min9,True,,wikifonia,leadsheet
6,Cm7,C:min7,True,,wikifonia,leadsheet
7,F#dim/G,F#:dim/b9,True,,wikifonia,leadsheet
8,B-7,Bb:7,True,,wikifonia,leadsheet


In [46]:
data = pd.concat(all_validated)
data

Unnamed: 0,original,converted,correct?,notes,partition,converter,Unnamed: 7
24,E^7,E:maj7,1.0,,ireal-pro,leadsheet,
25,C7b13,C:9(1b13),0.0,C:9(b13),ireal-pro,leadsheet,
26,F-7,F:min7,1.0,,ireal-pro,leadsheet,
27,Bb-7,Bb:min7,1.0,,ireal-pro,leadsheet,
28,Eb7,Eb:7,1.0,,ireal-pro,leadsheet,
...,...,...,...,...,...,...,...
5,Cm9,C:min9,1.0,,wikifonia,leadsheet,
6,Cm7,C:min7,1.0,,wikifonia,leadsheet,
7,F#dim/G,F#:dim/b9,1.0,,wikifonia,leadsheet,
8,B-7,Bb:7,1.0,,wikifonia,leadsheet,


## Compute data

In [65]:
for x in all_biased_validated.groupby(['converter']):
    if ',' in x[1]['original']:
        print(x[1]['original'])
    print('biased ', x[0], ' has accuracy: ', x[1]['correct?'].mean().round(2))

for x in all_unbiased_validated.groupby(['converter']):
    if ',' in x[1]['original']:
        print(x[1]['original'])
    print('unbiased ', x[0], ' has accuracy: ', x[1]['correct?'].mean().round(2))

data_accuracy = {}

for x in data.groupby(['converter']):
    data_accuracy[x[0]] = x[1]['correct?'].mean().round(3)
    print('all data ', x[0], ' has accuracy: ', x[1]['correct?'].mean().round(3))

data_accuracy['average'] = (data['correct?'].mean().round(3))
data_accuracy


biased  leadsheet  has accuracy:  0.99
biased  roman  has accuracy:  0.99
unbiased  leadsheet  has accuracy:  0.95
unbiased  roman  has accuracy:  1.0
all data  leadsheet  has accuracy:  0.977
all data  roman  has accuracy:  0.992


{'leadsheet': 0.977, 'roman': 0.992, 'average': 0.983}