Get multif0 metrics for Benetos and Duan's algorithms on the Bach10, Su, and MedleyDB test sets and save results to csv files.

At the end there's a sanity-check plot of a reference vs. estimate multif0.

In [None]:
import glob
import os
import mir_eval
import pandas

## evaluate

In [None]:
def get_mf0_scores(est_path, ref_path, test_set_name, algorithm_name):

    all_scores = []
    est_files = glob.glob(os.path.join(est_path, '*.txt'))

    for est_file in est_files:
        file_key = "_".join(os.path.basename(est_file).replace('-', '_').split('_')[:2])
        file_key = file_key.split('.')[0]
        print(file_key)
        file_glob = glob.glob(os.path.join(ref_path, '{}*.txt'.format(file_key)))
        if len(file_glob) > 1:
            print(file_glob)
        ref_file = file_glob[0]

        # load ground truth labels
        ref_times, ref_freqs = \
            mir_eval.io.load_ragged_time_series(ref_file)

        # load est labels
        est_times, est_freqs = \
            mir_eval.io.load_ragged_time_series(est_file)

        # get multif0 metrics and append
        scores = mir_eval.multipitch.evaluate(
            ref_times, ref_freqs, est_times, est_freqs)
        scores['track'] = file_key
        all_scores.append(scores)

    # save scores to data frame
    save_path = '../outputs'
    scores_path = os.path.join(
        save_path, '{}_{}_all_scores.csv'.format(test_set_name, algorithm_name)
    )
    score_summary_path = os.path.join(
        save_path, "{}_{}_score_summary.csv".format(test_set_name, algorithm_name)
    )
    df = pandas.DataFrame(all_scores)
    df.to_csv(scores_path)
    df.describe().to_csv(score_summary_path)
    print(df.describe())

In [None]:
get_mf0_scores(
    '../comparisons/multif0/algorithm_outputs/bach10/benetos/',
    '../comparisons/multif0/datasets/bach10/gt_F0',
    'bach10', 'benetos'
)

In [None]:
get_mf0_scores(
    '../comparisons/multif0/algorithm_outputs/bach10/duan/',
    '../comparisons/multif0/datasets/bach10/gt_F0',
    'bach10', 'duan'
)

In [None]:
get_mf0_scores(
    '../comparisons/multif0/algorithm_outputs/su/benetos/',
    '../comparisons/multif0/datasets/su/gt_F0/',
    'su', 'benetos'
)

In [None]:
get_mf0_scores(
    '../comparisons/multif0/algorithm_outputs/su/duan/',
    '../comparisons/multif0/datasets/su/gt_F0/',
    'su', 'duan'
)

In [None]:
get_mf0_scores(
    '../comparisons/multif0/algorithm_outputs/mdb_test/benetos/',
    '../comparisons/multif0/datasets/mdb_test/gt_F0/',
    'mdb_test', 'benetos'
)

In [None]:
import numpy as np
get_mf0_scores(
    '../comparisons/multif0/algorithm_outputs/mdb_test/duan/',
    '../comparisons/multif0/datasets/mdb_test/gt_F0/',
    'mdb_test', 'duan'
)

In [None]:
fpath1 = '../comparisons/multif0/algorithm_outputs/mdb_test/benetos/Phoenix_SeanCaughlinsTheScartaglen_multif0_MIX16.wav.txt'
fpath2 = '../comparisons/multif0/datasets/mdb_test/gt_F0/Phoenix_SeanCaughlinsTheScartaglen.txt'

est_times, est_freqs = mir_eval.io.load_ragged_time_series(fpath1)
ref_times, ref_freqs = mir_eval.io.load_ragged_time_series(fpath2)

In [None]:
def get_tf(times, freqs):
    t_array = []
    f_array = []
    for t, freqs in zip(times, freqs):
        for f in freqs:
            t_array.append(t)
            f_array.append(f)
    return t_array, f_array

In [None]:
est_t, est_f = get_tf(est_times, est_freqs)
ref_t, ref_f = get_tf(ref_times, ref_freqs)

import matplotlib.pyplot as plt
%matplotlib inline
plt.figure(figsize=(15, 7))
plt.plot(est_t, est_f, '.r')
plt.plot(ref_t, ref_f, '.b')
plt.show()