This notebook is for experiments with how far we could push the multif0 results on the bach10 dataset by injecting extra information. We constrained the maximum polyphony to 4 and did a sweep across thresholds

In [None]:
cd ../deepsalience/

In [None]:
import compute_training_data as C
import evaluate
import numpy as np
import scipy
import os
import glob
import matplotlib.pyplot as plt
import pandas as pd
import mir_eval
%matplotlib inline

In [None]:
def pitch_activations_to_mf0_polyphony_constraint(pitch_activation_mat, thresh):
    """Convert a pitch activation map to multif0 format by thresholding values
    at thresh
    """
    freqs = C.get_freq_grid()
    times = C.get_time_grid(pitch_activation_mat.shape[1])

    peak_thresh_mat = np.zeros(pitch_activation_mat.shape)
    peaks = scipy.signal.argrelmax(pitch_activation_mat, axis=0)
    peak_thresh_mat[peaks] = pitch_activation_mat[peaks]

    idx = np.where(peak_thresh_mat >= thresh)

    est_freq_idx = [[] for _ in range(len(times))]
    
    for f, t in zip(idx[0], idx[1]):
        est_freq_idx[t].append(f)
    
    est_freqs = []
    for t, f_list in zip(range(len(times)), est_freq_idx):
        if len(f_list) > 0:
            amplitudes = [peak_thresh_mat[f, t] for f in f_list]
            ordered_idx = np.flip(np.argsort(amplitudes), 0)
            est_freqs.append(
                np.array([freqs[f_list[i]] for i in ordered_idx[:4]])
            )
        else:
            est_freqs.append(np.array([]))
        

    return times, est_freqs

In [None]:
fpaths = glob.glob("../comparisons/multif0/datasets/bach10/gt_F0/*_GTF0s.txt")
trackids = ['_'.join(os.path.basename(f).split('_')[:2]) for f in fpaths]

thresh_vals = np.arange(0, 1, 0.05)

all_scores = {t: [] for t in thresh_vals}
for t in thresh_vals:
    for trackid in trackids:
        cnn_npy = "../comparisons/multif0/experiment11b_output/{}_prediction.npy".format(trackid)
        gt_output = '../comparisons/multif0/datasets/bach10/gt_F0/{}_GTF0s.txt'.format(trackid)
        Y = np.load(cnn_npy)
        c_times, c_freqs = pitch_activations_to_mf0_polyphony_constraint(Y, t)
        g_times, g_freqs = mir_eval.io.load_ragged_time_series(gt_output)
        scores = mir_eval.multipitch.evaluate(g_times, g_freqs, c_times, c_freqs)
        all_scores[t].append(scores)


In [None]:
import seaborn as sns


acc = [np.mean([s['Accuracy'] for s in all_scores[t]]) for t in thresh_vals]
recall = [np.mean([s['Recall'] for s in all_scores[t]]) for t in thresh_vals]
precision = [np.mean([s['Precision'] for s in all_scores[t]]) for t in thresh_vals]
fmeasure = [2.0*p*r/(p+r) for (r, p) in zip(recall, precision)]

plt.figure(figsize=(5, 2))
sns.set(font_scale=1.2)
sns.set_style('whitegrid')
# plt.subplot(121)
plt.plot([0.3, 0.3], [0, 0.65], '--', color='grey')
plt.plot(thresh_vals, acc, 'o-', color='#CF6766')
plt.ylabel("Accuracy")
plt.xlabel("Threshold")

plt.xlim([0, 1])
plt.ylim([0, 0.65])

plt.savefig('../paper-figs/bach10_threshsweep.pdf', format='pdf', bbox_inches='tight')

# plt.subplot(122)
# plt.plot([0.3, 0.3], [0, 1], '--', color='grey')
# plt.plot(thresh_vals, fmeasure, '.')
# plt.ylabel("F1")
# plt.xlabel("Threshold")

# plt.subplot(223)
# plt.plot([0.3, 0.3], [0, 1], '--', color='grey')
# plt.plot(thresh_vals, recall, '.')
# plt.ylabel("Recall")
# plt.xlabel("Threshold")


# plt.subplot(224)
# plt.plot([0.3, 0.3], [0, 1], '--', color='grey')
# plt.plot(thresh_vals, precision, '.')
# plt.ylabel("Precision")
# plt.xlabel("Threshold")


In [None]:
score_df = pd.DataFrame(all_scores[0.1])
score_df.describe()