This notebook is intended to test out the `mir_eval` functionality on heuristically-converted MIDI vs. ground truth MIDI.

In [1]:
import pretty_midi
import os
import pickle
import numpy as np
import mir_eval
import matplotlib.pyplot as plt
from scipy.io.wavfile import write as wavwrite

The code below to the "debug" section is intended to be run consistently.

In [3]:
hpath = '/juice/scr/rjcaste/curis/wavegenie/data/CustomViolin_params2midi_dev/heuristic_midis/elvis.p'
hpath = '../data/CustomViolin_16k/midi/val/elvis.mid'
groundtruth_path = '/juice/scr/rjcaste/curis/wavegenie/data/CustomViolin_params2midi_dev/midi/elvis.mid'

In [6]:
gt_midi = pretty_midi.PrettyMIDI(groundtruth_path)
#hmidi = pickle.load(open(hpath, 'rb'))
hmidi = pretty_midi.PrettyMIDI(hpath)

In [10]:
def midi2_ip(notes):
    """
    Converts notes list (from pretty_midi) to intervals and pitches.
    """
    
    onsets = np.array([n.start for n in notes])
    offsets = np.array([n.end for n in notes])
    pitches = np.array([n.pitch for n in notes])
    intervals = np.vstack((onsets, offsets)).T
    
    return intervals, pitches

Get estimated intervals and pitches...

In [5]:
#pitches, onsets, offsets = hmidi['pitches'], hmidi['onsets_sec'], hmidi['offsets_sec']

In [15]:
#est_pitches = np.array(pitches, dtype=int)

In [16]:
#est_intervals = np.vstack((onsets, offsets)).T

In [12]:
est_intervals, est_pitches = midi2_ip(hmidi.instruments[0].notes)

Get ground truth intervals and pitches...

In [13]:
ref_intervals, ref_pitches = midi2_ip(gt_midi.instruments[0].notes)

In [14]:
mir_eval.transcription.evaluate(ref_intervals, ref_pitches, est_intervals, est_pitches, onset_tolerance=0.10)

OrderedDict([('Precision', 0.3125),
             ('Recall', 0.32894736842105265),
             ('F-measure', 0.32051282051282054),
             ('Average_Overlap_Ratio', 0.9017790896226003),
             ('Precision_no_offset', 0.4875),
             ('Recall_no_offset', 0.5131578947368421),
             ('F-measure_no_offset', 0.5),
             ('Average_Overlap_Ratio_no_offset', 0.716148091921245),
             ('Onset_Precision', 0.4875),
             ('Onset_Recall', 0.5131578947368421),
             ('Onset_F-measure', 0.5),
             ('Offset_Precision', 0.6375),
             ('Offset_Recall', 0.6710526315789473),
             ('Offset_F-measure', 0.6538461538461537)])

In [25]:
mir_eval.transcription.evaluate(ref_intervals, ref_pitches, est_intervals, np.array([int(round(p)) for p in est_pitches]))

OrderedDict([('Precision', 0.0),
             ('Recall', 0.0),
             ('F-measure', 0.0),
             ('Average_Overlap_Ratio', 0),
             ('Precision_no_offset', 0.0),
             ('Recall_no_offset', 0.0),
             ('F-measure_no_offset', 0.0),
             ('Average_Overlap_Ratio_no_offset', 0),
             ('Onset_Precision', 0.0),
             ('Onset_Recall', 0.0),
             ('Onset_F-measure', 0.0),
             ('Offset_Precision', 0.0),
             ('Offset_Recall', 0.0),
             ('Offset_F-measure', 0.0)])

## Debugging

In [25]:
# synthesize
midi = pretty_midi.PrettyMIDI()

In [26]:
violin = pretty_midi.Instrument(program=40)

In [27]:
violin.notes = [pretty_midi.Note(velocity=100, pitch=int(round(p)), start=i[0], end=i[1]) for i, p in zip(est_intervals, est_pitches)]

In [28]:
violin.notes = violin.notes[:100]

In [29]:
midi.instruments = [violin]

In [31]:
out = midi.fluidsynth(fs=16000)
out_gt = gt_midi.fluidsynth(fs=16000)

In [33]:
from wavegenie.util import preview_audio, DDSP_DEFAULT_FS_AUDIO
start_time = 20
end_time = 25
start_idx = 16000*start_time
end_idx = 16000*end_time
audio = np.stack((out[start_idx:end_idx], out_gt[start_idx:end_idx])).T
preview_audio(audio)

In [53]:
preview_audio(np.stack((out[16000*start_time:16000*end_time], out[16000*start_time + 100:16000*end_time + 100])).T)

In [119]:
midi.instruments[0].program = 0

In [120]:
est_audio = midi.fluidsynth(fs=16000)

In [97]:
wavwrite('heuristic.wav', 16000, audio)

In [107]:
ref_audio = gt_midi.fluidsynth(fs=16000)

In [110]:
ref_audio.shape

(3034471,)

In [111]:
est_audio.shape

(3036353,)

In [121]:
both = np.stack((est_audio[:3034471], ref_audio)).T.astype('float32')

In [122]:
wavwrite('heuristic.wav', 16000, both)