In [9]:
import librosa
from IPython.display import Audio
import numpy as np
import matplotlib.pyplot as plt
import mir_eval.sonify # to synthesize a signal

In [10]:
sr = 22050 # sample rate

y_sweep = librosa.chirp(fmin = librosa.note_to_hz('C3'),
                        fmax = librosa.note_to_hz('E5'),
                        sr = sr,
                        duration = 1)

In [11]:
Audio(data = y_sweep, rate = sr)

In [12]:
y, sr = librosa.load(librosa.example('trumpet'))

Audio(data = y, rate = sr)

In [13]:
# f0 represents fundamental frequency; owest frequency of a periodic waveform and is perceived as the pitch of the sound

f0, voiced_flag, voiced_probs = librosa.pyin(y, 
                                             sr = sr,
                                             fmin = librosa.note_to_hz('C2'),
                                             fmax = librosa.note_to_hz('C7'),
                                             fill_na = None)

times = librosa.times_like(f0) # sample times

In [14]:
# mir_eval's synth uses -ve f0 vlas to indicate unvoiced regions
# array vneg which is 1 for voiced frames, and -1 for unvoiced frames. This way, f0 * vneg will leave voiced estimates unchanged, and negate the frequency for unvoiced frames

vneg = (-1) ** (~voiced_flag)

y_f0 = mir_eval.sonify.pitch_contour(times, f0 * vneg, sr)

Audio(data = y_f0, rate = sr)