Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] YIN #974

Closed
wants to merge 8 commits into from
Closed
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
163 changes: 163 additions & 0 deletions librosa/core/pitch.py
Expand Up @@ -336,3 +336,166 @@ def piptrack(y=None, sr=22050, S=None, n_fft=2048, hop_length=None,
+ dskew[idx[:, 0], idx[:, 1]])

return pitches, mags


def yin(y, sr=22050, frame_length=512, hop_length=None, fmin=40, fmax=None,
cumulative=False, interpolate=True,
threshold_1=0.1, threshold_2=0.2, pad_mode='reflect'):
'''Fundamental frequency (F0) estimation. [1]_


.. [1] De Cheveigné, A., & Kawahara, H. (2002).
"YIN, a fundamental frequency estimator for speech and music."
The Journal of the Acoustical Society of America, 111(4), 1917-1930.

Parameters
----------
y : np.ndarray [shape=(n,)]
audio time series

sr : number > 0 [scalar]
sampling rate of `y` in Hertz

frame_length : int [scalar]
length of the frame in samples.
By default, frame_length=512 corresponds to a time scale of 23 ms at
a sampling rate of 22050 Hz. This is close to the default of the
original publication.
We recommend setting `frame_length` to a power of two for optimizing
the speed of the fast Fourier transform (FFT) algorithm.
lostanlen marked this conversation as resolved.
Show resolved Hide resolved

hop_length : None or int > 0 [scalar]
number of audio samples between adjacent YIN predictions.
If `None`, defaults to `frame_length // 4`.

fmin: number > 0 [scalar]
minimum frequency in Hertz

fmax: None or number > 0 [scalar]
maximum frequency in Hertz. If `None`, defaults to fmax = sr / 4.0

threshold_1: number >= 0 [scalar]
absolute threshold for peak estimation

threshold_2: number >= 0 [scalar]
absolute threshold for aperiodicity estimation
NB: we recommend setting threshold_2 >= threshold_1.

lostanlen marked this conversation as resolved.
Show resolved Hide resolved
pad_mode: string or function
This argument is passed to `np.pad` for centering frames before
computing autocorrelation.
By default (`pad_mode="reflect"`), `y` is padded on both sides with its
own reflection, mirrored around its first and last sample respectively.

Returns
-------
f0: np.ndarray [shape=(n_frames,)]
time series of fundamental frequencies in Hertz.
Null values represent the absence of a perceptible fundamental frequency.

See also
--------
piptrack: sinusoidal peak interpolation

Examples
--------
Computing a fundamental frequency curve from an audio input:

>>> y = librosa.chirp(440, 880, duration=7.0)
>>> yin(y)
array([ 0. , 0. , 222.62295321, ..., 877.12362404,
885.76086659, 886.80491703])
'''
# Set the maximal frequency.
if fmax is None:
fmax = sr / 4
lostanlen marked this conversation as resolved.
Show resolved Hide resolved

# Set the default hop, if it's not already specified.
if hop_length is None:
hop_length = int(frame_length // 4)

# Check that audio is valid.
util.valid_audio(y, mono=True)

# Pad the time series so that frames are centered.
y = np.pad(y, int(frame_length // 2), mode=pad_mode)

# Compute autocorrelation in each frame.
y_frames = util.frame(y, frame_length=frame_length, hop_length=hop_length)
n_frames = y_frames.shape[1]
acf_frames = librosa.autocorrelate(y_frames, axis=0)

# Difference function.
min_period = np.maximum(int(np.floor(sr/fmax)), 2)
max_period = np.minimum(int(np.ceil(sr/fmin)), frame_length-1)
boxcar_window = scipy.signal.windows.boxcar(frame_length)
lostanlen marked this conversation as resolved.
Show resolved Hide resolved
energy = np.convolve(y*y, boxcar_window, mode="same")
lostanlen marked this conversation as resolved.
Show resolved Hide resolved
energy_frames = util.frame(
energy, frame_length=frame_length, hop_length=hop_length)
energy_0 = energy_frames[0, :]
energy_tau = energy_frames[(min_period-1):(max_period+1), :]
acf_tau = acf_frames[(min_period-1):(max_period+1), :]
yin_frames = energy_0 + energy_frames - 2*acf_frames

# Cumulative mean normalized difference function.
# NB: Equation 8 in de Cheveigné and Kawahara JASA 2002 seems to imply that
# the denominator is: cumsum(difference_fames)/tau_range, not
# cumsum(difference_frames/tau_range) as we implement it.
# However, going back to line 48 of the MATLAB code by AdC, we find the line:
# dd= d(2:end) ./ (cumsum(d(2:end)) ./ (1:(p.maxprd)));
# in which the parentheses indicate that the division by tau must happen
# before the cumulative summation, not after.
# Therefore, in this implementation, we purposefully diverge from Equation 8
# and follow the MATLAB reference implementation instead.
if cumulative:
yin_numerator = yin_frames[(min_period-1):(max_period+1), :]
tau_range = np.arange(1, frame_length)[:, np.newaxis]
cumulative_mean = np.cumsum(yin_frames[1:, :]/tau_range, axis=0)
epsilon = np.finfo(y.dtype).eps
yin_denominator = epsilon + cumulative_mean[(min_period-2):max_period, :]
yin_frames = yin_numerator / yin_denominator

# Parabolic interpolation.
# NB: perhaps these operations can be fused to alleviate memory allocation?
if interpolate:
parabola_a =\
(yin_frames[:-2, :]+yin_frames[2:, :]-2*yin_frames[1:-1, :]) / 2
parabola_b = (yin_frames[2:, :]-yin_frames[:-2, :]) / 2
parabolic_shifts = -parabola_b / (2*parabola_a)
parabolic_values =\
yin_frames[1:-1, :] - parabola_b*parabola_b/(4*parabola_a)
is_trough = np.logical_and(
yin_frames[1:-1, :]<yin_frames[:-2, :],
yin_frames[1:-1, :]<yin_frames[2:, :])
yin_frames = yin_frames[1:-1, :]
yin_frames[is_trough] = parabolic_values[is_trough]
lostanlen marked this conversation as resolved.
Show resolved Hide resolved
else:
yin_frames = yin_frames[1:-1, :]

# Absolute threshold
# "The solution we propose is to set an absolute threshold and choose the
# smallest value of tau that gives a minimum of d' deeper than
# this threshold. If none is found, the global minimum is chosen instead."
yin_period = np.argmin(yin_frames, axis=0)
if threshold_1 > 0:
lower_bound = (yin_frames<threshold_1).argmax(axis=0)
upper_bound = np.minimum(2*lower_bound, max_period-min_period-1)
for frame_id in range(n_frames):
if lower_bound[frame_id]>0:
bounded_frame = yin_frames[
lower_bound[frame_id]:upper_bound[frame_id], frame_id]
new_argmin = np.argmin(bounded_frame, axis=0)
yin_period[frame_id] = lower_bound[frame_id] + new_argmin
yin_aperiodicity = yin_frames[yin_period, range(n_frames)]
yin_period = min_period + yin_period

# Refine peak by parabolic interpolation
if interpolate:
yin_period = yin_period + parabolic_shifts[yin_period, range(n_frames)]

# Convert period to fundamental frequency (f0)
f0 = sr / yin_period
f0[yin_aperiodicity > threshold_2] = 0
f0[f0<fmin] = 0
f0[f0>fmax] = 0
return f0