Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] YIN #974

Closed
wants to merge 8 commits into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
154 changes: 154 additions & 0 deletions librosa/core/pitch.py
Expand Up @@ -4,6 +4,7 @@

import warnings
import numpy as np
import scipy.signal
import six

from .spectrum import _spectrogram
Expand Down Expand Up @@ -336,3 +337,156 @@ def piptrack(y=None, sr=22050, S=None, n_fft=2048, hop_length=None,
+ dskew[idx[:, 0], idx[:, 1]])

return pitches, mags


def yin(y, sr=22050, win_length=2048, hop_length=None, window='hann',
pad_mode='reflect', fmin=40, fmax=None, cumulative=False,
peak_threshold=0.1, periodicity_threshold=0.2):
'''Fundamental frequency (F0) estimation. [1]_


.. [1] De Cheveigné, A., & Kawahara, H. (2002).
"YIN, a fundamental frequency estimator for speech and music."
The Journal of the Acoustical Society of America, 111(4), 1917-1930.

Parameters
----------
y : np.ndarray [shape=(n,)]
audio time series

sr : number > 0 [scalar]
sampling rate of `y` in Hertz

win_length : int [scalar]
length of the frame in samples.
By default, frame_length=2048 corresponds to a time scale of 93 ms at
a sampling rate of 22050 Hz. In comparison, the time scale of the
original publication [1]_ is equal to 25 milliseconds.
We recommend setting `frame_length` to a power of two for optimizing
the speed of the fast Fourier transform (FFT) algorithm.
lostanlen marked this conversation as resolved.
Show resolved Hide resolved

hop_length : None or int > 0 [scalar]
number of audio samples between adjacent YIN predictions.
If `None`, defaults to `frame_length // 4`.

fmin: number > 0 [scalar]
minimum frequency in Hertz

fmax: None or number > 0 [scalar]
maximum frequency in Hertz. If `None`, defaults to fmax = sr / 4.0

peak_threshold: number >= 0 [scalar]
absolute threshold for peak estimation

periodicity_threshold: number >= 0 [scalar]
absolute threshold for aperiodicity estimation
NB: we recommend setting threshold_2 >= threshold_1.

lostanlen marked this conversation as resolved.
Show resolved Hide resolved
pad_mode: string or function
This argument is passed to `np.pad` for centering frames before
computing autocorrelation.
By default (`pad_mode="reflect"`), `y` is padded on both sides with its
own reflection, mirrored around its first and last sample respectively.

Returns
-------
f0: np.ndarray [shape=(n_frames,)]
time series of fundamental frequencies in Hertz.
Null values represent the absence of a perceptible fundamental frequency.

See also
--------
piptrack: sinusoidal peak interpolation

Examples
--------
Computing a fundamental frequency curve from an audio input:

>>> y = librosa.chirp(440, 880, duration=7.0)
>>> yin(y)
array([ 0. , 0. , 222.62295321, ..., 877.12362404,
885.76086659, 886.80491703])
'''
# Set the maximal frequency.
if fmax is None:
fmax = sr / 4.0

# Set the default hop, if it's not already specified.
if hop_length is None:
hop_length = int(frame_length // 4)

# Check that audio is valid.
util.valid_audio(y, mono=True)

# Pad the time series so that frames are centered.
y = np.pad(y, int(win_length // 2), mode=pad_mode)

# Compute autocorrelation in each frame.
y_frames = util.frame(y, frame_length=win_length, hop_length=hop_length)
n_frames = y_frames.shape[1]
acf_frames = librosa.autocorrelate(y_frames, axis=0)

# Difference function.
min_period = np.maximum(int(np.floor(sr/fmax)), 2)
max_period = np.minimum(int(np.ceil(sr/fmin)), win_length-1)
boxcar_window = scipy.signal.windows.boxcar(win_length)
energy = scipy.signal.convolve(y*y, boxcar_window, mode="same")
energy_frames = util.frame(
energy, frame_length=win_length, hop_length=hop_length)
energy_0 = energy_frames[0, :]
energy_tau = energy_frames[(min_period-1):(max_period+1), :]
acf_tau = acf_frames[(min_period-1):(max_period+1), :]
yin_frames = energy_0 + energy_frames - 2*acf_frames

# Cumulative mean normalized difference function.
if cumulative:
yin_numerator = yin_frames[(min_period-1):(max_period+1), :]
tau_range = np.arange(1, frame_length)[:, np.newaxis]
cumulative_mean = np.cumsum(yin_frames[1:, :], axis=0) / tau_range
epsilon = np.finfo(y.dtype).eps
yin_denominator = epsilon + cumulative_mean[(min_period-2):max_period, :]
yin_frames = yin_numerator / yin_denominator

# Parabolic interpolation.
if interpolate:
parabola_a =\
(yin_frames[:-2, :]+yin_frames[2:, :]-2*yin_frames[1:-1, :]) / 2
parabola_b = (yin_frames[2:, :]-yin_frames[:-2, :]) / 2
parabolic_shifts = -parabola_b / (2*parabola_a)
parabolic_values =\
yin_frames[1:-1, :] - parabola_b*parabola_b/(4*parabola_a)
is_trough = np.logical_and(
yin_frames[1:-1, :]<yin_frames[:-2, :],
yin_frames[1:-1, :]<yin_frames[2:, :])
yin_frames = yin_frames[1:-1, :]
yin_frames[is_trough] = parabolic_values[is_trough]
lostanlen marked this conversation as resolved.
Show resolved Hide resolved
else:
yin_frames = yin_frames[1:-1, :]

# Absolute threshold
# "The solution we propose is to set an absolute threshold and choose the
# smallest value of tau that gives a minimum of d' deeper than
# this threshold. If none is found, the global minimum is chosen instead."
yin_period = np.argmin(yin_frames, axis=0)
if peak_threshold > 0:
lower_bound = (yin_frames<peak_threshold).argmax(axis=0)
upper_bound = np.minimum(2*lower_bound, max_period-min_period-1)
for frame_id in range(n_frames):
if lower_bound[frame_id]>0:
bounded_frame = yin_frames[
lower_bound[frame_id]:upper_bound[frame_id], frame_id]
new_argmin = np.argmin(bounded_frame, axis=0)
yin_period[frame_id] = lower_bound[frame_id] + new_argmin
yin_aperiodicity = yin_frames[yin_period, range(n_frames)]
yin_period = min_period + yin_period

# Refine peak by parabolic interpolation
if interpolate:
yin_period = yin_period + parabolic_shifts[yin_period, range(n_frames)]

# Convert period to fundamental frequency (f0)
f0 = sr / yin_period
f0[yin_aperiodicity > periodicity_threshold] = 0
f0[f0<fmin] = 0
f0[f0>fmax] = 0
return f0