diff --git a/librosa/core/harmonic.py b/librosa/core/harmonic.py index fc98e1ffb0..6228af090e 100644 --- a/librosa/core/harmonic.py +++ b/librosa/core/harmonic.py @@ -4,9 +4,111 @@ import numpy as np import scipy.interpolate +import scipy.signal from ..util.exceptions import ParameterError -__all__ = ['harmonics'] +__all__ = ['salience', 'harmonics'] + + +def salience(S, freqs, h_range, weights=None, aggregate=None, + filter_peaks=True, kind='linear', axis=0): + """Harmonic salience function. + + Parameters + ---------- + S : np.ndarray [shape=(d, n)] + input time frequency magnitude representation (stft, ifgram, etc). + Must be real-valued and non-negative. + + freqs : np.ndarray, shape=(S.shape[axis]) + The frequency values corresponding to S's elements along the + chosen axis. + + h_range : list-like, non-negative + Harmonics to include in salience computation. The first harmonic (1) + corresponds to `S` itself. Values less than one (e.g., 1/2) correspond + to sub-harmonics. + + weights : list-like + The weight to apply to each harmonic in the summation. (default: + uniform weights). Must be the same length as `harmonics`. + + aggregate : function + aggregation function (default: `np.ma.average`) + If `aggregate=np.average`, then a weighted average is + computed per-harmonic according to the specified weights. + For all other aggregation functions, all harmonics + are treated equally. + + filter_peaks : bool + If true, computes harmonic summation only on frequencies of peak + magnitude. Otherwise computes harmonic summation over the full spectrum. + Defaults to True. + + kind : str + Interpolation type for harmonic estimation. + See `scipy.interpolate.interp1d`. + + axis : int + The axis along which to compute harmonics + + Returns + ------- + S_sal : np.ndarray, shape=(len(h_range), [x.shape]) + `S_sal` will have the same shape as `S`, and measure + the overal harmonic energy at each frequency. + + + See Also + -------- + core.harmonics + + + Examples + -------- + >>> y, sr = librosa.load(librosa.util.example_audio_file(), + ... duration=15, offset=30) + >>> S = np.abs(librosa.stft(y)) + >>> freqs = librosa.core.fft_frequencies(sr) + >>> harms = [1./3, 1./2, 1, 2, 3, 4] + >>> weights = [-0.5, -1.0, 1.0, 0.5, 0.33, 0.25] + >>> S_sal = librosa.salience(S, freqs, harms, weights) + >>> print(S_sal.shape) + (1025, 646) + + >>> import matplotlib.pyplot as plt + >>> plt.figure() + >>> librosa.display.specshow(librosa.logamplitude(S_sal**2, + ... ref_power=S_sal.max()*2), + ... sr=sr, y_axis='log') + >>> plt.tight_layout() + + """ + if aggregate is None or aggregate is np.average: + aggregate = np.ma.average + + if weights is None: + weights = np.ones((len(h_range), )) + else: + weights = np.array(weights, dtype=float) + + S_harm = harmonics(S, freqs, h_range, kind=kind, axis=axis) + + if filter_peaks: + S_peaks = scipy.signal.argrelmax(S, axis=0) + peak_mask = np.ones(S_harm.shape) + peak_mask[:, S_peaks[0], S_peaks[1]] = 0 + else: + peak_mask = np.zeros(S_harm.shape) + + S_mask = np.ma.masked_array(S_harm, mask=peak_mask) + + if aggregate is np.ma.average: + S_sal = aggregate(S_mask, axis=0, weights=weights) + else: + S_sal = aggregate(S_mask, axis=0) + + return S_sal def harmonics(x, freqs, h_range, kind='linear', fill_value=0, axis=0): diff --git a/librosa/core/spectrum.py b/librosa/core/spectrum.py index 90da927c00..ba2cc36a9b 100644 --- a/librosa/core/spectrum.py +++ b/librosa/core/spectrum.py @@ -16,8 +16,7 @@ from ..filters import get_window __all__ = ['stft', 'istft', 'magphase', - 'ifgram', - 'phase_vocoder', + 'ifgram', 'phase_vocoder', 'logamplitude', 'perceptual_weighting', 'fmt'] @@ -54,7 +53,7 @@ def stft(y, n_fft=2048, hop_length=None, win_length=None, window='hann', If unspecified, defaults to ``win_length = n_fft``. window : string, tuple, number, function, or np.ndarray [shape=(n_fft,)] - - a window specification (string, tuple, or number); + - a window specification (string, tuple, or number); see `scipy.signal.get_window` - a window function, such as `scipy.signal.hanning` - a vector or array of length `n_fft` diff --git a/tests/test_core.py b/tests/test_core.py index dba269cf58..9610aa4e44 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -283,6 +283,115 @@ def __test(ref_power, clip): yield tf, ref_power, clip +def test_salience_basecase(): + (y, sr) = librosa.load('data/test1_22050.wav') + S = np.abs(librosa.stft(y)) + freqs = librosa.core.fft_frequencies(sr) + harms = [1] + weights = [1.0] + S_sal = librosa.core.salience( + S, freqs, harms, weights, filter_peaks=False, kind='quadratic' + ) + assert np.allclose(S_sal, S) + +def test_salience_basecase2(): + (y, sr) = librosa.load('data/test1_22050.wav') + S = np.abs(librosa.stft(y)) + freqs = librosa.core.fft_frequencies(sr) + harms = [1, 0.5, 2.0] + weights = [1.0, 0.0, 0.0] + S_sal = librosa.core.salience( + S, freqs, harms, weights, filter_peaks=False, kind='quadratic' + ) + assert np.allclose(S_sal, S) + +def test_salience_defaults(): + S = np.array([ + [0.1, 0.5, 0.0], + [0.2, 1.2, 1.2], + [0.0, 0.7, 0.3], + [1.3, 3.2, 0.8] + ]) + freqs = np.array([50.0, 100.0, 200.0, 400.0]) + harms = [0.5, 1, 2] + actual = librosa.core.salience( + S, freqs, harms, kind='quadratic' + ) + + expected = np.array([ + [0.0, 0.0, 0.0], + [0.3, 2.4, 1.5], + [0.0, 0.0, 0.0], + [0.0, 0.0, 0.0] + ]) / 3.0 + assert np.allclose(expected, actual) + +def test_salience_weights(): + S = np.array([ + [0.1, 0.5, 0.0], + [0.2, 1.2, 1.2], + [0.0, 0.7, 0.3], + [1.3, 3.2, 0.8] + ]) + freqs = np.array([50.0, 100.0, 200.0, 400.0]) + harms = [0.5, 1, 2] + weights = [1.0, 1.0, 1.0] + actual = librosa.core.salience( + S, freqs, harms, weights, kind='quadratic' + ) + + expected = np.array([ + [0.0, 0.0, 0.0], + [0.3, 2.4, 1.5], + [0.0, 0.0, 0.0], + [0.0, 0.0, 0.0] + ]) / 3.0 + assert np.allclose(expected, actual) + +def test_salience_no_peak_filter(): + S = np.array([ + [0.1, 0.5, 0.0], + [0.2, 1.2, 1.2], + [0.0, 0.7, 0.3], + [1.3, 3.2, 0.8] + ]) + freqs = np.array([50.0, 100.0, 200.0, 400.0]) + harms = [0.5, 1, 2] + weights = [1.0, 1.0, 1.0] + actual = librosa.core.salience( + S, freqs, harms, weights, filter_peaks=False, kind='quadratic' + ) + + expected = np.array([ + [0.3, 1.7, 1.2], + [0.3, 2.4, 1.5], + [1.5, 5.1, 2.3], + [1.3, 3.9, 1.1] + ]) / 3.0 + assert np.allclose(expected, actual) + +def test_salience_aggregate(): + S = np.array([ + [0.1, 0.5, 0.0], + [0.2, 1.2, 1.2], + [0.0, 0.7, 0.3], + [1.3, 3.2, 0.8] + ]) + freqs = np.array([50.0, 100.0, 200.0, 400.0]) + harms = [0.5, 1, 2] + weights = [1.0, 1.0, 1.0] + actual = librosa.core.salience( + S, freqs, harms, weights, aggregate=np.ma.max, kind='quadratic' + ) + + expected = np.array([ + [0.0, 0.0, 0.0], + [0.2, 1.2, 1.2], + [0.0, 0.0, 0.0], + [0.0, 0.0, 0.0] + ]) + assert np.allclose(expected, actual) + def test_magphase(): (y, sr) = librosa.load('data/test1_22050.wav')