Merge pull request #427 from rabitt/salience

Salience
librosa · Nov 21, 2016 · 5e54e88 · 5e54e88
2 parents a96da54 + 74269b7
commit 5e54e88
Show file tree

Hide file tree

Showing 3 changed files with 214 additions and 4 deletions.
diff --git a/librosa/core/harmonic.py b/librosa/core/harmonic.py
@@ -4,9 +4,111 @@
 
 import numpy as np
 import scipy.interpolate
+import scipy.signal
 from ..util.exceptions import ParameterError
 
-__all__ = ['harmonics']
+__all__ = ['salience', 'harmonics']
+
+
+def salience(S, freqs, h_range, weights=None, aggregate=None,
+             filter_peaks=True, kind='linear', axis=0):
+    """Harmonic salience function.
+
+    Parameters
+    ----------
+    S : np.ndarray [shape=(d, n)]
+        input time frequency magnitude representation (stft, ifgram, etc).
+        Must be real-valued and non-negative.
+
+    freqs : np.ndarray, shape=(S.shape[axis])
+        The frequency values corresponding to S's elements along the
+        chosen axis.
+
+    h_range : list-like, non-negative
+        Harmonics to include in salience computation.  The first harmonic (1)
+        corresponds to `S` itself. Values less than one (e.g., 1/2) correspond
+        to sub-harmonics.
+
+    weights : list-like
+        The weight to apply to each harmonic in the summation. (default:
+        uniform weights). Must be the same length as `harmonics`.
+
+    aggregate : function
+        aggregation function (default: `np.ma.average`)
+        If `aggregate=np.average`, then a weighted average is
+        computed per-harmonic according to the specified weights.
+        For all other aggregation functions, all harmonics
+        are treated equally.
+
+    filter_peaks : bool
+        If true, computes harmonic summation only on frequencies of peak
+        magnitude. Otherwise computes harmonic summation over the full spectrum.
+        Defaults to True.
+
+    kind : str
+        Interpolation type for harmonic estimation.
+        See `scipy.interpolate.interp1d`.
+
+    axis : int
+        The axis along which to compute harmonics
+
+    Returns
+    -------
+    S_sal : np.ndarray, shape=(len(h_range), [x.shape])
+        `S_sal` will have the same shape as `S`, and measure
+        the overal harmonic energy at each frequency.
+
+
+    See Also
+    --------
+    core.harmonics
+
+
+    Examples
+    --------
+    >>> y, sr = librosa.load(librosa.util.example_audio_file(),
+    ...                      duration=15, offset=30)
+    >>> S = np.abs(librosa.stft(y))
+    >>> freqs = librosa.core.fft_frequencies(sr)
+    >>> harms = [1./3, 1./2, 1, 2, 3, 4]
+    >>> weights = [-0.5, -1.0, 1.0, 0.5, 0.33, 0.25]
+    >>> S_sal = librosa.salience(S, freqs, harms, weights)
+    >>> print(S_sal.shape)
+    (1025, 646)
+
+    >>> import matplotlib.pyplot as plt
+    >>> plt.figure()
+    >>> librosa.display.specshow(librosa.logamplitude(S_sal**2,
+    ...                                               ref_power=S_sal.max()*2),
+    ...                          sr=sr, y_axis='log')
+    >>> plt.tight_layout()
+
+    """
+    if aggregate is None or aggregate is np.average:
+        aggregate = np.ma.average
+
+    if weights is None:
+        weights = np.ones((len(h_range), ))
+    else:
+        weights = np.array(weights, dtype=float)
+
+    S_harm = harmonics(S, freqs, h_range, kind=kind, axis=axis)
+
+    if filter_peaks:
+        S_peaks = scipy.signal.argrelmax(S, axis=0)
+        peak_mask = np.ones(S_harm.shape)
+        peak_mask[:, S_peaks[0], S_peaks[1]] = 0
+    else:
+        peak_mask = np.zeros(S_harm.shape)
+
+    S_mask = np.ma.masked_array(S_harm, mask=peak_mask)
+
+    if aggregate is np.ma.average:
+        S_sal = aggregate(S_mask, axis=0, weights=weights)
+    else:
+        S_sal = aggregate(S_mask, axis=0)
+
+    return S_sal
 
 
 def harmonics(x, freqs, h_range, kind='linear', fill_value=0, axis=0):

diff --git a/librosa/core/spectrum.py b/librosa/core/spectrum.py
@@ -16,8 +16,7 @@
 from ..filters import get_window
 
 __all__ = ['stft', 'istft', 'magphase',
-           'ifgram',
-           'phase_vocoder',
+           'ifgram', 'phase_vocoder',
            'logamplitude', 'perceptual_weighting',
            'fmt']
 
@@ -54,7 +53,7 @@ def stft(y, n_fft=2048, hop_length=None, win_length=None, window='hann',
         If unspecified, defaults to ``win_length = n_fft``.
 
     window : string, tuple, number, function, or np.ndarray [shape=(n_fft,)]
-        - a window specification (string, tuple, or number); 
+        - a window specification (string, tuple, or number);
           see `scipy.signal.get_window`
         - a window function, such as `scipy.signal.hanning`
         - a vector or array of length `n_fft`

diff --git a/tests/test_core.py b/tests/test_core.py
@@ -283,6 +283,115 @@ def __test(ref_power, clip):
             yield tf, ref_power, clip
 
 
+def test_salience_basecase():
+    (y, sr) = librosa.load('data/test1_22050.wav')
+    S = np.abs(librosa.stft(y))
+    freqs = librosa.core.fft_frequencies(sr)
+    harms = [1]
+    weights = [1.0]
+    S_sal = librosa.core.salience(
+        S, freqs, harms, weights, filter_peaks=False, kind='quadratic'
+    )
+    assert np.allclose(S_sal, S)
+
+def test_salience_basecase2():
+    (y, sr) = librosa.load('data/test1_22050.wav')
+    S = np.abs(librosa.stft(y))
+    freqs = librosa.core.fft_frequencies(sr)
+    harms = [1, 0.5, 2.0]
+    weights = [1.0, 0.0, 0.0]
+    S_sal = librosa.core.salience(
+        S, freqs, harms, weights, filter_peaks=False, kind='quadratic'
+    )
+    assert np.allclose(S_sal, S)
+
+def test_salience_defaults():
+    S = np.array([
+        [0.1, 0.5, 0.0],
+        [0.2, 1.2, 1.2],
+        [0.0, 0.7, 0.3],
+        [1.3, 3.2, 0.8]
+    ])
+    freqs = np.array([50.0, 100.0, 200.0, 400.0])
+    harms = [0.5, 1, 2]
+    actual = librosa.core.salience(
+        S, freqs, harms, kind='quadratic'
+    )
+
+    expected = np.array([
+        [0.0, 0.0, 0.0],
+        [0.3, 2.4, 1.5],
+        [0.0, 0.0, 0.0],
+        [0.0, 0.0, 0.0]
+    ]) / 3.0
+    assert np.allclose(expected, actual)
+
+def test_salience_weights():
+    S = np.array([
+        [0.1, 0.5, 0.0],
+        [0.2, 1.2, 1.2],
+        [0.0, 0.7, 0.3],
+        [1.3, 3.2, 0.8]
+    ])
+    freqs = np.array([50.0, 100.0, 200.0, 400.0])
+    harms = [0.5, 1, 2]
+    weights = [1.0, 1.0, 1.0]
+    actual = librosa.core.salience(
+        S, freqs, harms, weights, kind='quadratic'
+    )
+
+    expected = np.array([
+        [0.0, 0.0, 0.0],
+        [0.3, 2.4, 1.5],
+        [0.0, 0.0, 0.0],
+        [0.0, 0.0, 0.0]
+    ]) / 3.0
+    assert np.allclose(expected, actual)
+
+def test_salience_no_peak_filter():
+    S = np.array([
+        [0.1, 0.5, 0.0],
+        [0.2, 1.2, 1.2],
+        [0.0, 0.7, 0.3],
+        [1.3, 3.2, 0.8]
+    ])
+    freqs = np.array([50.0, 100.0, 200.0, 400.0])
+    harms = [0.5, 1, 2]
+    weights = [1.0, 1.0, 1.0]
+    actual = librosa.core.salience(
+        S, freqs, harms, weights, filter_peaks=False, kind='quadratic'
+    )
+
+    expected = np.array([
+        [0.3, 1.7, 1.2],
+        [0.3, 2.4, 1.5],
+        [1.5, 5.1, 2.3],
+        [1.3, 3.9, 1.1]
+    ]) / 3.0
+    assert np.allclose(expected, actual)
+
+def test_salience_aggregate():
+    S = np.array([
+        [0.1, 0.5, 0.0],
+        [0.2, 1.2, 1.2],
+        [0.0, 0.7, 0.3],
+        [1.3, 3.2, 0.8]
+    ])
+    freqs = np.array([50.0, 100.0, 200.0, 400.0])
+    harms = [0.5, 1, 2]
+    weights = [1.0, 1.0, 1.0]
+    actual = librosa.core.salience(
+        S, freqs, harms, weights, aggregate=np.ma.max, kind='quadratic'
+    )
+
+    expected = np.array([
+        [0.0, 0.0, 0.0],
+        [0.2, 1.2, 1.2],
+        [0.0, 0.0, 0.0],
+        [0.0, 0.0, 0.0]
+    ]) 
+    assert np.allclose(expected, actual)
+
 def test_magphase():
 
     (y, sr) = librosa.load('data/test1_22050.wav')