Skip to content

Commit

Permalink
Merge pull request #427 from rabitt/salience
Browse files Browse the repository at this point in the history
Salience
  • Loading branch information
bmcfee committed Nov 21, 2016
2 parents a96da54 + 74269b7 commit 5e54e88
Show file tree
Hide file tree
Showing 3 changed files with 214 additions and 4 deletions.
104 changes: 103 additions & 1 deletion librosa/core/harmonic.py
Expand Up @@ -4,9 +4,111 @@

import numpy as np
import scipy.interpolate
import scipy.signal
from ..util.exceptions import ParameterError

__all__ = ['harmonics']
__all__ = ['salience', 'harmonics']


def salience(S, freqs, h_range, weights=None, aggregate=None,
filter_peaks=True, kind='linear', axis=0):
"""Harmonic salience function.
Parameters
----------
S : np.ndarray [shape=(d, n)]
input time frequency magnitude representation (stft, ifgram, etc).
Must be real-valued and non-negative.
freqs : np.ndarray, shape=(S.shape[axis])
The frequency values corresponding to S's elements along the
chosen axis.
h_range : list-like, non-negative
Harmonics to include in salience computation. The first harmonic (1)
corresponds to `S` itself. Values less than one (e.g., 1/2) correspond
to sub-harmonics.
weights : list-like
The weight to apply to each harmonic in the summation. (default:
uniform weights). Must be the same length as `harmonics`.
aggregate : function
aggregation function (default: `np.ma.average`)
If `aggregate=np.average`, then a weighted average is
computed per-harmonic according to the specified weights.
For all other aggregation functions, all harmonics
are treated equally.
filter_peaks : bool
If true, computes harmonic summation only on frequencies of peak
magnitude. Otherwise computes harmonic summation over the full spectrum.
Defaults to True.
kind : str
Interpolation type for harmonic estimation.
See `scipy.interpolate.interp1d`.
axis : int
The axis along which to compute harmonics
Returns
-------
S_sal : np.ndarray, shape=(len(h_range), [x.shape])
`S_sal` will have the same shape as `S`, and measure
the overal harmonic energy at each frequency.
See Also
--------
core.harmonics
Examples
--------
>>> y, sr = librosa.load(librosa.util.example_audio_file(),
... duration=15, offset=30)
>>> S = np.abs(librosa.stft(y))
>>> freqs = librosa.core.fft_frequencies(sr)
>>> harms = [1./3, 1./2, 1, 2, 3, 4]
>>> weights = [-0.5, -1.0, 1.0, 0.5, 0.33, 0.25]
>>> S_sal = librosa.salience(S, freqs, harms, weights)
>>> print(S_sal.shape)
(1025, 646)
>>> import matplotlib.pyplot as plt
>>> plt.figure()
>>> librosa.display.specshow(librosa.logamplitude(S_sal**2,
... ref_power=S_sal.max()*2),
... sr=sr, y_axis='log')
>>> plt.tight_layout()
"""
if aggregate is None or aggregate is np.average:
aggregate = np.ma.average

if weights is None:
weights = np.ones((len(h_range), ))
else:
weights = np.array(weights, dtype=float)

S_harm = harmonics(S, freqs, h_range, kind=kind, axis=axis)

if filter_peaks:
S_peaks = scipy.signal.argrelmax(S, axis=0)
peak_mask = np.ones(S_harm.shape)
peak_mask[:, S_peaks[0], S_peaks[1]] = 0
else:
peak_mask = np.zeros(S_harm.shape)

S_mask = np.ma.masked_array(S_harm, mask=peak_mask)

if aggregate is np.ma.average:
S_sal = aggregate(S_mask, axis=0, weights=weights)
else:
S_sal = aggregate(S_mask, axis=0)

return S_sal


def harmonics(x, freqs, h_range, kind='linear', fill_value=0, axis=0):
Expand Down
5 changes: 2 additions & 3 deletions librosa/core/spectrum.py
Expand Up @@ -16,8 +16,7 @@
from ..filters import get_window

__all__ = ['stft', 'istft', 'magphase',
'ifgram',
'phase_vocoder',
'ifgram', 'phase_vocoder',
'logamplitude', 'perceptual_weighting',
'fmt']

Expand Down Expand Up @@ -54,7 +53,7 @@ def stft(y, n_fft=2048, hop_length=None, win_length=None, window='hann',
If unspecified, defaults to ``win_length = n_fft``.
window : string, tuple, number, function, or np.ndarray [shape=(n_fft,)]
- a window specification (string, tuple, or number);
- a window specification (string, tuple, or number);
see `scipy.signal.get_window`
- a window function, such as `scipy.signal.hanning`
- a vector or array of length `n_fft`
Expand Down
109 changes: 109 additions & 0 deletions tests/test_core.py
Expand Up @@ -283,6 +283,115 @@ def __test(ref_power, clip):
yield tf, ref_power, clip


def test_salience_basecase():
(y, sr) = librosa.load('data/test1_22050.wav')
S = np.abs(librosa.stft(y))
freqs = librosa.core.fft_frequencies(sr)
harms = [1]
weights = [1.0]
S_sal = librosa.core.salience(
S, freqs, harms, weights, filter_peaks=False, kind='quadratic'
)
assert np.allclose(S_sal, S)

def test_salience_basecase2():
(y, sr) = librosa.load('data/test1_22050.wav')
S = np.abs(librosa.stft(y))
freqs = librosa.core.fft_frequencies(sr)
harms = [1, 0.5, 2.0]
weights = [1.0, 0.0, 0.0]
S_sal = librosa.core.salience(
S, freqs, harms, weights, filter_peaks=False, kind='quadratic'
)
assert np.allclose(S_sal, S)

def test_salience_defaults():
S = np.array([
[0.1, 0.5, 0.0],
[0.2, 1.2, 1.2],
[0.0, 0.7, 0.3],
[1.3, 3.2, 0.8]
])
freqs = np.array([50.0, 100.0, 200.0, 400.0])
harms = [0.5, 1, 2]
actual = librosa.core.salience(
S, freqs, harms, kind='quadratic'
)

expected = np.array([
[0.0, 0.0, 0.0],
[0.3, 2.4, 1.5],
[0.0, 0.0, 0.0],
[0.0, 0.0, 0.0]
]) / 3.0
assert np.allclose(expected, actual)

def test_salience_weights():
S = np.array([
[0.1, 0.5, 0.0],
[0.2, 1.2, 1.2],
[0.0, 0.7, 0.3],
[1.3, 3.2, 0.8]
])
freqs = np.array([50.0, 100.0, 200.0, 400.0])
harms = [0.5, 1, 2]
weights = [1.0, 1.0, 1.0]
actual = librosa.core.salience(
S, freqs, harms, weights, kind='quadratic'
)

expected = np.array([
[0.0, 0.0, 0.0],
[0.3, 2.4, 1.5],
[0.0, 0.0, 0.0],
[0.0, 0.0, 0.0]
]) / 3.0
assert np.allclose(expected, actual)

def test_salience_no_peak_filter():
S = np.array([
[0.1, 0.5, 0.0],
[0.2, 1.2, 1.2],
[0.0, 0.7, 0.3],
[1.3, 3.2, 0.8]
])
freqs = np.array([50.0, 100.0, 200.0, 400.0])
harms = [0.5, 1, 2]
weights = [1.0, 1.0, 1.0]
actual = librosa.core.salience(
S, freqs, harms, weights, filter_peaks=False, kind='quadratic'
)

expected = np.array([
[0.3, 1.7, 1.2],
[0.3, 2.4, 1.5],
[1.5, 5.1, 2.3],
[1.3, 3.9, 1.1]
]) / 3.0
assert np.allclose(expected, actual)

def test_salience_aggregate():
S = np.array([
[0.1, 0.5, 0.0],
[0.2, 1.2, 1.2],
[0.0, 0.7, 0.3],
[1.3, 3.2, 0.8]
])
freqs = np.array([50.0, 100.0, 200.0, 400.0])
harms = [0.5, 1, 2]
weights = [1.0, 1.0, 1.0]
actual = librosa.core.salience(
S, freqs, harms, weights, aggregate=np.ma.max, kind='quadratic'
)

expected = np.array([
[0.0, 0.0, 0.0],
[0.2, 1.2, 1.2],
[0.0, 0.0, 0.0],
[0.0, 0.0, 0.0]
])
assert np.allclose(expected, actual)

def test_magphase():

(y, sr) = librosa.load('data/test1_22050.wav')
Expand Down

0 comments on commit 5e54e88

Please sign in to comment.