Skip to content

Commit

Permalink
Merge 5122a0e into 397c724
Browse files Browse the repository at this point in the history
  • Loading branch information
carlthome committed Sep 21, 2016
2 parents 397c724 + 5122a0e commit 76e6d6b
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 10 deletions.
36 changes: 27 additions & 9 deletions librosa/feature/spectral.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from ..util.exceptions import ParameterError

from ..core.time_frequency import fft_frequencies
from ..core.audio import zero_crossings
from ..core.audio import zero_crossings, to_mono
from ..core.spectrum import logamplitude, _spectrogram
from ..core.constantq import cqt, hybrid_cqt
from ..core.pitch import estimate_tuning
Expand Down Expand Up @@ -496,16 +496,22 @@ def spectral_rolloff(y=None, sr=22050, S=None, n_fft=2048, hop_length=512,


def rmse(y=None, S=None, n_fft=2048, hop_length=512):
'''Compute root-mean-square (RMS) energy for each frame.
'''Compute root-mean-square (RMS) energy for each frame, either from the
audio samples `y` or from a spectrogram `S`.
Computing the energy from audio samples is faster as it doesn't require a
STFT calculation. However, using a spectrogram will give a more accurate
representation of energy over time because its frames can be windowed,
thus prefer using `S` if it's already available.
Parameters
----------
y : np.ndarray [shape=(n,)] or None
audio time series
(optional) audio time series. Required if `S` is not input.
S : np.ndarray [shape=(d, t)] or None
(optional) spectrogram magnitude
(optional) spectrogram magnitude. Required if `y` is not input.
n_fft : int > 0 [scalar]
FFT window size
Expand All @@ -521,7 +527,7 @@ def rmse(y=None, S=None, n_fft=2048, hop_length=512):
Examples
--------
--------
>>> y, sr = librosa.load(librosa.util.example_audio_file())
>>> librosa.feature.rmse(y=y)
array([[ 0. , 0.056, ..., 0. , 0. ]], dtype=float32)
Expand All @@ -543,12 +549,24 @@ def rmse(y=None, S=None, n_fft=2048, hop_length=512):
... y_axis='log', x_axis='time')
>>> plt.title('log Power spectrogram')
>>> plt.tight_layout()
Use a STFT window of constant ones and no frame centering to get consistent
results with the RMS energy computed from the audio samples `y`
>>> S = librosa.magphase(librosa.stft(y, window=np.ones, center=False)[0]
>>> librosa.feature.rmse(S=S)
'''

S, _ = _spectrogram(y=y, S=S, n_fft=n_fft, hop_length=hop_length)

return np.sqrt(np.mean(np.abs(S)**2, axis=0, keepdims=True))
if y is not None and S is not None:
raise ValueError('Either `y` or `S` should be input.')
if y is not None:
x = util.frame(to_mono(y))
elif S is not None:
x, _ = _spectrogram(y=y, S=S, n_fft=n_fft, hop_length=hop_length)
else:
raise ValueError('Either `y` or `S` must be input.')
return np.sqrt(np.mean(np.abs(x)**2, axis=0, keepdims=True))


def poly_features(y=None, sr=22050, S=None, n_fft=2048, hop_length=512,
Expand Down
29 changes: 28 additions & 1 deletion tests/test_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,7 +317,34 @@ def __test(n):
rmse = librosa.feature.rmse(S=S)

assert np.allclose(rmse, np.ones_like(rmse))


def __test_consistency():
y, sr = librosa.load(__EXAMPLE_FILE, sr=None)

# Ensure audio is divisible into frame size.
frame_length = 2048
y = librosa.util.fix_length(y, y.size - y.size % frame_length)
assert y.size % frame_length == 0

# STFT magnitudes with a constant windowing function and no centering.
S = librosa.magphase(librosa.stft(y,
n_fft=frame_length,
window=np.ones,
center=False))[0]

# Try both RMS methods.
rms1 = librosa.feature.rmse(S=S)
rms2 = librosa.feature.rmse(y=y)

# Normalize envelopes.
rms1 /= rms1.max()
rms2 /= rms2.max()

# Ensure results are similar.
np.testing.assert_allclose(rms1, rms2, rtol=1e-2)

yield __test_consistency

for n in range(10, 100, 10):
yield __test, n

Expand Down

0 comments on commit 76e6d6b

Please sign in to comment.