Merge fd96b58 into 9837e5d

librosa · Dec 29, 2017 · c80eed2 · c80eed2
2 parents 9837e5d + fd96b58
commit c80eed2
Show file tree

Hide file tree

Showing 2 changed files with 132 additions and 0 deletions.
diff --git a/librosa/feature/spectral.py b/librosa/feature/spectral.py
@@ -21,6 +21,7 @@
            'spectral_bandwidth',
            'spectral_contrast',
            'spectral_rolloff',
+           'spectral_flatness',
            'poly_features',
            'rmse',
            'zero_crossing_rate',
@@ -499,6 +500,94 @@ def spectral_rolloff(y=None, sr=22050, S=None, n_fft=2048, hop_length=512,
     return np.nanmin(ind * freq, axis=0, keepdims=True)
 
 
+def spectral_flatness(y=None, S=None, n_fft=2048, hop_length=512,
+                      amin=1e-10, power=2.0):
+    '''Compute spectral flatness
+
+    Spectral flatness (or tonality coefficient) is a measure to
+    quantify how much noise-like a sound is, as opposed to being
+    tone-like [1]_. A high spectral flatness (closer to 1.0)
+    indicates the spectrum is similar to white noise.
+    It is often converted to decibel.
+
+    .. [1] Dubnov, Shlomo  "Generalization of spectral flatness
+           measure for non-gaussian linear processes"
+           IEEE Signal Processing Letters, 2004, Vol. 11.
+
+    Parameters
+    ----------
+    y : np.ndarray [shape=(n,)] or None
+        audio time series
+
+    S : np.ndarray [shape=(d, t)] or None
+        (optional) pre-computed spectrogram magnitude
+
+    n_fft : int > 0 [scalar]
+        FFT window size
+
+    hop_length : int > 0 [scalar]
+        hop length for STFT. See `librosa.core.stft` for details.
+
+    amin : float > 0 [scalar]
+        minimum threshold for `S` (=added noise floor for numerical stability)
+
+    power : float > 0 [scalar]
+        Exponent for the magnitude spectrogram.
+        e.g., 1 for energy, 2 for power, etc.
+        Power spectrogram is usually used for computing spectral flatness.
+
+    Returns
+    -------
+    flatness : np.ndarray [shape=(1, t)]
+        spectral flatness for each frame.
+        The returned value is in [0, 1] and often converted to dB scale.
+
+
+    Examples
+    --------
+    From time-series input
+
+    >>> y, sr = librosa.load(librosa.util.example_audio_file())
+    >>> flatness = librosa.feature.spectral_flatness(y=y)
+    >>> flatness
+    array([[  1.00000e+00,   5.82299e-03,   5.64624e-04, ...,   9.99063e-01,
+          1.00000e+00,   1.00000e+00]], dtype=float32)
+
+    From spectrogram input
+
+    >>> S, phase = librosa.magphase(librosa.stft(y))
+    >>> librosa.feature.spectral_flatness(S=S)
+    array([[  1.00000e+00,   5.82299e-03,   5.64624e-04, ...,   9.99063e-01,
+          1.00000e+00,   1.00000e+00]], dtype=float32)
+
+    From power spectrogram input
+
+    >>> S, phase = librosa.magphase(librosa.stft(y))
+    >>> S_power = S ** 2
+    >>> librosa.feature.spectral_flatness(S=S_power, power=1.0)
+    array([[  1.00000e+00,   5.82299e-03,   5.64624e-04, ...,   9.99063e-01,
+          1.00000e+00,   1.00000e+00]], dtype=float32)
+
+    '''
+    if amin <= 0:
+        raise ParameterError('amin must be strictly positive')
+
+    S, n_fft = _spectrogram(y=y, S=S, n_fft=n_fft, hop_length=hop_length,
+                            power=1.)
+
+    if not np.isrealobj(S):
+        raise ParameterError('Spectral flatness is only defined '
+                             'with real-valued input')
+    elif np.any(S < 0):
+        raise ParameterError('Spectral flatness is only defined '
+                             'with non-negative energies')
+
+    gmean = np.exp(np.mean(np.log(np.maximum(amin, S ** power)),
+                           axis=0, keepdims=True))
+    amean = np.mean(np.maximum(amin, S ** power), axis=0, keepdims=True)
+    return gmean / amean
+
+
 def rmse(y=None, S=None, frame_length=2048, hop_length=512,
          center=True, pad_mode='reflect'):
     '''Compute root-mean-square (RMS) energy for each frame, either from the

diff --git a/tests/test_features.py b/tests/test_features.py
@@ -362,6 +362,49 @@ def __test(S, freq, fmin, n_bands, quantile):
     yield __test, S, None, 200, 7, 0.02
 
 
+def test_spectral_flatness_synthetic():
+
+    # to construct a spectrogram
+    n_fft = 2048
+    def __test(y, S, flatness_ref):
+        flatness = librosa.feature.spectral_flatness(y=y,
+                                                     S=S,
+                                                     n_fft=2048,
+                                                     hop_length=512)
+        assert np.allclose(flatness, flatness_ref)
+
+    # comparison to a manual calculation result
+    S = np.array([[1, 3], [2, 1], [1, 2]])
+    flatness_ref = np.array([[0.7937005259, 0.7075558390]])
+    yield __test, None, S, flatness_ref
+
+    # ones
+    S = np.ones((1 + n_fft // 2, 10))
+    flatness_ones = np.ones((1, 10))
+    yield __test, None, S, flatness_ones
+
+    # zeros
+    S = np.zeros((1 + n_fft // 2, 10))
+    flatness_zeros = np.ones((1, 10))
+    yield __test, None, S, flatness_zeros
+
+
+def test_spectral_flatness_errors():
+
+    @raises(librosa.ParameterError)
+    def __test(S, amin):
+        librosa.feature.spectral_flatness(S=S,
+                                          amin=amin)
+
+    S = np.ones((1025, 10))
+
+    # zero amin
+    yield __test, S, 0
+
+    # negative amin
+    yield __test, S, -1
+
+
 def test_rmse():
 
     def __test(n):