Chroma and tonnetz updates (#1183)

* fixed #1170 and #1176 * fixed docstring for chroma_cqt * updated chroma notebook to reflect new defaults * added safety check and test for chroma_cqt
librosa · Jun 25, 2020 · 6f609e6 · 6f609e6
1 parent ad96805
commit 6f609e6
Show file tree

Hide file tree

Showing 4 changed files with 33 additions and 39 deletions.
diff --git a/docs/examples/plot_chroma.py b/docs/examples/plot_chroma.py
@@ -16,12 +16,11 @@
 # Beyond the default parameter settings of librosa's chroma functions, we apply the following 
 # enhancements:
 #
-#    1. Over-sampling the frequency axis to reduce sensitivity to tuning deviations
-#    2. Harmonic-percussive-residual source separation to eliminate transients.
-#    3. Nearest-neighbor smoothing to eliminate passing tones and sparse noise.  This is inspired by the
+#    1. Harmonic-percussive-residual source separation to eliminate transients.
+#    2. Nearest-neighbor smoothing to eliminate passing tones and sparse noise.  This is inspired by the
 #       recurrence-based smoothing technique of
 #       `Cho and Bello, 2011 <http://ismir2011.ismir.net/papers/OS8-4.pdf>`_.
-#    4. Local median filtering to suppress remaining discontinuities.
+#    3. Local median filtering to suppress remaining discontinuities.
 
 # Code source: Brian McFee
 # License: ISC
@@ -65,44 +64,22 @@
 plt.tight_layout()
 
 
-###########################################################
-# We can correct for minor tuning deviations by using 3 CQT
-# bins per semi-tone, instead of one
-chroma_os = librosa.feature.chroma_cqt(y=y, sr=sr, bins_per_octave=12*3)
-
-
-plt.figure(figsize=(12, 4))
-
-plt.subplot(2, 1, 1)
-librosa.display.specshow(chroma_orig[idx], y_axis='chroma')
-plt.colorbar()
-plt.ylabel('Original')
-
-
-plt.subplot(2, 1, 2)
-librosa.display.specshow(chroma_os[idx], y_axis='chroma', x_axis='time')
-plt.colorbar()
-plt.ylabel('3x-over')
-plt.tight_layout()
-
-
 ########################################################
-# That cleaned up some rough edges, but we can do better
-# by isolating the harmonic component.
+# We can do better by isolating the harmonic component of the audio signal
 # We'll use a large margin for separating harmonics from percussives
 y_harm = librosa.effects.harmonic(y=y, margin=8)
-chroma_os_harm = librosa.feature.chroma_cqt(y=y_harm, sr=sr, bins_per_octave=12*3)
+chroma_harm = librosa.feature.chroma_cqt(y=y_harm, sr=sr)
 
 
 plt.figure(figsize=(12, 4))
 
 plt.subplot(2, 1, 1)
-librosa.display.specshow(chroma_os[idx], y_axis='chroma')
+librosa.display.specshow(chroma_orig[idx], y_axis='chroma')
 plt.colorbar()
 plt.ylabel('3x-over')
 
 plt.subplot(2, 1, 2)
-librosa.display.specshow(chroma_os_harm[idx], y_axis='chroma', x_axis='time')
+librosa.display.specshow(chroma_harm[idx], y_axis='chroma', x_axis='time')
 plt.colorbar()
 plt.ylabel('Harmonic')
 plt.tight_layout()
@@ -112,16 +89,16 @@
 # There's still some noise in there though.
 # We can clean it up using non-local filtering.
 # This effectively removes any sparse additive noise from the features.
-chroma_filter = np.minimum(chroma_os_harm,
-                           librosa.decompose.nn_filter(chroma_os_harm,
+chroma_filter = np.minimum(chroma_harm,
+                           librosa.decompose.nn_filter(chroma_harm,
                                                        aggregate=np.median,
                                                        metric='cosine'))
 
 
 plt.figure(figsize=(12, 4))
 
 plt.subplot(2, 1, 1)
-librosa.display.specshow(chroma_os_harm[idx], y_axis='chroma')
+librosa.display.specshow(chroma_harm[idx], y_axis='chroma')
 plt.colorbar()
 plt.ylabel('Harmonic')
 

diff --git a/librosa/feature/spectral.py b/librosa/feature/spectral.py
@@ -1206,7 +1206,7 @@ def chroma_stft(y=None, sr=22050, S=None, norm=np.inf, n_fft=2048,
 
 def chroma_cqt(y=None, sr=22050, C=None, hop_length=512, fmin=None,
                norm=np.inf, threshold=0.0, tuning=None, n_chroma=12,
-               n_octaves=7, window=None, bins_per_octave=None, cqt_mode='full'):
+               n_octaves=7, window=None, bins_per_octave=36, cqt_mode='full'):
     r'''Constant-Q chromagram
 
     Parameters
@@ -1246,9 +1246,13 @@ def chroma_cqt(y=None, sr=22050, C=None, hop_length=512, fmin=None,
     window : None or np.ndarray
         Optional window parameter to `filters.cq_to_chroma`
 
-    bins_per_octave : int > 0
+    bins_per_octave : int > 0, optional
         Number of bins per octave in the CQT.
-        Default: matches `n_chroma`
+        Must be an integer multiple of `n_chroma`.
+        Default: 36 (3 bins per semitone)
+
+        If `None`, it will match `n_chroma`.
+
 
     cqt_mode : ['full', 'hybrid']
         Constant-Q transform mode
@@ -1294,6 +1298,9 @@ def chroma_cqt(y=None, sr=22050, C=None, hop_length=512, fmin=None,
 
     if bins_per_octave is None:
         bins_per_octave = n_chroma
+    elif np.remainder(bins_per_octave, n_chroma) != 0:
+        raise ParameterError('bins_per_octave={} must be an integer '
+                             'multiple of n_chroma={}'.format(bins_per_octave, n_chroma))
 
     # Build the CQT if we don't have one already
     if C is None:
@@ -1468,7 +1475,7 @@ def chroma_cens(y=None, sr=22050, C=None, hop_length=512, fmin=None,
     return util.normalize(cens, norm=norm, axis=0)
 
 
-def tonnetz(y=None, sr=22050, chroma=None):
+def tonnetz(y=None, sr=22050, chroma=None, **kwargs):
     '''Computes the tonal centroid features (tonnetz), following the method of
     [1]_.
 
@@ -1490,6 +1497,10 @@ def tonnetz(y=None, sr=22050, chroma=None):
 
         If `None`, a cqt chromagram is performed.
 
+    kwargs
+        Additional keyword arguments to `chroma_cqt`, if `chroma` is not
+        pre-computed.
+
     Returns
     -------
     tonnetz : np.ndarray [shape(6, t)]
@@ -1547,7 +1558,7 @@ def tonnetz(y=None, sr=22050, chroma=None):
                              'passed as an argument.')
 
     if chroma is None:
-        chroma = chroma_cqt(y=y, sr=sr)
+        chroma = chroma_cqt(y=y, sr=sr, **kwargs)
 
     # Generate Transformation matrix
     dim_map = np.linspace(0, 12, num=chroma.shape[0], endpoint=False)

diff --git a/tests/baseline_images/test_display/test_tonnetz.png b/tests/baseline_images/test_display/test_tonnetz.png
diff --git a/tests/test_features.py b/tests/test_features.py
@@ -428,9 +428,15 @@ def test_tonnetz_audio(y_ex):
     assert tonnetz.shape[0] == 6
 
 
+@pytest.mark.xfail(raises=librosa.ParameterError)
+def test_chroma_cqt_badcombo(y_ex):
+    y, sr = y_ex
+    librosa.feature.chroma_cqt(y=y, sr=sr, n_chroma=24, bins_per_octave=36)
+
+
 def test_tonnetz_cqt(y_ex):
     y, sr = y_ex
-    chroma_cqt = librosa.feature.chroma_cqt(y=y, sr=sr, n_chroma=24)
+    chroma_cqt = librosa.feature.chroma_cqt(y=y, sr=sr, n_chroma=36)
     tonnetz = librosa.feature.tonnetz(chroma=chroma_cqt, sr=sr)
     assert tonnetz.shape[1] == chroma_cqt.shape[1]
     assert tonnetz.shape[0] == 6