From ed4b036702f46a57002b1fa946af84a9c822503e Mon Sep 17 00:00:00 2001 From: Brian McFee Date: Mon, 3 Oct 2016 13:56:13 -0400 Subject: [PATCH 1/3] implemented equivalent noise bandwidth check for cqt, expanded window options --- librosa/core/constantq.py | 73 +++++++++++++++++++++++----------- librosa/filters.py | 84 ++++++++++++++++++++++++++++----------- 2 files changed, 110 insertions(+), 47 deletions(-) diff --git a/librosa/core/constantq.py b/librosa/core/constantq.py index 50df1a1aa7..c02bb0642d 100644 --- a/librosa/core/constantq.py +++ b/librosa/core/constantq.py @@ -3,8 +3,6 @@ '''Pitch-tracking and tuning estimation''' from __future__ import division -from warnings import warn - import numpy as np import scipy.fftpack as fft @@ -23,7 +21,8 @@ @cache(level=20) def cqt(y, sr=22050, hop_length=512, fmin=None, n_bins=84, bins_per_octave=12, tuning=None, filter_scale=1, - norm=1, sparsity=0.01, real=util.Deprecated()): + norm=1, sparsity=0.01, window='hann', + real=util.Deprecated()): '''Compute the constant-Q transform of an audio signal. This implementation is based on the recursive sub-sampling method @@ -72,6 +71,10 @@ def cqt(y, sr=22050, hop_length=512, fmin=None, n_bins=84, Set `sparsity=0` to disable sparsification. + window : str, tuple, number, or function + Window specification for the basis filters. + See `filters.get_window` for details. + real : [DEPRECATED] .. warning:: This parameter name deprecated in librosa 0.5.0 It will be removed in librosa 0.6.0. @@ -159,7 +162,7 @@ def cqt(y, sr=22050, hop_length=512, fmin=None, n_bins=84, # Determine required resampling quality Q = float(filter_scale) / (2.0**(1. / bins_per_octave) - 1) - filter_cutoff = fmax_t * (1 + filters.window_bandwidth('hann') / Q) + filter_cutoff = fmax_t * (1 + 0.5 * filters.window_bandwidth(window) / Q) nyquist = sr / 2.0 if filter_cutoff < audio.BW_FASTEST * nyquist: res_type = 'kaiser_fast' @@ -182,7 +185,8 @@ def cqt(y, sr=22050, hop_length=512, fmin=None, n_bins=84, tuning, filter_scale, norm, - sparsity) + sparsity, + window=window) # Compute the CQT filter response and append it to the stack cqt_resp.append(__cqt_response(y, n_fft, hop_length, fft_basis)) @@ -191,7 +195,7 @@ def cqt(y, sr=22050, hop_length=512, fmin=None, n_bins=84, fmax_t /= 2 n_octaves -= 1 - filter_cutoff = fmax_t * (1 + filters.window_bandwidth('hann') / Q) + filter_cutoff = fmax_t * (1 + 0.5 * filters.window_bandwidth(window) / Q) res_type = 'kaiser_fast' @@ -209,7 +213,8 @@ def cqt(y, sr=22050, hop_length=512, fmin=None, n_bins=84, tuning, filter_scale, norm, - sparsity) + sparsity, + window=window) my_y, my_sr, my_hop = y, sr, hop_length @@ -220,9 +225,11 @@ def cqt(y, sr=22050, hop_length=512, fmin=None, n_bins=84, if i > 0: if len(my_y) < 2: raise ParameterError('Input signal length={} is too short for ' - '{:d}-octave CQT'.format(len_orig, n_octaves)) + '{:d}-octave CQT'.format(len_orig, + n_octaves)) - # The additional scaling of sqrt(2) here is to implicitly rescale the filters + # The additional scaling of sqrt(2) here is to implicitly rescale + # the filters my_y = np.sqrt(2) * audio.resample(my_y, my_sr, my_sr/2.0, res_type=res_type, scale=True) @@ -232,14 +239,13 @@ def cqt(y, sr=22050, hop_length=512, fmin=None, n_bins=84, # Compute the cqt filter response and append to the stack cqt_resp.append(__cqt_response(my_y, n_fft, my_hop, fft_basis)) - return __trim_stack(cqt_resp, n_bins) @cache(level=20) def hybrid_cqt(y, sr=22050, hop_length=512, fmin=None, n_bins=84, bins_per_octave=12, tuning=None, filter_scale=1, - norm=1, sparsity=0.01): + norm=1, sparsity=0.01, window='hann'): '''Compute the hybrid constant-Q transform of an audio signal. Here, the hybrid CQT uses the pseudo CQT for higher frequencies where @@ -280,6 +286,11 @@ def hybrid_cqt(y, sr=22050, hop_length=512, fmin=None, n_bins=84, Set `sparsity=0` to disable sparsification. + window : str, tuple, number, or function + Window specification for the basis filters. + See `filters.get_window` for details. + + Returns ------- CQT : np.ndarray [shape=(n_bins, t), dtype=np.float] @@ -321,7 +332,8 @@ def hybrid_cqt(y, sr=22050, hop_length=512, fmin=None, n_bins=84, n_bins=n_bins, bins_per_octave=bins_per_octave, tuning=tuning, - filter_scale=filter_scale) + filter_scale=filter_scale, + window=window) # Determine which filters to use with Pseudo CQT # These are the ones that fit within 2 hop lengths after padding @@ -342,7 +354,8 @@ def hybrid_cqt(y, sr=22050, hop_length=512, fmin=None, n_bins=84, tuning=tuning, filter_scale=filter_scale, norm=norm, - sparsity=sparsity)) + sparsity=sparsity, + window=window)) if n_bins_full > 0: cqt_resp.append(np.abs(cqt(y, sr, @@ -353,7 +366,8 @@ def hybrid_cqt(y, sr=22050, hop_length=512, fmin=None, n_bins=84, tuning=tuning, filter_scale=filter_scale, norm=norm, - sparsity=sparsity))) + sparsity=sparsity, + window=window))) return __trim_stack(cqt_resp, n_bins) @@ -361,7 +375,7 @@ def hybrid_cqt(y, sr=22050, hop_length=512, fmin=None, n_bins=84, @cache(level=20) def pseudo_cqt(y, sr=22050, hop_length=512, fmin=None, n_bins=84, bins_per_octave=12, tuning=None, filter_scale=1, - norm=1, sparsity=0.01): + norm=1, sparsity=0.01, window='hann'): '''Compute the pseudo constant-Q transform of an audio signal. This uses a single fft size that is the smallest power of 2 that is greater @@ -404,6 +418,10 @@ def pseudo_cqt(y, sr=22050, hop_length=512, fmin=None, n_bins=84, Set `sparsity=0` to disable sparsification. + window : str, tuple, number, or function + Window specification for the basis filters. + See `filters.get_window` for details. + Returns ------- @@ -417,7 +435,7 @@ def pseudo_cqt(y, sr=22050, hop_length=512, fmin=None, n_bins=84, `2**(n_bins / bins_per_octave)` Or if `y` is too short to support the frequency range of the CQT. - + Notes ----- This function caches at level 20. @@ -435,7 +453,8 @@ def pseudo_cqt(y, sr=22050, hop_length=512, fmin=None, n_bins=84, bins_per_octave, tuning, filter_scale, norm, sparsity, - hop_length=hop_length) + hop_length=hop_length, + window=window) fft_basis = np.abs(fft_basis) @@ -448,7 +467,8 @@ def pseudo_cqt(y, sr=22050, hop_length=512, fmin=None, n_bins=84, @cache(level=10) def __cqt_filter_fft(sr, fmin, n_bins, bins_per_octave, tuning, - filter_scale, norm, sparsity, hop_length=None): + filter_scale, norm, sparsity, hop_length=None, + window='hann'): '''Generate the frequency domain constant-Q filter basis.''' basis, lengths = filters.constant_q(sr, @@ -458,12 +478,15 @@ def __cqt_filter_fft(sr, fmin, n_bins, bins_per_octave, tuning, tuning=tuning, filter_scale=filter_scale, norm=norm, - pad_fft=True) + pad_fft=True, + window=window) # Filters are padded up to the nearest integral power of 2 n_fft = basis.shape[1] - if hop_length is not None and n_fft < 2.0**(1 + np.ceil(np.log2(hop_length))): + if (hop_length is not None and + n_fft < 2.0**(1 + np.ceil(np.log2(hop_length)))): + n_fft = int(2.0 ** (1 + np.ceil(np.log2(hop_length)))) # re-normalize bases with respect to the FFT window length @@ -529,10 +552,12 @@ def __early_downsample(y, sr, hop_length, res_type, n_octaves, raise ParameterError('Input signal length={:d} is too short for ' '{:d}-octave CQT'.format(len(y), n_octaves)) - # The additional scaling of sqrt(downsample_factor) here is to implicitly - # rescale the filters - y = np.sqrt(downsample_factor) * audio.resample(y, sr, sr / downsample_factor, - res_type=res_type, scale=True) + # The additional scaling of sqrt(downsample_factor) here is to + # implicitly rescale the filters + y = np.sqrt(downsample_factor) * audio.resample(y, sr, + sr / downsample_factor, + res_type=res_type, + scale=True) sr /= downsample_factor diff --git a/librosa/filters.py b/librosa/filters.py index 04d7ced5a4..fc06dc800d 100644 --- a/librosa/filters.py +++ b/librosa/filters.py @@ -32,8 +32,6 @@ cq_to_chroma """ -import warnings - import numpy as np import scipy import scipy.signal @@ -46,9 +44,6 @@ from .core.time_frequency import note_to_hz, hz_to_midi, hz_to_octs from .core.time_frequency import fft_frequencies, mel_frequencies -# Dictionary of window function bandwidths -WINDOW_BANDWIDTHS = dict(hann=0.725) - __all__ = ['dct', 'mel', 'chroma', @@ -59,6 +54,50 @@ 'get_window'] +# Dictionary of window function bandwidths + +WINDOW_BANDWIDTHS = {'bart': 1.3334961334912805, + 'barthann': 1.4560255965133932, + 'bartlett': 1.3334961334912805, + 'bkh': 2.0045975283585014, + 'black': 1.7269681554262326, + 'blackharr': 2.0045975283585014, + 'blackman': 1.7269681554262326, + 'blackmanharris': 2.0045975283585014, + 'blk': 1.7269681554262326, + 'bman': 1.7859588613860062, + 'bmn': 1.7859588613860062, + 'bohman': 1.7859588613860062, + 'box': 1.0, + 'boxcar': 1.0, + 'brt': 1.3334961334912805, + 'brthan': 1.4560255965133932, + 'bth': 1.4560255965133932, + 'cosine': 1.2337005350199792, + 'flat': 2.7762255046484143, + 'flattop': 2.7762255046484143, + 'flt': 2.7762255046484143, + 'halfcosine': 1.2337005350199792, + 'ham': 1.3629455320350348, + 'hamm': 1.3629455320350348, + 'hamming': 1.3629455320350348, + 'han': 1.50018310546875, + 'hann': 1.50018310546875, + 'hanning': 1.50018310546875, + 'nut': 1.9763500280946082, + 'nutl': 1.9763500280946082, + 'nuttall': 1.9763500280946082, + 'ones': 1.0, + 'par': 1.9174603174603191, + 'parz': 1.9174603174603191, + 'parzen': 1.9174603174603191, + 'rect': 1.0, + 'rectangular': 1.0, + 'tri': 1.3331706523555851, + 'triang': 1.3331706523555851, + 'triangle': 1.3331706523555851} + + @cache(level=10) def dct(n_filters, n_input): """Discrete cosine transform (DCT type-III) basis. @@ -337,7 +376,7 @@ def chroma(sr, n_fft, n_chroma=12, A440=440.0, ctroct=5.0, return np.ascontiguousarray(wts[:, :int(1 + n_fft/2)]) -def __float_window(window_function): +def __float_window(window_spec): '''Decorator function for windows with fractional input. This function guarantees that for fractional `x`, the following hold: @@ -352,7 +391,7 @@ def _wrap(n, *args, **kwargs): '''The wrapped window''' n_min, n_max = int(np.floor(n)), int(np.ceil(n)) - window = window_function(n, *args, **kwargs) + window = get_window(window_spec, n) if len(window) < n_max: window = np.pad(window, [(0, n_max - len(window))], @@ -367,7 +406,7 @@ def _wrap(n, *args, **kwargs): @cache(level=10) def constant_q(sr, fmin=None, n_bins=84, bins_per_octave=12, tuning=0.0, - window=None, filter_scale=1, pad_fft=True, norm=1, + window='hann', filter_scale=1, pad_fft=True, norm=1, **kwargs): r'''Construct a constant-Q basis. @@ -395,9 +434,8 @@ def constant_q(sr, fmin=None, n_bins=84, bins_per_octave=12, tuning=0.0, tuning : float in `[-0.5, +0.5)` [scalar] Tuning deviation from A440 in fractions of a bin - window : function or `None` + window : string, tuple, number, or function Windowing function to apply to filters. - Default: `scipy.signal.hann` filter_scale : float > 0 [scalar] Scale of filter windows. @@ -472,9 +510,6 @@ def constant_q(sr, fmin=None, n_bins=84, bins_per_octave=12, tuning=0.0, if fmin is None: fmin = note_to_hz('C1') - if window is None: - window = scipy.signal.hann - # Pass-through parameters to get the filter lengths lengths = constant_q_lengths(sr, fmin, n_bins=n_bins, @@ -587,7 +622,7 @@ def constant_q_lengths(sr, fmin, n_bins=84, bins_per_octave=12, # Compute the frequencies freq = fmin * (2.0 ** (np.arange(n_bins, dtype=float) / bins_per_octave)) - if np.any(freq * (1 + window_bandwidth(window) / Q) > sr / 2.0): + if freq[-1] * (1 + 0.5 * window_bandwidth(window) / Q) > sr / 2.0: raise ParameterError('Filter pass-band lies beyond Nyquist') # Convert frequencies to filter lengths @@ -719,10 +754,10 @@ def cq_to_chroma(n_input, bins_per_octave=12, n_chroma=12, return cq_to_ch -def window_bandwidth(window, default=1.0): - '''Get the bandwidth of a window function. +@cache(level=10) +def window_bandwidth(window, n=1000): + '''Get the equivalent noise bandwidth of a window function. - If the window function is unknown, return a default value. Parameters ---------- @@ -732,17 +767,19 @@ def window_bandwidth(window, default=1.0): - scipy.signal.hann - 'boxcar' - default : float >= 0 - The default value, if `window` is unknown. + n : int > 0 + The number of coefficients to use in estimating the + window bandwidth Returns ------- bandwidth : float - The bandwidth of the given window function + The equivalent noise bandwidth (in FFT bins) of the + given window function See Also -------- - scipy.signal.get_window + get_window ''' if hasattr(window, '__name__'): @@ -751,9 +788,10 @@ def window_bandwidth(window, default=1.0): key = window if key not in WINDOW_BANDWIDTHS: - warnings.warn("Unknown window function '{:s}'.".format(key)) + win = get_window(window, n) + WINDOW_BANDWIDTHS[key] = np.sum(win**2) / np.sum(np.abs(win))**2 - return WINDOW_BANDWIDTHS.get(key, default) + return WINDOW_BANDWIDTHS[key] @cache(level=10) From 72a6d3acde933f59eef88fd48dc1332d28fdfccc Mon Sep 17 00:00:00 2001 From: Brian McFee Date: Mon, 3 Oct 2016 14:10:50 -0400 Subject: [PATCH 2/3] revised window_bandwidth test --- tests/test_filters.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/tests/test_filters.py b/tests/test_filters.py index 07526f27bf..6d52a21887 100644 --- a/tests/test_filters.py +++ b/tests/test_filters.py @@ -240,14 +240,9 @@ def test_window_bandwidth(): librosa.filters.window_bandwidth(scipy.signal.hann)) +@raises(ValueError) def test_window_bandwidth_missing(): - warnings.resetwarnings() - with warnings.catch_warnings(record=True) as out: - x = librosa.filters.window_bandwidth('unknown_window') - eq_(x, 1) - assert len(out) > 0 - assert out[0].category is UserWarning - assert 'Unknown window function' in str(out[0].message) + librosa.filters.window_bandwidth('made up window name') def binstr(m): From 04fed1d666934fea9aee81ebc1f15fdcdf6f6755 Mon Sep 17 00:00:00 2001 From: Brian McFee Date: Mon, 3 Oct 2016 14:27:17 -0400 Subject: [PATCH 3/3] fixed enbw calculation, added a unit test for dynamic window bandwidth --- librosa/filters.py | 2 +- tests/test_filters.py | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/librosa/filters.py b/librosa/filters.py index fc06dc800d..87957445d7 100644 --- a/librosa/filters.py +++ b/librosa/filters.py @@ -789,7 +789,7 @@ def window_bandwidth(window, n=1000): if key not in WINDOW_BANDWIDTHS: win = get_window(window, n) - WINDOW_BANDWIDTHS[key] = np.sum(win**2) / np.sum(np.abs(win))**2 + WINDOW_BANDWIDTHS[key] = n * np.sum(win**2) / np.sum(np.abs(win))**2 return WINDOW_BANDWIDTHS[key] diff --git a/tests/test_filters.py b/tests/test_filters.py index 6d52a21887..d3d6083225 100644 --- a/tests/test_filters.py +++ b/tests/test_filters.py @@ -240,6 +240,14 @@ def test_window_bandwidth(): librosa.filters.window_bandwidth(scipy.signal.hann)) +def test_window_bandwidth_dynamic(): + + # Test with a window constructor guaranteed to not exist in + # the dictionary. + # should behave like a box filter, which has enbw == 1 + eq_(librosa.filters.window_bandwidth(lambda n: np.ones(n)), 1) + + @raises(ValueError) def test_window_bandwidth_missing(): librosa.filters.window_bandwidth('made up window name')