Skip to content

Commit

Permalink
Merge 08ef728 into 205663a
Browse files Browse the repository at this point in the history
  • Loading branch information
bmcfee committed Oct 8, 2016
2 parents 205663a + 08ef728 commit 6c7dd1a
Show file tree
Hide file tree
Showing 2 changed files with 115 additions and 21 deletions.
73 changes: 56 additions & 17 deletions librosa/core/constantq.py
Expand Up @@ -22,6 +22,7 @@
def cqt(y, sr=22050, hop_length=512, fmin=None, n_bins=84,
bins_per_octave=12, tuning=None, filter_scale=1,
norm=1, sparsity=0.01, window='hann',
scale=False,
real=util.Deprecated()):
'''Compute the constant-Q transform of an audio signal.
Expand Down Expand Up @@ -75,6 +76,13 @@ def cqt(y, sr=22050, hop_length=512, fmin=None, n_bins=84,
Window specification for the basis filters.
See `filters.get_window` for details.
scale : bool
If `True`, scale the CQT response by square-root the length of
each channel's filter. This is analogous to `norm='ortho'` in FFT.
If `False`, do not scale the CQT. This is analogous to
`norm=None` in FFT.
real : [DEPRECATED]
.. warning:: This parameter name deprecated in librosa 0.5.0
It will be removed in librosa 0.6.0.
Expand Down Expand Up @@ -172,7 +180,7 @@ def cqt(y, sr=22050, hop_length=512, fmin=None, n_bins=84,
y, sr, hop_length = __early_downsample(y, sr, hop_length,
res_type,
n_octaves,
nyquist, filter_cutoff)
nyquist, filter_cutoff, scale)

cqt_resp = []

Expand Down Expand Up @@ -239,13 +247,24 @@ def cqt(y, sr=22050, hop_length=512, fmin=None, n_bins=84,
# Compute the cqt filter response and append to the stack
cqt_resp.append(__cqt_response(my_y, n_fft, my_hop, fft_basis))

return __trim_stack(cqt_resp, n_bins)
C = __trim_stack(cqt_resp, n_bins)

if scale:
lengths = filters.constant_q_lengths(sr, fmin,
n_bins=n_bins,
bins_per_octave=bins_per_octave,
tuning=tuning,
window=window,
filter_scale=filter_scale)
C /= np.sqrt(lengths[:, np.newaxis])

return C


@cache(level=20)
def hybrid_cqt(y, sr=22050, hop_length=512, fmin=None, n_bins=84,
bins_per_octave=12, tuning=None, filter_scale=1,
norm=1, sparsity=0.01, window='hann'):
norm=1, sparsity=0.01, window='hann', scale=False):
'''Compute the hybrid constant-Q transform of an audio signal.
Here, the hybrid CQT uses the pseudo CQT for higher frequencies where
Expand Down Expand Up @@ -346,6 +365,7 @@ def hybrid_cqt(y, sr=22050, hop_length=512, fmin=None, n_bins=84,

if n_bins_pseudo > 0:
fmin_pseudo = np.min(freqs[pseudo_filters])

cqt_resp.append(pseudo_cqt(y, sr,
hop_length=hop_length,
fmin=fmin_pseudo,
Expand All @@ -355,7 +375,8 @@ def hybrid_cqt(y, sr=22050, hop_length=512, fmin=None, n_bins=84,
filter_scale=filter_scale,
norm=norm,
sparsity=sparsity,
window=window))
window=window,
scale=scale))

if n_bins_full > 0:
cqt_resp.append(np.abs(cqt(y, sr,
Expand All @@ -367,15 +388,16 @@ def hybrid_cqt(y, sr=22050, hop_length=512, fmin=None, n_bins=84,
filter_scale=filter_scale,
norm=norm,
sparsity=sparsity,
window=window)))
window=window,
scale=scale)))

return __trim_stack(cqt_resp, n_bins)


@cache(level=20)
def pseudo_cqt(y, sr=22050, hop_length=512, fmin=None, n_bins=84,
bins_per_octave=12, tuning=None, filter_scale=1,
norm=1, sparsity=0.01, window='hann'):
norm=1, sparsity=0.01, window='hann', scale=False):
'''Compute the pseudo constant-Q transform of an audio signal.
This uses a single fft size that is the smallest power of 2 that is greater
Expand Down Expand Up @@ -462,7 +484,21 @@ def pseudo_cqt(y, sr=22050, hop_length=512, fmin=None, n_bins=84,
D = np.abs(stft(y, n_fft=n_fft, hop_length=hop_length))

# Project onto the pseudo-cqt basis
return fft_basis.dot(D)
C = fft_basis.dot(D)

if scale:
C /= np.sqrt(n_fft)
else:
lengths = filters.constant_q_lengths(sr, fmin,
n_bins=n_bins,
bins_per_octave=bins_per_octave,
tuning=tuning,
window=window,
filter_scale=filter_scale)

C *= np.sqrt(lengths[:, np.newaxis] / n_fft)

return C


@cache(level=10)
Expand Down Expand Up @@ -527,8 +563,8 @@ def __cqt_response(y, n_fft, hop_length, fft_basis):
def __early_downsample_count(nyquist, filter_cutoff, hop_length, n_octaves):
'''Compute the number of early downsampling operations'''

downsample_count1 = int(np.ceil(np.log2(audio.BW_FASTEST * nyquist /
filter_cutoff)) - 1)
downsample_count1 = max(0, int(np.ceil(np.log2(audio.BW_FASTEST * nyquist /
filter_cutoff)) - 1) - 1)

num_twos = __num_two_factors(hop_length)
downsample_count2 = max(0, num_twos - n_octaves + 1)
Expand All @@ -537,7 +573,7 @@ def __early_downsample_count(nyquist, filter_cutoff, hop_length, n_octaves):


def __early_downsample(y, sr, hop_length, res_type, n_octaves,
nyquist, filter_cutoff):
nyquist, filter_cutoff, scale):
'''Perform early downsampling on an audio signal, if it applies.'''

downsample_count = __early_downsample_count(nyquist, filter_cutoff,
Expand All @@ -552,14 +588,17 @@ def __early_downsample(y, sr, hop_length, res_type, n_octaves,
raise ParameterError('Input signal length={:d} is too short for '
'{:d}-octave CQT'.format(len(y), n_octaves))

# The additional scaling of sqrt(downsample_factor) here is to
# implicitly rescale the filters
y = np.sqrt(downsample_factor) * audio.resample(y, sr,
sr / downsample_factor,
res_type=res_type,
scale=True)
new_sr = sr / float(downsample_factor)
y = audio.resample(y, sr, new_sr,
res_type=res_type,
scale=True)

# If we're not going to length-scale after CQT, we
# need to compensate for the downsampling factor here
if not scale:
y *= np.sqrt(downsample_factor)

sr /= downsample_factor
sr = new_sr

return y, sr, hop_length

Expand Down
63 changes: 59 additions & 4 deletions tests/test_constantq.py
Expand Up @@ -187,7 +187,7 @@ def test_cqt_fail_short_early():
@raises(librosa.ParameterError)
def test_cqt_fail_short_late():

y = np.zeros(64)
y = np.zeros(16)
librosa.cqt(y, sr=22050, real=False)

def test_cqt_impulse():
Expand All @@ -198,7 +198,6 @@ def __test(sr, hop_length, y):

max_response = np.max(C, axis=1)


ref_response = np.max(max_response)
continuity = np.abs(np.diff(max_response))

Expand All @@ -215,7 +214,7 @@ def __test(sr, hop_length, y):
for hop_scale in range(1, 9):
hop_length = 64 * hop_scale
# Center the impulse response on a frame
center = (len(x) / (2 * float(hop_length))) * hop_length
center = int((len(x) / (2.0 * float(hop_length))) * hop_length)
x[center] = 1
yield __test, sr, hop_length, x

Expand Down Expand Up @@ -245,6 +244,62 @@ def __test(sr, hop_length, y):
for hop_scale in range(1, 9):
hop_length = 64 * hop_scale
# Center the impulse response on a frame
center = (len(x) / (2 * float(hop_length))) * hop_length
center = int((len(x) / (2.0 * float(hop_length))) * hop_length)
x[center] = 1
yield __test, sr, hop_length, x


def test_cqt_white_noise():

def __test(fmin, n_bins, scale, sr, y):

C = np.abs(librosa.cqt(y=y, sr=sr,
fmin=fmin,
n_bins=n_bins,
scale=scale,
real=False))

if not scale:
lengths = librosa.filters.constant_q_lengths(sr, fmin,
n_bins=n_bins)
C /= np.sqrt(lengths[:, np.newaxis])

# Only compare statistics across the time dimension
# we want ~ constant mean and variance across frequencies
assert np.allclose(np.mean(C, axis=1), 1.0, atol=2.5e-1), np.mean(C, axis=1)
assert np.allclose(np.std(C, axis=1), 0.5, atol=5e-1), np.std(C, axis=1)

for sr in [22050]:
y = np.random.randn(30 * sr)

for scale in [False, True]:
for fmin in librosa.note_to_hz(['C1', 'C2']):
for n_octaves in range(2, 4):
yield __test, fmin, n_octaves * 12, scale, sr, y


def test_hcqt_white_noise():

def __test(fmin, n_bins, scale, sr, y):

C = librosa.hybrid_cqt(y=y, sr=sr,
fmin=fmin,
n_bins=n_bins,
scale=scale)

if not scale:
lengths = librosa.filters.constant_q_lengths(sr, fmin,
n_bins=n_bins)
C /= np.sqrt(lengths[:, np.newaxis])

assert np.allclose(np.mean(C, axis=1), 1.0, atol=2.5e-1), np.mean(C, axis=1)
assert np.allclose(np.std(C, axis=1), 0.5, atol=5e-1), np.std(C, axis=1)

for sr in [22050]:
y = np.random.randn(30 * sr)

for scale in [False, True]:
for fmin in librosa.note_to_hz(['C1', 'C2']):
for n_octaves in range(2, 4):
yield __test, fmin, n_octaves * 12, scale, sr, y

0 comments on commit 6c7dd1a

Please sign in to comment.