Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

All the windows in CQT #414

Merged
merged 3 commits into from
Oct 4, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 49 additions & 24 deletions librosa/core/constantq.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
'''Pitch-tracking and tuning estimation'''
from __future__ import division

from warnings import warn

import numpy as np
import scipy.fftpack as fft

Expand All @@ -23,7 +21,8 @@
@cache(level=20)
def cqt(y, sr=22050, hop_length=512, fmin=None, n_bins=84,
bins_per_octave=12, tuning=None, filter_scale=1,
norm=1, sparsity=0.01, real=util.Deprecated()):
norm=1, sparsity=0.01, window='hann',
real=util.Deprecated()):
'''Compute the constant-Q transform of an audio signal.

This implementation is based on the recursive sub-sampling method
Expand Down Expand Up @@ -72,6 +71,10 @@ def cqt(y, sr=22050, hop_length=512, fmin=None, n_bins=84,

Set `sparsity=0` to disable sparsification.

window : str, tuple, number, or function
Window specification for the basis filters.
See `filters.get_window` for details.

real : [DEPRECATED]
.. warning:: This parameter name deprecated in librosa 0.5.0
It will be removed in librosa 0.6.0.
Expand Down Expand Up @@ -159,7 +162,7 @@ def cqt(y, sr=22050, hop_length=512, fmin=None, n_bins=84,

# Determine required resampling quality
Q = float(filter_scale) / (2.0**(1. / bins_per_octave) - 1)
filter_cutoff = fmax_t * (1 + filters.window_bandwidth('hann') / Q)
filter_cutoff = fmax_t * (1 + 0.5 * filters.window_bandwidth(window) / Q)
nyquist = sr / 2.0
if filter_cutoff < audio.BW_FASTEST * nyquist:
res_type = 'kaiser_fast'
Expand All @@ -182,7 +185,8 @@ def cqt(y, sr=22050, hop_length=512, fmin=None, n_bins=84,
tuning,
filter_scale,
norm,
sparsity)
sparsity,
window=window)

# Compute the CQT filter response and append it to the stack
cqt_resp.append(__cqt_response(y, n_fft, hop_length, fft_basis))
Expand All @@ -191,7 +195,7 @@ def cqt(y, sr=22050, hop_length=512, fmin=None, n_bins=84,
fmax_t /= 2
n_octaves -= 1

filter_cutoff = fmax_t * (1 + filters.window_bandwidth('hann') / Q)
filter_cutoff = fmax_t * (1 + 0.5 * filters.window_bandwidth(window) / Q)

res_type = 'kaiser_fast'

Expand All @@ -209,7 +213,8 @@ def cqt(y, sr=22050, hop_length=512, fmin=None, n_bins=84,
tuning,
filter_scale,
norm,
sparsity)
sparsity,
window=window)

my_y, my_sr, my_hop = y, sr, hop_length

Expand All @@ -220,9 +225,11 @@ def cqt(y, sr=22050, hop_length=512, fmin=None, n_bins=84,
if i > 0:
if len(my_y) < 2:
raise ParameterError('Input signal length={} is too short for '
'{:d}-octave CQT'.format(len_orig, n_octaves))
'{:d}-octave CQT'.format(len_orig,
n_octaves))

# The additional scaling of sqrt(2) here is to implicitly rescale the filters
# The additional scaling of sqrt(2) here is to implicitly rescale
# the filters
my_y = np.sqrt(2) * audio.resample(my_y, my_sr, my_sr/2.0,
res_type=res_type,
scale=True)
Expand All @@ -232,14 +239,13 @@ def cqt(y, sr=22050, hop_length=512, fmin=None, n_bins=84,
# Compute the cqt filter response and append to the stack
cqt_resp.append(__cqt_response(my_y, n_fft, my_hop, fft_basis))


return __trim_stack(cqt_resp, n_bins)


@cache(level=20)
def hybrid_cqt(y, sr=22050, hop_length=512, fmin=None, n_bins=84,
bins_per_octave=12, tuning=None, filter_scale=1,
norm=1, sparsity=0.01):
norm=1, sparsity=0.01, window='hann'):
'''Compute the hybrid constant-Q transform of an audio signal.

Here, the hybrid CQT uses the pseudo CQT for higher frequencies where
Expand Down Expand Up @@ -280,6 +286,11 @@ def hybrid_cqt(y, sr=22050, hop_length=512, fmin=None, n_bins=84,

Set `sparsity=0` to disable sparsification.

window : str, tuple, number, or function
Window specification for the basis filters.
See `filters.get_window` for details.


Returns
-------
CQT : np.ndarray [shape=(n_bins, t), dtype=np.float]
Expand Down Expand Up @@ -321,7 +332,8 @@ def hybrid_cqt(y, sr=22050, hop_length=512, fmin=None, n_bins=84,
n_bins=n_bins,
bins_per_octave=bins_per_octave,
tuning=tuning,
filter_scale=filter_scale)
filter_scale=filter_scale,
window=window)

# Determine which filters to use with Pseudo CQT
# These are the ones that fit within 2 hop lengths after padding
Expand All @@ -342,7 +354,8 @@ def hybrid_cqt(y, sr=22050, hop_length=512, fmin=None, n_bins=84,
tuning=tuning,
filter_scale=filter_scale,
norm=norm,
sparsity=sparsity))
sparsity=sparsity,
window=window))

if n_bins_full > 0:
cqt_resp.append(np.abs(cqt(y, sr,
Expand All @@ -353,15 +366,16 @@ def hybrid_cqt(y, sr=22050, hop_length=512, fmin=None, n_bins=84,
tuning=tuning,
filter_scale=filter_scale,
norm=norm,
sparsity=sparsity)))
sparsity=sparsity,
window=window)))

return __trim_stack(cqt_resp, n_bins)


@cache(level=20)
def pseudo_cqt(y, sr=22050, hop_length=512, fmin=None, n_bins=84,
bins_per_octave=12, tuning=None, filter_scale=1,
norm=1, sparsity=0.01):
norm=1, sparsity=0.01, window='hann'):
'''Compute the pseudo constant-Q transform of an audio signal.

This uses a single fft size that is the smallest power of 2 that is greater
Expand Down Expand Up @@ -404,6 +418,10 @@ def pseudo_cqt(y, sr=22050, hop_length=512, fmin=None, n_bins=84,

Set `sparsity=0` to disable sparsification.

window : str, tuple, number, or function
Window specification for the basis filters.
See `filters.get_window` for details.


Returns
-------
Expand All @@ -417,7 +435,7 @@ def pseudo_cqt(y, sr=22050, hop_length=512, fmin=None, n_bins=84,
`2**(n_bins / bins_per_octave)`

Or if `y` is too short to support the frequency range of the CQT.

Notes
-----
This function caches at level 20.
Expand All @@ -435,7 +453,8 @@ def pseudo_cqt(y, sr=22050, hop_length=512, fmin=None, n_bins=84,
bins_per_octave,
tuning, filter_scale,
norm, sparsity,
hop_length=hop_length)
hop_length=hop_length,
window=window)

fft_basis = np.abs(fft_basis)

Expand All @@ -448,7 +467,8 @@ def pseudo_cqt(y, sr=22050, hop_length=512, fmin=None, n_bins=84,

@cache(level=10)
def __cqt_filter_fft(sr, fmin, n_bins, bins_per_octave, tuning,
filter_scale, norm, sparsity, hop_length=None):
filter_scale, norm, sparsity, hop_length=None,
window='hann'):
'''Generate the frequency domain constant-Q filter basis.'''

basis, lengths = filters.constant_q(sr,
Expand All @@ -458,12 +478,15 @@ def __cqt_filter_fft(sr, fmin, n_bins, bins_per_octave, tuning,
tuning=tuning,
filter_scale=filter_scale,
norm=norm,
pad_fft=True)
pad_fft=True,
window=window)

# Filters are padded up to the nearest integral power of 2
n_fft = basis.shape[1]

if hop_length is not None and n_fft < 2.0**(1 + np.ceil(np.log2(hop_length))):
if (hop_length is not None and
n_fft < 2.0**(1 + np.ceil(np.log2(hop_length)))):

n_fft = int(2.0 ** (1 + np.ceil(np.log2(hop_length))))

# re-normalize bases with respect to the FFT window length
Expand Down Expand Up @@ -529,10 +552,12 @@ def __early_downsample(y, sr, hop_length, res_type, n_octaves,
raise ParameterError('Input signal length={:d} is too short for '
'{:d}-octave CQT'.format(len(y), n_octaves))

# The additional scaling of sqrt(downsample_factor) here is to implicitly
# rescale the filters
y = np.sqrt(downsample_factor) * audio.resample(y, sr, sr / downsample_factor,
res_type=res_type, scale=True)
# The additional scaling of sqrt(downsample_factor) here is to
# implicitly rescale the filters
y = np.sqrt(downsample_factor) * audio.resample(y, sr,
sr / downsample_factor,
res_type=res_type,
scale=True)

sr /= downsample_factor

Expand Down
Loading