Skip to content

Commit

Permalink
Merge 19fa5a8 into 4a5d468
Browse files Browse the repository at this point in the history
  • Loading branch information
bmcfee committed Feb 9, 2017
2 parents 4a5d468 + 19fa5a8 commit f2ce5b6
Show file tree
Hide file tree
Showing 5 changed files with 363 additions and 18 deletions.
221 changes: 217 additions & 4 deletions librosa/core/constantq.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,9 @@
from .. import filters
from .. import util
from ..util.exceptions import ParameterError
from ..util.decorators import optional_jit

__all__ = ['cqt', 'hybrid_cqt', 'pseudo_cqt']
__all__ = ['cqt', 'hybrid_cqt', 'pseudo_cqt', 'icqt']


@cache(level=20)
Expand Down Expand Up @@ -244,9 +245,11 @@ def cqt(y, sr=22050, hop_length=512, fmin=None, n_bins=84,

# The additional scaling of sqrt(2) here is to implicitly rescale
# the filters
my_y = np.sqrt(2) * audio.resample(my_y, my_sr, my_sr/2.0,
res_type=res_type,
scale=True)
my_y = audio.resample(my_y, my_sr, my_sr/2.0,
res_type=res_type,
scale=True)
my_y[:] *= np.sqrt(2)

my_sr /= 2.0
my_hop //= 2

Expand Down Expand Up @@ -516,6 +519,203 @@ def pseudo_cqt(y, sr=22050, hop_length=512, fmin=None, n_bins=84,
return C


@cache(level=40)
def icqt(C, sr=22050, hop_length=512, fmin=None,
bins_per_octave=12,
tuning=0.0,
filter_scale=1,
norm=1,
sparsity=0.01,
window='hann',
scale=True,
amin=np.sqrt(2)):
'''Compute the inverse constant-Q transform.
Given a constant-Q transform representation `C` of an audio signal `y`,
this function produces an approximation `y_hat`.
.. warning:: This implementation is unstable, and subject to change in
future versions of librosa. We recommend that its use be
limited to sonification and diagnostic applications.
Parameters
----------
C : np.ndarray, [shape=(n_bins, n_frames)]
Constant-Q representation as produced by `core.cqt`
hop_length : int > 0 [scalar]
number of samples between successive frames
fmin : float > 0 [scalar]
Minimum frequency. Defaults to C1 ~= 32.70 Hz
tuning : float in `[-0.5, 0.5)` [scalar]
Tuning offset in fractions of a bin (cents).
filter_scale : float > 0 [scalar]
Filter scale factor. Small values (<1) use shorter windows
for improved time resolution.
norm : {inf, -inf, 0, float > 0}
Type of norm to use for basis function normalization.
See `librosa.util.normalize`.
sparsity : float in [0, 1)
Sparsify the CQT basis by discarding up to `sparsity`
fraction of the energy in each basis.
Set `sparsity=0` to disable sparsification.
window : str, tuple, number, or function
Window specification for the basis filters.
See `filters.get_window` for details.
scale : bool
If `True`, scale the CQT response by square-root the length
of each channel's filter. This is analogous to `norm='ortho'` in FFT.
If `False`, do not scale the CQT. This is analogous to `norm=None`
in FFT.
amin : float or None
When applying squared window normalization, sample positions with
coefficients below `amin` will left as is.
If `None`, then `amin` is inferred as the smallest valid floating
point value.
Returns
-------
y : np.ndarray, [shape=(n_samples), dtype=np.float]
Audio time-series reconstructed from the CQT representation.
See Also
--------
cqt
Notes
-----
This function caches at level 40.
Examples
--------
Using default parameters
>>> y, sr = librosa.load(librosa.util.example_audio_file(), duration=15)
>>> C = librosa.cqt(y=y, sr=sr)
>>> y_hat = librosa.icqt(C=C, sr=sr)
Or with a different hop length and frequency resolution:
>>> hop_length = 256
>>> bins_per_octave = 12 * 3
>>> C = librosa.cqt(y=y, sr=sr, hop_length=256, n_bins=7*bins_per_octave,
... bins_per_octave=bins_per_octave)
>>> y_hat = librosa.icqt(C=C, sr=sr, hop_length=hop_length,
... bins_per_octave=bins_per_octave)
'''
n_bins, n_frames = C.shape
n_octaves = int(np.ceil(float(n_bins) / bins_per_octave))

if amin is None:
amin = util.tiny(C)

if fmin is None:
fmin = note_to_hz('C1')

freqs = cqt_frequencies(n_bins,
fmin,
bins_per_octave=bins_per_octave,
tuning=tuning)[-bins_per_octave:]

fmin_t = np.min(freqs)

# Make the filter bank
basis, lengths = filters.constant_q(sr=sr,
fmin=fmin_t,
n_bins=bins_per_octave,
bins_per_octave=bins_per_octave,
filter_scale=filter_scale,
tuning=tuning,
norm=norm,
window=window,
pad_fft=True)
n_fft = basis.shape[1]

# The extra factor of lengths**0.5 corrects for within-octave tapering
# The factor of sqrt(2) compensates for downsampling effects
basis = basis.conj() * lengths[:, np.newaxis]**0.5 / np.sqrt(2)
n_trim = basis.shape[1] // 2

if scale:
Cnorm = np.ones(n_bins)[:, np.newaxis]
else:
Cnorm = filters.constant_q_lengths(sr=sr,
fmin=fmin,
n_bins=n_bins,
bins_per_octave=bins_per_octave,
filter_scale=filter_scale,
tuning=tuning,
window=window)[:, np.newaxis]**0.5

y = None

# Revised algorithm:
# for each octave
# upsample old octave
# @--numba accelerate this loop?
# for each basis
# convolve with activation (valid-mode)
# divide by window sumsquare
# trim and add to total

for octave in range(n_octaves - 1, -1, -1):
# Compute the slice index for the current octave
slice_ = slice(-(octave+1) * bins_per_octave - 1,
-(octave) * bins_per_octave - 1)

# Project onto the basis
C_oct = C[slice_] / Cnorm[slice_]
basis_oct = basis[-C_oct.shape[0]:]

y_oct = None

# Make a dummy activation
oct_hop = hop_length // 2**octave
n = n_fft + (C_oct.shape[1] - 1) * oct_hop

for i in range(basis_oct.shape[0]-1, -1, -1):
wss = filters.window_sumsquare(window,
n_frames,
hop_length=oct_hop,
win_length=lengths[i],
n_fft=n_fft)

# Construct the response for this filter
y_oct_i = np.zeros(n, dtype=C_oct.dtype)
__activation_fill(y_oct_i, basis_oct[i], C_oct[i], oct_hop)
# Retain only the real part
# Only do squared window normalization for sufficiently large window
# coefficients
y_oct_i = y_oct_i.real / np.maximum(amin, wss)

if y_oct is None:
y_oct = y_oct_i
else:
y_oct += y_oct_i

# Remove the effects of zero-padding
y_oct = y_oct[n_trim:-n_trim]

if y is None:
y = y_oct
else:
# Up-sample the previous buffer and add in the new one
# Scipy-resampling is fast here, since it's a power-of-two relation
y = audio.resample(y, 1, 2, scale=True, res_type='scipy') + y_oct

return y


@cache(level=10)
def __cqt_filter_fft(sr, fmin, n_bins, bins_per_octave, tuning,
filter_scale, norm, sparsity, hop_length=None,
Expand Down Expand Up @@ -631,3 +831,16 @@ def __num_two_factors(x):
x //= 2

return num_twos


@optional_jit(nopython=True)
def __activation_fill(x, basis, activation, hop_length):
'''Helper function for icqt time-domain reconstruction'''

n = len(x)
n_fft = len(basis)
n_frames = len(activation)

for i in range(n_frames):
sample = i * hop_length
x[sample:min(n, sample + n_fft)] += activation[i] * basis[:max(0, min(n_fft, n - sample))]
12 changes: 8 additions & 4 deletions librosa/core/spectrum.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from ..util.decorators import moved
from ..util.deprecation import rename_kw, Deprecated
from ..util.exceptions import ParameterError
from ..filters import get_window
from ..filters import get_window, window_sumsquare

__all__ = ['stft', 'istft', 'magphase',
'ifgram', 'phase_vocoder',
Expand Down Expand Up @@ -277,8 +277,6 @@ def istft(stft_matrix, hop_length=None, win_length=None, window='hann',
n_frames = stft_matrix.shape[1]
expected_signal_len = n_fft + hop_length * (n_frames - 1)
y = np.zeros(expected_signal_len, dtype=dtype)
ifft_window_sum = np.zeros(expected_signal_len, dtype=dtype)
ifft_window_square = ifft_window * ifft_window

for i in range(n_frames):
sample = i * hop_length
Expand All @@ -287,9 +285,15 @@ def istft(stft_matrix, hop_length=None, win_length=None, window='hann',
ytmp = ifft_window * fft.ifft(spec).real

y[sample:(sample + n_fft)] = y[sample:(sample + n_fft)] + ytmp
ifft_window_sum[sample:(sample + n_fft)] += ifft_window_square

# Normalize by sum of squared window
ifft_window_sum = window_sumsquare(window,
n_frames,
win_length=win_length,
n_fft=n_fft,
hop_length=hop_length,
dtype=dtype)

approx_nonzero_indices = ifft_window_sum > util.tiny(ifft_window_sum)
y[approx_nonzero_indices] /= ifft_window_sum[approx_nonzero_indices]

Expand Down
91 changes: 90 additions & 1 deletion librosa/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@
constant_q_lengths
cq_to_chroma
window_sumsquare
"""
import warnings

Expand All @@ -40,6 +42,7 @@

from . import cache
from . import util
from .util.decorators import optional_jit
from .util.exceptions import ParameterError

from .core.time_frequency import note_to_hz, hz_to_midi, hz_to_octs
Expand All @@ -52,7 +55,8 @@
'constant_q_lengths',
'cq_to_chroma',
'window_bandwidth',
'get_window']
'get_window',
'window_sumsquare']


# Dictionary of window function bandwidths
Expand Down Expand Up @@ -872,3 +876,88 @@ def get_window(window, Nx, fftbins=True):
'{:d} != {:d}'.format(len(window), Nx))
else:
raise ParameterError('Invalid window specification: {}'.format(window))


@optional_jit(nopython=True)
def __window_ss_fill(x, win_sq, n_frames, hop_length):
'''Helper function for window sum-square calculation.'''

n = len(x)
n_fft = len(win_sq)
for i in range(n_frames):
sample = i * hop_length
x[sample:min(n, sample + n_fft)] += win_sq[:max(0, min(n_fft, n - sample))]
pass


def window_sumsquare(window, n_frames, hop_length=512, win_length=None, n_fft=2048,
dtype=np.float32):
'''
Compute the sum-square envelope of a window function at a given hop length.
This is used to estimate modulation effects induced by windowing observations
in short-time fourier transforms.
Parameters
----------
window : string, tuple, number, callable, or list-like
Window specification, as in `get_window`
n_frames : int > 0
The number of analysis frames
hop_length : int > 0
The number of samples to advance between frames
win_length : [optional]
The length of the window function. By default, this matches `n_fft`.
n_fft : int > 0
The length of each analysis frame.
dtype : np.dtype
The data type of the output
Returns
-------
wss : np.ndarray, shape=`(n_fft + hop_length * (n_frames - 1))`
The sum-squared envelope of the window function
Examples
--------
For a fixed frame length (2048), compare modulation effects for a Hann window
at different hop lengths:
>>> n_frames = 50
>>> wss_256 = librosa.filters.window_sumsquare('hann', n_frames, hop_length=256)
>>> wss_512 = librosa.filters.window_sumsquare('hann', n_frames, hop_length=512)
>>> wss_1024 = librosa.filters.window_sumsquare('hann', n_frames, hop_length=1024)
>>> import matplotlib.pyplot as plt
>>> plt.figure()
>>> plt.subplot(3,1,1)
>>> plt.plot(wss_256)
>>> plt.title('hop_length=256')
>>> plt.subplot(3,1,2)
>>> plt.plot(wss_512)
>>> plt.title('hop_length=512')
>>> plt.subplot(3,1,3)
>>> plt.plot(wss_1024)
>>> plt.title('hop_length=1024')
>>> plt.tight_layout()
'''
if win_length is None:
win_length = n_fft

n = n_fft + hop_length * (n_frames - 1)
x = np.zeros(n, dtype=dtype)

# Compute the squared window at the desired length
win_sq = get_window(window, win_length)**2
win_sq = util.pad_center(win_sq, n_fft)

# Fill the envelope
__window_ss_fill(x, win_sq, n_frames, hop_length)

return x

0 comments on commit f2ce5b6

Please sign in to comment.