Skip to content

Commit

Permalink
refactoring spectrogram helper into core
Browse files Browse the repository at this point in the history
  • Loading branch information
bmcfee committed Jan 20, 2015
1 parent 859566f commit 5ddf210
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 35 deletions.
12 changes: 3 additions & 9 deletions librosa/core/pitch.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import numpy as np

from .spectrum import stft, ifgram
from .spectrum import ifgram, _spectrogram
from . import time_frequency
from .. import cache
from .. import util
Expand Down Expand Up @@ -302,7 +302,7 @@ def ifptrack(y, sr=22050, n_fft=4096, hop_length=None, fmin=None,

@cache
def piptrack(y=None, sr=22050, S=None, n_fft=4096, hop_length=None,
fmin=150.0, fmax=4000.0, threshold=.1):
fmin=150.0, fmax=4000.0, threshold=0.1):
'''Pitch tracking on thresholded parabolically-interpolated STFT
.. [1] https://ccrma.stanford.edu/~jos/sasp/Sinusoidal_Peak_Interpolation.html
Expand Down Expand Up @@ -364,18 +364,12 @@ def piptrack(y=None, sr=22050, S=None, n_fft=4096, hop_length=None,
if hop_length is None:
hop_length = int(n_fft / 4)

if S is None:
if y is None:
raise ValueError('Either "y" or "S" must be provided')
S = np.abs(stft(y, n_fft=n_fft, hop_length=hop_length))
S, n_fft = _spectrogram(y=y, S=S, n_fft=n_fft, hop_length=hop_length)

# Truncate to feasible region
fmin = np.maximum(fmin, 0)
fmax = np.minimum(fmax, float(sr) / 2)

# Pre-compute FFT frequencies
n_fft = 2 * (S.shape[0] - 1)

fft_freqs = time_frequency.fft_frequencies(sr=sr, n_fft=n_fft)

# Do the parabolic interpolation everywhere,
Expand Down
47 changes: 47 additions & 0 deletions librosa/core/spectrum.py
Original file line number Diff line number Diff line change
Expand Up @@ -639,3 +639,50 @@ def perceptual_weighting(S, frequencies, **kwargs):
offset = time_frequency.A_weighting(frequencies).reshape((-1, 1))

return offset + logamplitude(S, **kwargs)


@cache
def _spectrogram(y=None, S=None, n_fft=2048, hop_length=512, power=1):
'''Helper function to retrieve a magnitude spectrogram.
This is primarily used in feature extraction functions that can operate on
either audio time-series or spectrogram input.
Parameters
----------
y : None or np.ndarray [ndim=1]
If provided, an audio time series
S : None or np.ndarray
Spectrogram input, optional
n_fft : int > 0
STFT window size
hop_length : int > 0
STFT hop length
power : float > 0
Exponent for the magnitude spectrogram,
e.g., 1 for energy, 2 for power, etc.
Returns
-------
S_out : np.ndarray [dtype=np.float32]
- If `S` is provided as input, then `S_out == S`
- Else, `S_out = |stft(y, n_fft=n_fft, hop_length=hop_length)|**power`
n_fft : int > 0
- If `S` is provided, then `n_fft` is inferred from `S`
- Else, copied from input
'''

if S is not None:
# Infer n_fft from spectrogram shape
n_fft = 2 * (S.shape[0] - 1)
else:
# Otherwise, compute a magnitude spectrogram from input
S = np.abs(stft(y, n_fft=n_fft, hop_length=hop_length))**power

return S, n_fft
39 changes: 13 additions & 26 deletions librosa/feature/spectral.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,23 +10,10 @@

from ..core.time_frequency import fft_frequencies
from ..core.audio import zero_crossings
from ..core.spectrum import stft, logamplitude
from ..core.spectrum import logamplitude, _spectrogram
from ..core.pitch import estimate_tuning


def __get_spec(y=None, sr=22050, S=None, n_fft=2048, hop_length=512, power=1):
'''Helper function to retrieve a magnitude spectrogram.'''

if S is not None:
# Infer n_fft from spectrogram shape
n_fft = 2 * (S.shape[0] - 1)
else:
# Otherwise, compute a magnitude spectrogram from input
S = np.abs(stft(y, n_fft=n_fft, hop_length=hop_length))**power

return S, n_fft


# -- Spectral features -- #
@cache
def spectral_centroid(y=None, sr=22050, S=None, n_fft=2048, hop_length=512,
Expand Down Expand Up @@ -93,7 +80,7 @@ def spectral_centroid(y=None, sr=22050, S=None, n_fft=2048, hop_length=512,
Instantaneous-frequency spectrogram
'''

S, n_fft = __get_spec(y=y, sr=sr, S=S, n_fft=n_fft, hop_length=hop_length)
S, n_fft = _spectrogram(y=y, S=S, n_fft=n_fft, hop_length=hop_length)

if not np.isrealobj(S):
raise ValueError('Spectral centroid is only defined '
Expand Down Expand Up @@ -178,7 +165,7 @@ def spectral_bandwidth(y=None, sr=22050, S=None, n_fft=2048, hop_length=512,
frequency bandwidth for each frame
'''

S, n_fft = __get_spec(y=y, sr=sr, S=S, n_fft=n_fft, hop_length=hop_length)
S, n_fft = _spectrogram(y=y, S=S, n_fft=n_fft, hop_length=hop_length)

if not np.isrealobj(S):
raise ValueError('Spectral bandwidth is only defined '
Expand Down Expand Up @@ -248,7 +235,7 @@ def spectral_contrast(y=None, sr=22050, S=None, n_fft=2048, hop_length=512,
octave-based frequency
'''

S, n_fft = __get_spec(y=y, sr=sr, S=S, n_fft=n_fft, hop_length=hop_length)
S, n_fft = _spectrogram(y=y, S=S, n_fft=n_fft, hop_length=hop_length)

# Compute the center frequencies of each bin
if freq is None:
Expand Down Expand Up @@ -338,7 +325,7 @@ def spectral_rolloff(y=None, sr=22050, S=None, n_fft=2048, hop_length=512,
roll-off frequency for each frame
'''

S, n_fft = __get_spec(y=y, sr=sr, S=S, n_fft=n_fft, hop_length=hop_length)
S, n_fft = _spectrogram(y=y, S=S, n_fft=n_fft, hop_length=hop_length)

if not np.isrealobj(S):
raise ValueError('Spectral centroid is only defined '
Expand Down Expand Up @@ -395,7 +382,7 @@ def rms(y=None, S=None, n_fft=2048, hop_length=512):
RMS value for each frame
'''

S, _ = __get_spec(y=y, S=S, n_fft=n_fft, hop_length=hop_length)
S, _ = _spectrogram(y=y, S=S, n_fft=n_fft, hop_length=hop_length)

return np.sqrt(np.mean(np.abs(S)**2, axis=0, keepdims=True))

Expand Down Expand Up @@ -453,7 +440,7 @@ def poly_features(y=None, sr=22050, S=None, n_fft=2048, hop_length=512,
polynomial coefficients for each frame
'''

S, n_fft = __get_spec(y=y, sr=sr, S=S, n_fft=n_fft, hop_length=hop_length)
S, n_fft = _spectrogram(y=y, S=S, n_fft=n_fft, hop_length=hop_length)

# Compute the center frequencies of each bin
if freq is None:
Expand Down Expand Up @@ -593,8 +580,8 @@ def logfsgram(y=None, sr=22050, S=None, n_fft=4096, hop_length=512, **kwargs):
`P[f, t]` contains the energy at pitch bin `f`, frame `t`.
'''

S, n_fft = __get_spec(y=y, sr=sr, S=S, n_fft=n_fft, hop_length=hop_length,
power=2)
S, n_fft = _spectrogram(y=y, S=S, n_fft=n_fft, hop_length=hop_length,
power=2)

# If we don't have tuning already, grab it from S
if 'tuning' not in kwargs:
Expand Down Expand Up @@ -678,8 +665,8 @@ def chromagram(y=None, sr=22050, S=None, norm=np.inf, n_fft=2048,
Vector normalization
"""

S, n_fft = __get_spec(y=y, sr=sr, S=S, n_fft=n_fft, hop_length=hop_length,
power=2)
S, n_fft = _spectrogram(y=y, S=S, n_fft=n_fft, hop_length=hop_length,
power=2)

n_chroma = kwargs.get('n_chroma', 12)

Expand Down Expand Up @@ -828,8 +815,8 @@ def melspectrogram(y=None, sr=22050, S=None, n_fft=2048, hop_length=512,
Short-time Fourier Transform
"""

S, n_fft = __get_spec(y=y, sr=sr, S=S, n_fft=n_fft, hop_length=hop_length,
power=2)
S, n_fft = _spectrogram(y=y, S=S, n_fft=n_fft, hop_length=hop_length,
power=2)

# Build a Mel filter
mel_basis = filters.mel(sr, n_fft, **kwargs)
Expand Down

0 comments on commit 5ddf210

Please sign in to comment.