Skip to content

Commit

Permalink
pushed mel guts into main
Browse files Browse the repository at this point in the history
  • Loading branch information
bmcfee committed Nov 5, 2012
1 parent df87b62 commit 9238552
Show file tree
Hide file tree
Showing 3 changed files with 81 additions and 75 deletions.
75 changes: 75 additions & 0 deletions librosa/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,3 +129,78 @@ def istft(d, n_fft=None, hann_w=None, hop=None):
pass

return x

# Dead-simple mel spectrum conversion
def hz_to_mel(f):
return 2595.0 * numpy.log10(1 + f / 700.0)

def mel_to_hz(z):
return 700.0 * (10.0**(z / 2595.0) - 1.0)

# Stolen from ronw's mfcc.py
# https://github.com/ronw/frontend/blob/master/mfcc.py

def melfb(samplerate, nfft, nfilts=20, width=1.0, fmin=0, fmax=None):
"""Create a Filterbank matrix to combine FFT bins into Mel-frequency bins.
Parameters
----------
samplerate : int
Sampling rate of the incoming signal.
nfft : int
FFT length to use.
nfilts : int
Number of Mel bands to use.
width : float
The constant width of each band relative to standard Mel. Defaults 1.0.
fmin : float
Frequency in Hz of the lowest edge of the Mel bands. Defaults to 0.
fmax : float
Frequency in Hz of the upper edge of the Mel bands. Defaults
to `samplerate` / 2.
See Also
--------
Filterbank
MelSpec
"""

if fmax is None:
fmax = samplerate / 2
pass

# Initialize the weights
# wts = numpy.zeros((nfilts, nfft / 2 + 1))
wts = numpy.zeros( (nfilts, nfft) )

# Center freqs of each FFT bin
# fftfreqs = numpy.arange(nfft / 2 + 1, dtype=numpy.double) / nfft * samplerate
fftfreqs = numpy.arange( wts.shape[1], dtype=numpy.double ) / nfft * samplerate

# 'Center freqs' of mel bands - uniformly spaced between limits
minmel = hz_to_mel(fmin)
maxmel = hz_to_mel(fmax)
binfreqs = mel_to_hz(minmel + numpy.arange((nfilts+2), dtype=numpy.double) / (nfilts+1) * (maxmel - minmel))

for i in xrange(nfilts):
freqs = binfreqs[i + numpy.arange(3)]

# scale by width
freqs = freqs[1] + width * (freqs - freqs[1])

# lower and upper slopes for all bins
loslope = (fftfreqs - freqs[0]) / (freqs[1] - freqs[0])
hislope = (freqs[2] - fftfreqs) / (freqs[2] - freqs[1])

# .. then intersect them with each other and zero
wts[i,:] = numpy.maximum(0, numpy.minimum(loslope, hislope))

pass

# Slaney-style mel is scaled to be approx constant E per channel
enorm = 2.0 / (binfreqs[2:nfilts+2] - binfreqs[:nfilts])
wts = numpy.dot(numpy.diag(enorm), wts)

return wts


73 changes: 2 additions & 71 deletions librosa/_mfcc.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
http://www.ee.columbia.edu/~dpwe/resources/matlab/rastamat/
'''
import numpy as np
import librosa

def mfcc(framevector, samplerate, winfun=np.hamming, nmel=20, width=1.0, fmin=0, fmax=None):
'''Given a frame of arbitrary length and sample rate, compute the MFCCs
Expand All @@ -30,77 +31,7 @@ def mfcc(framevector, samplerate, winfun=np.hamming, nmel=20, width=1.0, fmin=0,
nfft = len(framevector)
F = np.abs(np.fft.fft(framevector * winfun(nfft)))
# transfermation matrix from FFT bin to mel bin
fft2melmx = melfb(samplerate, nfft, nmel, width, fmin, fmax)
fft2melmx = librosa.melfb(samplerate, nfft, nmel, width, fmin, fmax)
# hope the dimension not messed up
return np.dot(fft2melmx, F)

# Stolen from ronw's mfcc.py
# https://github.com/ronw/frontend/blob/master/mfcc.py
def _hz_to_mel(f):
return 2595.0 * np.log10(1 + f / 700.0)

def _mel_to_hz(z):
return 700.0 * (10.0**(z / 2595.0) - 1.0)

def melfb(samplerate, nfft, nfilts=20, width=1.0, fmin=0, fmax=None):
"""Create a Filterbank matrix to combine FFT bins into Mel-frequency bins.
Parameters
----------
samplerate : int
Sampling rate of the incoming signal.
nfft : int
FFT length to use.
nfilts : int
Number of Mel bands to use.
width : float
The constant width of each band relative to standard Mel. Defaults 1.0.
fmin : float
Frequency in Hz of the lowest edge of the Mel bands. Defaults to 0.
fmax : float
Frequency in Hz of the upper edge of the Mel bands. Defaults
to `samplerate` / 2.
See Also
--------
Filterbank
MelSpec
"""

if fmax is None:
fmax = samplerate / 2

# Initialize the weights
# wts = np.zeros((nfilts, nfft / 2 + 1))
wts = np.zeros( (nfilts, nfft) )

# Center freqs of each FFT bin
# fftfreqs = np.arange(nfft / 2 + 1, dtype=np.double) / nfft * samplerate
fftfreqs = np.arange( wts.shape[1], dtype=np.double ) / nfft * samplerate

# 'Center freqs' of mel bands - uniformly spaced between limits
minmel = _hz_to_mel(fmin)
maxmel = _hz_to_mel(fmax)
binfreqs = _mel_to_hz(minmel + np.arange((nfilts+2), dtype=np.double) / (nfilts+1) * (maxmel - minmel))

for i in xrange(nfilts):
freqs = binfreqs[i + np.arange(3)]

# scale by width
freqs = freqs[1] + width * (freqs - freqs[1])

# lower and upper slopes for all bins
loslope = (fftfreqs - freqs[0]) / (freqs[1] - freqs[0])
hislope = (freqs[2] - fftfreqs) / (freqs[2] - freqs[1])

# .. then intersect them with each other and zero
wts[i,:] = np.maximum(0, np.minimum(loslope, hislope))

pass

# Slaney-style mel is scaled to be approx constant E per channel
enorm = 2.0 / (binfreqs[2:nfilts+2] - binfreqs[:nfilts])
wts = np.dot(np.diag(enorm), wts)

return wts

8 changes: 4 additions & 4 deletions librosa/tf_agc.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import numpy
import scipy
import _mfcc
import stft
import librosa

def tf_agc(frame_iterator, sample_rate=22050, **kwargs):
'''
Expand Down Expand Up @@ -79,7 +79,7 @@ def tf_agc(frame_iterator, sample_rate=22050, **kwargs):
if f2a is None:
# initialize the mel filter bank after grabbing the first frame

f2a = _mfcc.melfb(sample_rate, len(frame), num_frequency_bands, mel_filter_width)
f2a = librosa.melfb(sample_rate, len(frame), num_frequency_bands, mel_filter_width)
f2a = f2a[:,:(round(len(frame)/2) + 1)]

#% map back to FFT grid, flatten bark loop gain
Expand All @@ -97,7 +97,7 @@ def tf_agc(frame_iterator, sample_rate=22050, **kwargs):

# FFT each frame
# D = scipy.fft(frame)
D = stft.stft(frame, n_fft=len(frame))
D = librosa.stft(frame, n_fft=len(frame))
# multiply by f2a
audiogram = numpy.dot(f2a, numpy.abs(D))

Expand All @@ -117,7 +117,7 @@ def tf_agc(frame_iterator, sample_rate=22050, **kwargs):
E[E<=0.0] = min(E[E>0.0])

#% invert back to waveform
y = stft.istft(D/E);
y = librosa.istft(D/E);
# y = numpy.real(scipy.ifft(D/E))

yield y
Expand Down

0 comments on commit 9238552

Please sign in to comment.