pushed mel guts into main

librosa · Nov 5, 2012 · 9238552 · 9238552
1 parent df87b62
commit 9238552
Show file tree

Hide file tree

Showing 3 changed files with 81 additions and 75 deletions.
diff --git a/librosa/__init__.py b/librosa/__init__.py
@@ -129,3 +129,78 @@ def istft(d, n_fft=None, hann_w=None, hop=None):
         pass
 
     return x
+
+# Dead-simple mel spectrum conversion
+def hz_to_mel(f):
+    return 2595.0 * numpy.log10(1 + f / 700.0)
+
+def mel_to_hz(z):
+    return 700.0 * (10.0**(z / 2595.0) - 1.0)
+
+# Stolen from ronw's mfcc.py
+# https://github.com/ronw/frontend/blob/master/mfcc.py
+
+def melfb(samplerate, nfft, nfilts=20, width=1.0, fmin=0, fmax=None):
+    """Create a Filterbank matrix to combine FFT bins into Mel-frequency bins.
+
+    Parameters
+    ----------
+    samplerate : int
+        Sampling rate of the incoming signal.
+    nfft : int
+        FFT length to use.
+    nfilts : int
+        Number of Mel bands to use.
+    width : float
+        The constant width of each band relative to standard Mel. Defaults 1.0.
+    fmin : float
+        Frequency in Hz of the lowest edge of the Mel bands. Defaults to 0.
+    fmax : float
+        Frequency in Hz of the upper edge of the Mel bands. Defaults
+        to `samplerate` / 2.
+
+    See Also
+    --------
+    Filterbank
+    MelSpec
+    """
+
+    if fmax is None:
+        fmax = samplerate / 2
+        pass
+
+    # Initialize the weights
+#     wts = numpy.zeros((nfilts, nfft / 2 + 1))
+    wts         = numpy.zeros( (nfilts, nfft) )
+
+    # Center freqs of each FFT bin
+#     fftfreqs = numpy.arange(nfft / 2 + 1, dtype=numpy.double) / nfft * samplerate
+    fftfreqs    = numpy.arange( wts.shape[1], dtype=numpy.double ) / nfft * samplerate
+
+    # 'Center freqs' of mel bands - uniformly spaced between limits
+    minmel      = hz_to_mel(fmin)
+    maxmel      = hz_to_mel(fmax)
+    binfreqs    = mel_to_hz(minmel + numpy.arange((nfilts+2), dtype=numpy.double) / (nfilts+1) * (maxmel - minmel))
+
+    for i in xrange(nfilts):
+        freqs       = binfreqs[i + numpy.arange(3)]
+
+        # scale by width
+        freqs       = freqs[1] + width * (freqs - freqs[1])
+
+        # lower and upper slopes for all bins
+        loslope     = (fftfreqs - freqs[0]) / (freqs[1] - freqs[0])
+        hislope     = (freqs[2] - fftfreqs) / (freqs[2] - freqs[1])
+
+        # .. then intersect them with each other and zero
+        wts[i,:]    = numpy.maximum(0, numpy.minimum(loslope, hislope))
+
+        pass
+
+    # Slaney-style mel is scaled to be approx constant E per channel
+    enorm   = 2.0 / (binfreqs[2:nfilts+2] - binfreqs[:nfilts])
+    wts     = numpy.dot(numpy.diag(enorm), wts)
+
+    return wts
+
+
diff --git a/librosa/_mfcc.py b/librosa/_mfcc.py
@@ -6,6 +6,7 @@
 http://www.ee.columbia.edu/~dpwe/resources/matlab/rastamat/
 '''
 import numpy as np
+import librosa
 
 def mfcc(framevector, samplerate, winfun=np.hamming, nmel=20, width=1.0, fmin=0, fmax=None):
     '''Given a frame of arbitrary length and sample rate, compute the MFCCs
@@ -30,77 +31,7 @@ def mfcc(framevector, samplerate, winfun=np.hamming, nmel=20, width=1.0, fmin=0,
     nfft = len(framevector)
     F = np.abs(np.fft.fft(framevector * winfun(nfft)))
     # transfermation matrix from FFT bin to mel bin
-    fft2melmx = melfb(samplerate, nfft, nmel, width, fmin, fmax)
+    fft2melmx = librosa.melfb(samplerate, nfft, nmel, width, fmin, fmax)
     # hope the dimension not messed up
     return np.dot(fft2melmx, F) 
 
-# Stolen from ronw's mfcc.py
-# https://github.com/ronw/frontend/blob/master/mfcc.py
-def _hz_to_mel(f):
-    return 2595.0 * np.log10(1 + f / 700.0)
-
-def _mel_to_hz(z):
-    return 700.0 * (10.0**(z / 2595.0) - 1.0)
-
-def melfb(samplerate, nfft, nfilts=20, width=1.0, fmin=0, fmax=None):
-    """Create a Filterbank matrix to combine FFT bins into Mel-frequency bins.
-
-    Parameters
-    ----------
-    samplerate : int
-        Sampling rate of the incoming signal.
-    nfft : int
-        FFT length to use.
-    nfilts : int
-        Number of Mel bands to use.
-    width : float
-        The constant width of each band relative to standard Mel. Defaults 1.0.
-    fmin : float
-        Frequency in Hz of the lowest edge of the Mel bands. Defaults to 0.
-    fmax : float
-        Frequency in Hz of the upper edge of the Mel bands. Defaults
-        to `samplerate` / 2.
-
-    See Also
-    --------
-    Filterbank
-    MelSpec
-    """
-
-    if fmax is None:
-        fmax = samplerate / 2
-
-    # Initialize the weights
-#     wts = np.zeros((nfilts, nfft / 2 + 1))
-    wts = np.zeros( (nfilts, nfft) )
-
-    # Center freqs of each FFT bin
-#     fftfreqs = np.arange(nfft / 2 + 1, dtype=np.double) / nfft * samplerate
-    fftfreqs = np.arange( wts.shape[1], dtype=np.double ) / nfft * samplerate
-
-    # 'Center freqs' of mel bands - uniformly spaced between limits
-    minmel      = _hz_to_mel(fmin)
-    maxmel      = _hz_to_mel(fmax)
-    binfreqs    = _mel_to_hz(minmel + np.arange((nfilts+2), dtype=np.double) / (nfilts+1) * (maxmel - minmel))
-
-    for i in xrange(nfilts):
-        freqs       = binfreqs[i + np.arange(3)]
-
-        # scale by width
-        freqs       = freqs[1] + width * (freqs - freqs[1])
-
-        # lower and upper slopes for all bins
-        loslope     = (fftfreqs - freqs[0]) / (freqs[1] - freqs[0])
-        hislope     = (freqs[2] - fftfreqs) / (freqs[2] - freqs[1])
-
-        # .. then intersect them with each other and zero
-        wts[i,:]    = np.maximum(0, np.minimum(loslope, hislope))
-
-        pass
-
-    # Slaney-style mel is scaled to be approx constant E per channel
-    enorm   = 2.0 / (binfreqs[2:nfilts+2] - binfreqs[:nfilts])
-    wts     = np.dot(np.diag(enorm), wts)
-
-    return wts
-
diff --git a/librosa/tf_agc.py b/librosa/tf_agc.py
@@ -11,7 +11,7 @@
 import numpy
 import scipy
 import _mfcc
-import stft
+import librosa
 
 def tf_agc(frame_iterator, sample_rate=22050, **kwargs):
     '''
@@ -79,7 +79,7 @@ def tf_agc(frame_iterator, sample_rate=22050, **kwargs):
             if f2a is None: 
                 # initialize the mel filter bank after grabbing the first frame
 
-                f2a = _mfcc.melfb(sample_rate, len(frame), num_frequency_bands, mel_filter_width)
+                f2a = librosa.melfb(sample_rate, len(frame), num_frequency_bands, mel_filter_width)
                 f2a = f2a[:,:(round(len(frame)/2) + 1)]
 
                 #% map back to FFT grid, flatten bark loop gain
@@ -97,7 +97,7 @@ def tf_agc(frame_iterator, sample_rate=22050, **kwargs):
 
             # FFT each frame
 #             D = scipy.fft(frame)
-            D = stft.stft(frame, n_fft=len(frame))
+            D = librosa.stft(frame, n_fft=len(frame))
             # multiply by f2a
             audiogram = numpy.dot(f2a, numpy.abs(D))
 
@@ -117,7 +117,7 @@ def tf_agc(frame_iterator, sample_rate=22050, **kwargs):
             E[E<=0.0] = min(E[E>0.0])
 
             #% invert back to waveform
-            y = stft.istft(D/E);
+            y = librosa.istft(D/E);
 #             y = numpy.real(scipy.ifft(D/E))
 
             yield y