added fmin/max to melspec, fixed logamplitude

librosa · Mar 22, 2013 · 14056d4 · 14056d4
1 parent c3accf3
commit 14056d4
Show file tree

Hide file tree

Showing 4 changed files with 78 additions and 64 deletions.
diff --git a/librosa/__init__.py b/librosa/__init__.py
@@ -193,11 +193,10 @@ def logamplitude(S, amin=1e-10, gain_threshold=-80.0):
         D                   =   S in dBs
     '''
 
-    SCALE   =   20.0
-    D       =   SCALE * numpy.log10(numpy.maximum(amin, numpy.abs(S)))
+    D       =   20.0 * numpy.log10(numpy.maximum(amin, numpy.abs(S)))
 
     if gain_threshold is not None:
-        D[D < gain_threshold] = gain_threshold
+        D = numpy.maximum(D, D.max() + gain_threshold)
         pass
 
     return D
@@ -219,36 +218,6 @@ def frames_to_time(frames, sr=22050, hop_length=64):
     return frames * float(hop_length) / float(sr)
 
 
-def feature_sync(X, F, agg=numpy.mean):
-    '''
-    Synchronous aggregation of a feature matrix
-
-    Input:
-        X:      d-by-T              | feature matrix 
-        F:      t-vector            | (ordered) array of frame numbers
-        agg:    aggregator function | default: numpy.mean
-
-    Output:
-        Y:      d-by-(<=t+1) vector
-        where 
-                Y[:, i] = agg(X[:, F[i-1]:F[i]], axis=1)
-
-        In order to ensure total coverage, boundary points are added to F
-    '''
-
-    F = numpy.unique(numpy.concatenate( ([0], F, [X.shape[1]]) ))
-
-    Y = numpy.zeros( (X.shape[0], len(F)-1) )
-
-    lb = F[0]
-
-    for (i, ub) in enumerate(F[1:]):
-        Y[:, i] = agg(X[:, lb:ub], axis=1)
-        lb = ub
-        pass
-
-    return Y
-
 def pad(w, d_pad, v=0.0, center=True):
     '''
     Pad a vector w out to d dimensions, using value v

diff --git a/librosa/beat.py b/librosa/beat.py
@@ -185,16 +185,16 @@ def onset_estimate_bpm(onsets, start_bpm, sr, hop_length):
 
     return start_bpm
 
-def onset_strength_percussive(y, sr=22050, window_length=2048, hop_length=256, mel_channels=128, S=None):
+def onset_strength_percussive(y, sr=22050, window_length=256, hop_length=32, mel_channels=40, S=None):
     '''
     Onset strength derived from harmonic-percussive source separation
 
     Input:
         y:                  time series signal
         sr:                 sample rate of y                    | default: 22050
-        window_length:      fourier analysis window length      | default: 2048
-        hop_length:         number of frames to hop             | default: 256
-        mel_channels:       number of mel bins to use           | default: 128
+        window_length:      fourier analysis window length      | default: 256
+        hop_length:         number of frames to hop             | default: 32
+        mel_channels:       number of mel bins to use           | default: 40
     '''
 
     # Step 1: compute spectrogram
@@ -231,7 +231,7 @@ def onset_strength_percussive(y, sr=22050, window_length=2048, hop_length=256, m
 
     return O / Onorm
 
-def onset_strength(y, sr=22050, window_length=2048, hop_length=256, mel_channels=40, rising=True, htk=False, S=None):
+def onset_strength(y, sr=22050, window_length=256, hop_length=32, mel_channels=40, htk=False, S=None):
     '''
     Adapted from McVicar, adapted from Ellis, etc...
     
@@ -240,10 +240,9 @@ def onset_strength(y, sr=22050, window_length=2048, hop_length=256, mel_channels
     INPUT:
         y               = time-series waveform (t-by-1 vector)
         sr              = sampling rate of the input signal     | default: 22050
-        window_length   = number of samples per frame           | default: 2048      | = 92.8ms @ 22KHz
-        hop_length      = offset between frames                 | default: 256       | = 11.6ms @ 22KHz
+        window_length   = number of samples per frame           | default: 256
+        hop_length      = offset between frames                 | default: 32
         mel_channels    = number of Mel bins to use             | default: 40
-        rising          = detect only rising edges of beats     | default: True
         htk             = use HTK mels instead of Slaney        | default: False
         S               = (optional) pre-computed spectrogram   | default: None
 
@@ -252,51 +251,42 @@ def onset_strength(y, sr=22050, window_length=2048, hop_length=256, mel_channels
         onset_envelope
     '''
 
-    gain_threshold  = 80.0
-
     # First, compute mel spectrogram
     if S is None:
-        S   = librosa.feature.melspectrogram(y, sr=sr, 
-                                                window_length=window_length, 
-                                                hop_length=hop_length, 
-                                                mel_channels=mel_channels, 
+        S   = librosa.feature.melspectrogram(y, sr              =   sr, 
+                                                window_length   =   window_length, 
+                                                hop_length      =   hop_length, 
+                                                mel_channels    =   mel_channels, 
                                                 htk=htk)
         # Convert to dBs
         S   = librosa.logamplitude(S)
 
         pass
 
-    ### Only look at top 80 dB
-    onsets  = numpy.maximum(S, S.max() - gain_threshold)
-
     ### Compute first difference
-    onsets  = numpy.diff(onsets, n=1, axis=1)
+    onsets  = numpy.diff(S, n=1, axis=1)
 
     ### Discard negatives (decreasing amplitude)
     #   falling edges could also be useful segmentation cues
     #   to catch falling edges, replace max(0,D) with abs(D)
-    if rising:
-        onsets  = numpy.maximum(0.0, onsets)
-    else:
-        onsets  = onsets**2
-        pass
+    onsets  = numpy.maximum(0.0, onsets)
 
     ### Average over mel bands
-    onsets      = numpy.mean(onsets, axis=0)
+    onsets  = numpy.mean(onsets, axis=0)
 
     ### remove the DC component
-    onsets      = scipy.signal.lfilter([1.0, -1.0], [1.0, -0.99], onsets)
+    onsets  = scipy.signal.lfilter([1.0, -1.0], [1.0, -0.99], onsets)
 
     ### Threshold at zero
     onsets   = numpy.maximum(0.0, onsets)
 
     ### Normalize by the maximum onset strength
-    Onorm = numpy.max(onsets)
-    if Onorm == 0:
-        Onorm = 1.0
+    Onorm = onsets.max()
+    if Onorm > 0:
+        onsets = onsets / Onorm
         pass
 
-    return onsets / Onorm
+    return onsets 
 
 def segment(X, k):
     '''

diff --git a/librosa/feature.py b/librosa/feature.py
@@ -297,7 +297,7 @@ def melfb(sr, nfft, nfilts=40, width=1.0, fmin=0.0, fmax=None, use_htk=False):
 
     return wts
 
-def melspectrogram(y, sr=22050, window_length=256, hop_length=128, mel_channels=40, htk=False, width=1):
+def melspectrogram(y, sr=22050, window_length=256, hop_length=128, mel_channels=40, htk=False, width=1, fmin=0, fmax=None):
     '''
     Compute a mel spectrogram from a time series
 
@@ -309,6 +309,8 @@ def melspectrogram(y, sr=22050, window_length=256, hop_length=128, mel_channels=
         mel_channels        =   number of Mel filters to use                | default: 40
         htk                 =   use HTK mels instead of Slaney              | default: False
         width               =   width of mel bins                           | default: 1
+        fmin                =   minimum frequency                           | default: 0
+        fmax                =   maximum frequency                           | default: sr/2
 
     Output:
         S                   =   Mel amplitude spectrogram
@@ -318,7 +320,7 @@ def melspectrogram(y, sr=22050, window_length=256, hop_length=128, mel_channels=
     S = librosa.stft(y, sr=sr, n_fft=window_length, hann_w=window_length, hop_length=hop_length)
 
     # Build a Mel filter
-    M = melfb(sr, window_length, nfilts=mel_channels, width=width, use_htk=htk)
+    M = melfb(sr, window_length, nfilts=mel_channels, width=width, use_htk=htk, fmin=fmin, fmax=fmax)
 
     # Remove everything past the nyquist frequency
     M = M[:, :(window_length / 2  + 1)]
@@ -327,3 +329,34 @@ def melspectrogram(y, sr=22050, window_length=256, hop_length=128, mel_channels=
 
     return S
 
+#-- miscellaneous utilities --#
+def sync(X, F, agg=numpy.mean):
+    '''
+    Synchronous aggregation of a feature matrix
+
+    Input:
+        X:      d-by-T              | feature matrix 
+        F:      t-vector            | (ordered) array of frame numbers
+        agg:    aggregator function | default: numpy.mean
+
+    Output:
+        Y:      d-by-(<=t+1) vector
+        where 
+                Y[:, i] = agg(X[:, F[i-1]:F[i]], axis=1)
+
+        In order to ensure total coverage, boundary points are added to F
+    '''
+
+    F = numpy.unique(numpy.concatenate( ([0], F, [X.shape[1]]) ))
+
+    Y = numpy.zeros( (X.shape[0], len(F)-1) )
+
+    lb = F[0]
+
+    for (i, ub) in enumerate(F[1:]):
+        Y[:, i] = agg(X[:, lb:ub], axis=1)
+        lb = ub
+        pass
+
+    return Y
+
diff --git a/tests/makeTestData.m b/tests/makeTestData.m
@@ -51,6 +51,9 @@ function testData(source_path, output_path)
     display('resample');
     testResample(output_path);
 
+    display('tempo');
+    testOnset(output_path);
+
     %% Done!
     display('Done.');
 end
@@ -327,3 +330,22 @@ function testISTFT(output_path)
         end
     end
 end
+
+function testOnset(output_path)
+
+    wavfile     = 'data/test2_8000.wav';
+
+    [y, sr]     = wavread(wavfile);
+    y           = mean(y, 2);               % Convert to mono
+
+    % Generate the onset envelope first
+    [t, xcr, D, onsetenv, oesr] = tempo2(y, sr);
+
+    filename    = sprintf('%s/beat-onset-000.mat', output_path);
+    display(['  `-- saving ', filename]);
+    save(filename, 'wavfile', 'onsetenv');
+
+    filename    = sprintf('%s/beat-tempo-000.mat', output_path);
+    display(['  `-- saving ', filename]);
+    save(filename, 'wavfile', 't');
+end