kaldi-asr · danpovey · Mar 5, 2019 · Dec 2, 2018
diff --git a/src/feat/feature-common-inl.h b/src/feat/feature-common-inl.h
@@ -33,26 +33,26 @@ void OfflineFeatureTpl<F>::ComputeFeatures(
     Matrix<BaseFloat> *output) {
   KALDI_ASSERT(output != NULL);
   BaseFloat new_sample_freq = computer_.GetFrameOptions().samp_freq;
-  if (sample_freq == new_sample_freq)
+  if (sample_freq == new_sample_freq) {
     Compute(wave, vtln_warp, output);
-  else {
-    if (new_sample_freq < sample_freq) {
-      if (! computer_.GetFrameOptions().allow_downsample)
+  } else {
+    if (new_sample_freq < sample_freq &&
+        ! computer_.GetFrameOptions().allow_downsample)
         KALDI_ERR << "Waveform and config sample Frequency mismatch: "
                   << sample_freq << " .vs " << new_sample_freq
-                  << " ( use --allow_downsample=true option to allow "
+                  << " (use --allow-downsample=true to allow "
                   << " downsampling the waveform).";
-
-      // Downsample the waveform.
-      Vector<BaseFloat> downsampled_wave(wave);
-      DownsampleWaveForm(sample_freq, wave,
-                         new_sample_freq, &downsampled_wave);
-      Compute(downsampled_wave, vtln_warp, output);
-    } else
-      KALDI_ERR << "New sample Frequency " << new_sample_freq
-                << " is larger than waveform original sampling frequency "
-                << sample_freq;
-
+    else if (new_sample_freq > sample_freq &&
+             ! computer_.GetFrameOptions().allow_upsample)
+      KALDI_ERR << "Waveform and config sample Frequency mismatch: "
+                  << sample_freq << " .vs " << new_sample_freq
+                << " (use --allow-upsample=true option to allow "
+                << " upsampling the waveform).";
+    // Resample the waveform.
+    Vector<BaseFloat> resampled_wave(wave);
+    ResampleWaveform(sample_freq, wave,
+                     new_sample_freq, &resampled_wave);
+    Compute(resampled_wave, vtln_warp, output);
   }
 }
 

diff --git a/src/feat/feature-window.h b/src/feat/feature-window.h
@@ -40,14 +40,15 @@ struct FrameExtractionOptions {
   BaseFloat preemph_coeff;  // Preemphasis coefficient.
   bool remove_dc_offset;  // Subtract mean of wave before FFT.
   std::string window_type;  // e.g. Hamming window
-  bool round_to_power_of_two;
-  BaseFloat blackman_coeff;
-  bool snip_edges;
-  bool allow_downsample;
   // May be "hamming", "rectangular", "povey", "hanning", "blackman"
   // "povey" is a window I made to be similar to Hamming but to go to zero at the
   // edges, it's pow((0.5 - 0.5*cos(n/N*2*pi)), 0.85)
   // I just don't think the Hamming window makes sense as a windowing function.
+  bool round_to_power_of_two;
+  BaseFloat blackman_coeff;
+  bool snip_edges;
+  bool allow_downsample;
+  bool allow_upsample;
   FrameExtractionOptions():
       samp_freq(16000),
       frame_shift_ms(10.0),
@@ -59,7 +60,8 @@ struct FrameExtractionOptions {
       round_to_power_of_two(true),
       blackman_coeff(0.42),
       snip_edges(true),
-      allow_downsample(false) { }
+      allow_downsample(false),
+      allow_upsample(false) { }
 
   void Register(OptionsItf *opts) {
     opts->Register("sample-frequency", &samp_freq,
@@ -90,6 +92,9 @@ struct FrameExtractionOptions {
     opts->Register("allow-downsample", &allow_downsample,
                    "If true, allow the input waveform to have a higher frequency than "
                    "the specified --sample-frequency (and we'll downsample).");
+    opts->Register("allow-upsample", &allow_upsample,
+                   "If true, allow the input waveform to have a lower frequency than "
+                   "the specified --sample-frequency (and we'll upsample).");
   }
   int32 WindowShift() const {
     return static_cast<int32>(samp_freq * 0.001 * frame_shift_ms);

diff --git a/src/feat/resample.cc b/src/feat/resample.cc
@@ -302,7 +302,7 @@ void ArbitraryResample::Resample(const VectorBase<BaseFloat> &input,
                                  VectorBase<BaseFloat> *output) const {
   KALDI_ASSERT(input.Dim() == num_samples_in_ &&
                output->Dim() == weights_.size());
-  
+
   int32 output_dim = output->Dim();
   for (int32 i = 0; i < output_dim; i++) {
     SubVector<BaseFloat> input_part(input, first_index_[i], weights_[i].Dim());
@@ -365,13 +365,13 @@ BaseFloat ArbitraryResample::FilterFunc(BaseFloat t) const {
   return filter * window;
 }
 
-void DownsampleWaveForm(BaseFloat orig_freq, const VectorBase<BaseFloat> &wave,
-                        BaseFloat new_freq, Vector<BaseFloat> *new_wave) {
-  KALDI_ASSERT(new_freq < orig_freq);
-  BaseFloat lowpass_cutoff = 0.99 * 0.5 * new_freq;
+void ResampleWaveform(BaseFloat orig_freq, const VectorBase<BaseFloat> &wave,
+                      BaseFloat new_freq, Vector<BaseFloat> *new_wave) {
+  BaseFloat min_freq = std::min(orig_freq, new_freq);
+  BaseFloat lowpass_cutoff = 0.99 * 0.5 * min_freq;
   int32 lowpass_filter_width = 6;
-  LinearResample signal_downsampler(orig_freq, new_freq,
-                                    lowpass_cutoff, lowpass_filter_width);
-  signal_downsampler.Resample(wave, true, new_wave);
+  LinearResample resampler(orig_freq, new_freq,
+                           lowpass_cutoff, lowpass_filter_width);
+  resampler.Resample(wave, true, new_wave);
 }
 }  // namespace kaldi
diff --git a/src/feat/resample.h b/src/feat/resample.h
@@ -40,7 +40,7 @@ namespace kaldi {
 
 /**
    \file[resample.h]
-   
+
    This header contains declarations of classes for resampling signals.  The
    normal cases of resampling a signal are upsampling and downsampling
    (increasing and decreasing the sample rate of a signal, respectively),
@@ -51,7 +51,7 @@ namespace kaldi {
    The input signal is always evenly spaced, say sampled with frequency S, and
    we assume the original signal was band-limited to S/2 or lower.  The n'th
    input sample x_n (with n = 0, 1, ...) is interpreted as the original
-   signal's value at time n/S.  
+   signal's value at time n/S.
 
    For resampling, it is convenient to view the input signal as a
    continuous function x(t) of t, where each sample x_n becomes a delta function
@@ -73,14 +73,14 @@ namespace kaldi {
    means we window the sinc function out to its first zero on the left and right,
    w = 2 means the second zero, and so on; we normally choose w to be at least two.
    We call this num_zeros, not w, in the code.
-   
+
    Convolving the signal x(t) with this windowed filter h(t) = f(t)g(t) and evaluating the resulting
    signal s(t) at an arbitrary time t is easy: we have
     \f[          s(t) = 1/S \sum_n x_n h(t - n/S)        \f].
    (note: the sign of t - n/S might be wrong, but it doesn't matter as the filter
    and window are symmetric).
    This is true for arbitrary values of t.  What the class ArbitraryResample does
-   is to allow you to evaluate the signal for specified values of t.  
+   is to allow you to evaluate the signal for specified values of t.
 */
 
 
@@ -90,7 +90,7 @@ namespace kaldi {
    don't have to be linearly spaced.  The low-pass filter cutoff
    "filter_cutoff_hz" should be less than half the sample rate;
    "num_zeros" should probably be at least two preferably more; higher numbers give
-   sharper filters but will be less efficient. 
+   sharper filters but will be less efficient.
 */
 class ArbitraryResample {
  public:
@@ -115,7 +115,7 @@ class ArbitraryResample {
   /// This version of the Resample function processes just
   /// one vector.
   void Resample(const VectorBase<BaseFloat> &input,
-                VectorBase<BaseFloat> *output) const;  
+                VectorBase<BaseFloat> *output) const;
  private:
   void SetIndexes(const Vector<BaseFloat> &sample_points);
 
@@ -248,20 +248,35 @@ class LinearResample {
                                        ///< previously seen input signal.
 };
 
-/// Downsample a waveform. This is a convenience wrapper for the
-/// class 'LinearResample'.
-/// The low-pass filter cutoff used in 'LinearResample' is 0.99 of half of the
-/// new_freq and num_zeros is 6.
-/// The downsampling results is also checked wit sox resampling toolkit.
-/// Sox design is inspired by Laurent De Soras' paper,
-/// https://ccrma.stanford.edu/~jos/resample/Implementation.html
-/// It designs low pass filter using pass-band, stop-band, Nyquist freq
-/// and stop-band attenuation.
-/// e.g. The mainlob for Hanning window is 4pi/M, where the main-lobe width is
-/// equal to (pass-band-freq - stop-band-freq).
-/// Also the cutoff frequency is equal to (pass-band-freq - stop-band-freq).
-void DownsampleWaveForm(BaseFloat orig_freq, const VectorBase<BaseFloat> &wave,
-                        BaseFloat new_freq, Vector<BaseFloat> *new_wave);
+/**
+   Downsample or upsample a waveform. This is a convenience wrapper for the
+   class 'LinearResample'.
+   The low-pass filter cutoff used in 'LinearResample' is 0.99 of the Nyquist,
+   where the Nyquist is half of the minimum of (orig_freq, new_freq).  The
+   resampling is done with a symmetric FIR filter with N_z (number of zeros)
+   as 6.
+
+   We compared the downsampling results with those from the sox resampling
+   toolkit.
+   Sox's design is inspired by Laurent De Soras' paper,
+   https://ccrma.stanford.edu/~jos/resample/Implementation.html
+
+   Note: we expect that while orig_freq and new_freq are of type BaseFloat, they
+   are actually required to have exact integer values (like 16000 or 8000) with
+   a ratio between them that can be expressed as a rational number with
+   reasonably small integer factors.
+*/
+void ResampleWaveform(BaseFloat orig_freq, const VectorBase<BaseFloat> &wave,
+                      BaseFloat new_freq, Vector<BaseFloat> *new_wave);
+
+
+/// This function is deprecated.  It is provided for backward compatibility, to avoid
+/// breaking older code.
+inline void DownsampleWaveForm(BaseFloat orig_freq, const VectorBase<BaseFloat> &wave,
+                               BaseFloat new_freq, Vector<BaseFloat> *new_wave) {
+  ResampleWaveform(orig_freq, wave, new_freq, new_wave);
+}
+
 
 /// @} End of "addtogroup feat"
 }  // namespace kaldi