Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[src] Allow upsampling in compute-mfcc-feats, etc. #3014

Merged
merged 1 commit into from
Mar 5, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 16 additions & 16 deletions src/feat/feature-common-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,26 +33,26 @@ void OfflineFeatureTpl<F>::ComputeFeatures(
Matrix<BaseFloat> *output) {
KALDI_ASSERT(output != NULL);
BaseFloat new_sample_freq = computer_.GetFrameOptions().samp_freq;
if (sample_freq == new_sample_freq)
if (sample_freq == new_sample_freq) {
Compute(wave, vtln_warp, output);
else {
if (new_sample_freq < sample_freq) {
if (! computer_.GetFrameOptions().allow_downsample)
} else {
if (new_sample_freq < sample_freq &&
! computer_.GetFrameOptions().allow_downsample)
KALDI_ERR << "Waveform and config sample Frequency mismatch: "
<< sample_freq << " .vs " << new_sample_freq
<< " ( use --allow_downsample=true option to allow "
<< " (use --allow-downsample=true to allow "
<< " downsampling the waveform).";

// Downsample the waveform.
Vector<BaseFloat> downsampled_wave(wave);
DownsampleWaveForm(sample_freq, wave,
new_sample_freq, &downsampled_wave);
Compute(downsampled_wave, vtln_warp, output);
} else
KALDI_ERR << "New sample Frequency " << new_sample_freq
<< " is larger than waveform original sampling frequency "
<< sample_freq;

else if (new_sample_freq > sample_freq &&
! computer_.GetFrameOptions().allow_upsample)
KALDI_ERR << "Waveform and config sample Frequency mismatch: "
<< sample_freq << " .vs " << new_sample_freq
<< " (use --allow-upsample=true option to allow "
<< " upsampling the waveform).";
// Resample the waveform.
Vector<BaseFloat> resampled_wave(wave);
ResampleWaveform(sample_freq, wave,
new_sample_freq, &resampled_wave);
Compute(resampled_wave, vtln_warp, output);
}
}

Expand Down
15 changes: 10 additions & 5 deletions src/feat/feature-window.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,14 +40,15 @@ struct FrameExtractionOptions {
BaseFloat preemph_coeff; // Preemphasis coefficient.
bool remove_dc_offset; // Subtract mean of wave before FFT.
std::string window_type; // e.g. Hamming window
bool round_to_power_of_two;
BaseFloat blackman_coeff;
bool snip_edges;
bool allow_downsample;
// May be "hamming", "rectangular", "povey", "hanning", "blackman"
// "povey" is a window I made to be similar to Hamming but to go to zero at the
// edges, it's pow((0.5 - 0.5*cos(n/N*2*pi)), 0.85)
// I just don't think the Hamming window makes sense as a windowing function.
bool round_to_power_of_two;
BaseFloat blackman_coeff;
bool snip_edges;
bool allow_downsample;
bool allow_upsample;
FrameExtractionOptions():
samp_freq(16000),
frame_shift_ms(10.0),
Expand All @@ -59,7 +60,8 @@ struct FrameExtractionOptions {
round_to_power_of_two(true),
blackman_coeff(0.42),
snip_edges(true),
allow_downsample(false) { }
allow_downsample(false),
allow_upsample(false) { }

void Register(OptionsItf *opts) {
opts->Register("sample-frequency", &samp_freq,
Expand Down Expand Up @@ -90,6 +92,9 @@ struct FrameExtractionOptions {
opts->Register("allow-downsample", &allow_downsample,
"If true, allow the input waveform to have a higher frequency than "
"the specified --sample-frequency (and we'll downsample).");
opts->Register("allow-upsample", &allow_upsample,
"If true, allow the input waveform to have a lower frequency than "
"the specified --sample-frequency (and we'll upsample).");
}
int32 WindowShift() const {
return static_cast<int32>(samp_freq * 0.001 * frame_shift_ms);
Expand Down
16 changes: 8 additions & 8 deletions src/feat/resample.cc
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,7 @@ void ArbitraryResample::Resample(const VectorBase<BaseFloat> &input,
VectorBase<BaseFloat> *output) const {
KALDI_ASSERT(input.Dim() == num_samples_in_ &&
output->Dim() == weights_.size());

int32 output_dim = output->Dim();
for (int32 i = 0; i < output_dim; i++) {
SubVector<BaseFloat> input_part(input, first_index_[i], weights_[i].Dim());
Expand Down Expand Up @@ -365,13 +365,13 @@ BaseFloat ArbitraryResample::FilterFunc(BaseFloat t) const {
return filter * window;
}

void DownsampleWaveForm(BaseFloat orig_freq, const VectorBase<BaseFloat> &wave,
BaseFloat new_freq, Vector<BaseFloat> *new_wave) {
KALDI_ASSERT(new_freq < orig_freq);
BaseFloat lowpass_cutoff = 0.99 * 0.5 * new_freq;
void ResampleWaveform(BaseFloat orig_freq, const VectorBase<BaseFloat> &wave,
BaseFloat new_freq, Vector<BaseFloat> *new_wave) {
BaseFloat min_freq = std::min(orig_freq, new_freq);
BaseFloat lowpass_cutoff = 0.99 * 0.5 * min_freq;
int32 lowpass_filter_width = 6;
LinearResample signal_downsampler(orig_freq, new_freq,
lowpass_cutoff, lowpass_filter_width);
signal_downsampler.Resample(wave, true, new_wave);
LinearResample resampler(orig_freq, new_freq,
lowpass_cutoff, lowpass_filter_width);
resampler.Resample(wave, true, new_wave);
}
} // namespace kaldi
55 changes: 35 additions & 20 deletions src/feat/resample.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ namespace kaldi {

/**
\file[resample.h]

This header contains declarations of classes for resampling signals. The
normal cases of resampling a signal are upsampling and downsampling
(increasing and decreasing the sample rate of a signal, respectively),
Expand All @@ -51,7 +51,7 @@ namespace kaldi {
The input signal is always evenly spaced, say sampled with frequency S, and
we assume the original signal was band-limited to S/2 or lower. The n'th
input sample x_n (with n = 0, 1, ...) is interpreted as the original
signal's value at time n/S.
signal's value at time n/S.

For resampling, it is convenient to view the input signal as a
continuous function x(t) of t, where each sample x_n becomes a delta function
Expand All @@ -73,14 +73,14 @@ namespace kaldi {
means we window the sinc function out to its first zero on the left and right,
w = 2 means the second zero, and so on; we normally choose w to be at least two.
We call this num_zeros, not w, in the code.

Convolving the signal x(t) with this windowed filter h(t) = f(t)g(t) and evaluating the resulting
signal s(t) at an arbitrary time t is easy: we have
\f[ s(t) = 1/S \sum_n x_n h(t - n/S) \f].
(note: the sign of t - n/S might be wrong, but it doesn't matter as the filter
and window are symmetric).
This is true for arbitrary values of t. What the class ArbitraryResample does
is to allow you to evaluate the signal for specified values of t.
is to allow you to evaluate the signal for specified values of t.
*/


Expand All @@ -90,7 +90,7 @@ namespace kaldi {
don't have to be linearly spaced. The low-pass filter cutoff
"filter_cutoff_hz" should be less than half the sample rate;
"num_zeros" should probably be at least two preferably more; higher numbers give
sharper filters but will be less efficient.
sharper filters but will be less efficient.
*/
class ArbitraryResample {
public:
Expand All @@ -115,7 +115,7 @@ class ArbitraryResample {
/// This version of the Resample function processes just
/// one vector.
void Resample(const VectorBase<BaseFloat> &input,
VectorBase<BaseFloat> *output) const;
VectorBase<BaseFloat> *output) const;
private:
void SetIndexes(const Vector<BaseFloat> &sample_points);

Expand Down Expand Up @@ -248,20 +248,35 @@ class LinearResample {
///< previously seen input signal.
};

/// Downsample a waveform. This is a convenience wrapper for the
/// class 'LinearResample'.
/// The low-pass filter cutoff used in 'LinearResample' is 0.99 of half of the
/// new_freq and num_zeros is 6.
/// The downsampling results is also checked wit sox resampling toolkit.
/// Sox design is inspired by Laurent De Soras' paper,
/// https://ccrma.stanford.edu/~jos/resample/Implementation.html
/// It designs low pass filter using pass-band, stop-band, Nyquist freq
/// and stop-band attenuation.
/// e.g. The mainlob for Hanning window is 4pi/M, where the main-lobe width is
/// equal to (pass-band-freq - stop-band-freq).
/// Also the cutoff frequency is equal to (pass-band-freq - stop-band-freq).
void DownsampleWaveForm(BaseFloat orig_freq, const VectorBase<BaseFloat> &wave,
BaseFloat new_freq, Vector<BaseFloat> *new_wave);
/**
Downsample or upsample a waveform. This is a convenience wrapper for the
class 'LinearResample'.
The low-pass filter cutoff used in 'LinearResample' is 0.99 of the Nyquist,
where the Nyquist is half of the minimum of (orig_freq, new_freq). The
resampling is done with a symmetric FIR filter with N_z (number of zeros)
as 6.

We compared the downsampling results with those from the sox resampling
toolkit.
Sox's design is inspired by Laurent De Soras' paper,
https://ccrma.stanford.edu/~jos/resample/Implementation.html

Note: we expect that while orig_freq and new_freq are of type BaseFloat, they
are actually required to have exact integer values (like 16000 or 8000) with
a ratio between them that can be expressed as a rational number with
reasonably small integer factors.
*/
void ResampleWaveform(BaseFloat orig_freq, const VectorBase<BaseFloat> &wave,
BaseFloat new_freq, Vector<BaseFloat> *new_wave);


/// This function is deprecated. It is provided for backward compatibility, to avoid
/// breaking older code.
inline void DownsampleWaveForm(BaseFloat orig_freq, const VectorBase<BaseFloat> &wave,
BaseFloat new_freq, Vector<BaseFloat> *new_wave) {
ResampleWaveform(orig_freq, wave, new_freq, new_wave);
}


/// @} End of "addtogroup feat"
} // namespace kaldi
Expand Down