From b1d7bfe8da21c40e1b5fc81367c5bf4fad7eb2a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20Dr=C3=B6ge?= Date: Sat, 8 Aug 2020 11:08:57 +0300 Subject: [PATCH] Port filter to Rust This is about 2x slower than the C implementation but completely unoptimized so far. With this the last component is ported to C and only porting the API layer and input chunking is remaining. --- Cargo.toml | 4 + benches/filter.rs | 266 ++++++++++++++ build.rs | 1 + src/c/ebur128.c | 245 ++----------- src/c/tests/filter.c | 215 ++++++++++++ src/filter.rs | 812 +++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 5 + src/true_peak.rs | 81 ----- 8 files changed, 1335 insertions(+), 294 deletions(-) create mode 100644 benches/filter.rs create mode 100644 src/c/tests/filter.c create mode 100644 src/filter.rs diff --git a/Cargo.toml b/Cargo.toml index 8605805..db9630c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -41,6 +41,10 @@ harness = false name = "history" harness = false +[[bench]] +name = "filter" +harness = false + [[bench]] name = "ebur128" harness = false diff --git a/benches/filter.rs b/benches/filter.rs new file mode 100644 index 0000000..8f8f617 --- /dev/null +++ b/benches/filter.rs @@ -0,0 +1,266 @@ +use criterion::{black_box, criterion_group, criterion_main, Criterion}; + +// Run all benchmarks without true peak calculation as that's just another function call +// and we measure that one in the true peak benchmarks already. + +#[cfg(feature = "internal-tests")] +fn filter_i16_c(rate: u32, channels: u32, src: &[i16], dest: &mut [f64], channel_map: &[u32]) { + use ebur128::filter; + + unsafe { + let f = filter::filter_create_c(rate, channels, 1, 0); + filter::filter_process_short_c( + f, + src.len() / channels as usize, + src.as_ptr(), + dest.as_mut_ptr(), + channel_map.as_ptr(), + ); + filter::filter_destroy_c(f); + } +} + +#[cfg(feature = "internal-tests")] +fn filter_i16(rate: u32, channels: u32, src: &[i16], dest: &mut [f64], channel_map: &[u32]) { + use ebur128::filter; + + let mut f = filter::Filter::new(rate, channels, true, false); + f.process(src, dest, channel_map); +} + +#[cfg(feature = "internal-tests")] +fn filter_i32_c(rate: u32, channels: u32, src: &[i32], dest: &mut [f64], channel_map: &[u32]) { + use ebur128::filter; + + unsafe { + let f = filter::filter_create_c(rate, channels, 1, 0); + filter::filter_process_int_c( + f, + src.len() / channels as usize, + src.as_ptr(), + dest.as_mut_ptr(), + channel_map.as_ptr(), + ); + filter::filter_destroy_c(f); + } +} + +#[cfg(feature = "internal-tests")] +fn filter_i32(rate: u32, channels: u32, src: &[i32], dest: &mut [f64], channel_map: &[u32]) { + use ebur128::filter; + + let mut f = filter::Filter::new(rate, channels, true, false); + f.process(src, dest, channel_map); +} + +#[cfg(feature = "internal-tests")] +fn filter_f32_c(rate: u32, channels: u32, src: &[f32], dest: &mut [f64], channel_map: &[u32]) { + use ebur128::filter; + + unsafe { + let f = filter::filter_create_c(rate, channels, 1, 0); + filter::filter_process_float_c( + f, + src.len() / channels as usize, + src.as_ptr(), + dest.as_mut_ptr(), + channel_map.as_ptr(), + ); + filter::filter_destroy_c(f); + } +} + +#[cfg(feature = "internal-tests")] +fn filter_f32(rate: u32, channels: u32, src: &[f32], dest: &mut [f64], channel_map: &[u32]) { + use ebur128::filter; + + let mut f = filter::Filter::new(rate, channels, true, false); + f.process(src, dest, channel_map); +} + +#[cfg(feature = "internal-tests")] +fn filter_f64_c(rate: u32, channels: u32, src: &[f64], dest: &mut [f64], channel_map: &[u32]) { + use ebur128::filter; + + unsafe { + let f = filter::filter_create_c(rate, channels, 1, 0); + filter::filter_process_double_c( + f, + src.len() / channels as usize, + src.as_ptr(), + dest.as_mut_ptr(), + channel_map.as_ptr(), + ); + filter::filter_destroy_c(f); + } +} + +#[cfg(feature = "internal-tests")] +fn filter_f64(rate: u32, channels: u32, src: &[f64], dest: &mut [f64], channel_map: &[u32]) { + use ebur128::filter; + + let mut f = filter::Filter::new(rate, channels, true, false); + f.process(src, dest, channel_map); +} + +pub fn criterion_benchmark(c: &mut Criterion) { + #[cfg(feature = "internal-tests")] + { + let channel_map = [1; 2]; + let mut data_out = vec![0.0f64; 19200 * 2]; + let mut data = vec![0i16; 19200 * 2]; + let mut accumulator = 0.0; + let step = 2.0 * std::f32::consts::PI * 440.0 / 48_000.0; + for out in data.chunks_exact_mut(2) { + let val = f32::sin(accumulator) * std::i16::MAX as f32; + out[0] = val as i16; + out[1] = val as i16; + accumulator += step; + } + + let mut group = c.benchmark_group("filter: 48kHz 2ch i16"); + + group.bench_function("C", |b| { + b.iter(|| { + filter_i16_c( + black_box(48_000), + black_box(2), + black_box(&data), + black_box(&mut data_out), + black_box(&channel_map), + ) + }) + }); + + group.bench_function("Rust", |b| { + b.iter(|| { + filter_i16( + black_box(48_000), + black_box(2), + black_box(&data), + black_box(&mut data_out), + black_box(&channel_map), + ) + }) + }); + + group.finish(); + + let mut data = vec![0i32; 19200 * 2]; + let mut accumulator = 0.0; + let step = 2.0 * std::f32::consts::PI * 440.0 / 48_000.0; + for out in data.chunks_exact_mut(2) { + let val = f32::sin(accumulator) * std::i32::MAX as f32; + out[0] = val as i32; + out[1] = val as i32; + accumulator += step; + } + + let mut group = c.benchmark_group("filter: 48kHz 2ch i32"); + + group.bench_function("C", |b| { + b.iter(|| { + filter_i32_c( + black_box(48_000), + black_box(2), + black_box(&data), + black_box(&mut data_out), + black_box(&channel_map), + ) + }) + }); + + group.bench_function("Rust", |b| { + b.iter(|| { + filter_i32( + black_box(48_000), + black_box(2), + black_box(&data), + black_box(&mut data_out), + black_box(&channel_map), + ) + }) + }); + + group.finish(); + + let mut data = vec![0.0f32; 19200 * 2]; + let mut accumulator = 0.0; + let step = 2.0 * std::f32::consts::PI * 440.0 / 48_000.0; + for out in data.chunks_exact_mut(2) { + let val = f32::sin(accumulator); + out[0] = val; + out[1] = val; + accumulator += step; + } + + let mut group = c.benchmark_group("filter: 48kHz 2ch f32"); + + group.bench_function("C", |b| { + b.iter(|| { + filter_f32_c( + black_box(48_000), + black_box(2), + black_box(&data), + black_box(&mut data_out), + black_box(&channel_map), + ) + }) + }); + + group.bench_function("Rust", |b| { + b.iter(|| { + filter_f32( + black_box(48_000), + black_box(2), + black_box(&data), + black_box(&mut data_out), + black_box(&channel_map), + ) + }) + }); + + group.finish(); + + let mut data = vec![0.0f64; 19200 * 2]; + let mut accumulator = 0.0; + let step = 2.0 * std::f32::consts::PI * 440.0 / 48_000.0; + for out in data.chunks_exact_mut(2) { + let val = f32::sin(accumulator); + out[0] = val as f64; + out[1] = val as f64; + accumulator += step; + } + + let mut group = c.benchmark_group("filter: 48kHz 2ch f64"); + + group.bench_function("C", |b| { + b.iter(|| { + filter_f64_c( + black_box(48_000), + black_box(2), + black_box(&data), + black_box(&mut data_out), + black_box(&channel_map), + ) + }) + }); + + group.bench_function("Rust", |b| { + b.iter(|| { + filter_f64( + black_box(48_000), + black_box(2), + black_box(&data), + black_box(&mut data_out), + black_box(&channel_map), + ) + }) + }); + + group.finish(); + } +} + +criterion_group!(benches, criterion_benchmark); +criterion_main!(benches); diff --git a/build.rs b/build.rs index d14e75a..e3e49a6 100644 --- a/build.rs +++ b/build.rs @@ -7,6 +7,7 @@ fn main() { b.file("src/c/tests/interp.c"); b.file("src/c/tests/true_peak.c"); b.file("src/c/tests/history.c"); + b.file("src/c/tests/filter.c"); } b.file("src/c/ebur128.c"); diff --git a/src/c/ebur128.c b/src/c/ebur128.c index ae83f4a..c33b965 100644 --- a/src/c/ebur128.c +++ b/src/c/ebur128.c @@ -17,16 +17,18 @@ #define EBUR128_MAX(a, b) (((a) > (b)) ? (a) : (b)) #define ALMOST_ZERO 0.000001 -#define FILTER_STATE_SIZE 5 /* Rust implementations */ -typedef void * true_peak; -extern true_peak* true_peak_create(unsigned int rate, unsigned int channels); -extern void true_peak_destroy(true_peak* tp); -extern void true_peak_check_short(true_peak* tp, size_t frames, const int16_t* src, double* peaks); -extern void true_peak_check_int(true_peak* tp, size_t frames, const int32_t* src, double* peaks); -extern void true_peak_check_float(true_peak* tp, size_t frames, const float* src, double* peaks); -extern void true_peak_check_double(true_peak* tp, size_t frames, const double* src, double* peaks); +typedef void * filter; +extern filter* filter_create(unsigned int rate, unsigned int channels, int calculate_sample_peak, int calculate_true_peak); +extern void filter_destroy(filter* f); +extern void filter_process_short(filter* f, size_t frames, const int16_t* src, double* dest, const int* channel_map); +extern void filter_process_int(filter* f, size_t frames, const int32_t* src, double* dest, const int* channel_map); +extern void filter_process_float(filter* f, size_t frames, const float* src, double* dest, const int* channel_map); +extern void filter_process_double(filter* f, size_t frames, const double* src, double* dest, const int* channel_map); +extern const double* filter_sample_peak(const filter *f); +extern const double* filter_true_peak(const filter *f); +extern void filter_reset_peaks(filter *f); typedef void * history; extern history* history_create(int use_histogram, size_t max); @@ -37,9 +39,6 @@ extern double history_relative_threshold(const history* hist); extern double history_loudness_range(const history* hist); extern void history_destroy(history *hist); -/** BS.1770 filter state. */ -typedef double filter_state[FILTER_STATE_SIZE]; - struct ebur128_state_internal { /** Filtered audio data (used as ring buffer). */ double* audio_data; @@ -55,23 +54,17 @@ struct ebur128_state_internal { int* channel_map; /** How many samples fit in 100ms (rounded). */ unsigned long samples_in_100ms; - /** BS.1770 filter coefficients (nominator). */ - double b[5]; - /** BS.1770 filter coefficients (denominator). */ - double a[5]; - /** one filter_state per channel. */ - filter_state* v; + + filter* f; history* block_energy_history; history* short_term_block_energy_history; + /** Keeps track of when a new short term block is needed. */ size_t short_term_frame_counter; /** Maximum sample peak, one per channel */ double* sample_peak; - double* prev_sample_peak; /** Maximum true peak, one per channel */ double* true_peak; - double* prev_true_peak; - true_peak* tp; /** The maximum window duration in ms. */ unsigned long window; unsigned long history; @@ -83,66 +76,6 @@ static double relative_gate = -10.0; static double relative_gate_factor; static double minus_twenty_decibels; -static int ebur128_init_filter(ebur128_state* st) { - int errcode = EBUR128_SUCCESS; - int i, j; - - double f0 = 1681.974450955533; - double G = 3.999843853973347; - double Q = 0.7071752369554196; - - double K = tan(M_PI * f0 / (double) st->samplerate); - double Vh = pow(10.0, G / 20.0); - double Vb = pow(Vh, 0.4996667741545416); - - double pb[3] = { 0.0, 0.0, 0.0 }; - double pa[3] = { 1.0, 0.0, 0.0 }; - double rb[3] = { 1.0, -2.0, 1.0 }; - double ra[3] = { 1.0, 0.0, 0.0 }; - - double a0 = 1.0 + K / Q + K * K; - pb[0] = (Vh + Vb * K / Q + K * K) / a0; - pb[1] = 2.0 * (K * K - Vh) / a0; - pb[2] = (Vh - Vb * K / Q + K * K) / a0; - pa[1] = 2.0 * (K * K - 1.0) / a0; - pa[2] = (1.0 - K / Q + K * K) / a0; - - /* fprintf(stderr, "%.14f %.14f %.14f %.14f %.14f\n", - b1[0], b1[1], b1[2], a1[1], a1[2]); */ - - f0 = 38.13547087602444; - Q = 0.5003270373238773; - K = tan(M_PI * f0 / (double) st->samplerate); - - ra[1] = 2.0 * (K * K - 1.0) / (1.0 + K / Q + K * K); - ra[2] = (1.0 - K / Q + K * K) / (1.0 + K / Q + K * K); - - /* fprintf(stderr, "%.14f %.14f\n", a2[1], a2[2]); */ - - st->d->b[0] = pb[0] * rb[0]; - st->d->b[1] = pb[0] * rb[1] + pb[1] * rb[0]; - st->d->b[2] = pb[0] * rb[2] + pb[1] * rb[1] + pb[2] * rb[0]; - st->d->b[3] = pb[1] * rb[2] + pb[2] * rb[1]; - st->d->b[4] = pb[2] * rb[2]; - - st->d->a[0] = pa[0] * ra[0]; - st->d->a[1] = pa[0] * ra[1] + pa[1] * ra[0]; - st->d->a[2] = pa[0] * ra[2] + pa[1] * ra[1] + pa[2] * ra[0]; - st->d->a[3] = pa[1] * ra[2] + pa[2] * ra[1]; - st->d->a[4] = pa[2] * ra[2]; - - st->d->v = (filter_state*) malloc(st->channels * sizeof(filter_state)); - CHECK_ERROR(!st->d->v, EBUR128_ERROR_NOMEM, exit); - for (i = 0; i < (int) st->channels; ++i) { - for (j = 0; j < FILTER_STATE_SIZE; ++j) { - st->d->v[i][j] = 0.0; - } - } - -exit: - return errcode; -} - static int ebur128_init_channel_map(ebur128_state* st) { size_t i; st->d->channel_map = (int*) malloc(st->channels * sizeof(int)); @@ -226,17 +159,11 @@ ebur128_init(unsigned int channels, unsigned long samplerate, int mode) { st->d->sample_peak = (double*) malloc(channels * sizeof(double)); CHECK_ERROR(!st->d->sample_peak, 0, free_channel_map) - st->d->prev_sample_peak = (double*) malloc(channels * sizeof(double)); - CHECK_ERROR(!st->d->prev_sample_peak, 0, free_sample_peak) st->d->true_peak = (double*) malloc(channels * sizeof(double)); - CHECK_ERROR(!st->d->true_peak, 0, free_prev_sample_peak) - st->d->prev_true_peak = (double*) malloc(channels * sizeof(double)); - CHECK_ERROR(!st->d->prev_true_peak, 0, free_true_peak) + CHECK_ERROR(!st->d->true_peak, 0, free_sample_peak) for (i = 0; i < channels; ++i) { st->d->sample_peak[i] = 0.0; - st->d->prev_sample_peak[i] = 0.0; st->d->true_peak[i] = 0.0; - st->d->prev_true_peak[i] = 0.0; } st->d->history = ULONG_MAX; @@ -248,7 +175,7 @@ ebur128_init(unsigned int channels, unsigned long samplerate, int mode) { } else if ((mode & EBUR128_MODE_M) == EBUR128_MODE_M) { st->d->window = 400; } else { - goto free_prev_true_peak; + goto free_true_peak; } st->d->audio_data_frames = st->samplerate * st->d->window / 1000; if (st->d->audio_data_frames % st->d->samples_in_100ms) { @@ -259,19 +186,16 @@ ebur128_init(unsigned int channels, unsigned long samplerate, int mode) { } st->d->audio_data = (double*) malloc(st->d->audio_data_frames * st->channels * sizeof(double)); - CHECK_ERROR(!st->d->audio_data, 0, free_prev_true_peak) + CHECK_ERROR(!st->d->audio_data, 0, free_true_peak) for (j = 0; j < st->d->audio_data_frames * st->channels; ++j) { st->d->audio_data[j] = 0.0; } - errcode = ebur128_init_filter(st); - CHECK_ERROR(errcode, 0, free_audio_data) - st->d->block_energy_history = history_create(mode & EBUR128_MODE_HISTOGRAM ? 1 : 0, st->d->history / 100); st->d->short_term_block_energy_history = history_create(mode & EBUR128_MODE_HISTOGRAM ? 1 : 0, st->d->history / 3000); st->d->short_term_frame_counter = 0; - st->d->tp = true_peak_create(st->samplerate, st->channels); + st->d->f = filter_create(st->samplerate, st->channels, (st->mode & EBUR128_MODE_SAMPLE_PEAK) == EBUR128_MODE_SAMPLE_PEAK, (st->mode & EBUR128_MODE_TRUE_PEAK) == EBUR128_MODE_TRUE_PEAK); /* the first block needs 400ms of audio data */ st->d->needed_frames = st->d->samples_in_100ms * 4; @@ -280,14 +204,8 @@ ebur128_init(unsigned int channels, unsigned long samplerate, int mode) { return st; -free_audio_data: - free(st->d->audio_data); -free_prev_true_peak: - free(st->d->prev_true_peak); free_true_peak: free(st->d->true_peak); -free_prev_sample_peak: - free(st->d->prev_sample_peak); free_sample_peak: free(st->d->sample_peak); free_channel_map: @@ -303,99 +221,16 @@ ebur128_init(unsigned int channels, unsigned long samplerate, int mode) { void ebur128_destroy(ebur128_state** st) { history_destroy((*st)->d->short_term_block_energy_history); history_destroy((*st)->d->block_energy_history); - free((*st)->d->v); + filter_destroy((*st)->d->f); free((*st)->d->audio_data); free((*st)->d->channel_map); free((*st)->d->sample_peak); - free((*st)->d->prev_sample_peak); free((*st)->d->true_peak); - free((*st)->d->prev_true_peak); - true_peak_destroy((*st)->d->tp); free((*st)->d); free(*st); *st = NULL; } -#if defined(__SSE2_MATH__) || defined(_M_X64) || _M_IX86_FP >= 2 -#include -#define TURN_ON_FTZ \ - unsigned int mxcsr = _mm_getcsr(); \ - _mm_setcsr(mxcsr | _MM_FLUSH_ZERO_ON); -#define TURN_OFF_FTZ _mm_setcsr(mxcsr); -#define FLUSH_MANUALLY -#else -#warning "manual FTZ is being used, please enable SSE2 (-msse2 -mfpmath=sse)" -#define TURN_ON_FTZ -#define TURN_OFF_FTZ -#define FLUSH_MANUALLY \ - st->d->v[c][4] = fabs(st->d->v[c][4]) < DBL_MIN ? 0.0 : st->d->v[c][4]; \ - st->d->v[c][3] = fabs(st->d->v[c][3]) < DBL_MIN ? 0.0 : st->d->v[c][3]; \ - st->d->v[c][2] = fabs(st->d->v[c][2]) < DBL_MIN ? 0.0 : st->d->v[c][2]; \ - st->d->v[c][1] = fabs(st->d->v[c][1]) < DBL_MIN ? 0.0 : st->d->v[c][1]; -#endif - -#define EBUR128_FILTER(type, min_scale, max_scale) \ - static void ebur128_filter_##type(ebur128_state* st, const type* src, \ - size_t frames) { \ - static double scaling_factor = \ - EBUR128_MAX(-((double) (min_scale)), (double) (max_scale)); \ - \ - double* audio_data = st->d->audio_data + st->d->audio_data_index; \ - size_t i, c; \ - \ - TURN_ON_FTZ \ - \ - if ((st->mode & EBUR128_MODE_SAMPLE_PEAK) == EBUR128_MODE_SAMPLE_PEAK) { \ - for (c = 0; c < st->channels; ++c) { \ - double max = 0.0; \ - for (i = 0; i < frames; ++i) { \ - double cur = (double) src[i * st->channels + c]; \ - if (EBUR128_MAX(cur, -cur) > max) { \ - max = EBUR128_MAX(cur, -cur); \ - } \ - } \ - max /= scaling_factor; \ - if (max > st->d->prev_sample_peak[c]) { \ - st->d->prev_sample_peak[c] = max; \ - } \ - } \ - } \ - if ((st->mode & EBUR128_MODE_TRUE_PEAK) == EBUR128_MODE_TRUE_PEAK && \ - st->d->tp) { \ - true_peak_check_##type(st->d->tp, frames, src, st->d->prev_true_peak); \ - } \ - for (c = 0; c < st->channels; ++c) { \ - if (st->d->channel_map[c] == EBUR128_UNUSED) { \ - continue; \ - } \ - for (i = 0; i < frames; ++i) { \ - st->d->v[c][0] = \ - (double) ((double) src[i * st->channels + c] / scaling_factor) - \ - st->d->a[1] * st->d->v[c][1] - /**/ \ - st->d->a[2] * st->d->v[c][2] - /**/ \ - st->d->a[3] * st->d->v[c][3] - /**/ \ - st->d->a[4] * st->d->v[c][4]; \ - audio_data[i * st->channels + c] = /**/ \ - st->d->b[0] * st->d->v[c][0] + /**/ \ - st->d->b[1] * st->d->v[c][1] + /**/ \ - st->d->b[2] * st->d->v[c][2] + /**/ \ - st->d->b[3] * st->d->v[c][3] + /**/ \ - st->d->b[4] * st->d->v[c][4]; \ - st->d->v[c][4] = st->d->v[c][3]; \ - st->d->v[c][3] = st->d->v[c][2]; \ - st->d->v[c][2] = st->d->v[c][1]; \ - st->d->v[c][1] = st->d->v[c][0]; \ - } \ - FLUSH_MANUALLY \ - } \ - TURN_OFF_FTZ \ - } - -EBUR128_FILTER(short, SHRT_MIN, SHRT_MAX) -EBUR128_FILTER(int, INT_MIN, INT_MAX) -EBUR128_FILTER(float, -1.0f, 1.0f) -EBUR128_FILTER(double, -1.0, 1.0) - static double ebur128_energy_to_loudness(double energy) { return 10 * (log(energy) / log(10.0)) - 0.691; } @@ -501,12 +336,8 @@ int ebur128_change_parameters(ebur128_state* st, st->d->channel_map = NULL; free(st->d->sample_peak); st->d->sample_peak = NULL; - free(st->d->prev_sample_peak); - st->d->prev_sample_peak = NULL; free(st->d->true_peak); st->d->true_peak = NULL; - free(st->d->prev_true_peak); - st->d->prev_true_peak = NULL; st->channels = channels; errcode = ebur128_init_channel_map(st); @@ -514,17 +345,11 @@ int ebur128_change_parameters(ebur128_state* st, st->d->sample_peak = (double*) malloc(channels * sizeof(double)); CHECK_ERROR(!st->d->sample_peak, EBUR128_ERROR_NOMEM, exit) - st->d->prev_sample_peak = (double*) malloc(channels * sizeof(double)); - CHECK_ERROR(!st->d->prev_sample_peak, EBUR128_ERROR_NOMEM, exit) st->d->true_peak = (double*) malloc(channels * sizeof(double)); CHECK_ERROR(!st->d->true_peak, EBUR128_ERROR_NOMEM, exit) - st->d->prev_true_peak = (double*) malloc(channels * sizeof(double)); - CHECK_ERROR(!st->d->prev_true_peak, EBUR128_ERROR_NOMEM, exit) for (i = 0; i < channels; ++i) { st->d->sample_peak[i] = 0.0; - st->d->prev_sample_peak[i] = 0.0; st->d->true_peak[i] = 0.0; - st->d->prev_true_peak[i] = 0.0; } } if (samplerate != st->samplerate) { @@ -534,10 +359,8 @@ int ebur128_change_parameters(ebur128_state* st, /* If we're here, either samplerate or channels * have changed. Re-init filter. */ - free(st->d->v); - st->d->v = NULL; - errcode = ebur128_init_filter(st); - CHECK_ERROR(errcode, EBUR128_ERROR_NOMEM, exit) + filter_destroy(st->d->f); + st->d->f = filter_create(st->samplerate, st->channels, (st->mode & EBUR128_MODE_SAMPLE_PEAK) == EBUR128_MODE_SAMPLE_PEAK, (st->mode & EBUR128_MODE_TRUE_PEAK) == EBUR128_MODE_TRUE_PEAK); st->d->audio_data_frames = st->samplerate * st->d->window / 1000; if (st->d->audio_data_frames % st->d->samples_in_100ms) { @@ -553,9 +376,6 @@ int ebur128_change_parameters(ebur128_state* st, st->d->audio_data[j] = 0.0; } - true_peak_destroy(st->d->tp); - st->d->tp = true_peak_create(st->samplerate, st->channels); - /* the first block needs 400ms of audio data */ st->d->needed_frames = st->d->samples_in_100ms * 4; /* start at the beginning of the buffer */ @@ -636,13 +456,10 @@ static int ebur128_energy_shortterm(ebur128_state* st, double* out); size_t frames) { \ size_t src_index = 0; \ unsigned int c = 0; \ - for (c = 0; c < st->channels; c++) { \ - st->d->prev_sample_peak[c] = 0.0; \ - st->d->prev_true_peak[c] = 0.0; \ - } \ + filter_reset_peaks(st->d->f); \ while (frames > 0) { \ if (frames >= st->d->needed_frames) { \ - ebur128_filter_##type(st, src + src_index, st->d->needed_frames); \ + filter_process_##type(st->d->f, st->d->needed_frames, src + src_index, st->d->audio_data + st->d->audio_data_index, st->d->channel_map); \ src_index += st->d->needed_frames * st->channels; \ frames -= st->d->needed_frames; \ st->d->audio_data_index += st->d->needed_frames * st->channels; \ @@ -673,7 +490,7 @@ static int ebur128_energy_shortterm(ebur128_state* st, double* out); st->d->audio_data_index = 0; \ } \ } else { \ - ebur128_filter_##type(st, src + src_index, frames); \ + filter_process_##type(st->d->f, frames, src + src_index, st->d->audio_data + st->d->audio_data_index, st->d->channel_map); \ st->d->audio_data_index += frames * st->channels; \ if ((st->mode & EBUR128_MODE_LRA) == EBUR128_MODE_LRA) { \ st->d->short_term_frame_counter += frames; \ @@ -683,11 +500,13 @@ static int ebur128_energy_shortterm(ebur128_state* st, double* out); } \ } \ for (c = 0; c < st->channels; c++) { \ - if (st->d->prev_sample_peak[c] > st->d->sample_peak[c]) { \ - st->d->sample_peak[c] = st->d->prev_sample_peak[c]; \ + const double *prev_sample_peak = filter_sample_peak(st->d->f); \ + const double *prev_true_peak = filter_true_peak(st->d->f); \ + if (prev_sample_peak[c] > st->d->sample_peak[c]) { \ + st->d->sample_peak[c] = prev_sample_peak[c]; \ } \ - if (st->d->prev_true_peak[c] > st->d->true_peak[c]) { \ - st->d->true_peak[c] = st->d->prev_true_peak[c]; \ + if (prev_true_peak[c] > st->d->true_peak[c]) { \ + st->d->true_peak[c] = prev_true_peak[c]; \ } \ } \ return EBUR128_SUCCESS; \ @@ -831,7 +650,7 @@ int ebur128_prev_sample_peak(ebur128_state* st, return EBUR128_ERROR_INVALID_CHANNEL_INDEX; } - *out = st->d->prev_sample_peak[channel_number]; + *out = filter_sample_peak(st->d->f)[channel_number]; return EBUR128_SUCCESS; } @@ -862,7 +681,7 @@ int ebur128_prev_true_peak(ebur128_state* st, return EBUR128_ERROR_INVALID_CHANNEL_INDEX; } - *out = EBUR128_MAX(st->d->prev_true_peak[channel_number], - st->d->prev_sample_peak[channel_number]); + *out = EBUR128_MAX(filter_true_peak(st->d->f)[channel_number], + filter_sample_peak(st->d->f)[channel_number]); return EBUR128_SUCCESS; } diff --git a/src/c/tests/filter.c b/src/c/tests/filter.c new file mode 100644 index 0000000..09815aa --- /dev/null +++ b/src/c/tests/filter.c @@ -0,0 +1,215 @@ +#include +#include +#define _USE_MATH_DEFINES +#include +#include +#include + +typedef void * true_peak; +extern true_peak* true_peak_create_c(unsigned int rate, unsigned int channels); +extern void true_peak_destroy_c(true_peak* tp); +extern void true_peak_check_short_c(true_peak* tp, size_t frames, const int16_t* src, double* peaks); +extern void true_peak_check_int_c(true_peak* tp, size_t frames, const int32_t* src, double* peaks); +extern void true_peak_check_float_c(true_peak* tp, size_t frames, const float* src, double* peaks); +extern void true_peak_check_double_c(true_peak* tp, size_t frames, const double* src, double* peaks); + +#define FILTER_STATE_SIZE 5 +#define EBUR128_UNUSED 0 + +#define EBUR128_MAX(a, b) (((a) > (b)) ? (a) : (b)) + +/** BS.1770 filter state. */ +typedef double filter_state[FILTER_STATE_SIZE]; + +typedef struct { + unsigned int rate; + unsigned int channels; + + /** BS.1770 filter coefficients (nominator). */ + double b[5]; + /** BS.1770 filter coefficients (denominator). */ + double a[5]; + /** one filter_state per channel. */ + filter_state* v; + + /** Maximum sample peak, one per channel */ + int calculate_sample_peak; + double* sample_peak; + /** Maximum true peak, one per channel */ + true_peak *tp; + double* true_peak; +} filter; + +filter * filter_create_c(unsigned int rate, unsigned int channels, int calculate_sample_peak, int calculate_true_peak) { + filter* f = (filter*) calloc(1, sizeof(filter)); + int i, j; + + f->rate = rate; + f->channels = channels; + + double f0 = 1681.974450955533; + double G = 3.999843853973347; + double Q = 0.7071752369554196; + + double K = tan(M_PI * f0 / (double) f->rate); + double Vh = pow(10.0, G / 20.0); + double Vb = pow(Vh, 0.4996667741545416); + + double pb[3] = { 0.0, 0.0, 0.0 }; + double pa[3] = { 1.0, 0.0, 0.0 }; + double rb[3] = { 1.0, -2.0, 1.0 }; + double ra[3] = { 1.0, 0.0, 0.0 }; + + double a0 = 1.0 + K / Q + K * K; + pb[0] = (Vh + Vb * K / Q + K * K) / a0; + pb[1] = 2.0 * (K * K - Vh) / a0; + pb[2] = (Vh - Vb * K / Q + K * K) / a0; + pa[1] = 2.0 * (K * K - 1.0) / a0; + pa[2] = (1.0 - K / Q + K * K) / a0; + + /* fprintf(stderr, "%.14f %.14f %.14f %.14f %.14f\n", + b1[0], b1[1], b1[2], a1[1], a1[2]); */ + + f0 = 38.13547087602444; + Q = 0.5003270373238773; + K = tan(M_PI * f0 / (double) f->rate); + + ra[1] = 2.0 * (K * K - 1.0) / (1.0 + K / Q + K * K); + ra[2] = (1.0 - K / Q + K * K) / (1.0 + K / Q + K * K); + + /* fprintf(stderr, "%.14f %.14f\n", a2[1], a2[2]); */ + + f->b[0] = pb[0] * rb[0]; + f->b[1] = pb[0] * rb[1] + pb[1] * rb[0]; + f->b[2] = pb[0] * rb[2] + pb[1] * rb[1] + pb[2] * rb[0]; + f->b[3] = pb[1] * rb[2] + pb[2] * rb[1]; + f->b[4] = pb[2] * rb[2]; + + f->a[0] = pa[0] * ra[0]; + f->a[1] = pa[0] * ra[1] + pa[1] * ra[0]; + f->a[2] = pa[0] * ra[2] + pa[1] * ra[1] + pa[2] * ra[0]; + f->a[3] = pa[1] * ra[2] + pa[2] * ra[1]; + f->a[4] = pa[2] * ra[2]; + + f->v = (filter_state*) malloc(channels * sizeof(filter_state)); + for (i = 0; i < (int) channels; ++i) { + for (j = 0; j < FILTER_STATE_SIZE; ++j) { + f->v[i][j] = 0.0; + } + } + + f->calculate_sample_peak = calculate_sample_peak; + f->sample_peak = (double*) malloc(channels * sizeof(double)); + + f->tp = calculate_true_peak ? true_peak_create_c(rate, channels) : NULL; + f->true_peak = (double*) malloc(channels * sizeof(double)); + + for (i = 0; i < (int) channels; ++i) { + f->sample_peak[i] = 0.0; + f->true_peak[i] = 0.0; + } + + return f; +} + +void filter_destroy_c(filter* f) { + free(f->sample_peak); + true_peak_destroy_c(f->tp); + free(f->true_peak); + free(f); +} + +const double* filter_sample_peak_c(const filter *f) { + return f->sample_peak; +} + +const double* filter_true_peak_c(const filter *f) { + return f->true_peak; +} + +void filter_reset_peaks_c(filter *f) { + int i; + + for (i = 0; i < (int) f->channels; ++i) { + f->sample_peak[i] = 0.0; + f->true_peak[i] = 0.0; + } +} + +#if defined(__SSE2_MATH__) || defined(_M_X64) || _M_IX86_FP >= 2 +#include +#define TURN_ON_FTZ \ + unsigned int mxcsr = _mm_getcsr(); \ + _mm_setcsr(mxcsr | _MM_FLUSH_ZERO_ON); +#define TURN_OFF_FTZ _mm_setcsr(mxcsr); +#define FLUSH_MANUALLY +#else +#warning "manual FTZ is being used, please enable SSE2 (-msse2 -mfpmath=sse)" +#define TURN_ON_FTZ +#define TURN_OFF_FTZ +#define FLUSH_MANUALLY \ + f->v[c][4] = fabs(f->v[c][4]) < DBL_MIN ? 0.0 : f->v[c][4]; \ + f->v[c][3] = fabs(f->v[c][3]) < DBL_MIN ? 0.0 : f->v[c][3]; \ + f->v[c][2] = fabs(f->v[c][2]) < DBL_MIN ? 0.0 : f->v[c][2]; \ + f->v[c][1] = fabs(f->v[c][1]) < DBL_MIN ? 0.0 : f->v[c][1]; +#endif + +#define EBUR128_FILTER(type, min_scale, max_scale) \ + void filter_process_##type##_c(filter* f, size_t frames, const type* src, \ + double* audio_data, const unsigned int* channel_map) { \ + static double scaling_factor = \ + EBUR128_MAX(-((double) (min_scale)), (double) (max_scale)); \ + \ + size_t i, c; \ + \ + TURN_ON_FTZ \ + \ + if (f->calculate_sample_peak) { \ + for (c = 0; c < f->channels; ++c) { \ + double max = 0.0; \ + for (i = 0; i < frames; ++i) { \ + double cur = (double) src[i * f->channels + c]; \ + if (EBUR128_MAX(cur, -cur) > max) { \ + max = EBUR128_MAX(cur, -cur); \ + } \ + } \ + max /= scaling_factor; \ + if (max > f->sample_peak[c]) { \ + f->sample_peak[c] = max; \ + } \ + } \ + } \ + if (f->tp) { \ + true_peak_check_##type##_c(f->tp, frames, src, f->true_peak); \ + } \ + for (c = 0; c < f->channels; ++c) { \ + if (channel_map[c] == EBUR128_UNUSED) { \ + continue; \ + } \ + for (i = 0; i < frames; ++i) { \ + f->v[c][0] = \ + (double) ((double) src[i * f->channels + c] / scaling_factor) - \ + f->a[1] * f->v[c][1] - /**/ \ + f->a[2] * f->v[c][2] - /**/ \ + f->a[3] * f->v[c][3] - /**/ \ + f->a[4] * f->v[c][4]; \ + audio_data[i * f->channels + c] = /**/ \ + f->b[0] * f->v[c][0] + /**/ \ + f->b[1] * f->v[c][1] + /**/ \ + f->b[2] * f->v[c][2] + /**/ \ + f->b[3] * f->v[c][3] + /**/ \ + f->b[4] * f->v[c][4]; \ + f->v[c][4] = f->v[c][3]; \ + f->v[c][3] = f->v[c][2]; \ + f->v[c][2] = f->v[c][1]; \ + f->v[c][1] = f->v[c][0]; \ + } \ + FLUSH_MANUALLY \ + } \ + TURN_OFF_FTZ \ + } + +EBUR128_FILTER(short, SHRT_MIN, SHRT_MAX) +EBUR128_FILTER(int, INT_MIN, INT_MAX) +EBUR128_FILTER(float, -1.0f, 1.0f) +EBUR128_FILTER(double, -1.0, 1.0) diff --git a/src/filter.rs b/src/filter.rs new file mode 100644 index 0000000..6016b86 --- /dev/null +++ b/src/filter.rs @@ -0,0 +1,812 @@ +pub struct Filter { + channels: u32, + // BS.1770 filter coefficients (nominator). + b: [f64; 5], + // BS.1770 filter coefficients (denominator). + a: [f64; 5], + // One filter state per channel. + filter_state: Vec<[f64; 5]>, + + calculate_sample_peak: bool, + sample_peak: Vec, + + tp: Option, + true_peak: Vec, +} + +#[allow(non_snake_case)] +fn filter_coefficients(rate: f64) -> ([f64; 5], [f64; 5]) { + let f0 = 1681.974450955533; + let G = 3.999843853973347; + let Q = 0.7071752369554196; + + let K = f64::tan(std::f64::consts::PI * f0 / rate); + let Vh = f64::powf(10.0, G / 20.0); + let Vb = f64::powf(Vh, 0.4996667741545416); + + let mut pb = [0.0, 0.0, 0.0]; + let mut pa = [1.0, 0.0, 0.0]; + let rb = [1.0, -2.0, 1.0]; + let mut ra = [1.0, 0.0, 0.0]; + + let a0 = 1.0 + K / Q + K * K; + pb[0] = (Vh + Vb * K / Q + K * K) / a0; + pb[1] = 2.0 * (K * K - Vh) / a0; + pb[2] = (Vh - Vb * K / Q + K * K) / a0; + pa[1] = 2.0 * (K * K - 1.0) / a0; + pa[2] = (1.0 - K / Q + K * K) / a0; + + let f0 = 38.13547087602444; + let Q = 0.5003270373238773; + let K = f64::tan(std::f64::consts::PI * f0 / rate); + + ra[1] = 2.0 * (K * K - 1.0) / (1.0 + K / Q + K * K); + ra[2] = (1.0 - K / Q + K * K) / (1.0 + K / Q + K * K); + + ( + // Nominator + [ + pb[0] * rb[0], + pb[0] * rb[1] + pb[1] * rb[0], + pb[0] * rb[2] + pb[1] * rb[1] + pb[2] * rb[0], + pb[1] * rb[2] + pb[2] * rb[1], + pb[2] * rb[2], + ], + // Denominator + [ + pa[0] * ra[0], + pa[0] * ra[1] + pa[1] * ra[0], + pa[0] * ra[2] + pa[1] * ra[1] + pa[2] * ra[0], + pa[1] * ra[2] + pa[2] * ra[1], + pa[2] * ra[2], + ], + ) +} + +impl Filter { + pub fn new( + rate: u32, + channels: u32, + calculate_sample_peak: bool, + calculate_true_peak: bool, + ) -> Self { + assert!(rate > 0); + assert!(channels > 0); + + let (b, a) = filter_coefficients(rate as f64); + + let tp = if calculate_true_peak { + crate::true_peak::TruePeak::new(rate, channels) + } else { + None + }; + + Filter { + channels, + b, + a, + filter_state: vec![[0.0; 5]; channels as usize], + calculate_sample_peak, + sample_peak: vec![0.0; channels as usize], + tp, + true_peak: vec![0.0; channels as usize], + } + } + + pub fn reset_peaks(&mut self) { + for v in &mut self.sample_peak { + *v = 0.0; + } + + for v in &mut self.true_peak { + *v = 0.0; + } + } + + pub fn sample_peak(&self) -> &[f64] { + &*self.sample_peak + } + + pub fn true_peak(&self) -> &[f64] { + &*self.true_peak + } + + // FIXME: Channel map should go away + pub fn process(&mut self, src: &[T], dest: &mut [f64], channel_map: &[u32]) { + let ftz = ftz::Ftz::new(); + + assert!(src.len() == dest.len()); + assert!(src.len() % self.channels as usize == 0); + assert!(dest.len() % self.channels as usize == 0); + assert!(src.len() / self.channels as usize == dest.len() / self.channels as usize); + assert!(channel_map.len() == self.channels as usize); + assert!(self.filter_state.len() == self.channels as usize); + + // TODO: Deinterleaving into a &mut [f64] as a first step seems beneficial and + // would also prevent the deinterleaving that now happens inside check_true_peak() + // anyway + + if self.calculate_sample_peak { + assert!(self.sample_peak.len() == self.channels as usize); + + for frame in src.chunks_exact(self.channels as usize) { + for (c, sample) in frame.iter().enumerate() { + let v = sample.as_f64().abs(); + if v > self.sample_peak[c] { + self.sample_peak[c] = v; + } + } + } + } + + if let Some(ref mut tp) = self.tp { + assert!(self.true_peak.len() == self.channels as usize); + tp.check_true_peak(src, &mut *self.true_peak); + } + + for (c, channel_map) in channel_map.iter().enumerate() { + if *channel_map == crate::ffi::channel_EBUR128_UNUSED { + continue; + } + + let filter_state = &mut self.filter_state[c]; + for (src, dest) in src + .chunks_exact(self.channels as usize) + .zip(dest.chunks_exact_mut(self.channels as usize)) + { + filter_state[0] = src[c].as_f64() + - self.a[1] * filter_state[1] + - self.a[2] * filter_state[2] + - self.a[3] * filter_state[3] + - self.a[4] * filter_state[4]; + dest[c] = self.b[0] * filter_state[0] + + self.b[1] * filter_state[1] + + self.b[2] * filter_state[2] + + self.b[3] * filter_state[3] + + self.b[4] * filter_state[4]; + + filter_state[4] = filter_state[3]; + filter_state[3] = filter_state[2]; + filter_state[2] = filter_state[1]; + filter_state[1] = filter_state[0]; + } + + if ftz.is_none() { + for v in filter_state { + if v.abs() < f64::EPSILON { + *v = 0.0; + } + } + } + } + } +} + +pub trait AsF64: crate::true_peak::AsF32 + Copy + PartialOrd { + fn as_f64(self) -> f64; +} + +impl AsF64 for i16 { + fn as_f64(self) -> f64 { + self as f64 / -(std::i16::MIN as f64) + } +} + +impl AsF64 for i32 { + fn as_f64(self) -> f64 { + self as f64 / -(std::i32::MIN as f64) + } +} + +impl AsF64 for f32 { + fn as_f64(self) -> f64 { + self as f64 + } +} + +impl AsF64 for f64 { + fn as_f64(self) -> f64 { + self + } +} + +#[cfg(all( + any(target_arch = "x86", target_arch = "x86_64"), + target_feature = "sse2" +))] +mod ftz { + #[cfg(target_arch = "x86")] + use std::arch::x86::{_mm_getcsr, _mm_setcsr, _MM_FLUSH_ZERO_ON}; + #[cfg(target_arch = "x86_64")] + use std::arch::x86_64::{_mm_getcsr, _mm_setcsr, _MM_FLUSH_ZERO_ON}; + + pub struct Ftz(u32); + + impl Ftz { + pub fn new() -> Option { + unsafe { + let csr = _mm_getcsr(); + + _mm_setcsr(csr | _MM_FLUSH_ZERO_ON); + + Some(Ftz(csr)) + } + } + } + + impl Drop for Ftz { + fn drop(&mut self) { + unsafe { + _mm_setcsr(self.0); + } + } + } +} + +#[cfg(not(any(all( + any(target_arch = "x86", target_arch = "x86_64"), + target_feature = "sse2" +)),))] +mod ftz { + pub struct Ftz; + + impl Ftz { + pub fn new() -> Option { + None + } + } +} + +#[no_mangle] +pub unsafe extern "C" fn filter_create( + rate: u32, + channels: u32, + calculate_sample_peak: i32, + calculate_true_peak: i32, +) -> *mut Filter { + Box::into_raw(Box::new(Filter::new( + rate, + channels, + calculate_sample_peak != 0, + calculate_true_peak != 0, + ))) +} + +#[no_mangle] +pub unsafe extern "C" fn filter_reset_peaks(filter: *mut Filter) { + let filter = &mut *filter; + filter.reset_peaks(); +} + +#[no_mangle] +pub unsafe extern "C" fn filter_sample_peak(filter: *const Filter) -> *const f64 { + let filter = &*filter; + filter.sample_peak().as_ptr() +} + +#[no_mangle] +pub unsafe extern "C" fn filter_true_peak(filter: *const Filter) -> *const f64 { + let filter = &*filter; + filter.true_peak().as_ptr() +} + +#[no_mangle] +pub unsafe extern "C" fn filter_process_short( + filter: *mut Filter, + frames: usize, + src: *const i16, + dest: *mut f64, + channel_map: *const u32, +) { + use std::slice; + + let filter = &mut *filter; + let src = slice::from_raw_parts(src, filter.channels as usize * frames); + let dest = slice::from_raw_parts_mut(dest, filter.channels as usize * frames); + let channel_map = slice::from_raw_parts(channel_map, filter.channels as usize); + + filter.process(src, dest, channel_map); +} + +#[no_mangle] +pub unsafe extern "C" fn filter_process_int( + filter: *mut Filter, + frames: usize, + src: *const i32, + dest: *mut f64, + channel_map: *const u32, +) { + use std::slice; + + let filter = &mut *filter; + let src = slice::from_raw_parts(src, filter.channels as usize * frames); + let dest = slice::from_raw_parts_mut(dest, filter.channels as usize * frames); + let channel_map = slice::from_raw_parts(channel_map, filter.channels as usize); + + filter.process(src, dest, channel_map); +} + +#[no_mangle] +pub unsafe extern "C" fn filter_process_float( + filter: *mut Filter, + frames: usize, + src: *const f32, + dest: *mut f64, + channel_map: *const u32, +) { + use std::slice; + + let filter = &mut *filter; + let src = slice::from_raw_parts(src, filter.channels as usize * frames); + let dest = slice::from_raw_parts_mut(dest, filter.channels as usize * frames); + let channel_map = slice::from_raw_parts(channel_map, filter.channels as usize); + + filter.process(src, dest, channel_map); +} + +#[no_mangle] +pub unsafe extern "C" fn filter_process_double( + filter: *mut Filter, + frames: usize, + src: *const f64, + dest: *mut f64, + channel_map: *const u32, +) { + use std::slice; + + let filter = &mut *filter; + let src = slice::from_raw_parts(src, filter.channels as usize * frames); + let dest = slice::from_raw_parts_mut(dest, filter.channels as usize * frames); + let channel_map = slice::from_raw_parts(channel_map, filter.channels as usize); + + filter.process(src, dest, channel_map); +} + +#[no_mangle] +pub unsafe extern "C" fn filter_destroy(filter: *mut Filter) { + drop(Box::from_raw(filter)); +} + +#[cfg(feature = "internal-tests")] +use std::os::raw::c_void; + +#[cfg(feature = "internal-tests")] +extern "C" { + pub fn filter_create_c( + rate: u32, + channels: u32, + calculate_sample_peak: i32, + calculate_true_peak: i32, + ) -> *mut c_void; + pub fn filter_reset_peaks_c(filter: *mut c_void); + pub fn filter_sample_peak_c(filter: *const c_void) -> *const f64; + pub fn filter_true_peak_c(filter: *const c_void) -> *const f64; + pub fn filter_process_short_c( + filter: *mut c_void, + frames: usize, + src: *const i16, + dest: *mut f64, + channel_map: *const u32, + ); + pub fn filter_process_int_c( + filter: *mut c_void, + frames: usize, + src: *const i32, + dest: *mut f64, + channel_map: *const u32, + ); + pub fn filter_process_float_c( + filter: *mut c_void, + frames: usize, + src: *const f32, + dest: *mut f64, + channel_map: *const u32, + ); + pub fn filter_process_double_c( + filter: *mut c_void, + frames: usize, + src: *const f64, + dest: *mut f64, + channel_map: *const u32, + ); + pub fn filter_destroy_c(filter: *mut c_void); +} + +#[cfg(all(test, feature = "internal-tests"))] +mod tests { + use super::*; + use crate::tests::Signal; + use quickcheck_macros::quickcheck; + + #[quickcheck] + fn compare_c_impl_i16( + signal: Signal, + calculate_sample_peak: bool, + calculate_true_peak: bool, + ) { + use float_cmp::approx_eq; + + // Maximum of 400ms but our input is up to 5000ms, so distribute it evenly + // by shrinking accordingly. + let frames = signal.data.len() / signal.channels as usize; + let frames = std::cmp::min(2 * frames / 25, 4 * ((signal.rate as usize + 5) / 10)); + + let mut data_out = vec![0.0f64; frames * signal.channels as usize]; + let mut data_out_c = vec![0.0f64; frames * signal.channels as usize]; + + let channel_map = vec![1; signal.channels as usize]; + + let (sp, tp) = { + let mut f = Filter::new( + signal.rate, + signal.channels, + calculate_sample_peak, + calculate_true_peak, + ); + f.process( + &signal.data[..(frames * signal.channels as usize)], + &mut data_out, + &channel_map, + ); + + (Vec::from(f.sample_peak()), Vec::from(f.true_peak())) + }; + + let (sp_c, tp_c) = unsafe { + use std::slice; + + let f = filter_create_c( + signal.rate, + signal.channels, + if calculate_sample_peak { 1 } else { 1 }, + if calculate_true_peak { 1 } else { 0 }, + ); + filter_process_short_c( + f, + frames, + signal.data[..(frames * signal.channels as usize)].as_ptr(), + data_out_c.as_mut_ptr(), + channel_map.as_ptr(), + ); + + let sp = Vec::from(slice::from_raw_parts( + filter_sample_peak_c(f), + signal.channels as usize, + )); + let tp = Vec::from(slice::from_raw_parts( + filter_true_peak_c(f), + signal.channels as usize, + )); + filter_destroy_c(f); + (sp, tp) + }; + + if calculate_sample_peak { + for (i, (r, c)) in sp.iter().zip(sp_c.iter()).enumerate() { + assert!( + approx_eq!(f64, *r, *c, ulps = 2), + "Rust and C implementation differ at sample peak {}: {} != {}", + i, + r, + c + ); + } + } + + if calculate_true_peak { + for (i, (r, c)) in tp.iter().zip(tp_c.iter()).enumerate() { + assert!( + approx_eq!(f64, *r, *c, ulps = 2), + "Rust and C implementation differ at true peak {}: {} != {}", + i, + r, + c + ); + } + } + + for (i, (r, c)) in data_out.iter().zip(data_out_c.iter()).enumerate() { + assert!( + approx_eq!(f64, *r, *c, ulps = 2), + "Rust and C implementation differ at sample {}: {} != {}", + i, + r, + c + ); + } + } + + #[quickcheck] + fn compare_c_impl_i32( + signal: Signal, + calculate_sample_peak: bool, + calculate_true_peak: bool, + ) { + use float_cmp::approx_eq; + + // Maximum of 400ms but our input is up to 5000ms, so distribute it evenly + // by shrinking accordingly. + let frames = signal.data.len() / signal.channels as usize; + let frames = std::cmp::min(2 * frames / 25, 4 * ((signal.rate as usize + 5) / 10)); + + let mut data_out = vec![0.0f64; frames * signal.channels as usize]; + let mut data_out_c = vec![0.0f64; frames * signal.channels as usize]; + + let channel_map = vec![1; signal.channels as usize]; + + let (sp, tp) = { + let mut f = Filter::new( + signal.rate, + signal.channels, + calculate_sample_peak, + calculate_true_peak, + ); + f.process( + &signal.data[..(frames * signal.channels as usize)], + &mut data_out, + &channel_map, + ); + + (Vec::from(f.sample_peak()), Vec::from(f.true_peak())) + }; + + let (sp_c, tp_c) = unsafe { + use std::slice; + + let f = filter_create_c( + signal.rate, + signal.channels, + if calculate_sample_peak { 1 } else { 1 }, + if calculate_true_peak { 1 } else { 0 }, + ); + filter_process_int_c( + f, + frames, + signal.data[..(frames * signal.channels as usize)].as_ptr(), + data_out_c.as_mut_ptr(), + channel_map.as_ptr(), + ); + + let sp = Vec::from(slice::from_raw_parts( + filter_sample_peak_c(f), + signal.channels as usize, + )); + let tp = Vec::from(slice::from_raw_parts( + filter_true_peak_c(f), + signal.channels as usize, + )); + filter_destroy_c(f); + (sp, tp) + }; + + if calculate_sample_peak { + for (i, (r, c)) in sp.iter().zip(sp_c.iter()).enumerate() { + assert!( + approx_eq!(f64, *r, *c, ulps = 2), + "Rust and C implementation differ at sample peak {}: {} != {}", + i, + r, + c + ); + } + } + + if calculate_true_peak { + for (i, (r, c)) in tp.iter().zip(tp_c.iter()).enumerate() { + assert!( + approx_eq!(f64, *r, *c, ulps = 2), + "Rust and C implementation differ at true peak {}: {} != {}", + i, + r, + c + ); + } + } + + for (i, (r, c)) in data_out.iter().zip(data_out_c.iter()).enumerate() { + assert!( + approx_eq!(f64, *r, *c, ulps = 2), + "Rust and C implementation differ at sample {}: {} != {}", + i, + r, + c + ); + } + } + + #[quickcheck] + fn compare_c_impl_f32( + signal: Signal, + calculate_sample_peak: bool, + calculate_true_peak: bool, + ) { + use float_cmp::approx_eq; + + // Maximum of 400ms but our input is up to 5000ms, so distribute it evenly + // by shrinking accordingly. + let frames = signal.data.len() / signal.channels as usize; + let frames = std::cmp::min(2 * frames / 25, 4 * ((signal.rate as usize + 5) / 10)); + + let mut data_out = vec![0.0f64; frames * signal.channels as usize]; + let mut data_out_c = vec![0.0f64; frames * signal.channels as usize]; + + let channel_map = vec![1; signal.channels as usize]; + + let (sp, tp) = { + let mut f = Filter::new( + signal.rate, + signal.channels, + calculate_sample_peak, + calculate_true_peak, + ); + f.process( + &signal.data[..(frames * signal.channels as usize)], + &mut data_out, + &channel_map, + ); + + (Vec::from(f.sample_peak()), Vec::from(f.true_peak())) + }; + + let (sp_c, tp_c) = unsafe { + use std::slice; + + let f = filter_create_c( + signal.rate, + signal.channels, + if calculate_sample_peak { 1 } else { 1 }, + if calculate_true_peak { 1 } else { 0 }, + ); + filter_process_float_c( + f, + frames, + signal.data[..(frames * signal.channels as usize)].as_ptr(), + data_out_c.as_mut_ptr(), + channel_map.as_ptr(), + ); + + let sp = Vec::from(slice::from_raw_parts( + filter_sample_peak_c(f), + signal.channels as usize, + )); + let tp = Vec::from(slice::from_raw_parts( + filter_true_peak_c(f), + signal.channels as usize, + )); + filter_destroy_c(f); + (sp, tp) + }; + + if calculate_sample_peak { + for (i, (r, c)) in sp.iter().zip(sp_c.iter()).enumerate() { + assert!( + approx_eq!(f64, *r, *c, ulps = 2), + "Rust and C implementation differ at sample peak {}: {} != {}", + i, + r, + c + ); + } + } + + if calculate_true_peak { + for (i, (r, c)) in tp.iter().zip(tp_c.iter()).enumerate() { + assert!( + approx_eq!(f64, *r, *c, ulps = 2), + "Rust and C implementation differ at true peak {}: {} != {}", + i, + r, + c + ); + } + } + + for (i, (r, c)) in data_out.iter().zip(data_out_c.iter()).enumerate() { + assert!( + approx_eq!(f64, *r, *c, ulps = 2), + "Rust and C implementation differ at sample {}: {} != {}", + i, + r, + c + ); + } + } + + #[quickcheck] + fn compare_c_impl_f64( + signal: Signal, + calculate_sample_peak: bool, + calculate_true_peak: bool, + ) { + use float_cmp::approx_eq; + + // Maximum of 400ms but our input is up to 5000ms, so distribute it evenly + // by shrinking accordingly. + let frames = signal.data.len() / signal.channels as usize; + let frames = std::cmp::min(2 * frames / 25, 4 * ((signal.rate as usize + 5) / 10)); + + let mut data_out = vec![0.0f64; frames * signal.channels as usize]; + let mut data_out_c = vec![0.0f64; frames * signal.channels as usize]; + + let channel_map = vec![1; signal.channels as usize]; + + let (sp, tp) = { + let mut f = Filter::new( + signal.rate, + signal.channels, + calculate_sample_peak, + calculate_true_peak, + ); + f.process( + &signal.data[..(frames * signal.channels as usize)], + &mut data_out, + &channel_map, + ); + + (Vec::from(f.sample_peak()), Vec::from(f.true_peak())) + }; + + let (sp_c, tp_c) = unsafe { + use std::slice; + + let f = filter_create_c( + signal.rate, + signal.channels, + if calculate_sample_peak { 1 } else { 1 }, + if calculate_true_peak { 1 } else { 0 }, + ); + filter_process_double_c( + f, + frames, + signal.data[..(frames * signal.channels as usize)].as_ptr(), + data_out_c.as_mut_ptr(), + channel_map.as_ptr(), + ); + + let sp = Vec::from(slice::from_raw_parts( + filter_sample_peak_c(f), + signal.channels as usize, + )); + let tp = Vec::from(slice::from_raw_parts( + filter_true_peak_c(f), + signal.channels as usize, + )); + filter_destroy_c(f); + (sp, tp) + }; + + if calculate_sample_peak { + for (i, (r, c)) in sp.iter().zip(sp_c.iter()).enumerate() { + assert!( + approx_eq!(f64, *r, *c, ulps = 2), + "Rust and C implementation differ at sample peak {}: {} != {}", + i, + r, + c + ); + } + } + + if calculate_true_peak { + for (i, (r, c)) in tp.iter().zip(tp_c.iter()).enumerate() { + assert!( + approx_eq!(f64, *r, *c, ulps = 2), + "Rust and C implementation differ at true peak {}: {} != {}", + i, + r, + c + ); + } + } + + for (i, (r, c)) in data_out.iter().zip(data_out_c.iter()).enumerate() { + assert!( + approx_eq!(f64, *r, *c, ulps = 2), + "Rust and C implementation differ at sample {}: {} != {}", + i, + r, + c + ); + } + } +} diff --git a/src/lib.rs b/src/lib.rs index d9d02a2..10c1fdd 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -34,6 +34,11 @@ pub mod history; #[cfg(not(feature = "internal-tests"))] pub(crate) mod history; +#[cfg(feature = "internal-tests")] +pub mod filter; +#[cfg(not(feature = "internal-tests"))] +pub(crate) mod filter; + #[cfg(test)] mod tests { #[derive(Clone, Debug)] diff --git a/src/true_peak.rs b/src/true_peak.rs index 18d7aeb..a0481f3 100644 --- a/src/true_peak.rs +++ b/src/true_peak.rs @@ -118,87 +118,6 @@ impl AsF32 for f64 { } } -#[no_mangle] -pub unsafe extern "C" fn true_peak_create(rate: u32, channels: u32) -> *mut TruePeak { - TruePeak::new(rate, channels) - .map(Box::new) - .map(Box::into_raw) - .unwrap_or(std::ptr::null_mut()) -} - -#[no_mangle] -pub unsafe extern "C" fn true_peak_check_short( - tp: *mut TruePeak, - frames: usize, - src: *const i16, - peaks: *mut f64, -) { - use std::slice; - - let tp = &mut *tp; - let src = slice::from_raw_parts(src, tp.channels as usize * frames); - let peaks = slice::from_raw_parts_mut(peaks, tp.channels as usize); - - tp.check_true_peak(src, peaks); -} - -#[no_mangle] -pub unsafe extern "C" fn true_peak_check_int( - tp: *mut TruePeak, - frames: usize, - src: *const i32, - peaks: *mut f64, -) { - use std::slice; - - let tp = &mut *tp; - let src = slice::from_raw_parts(src, tp.channels as usize * frames); - let peaks = slice::from_raw_parts_mut(peaks, tp.channels as usize); - - tp.check_true_peak(src, peaks); -} - -#[no_mangle] -pub unsafe extern "C" fn true_peak_check_float( - tp: *mut TruePeak, - frames: usize, - src: *const f32, - peaks: *mut f64, -) { - use std::slice; - - let tp = &mut *tp; - let src = slice::from_raw_parts(src, tp.channels as usize * frames); - let peaks = slice::from_raw_parts_mut(peaks, tp.channels as usize); - - tp.check_true_peak(src, peaks); -} - -#[no_mangle] -pub unsafe extern "C" fn true_peak_check_double( - tp: *mut TruePeak, - frames: usize, - src: *const f64, - peaks: *mut f64, -) { - use std::slice; - - let tp = &mut *tp; - let src = slice::from_raw_parts(src, tp.channels as usize * frames); - let peaks = slice::from_raw_parts_mut(peaks, tp.channels as usize); - - tp.check_true_peak(src, peaks); -} - -#[no_mangle] -pub unsafe extern "C" fn true_peak_destroy(tp: *mut TruePeak) { - if tp.is_null() { - return; - } - - drop(Box::from_raw(tp)); -} - #[cfg(feature = "internal-tests")] use std::os::raw::c_void;