|
| 1 | +const BYTES: usize = 1 << 10; |
| 2 | + |
| 3 | +macro_rules! bench_template { |
| 4 | + ($op:path, $name:ident, $mask:expr) => { |
| 5 | + #[bench] |
| 6 | + fn $name(bench: &mut ::test::Bencher) { |
| 7 | + use ::rand::Rng; |
| 8 | + let mut rng = crate::bench_rng(); |
| 9 | + let mut dst = vec![0; ITERATIONS]; |
| 10 | + let src1: Vec<U> = (0..ITERATIONS).map(|_| rng.random_range(0..=U::MAX)).collect(); |
| 11 | + let mut src2: Vec<U> = (0..ITERATIONS).map(|_| rng.random_range(0..=U::MAX)).collect(); |
| 12 | + // Fix the loop invariant mask |
| 13 | + src2[0] = U::MAX / 3; |
| 14 | + let dst = dst.first_chunk_mut().unwrap(); |
| 15 | + let src1 = src1.first_chunk().unwrap(); |
| 16 | + let src2 = src2.first_chunk().unwrap(); |
| 17 | + |
| 18 | + #[allow(unused)] |
| 19 | + fn vectored(dst: &mut Data, src1: &Data, src2: &Data) { |
| 20 | + let mask = $mask; |
| 21 | + for k in 0..ITERATIONS { |
| 22 | + dst[k] = $op(src1[k], mask(src2, k)); |
| 23 | + } |
| 24 | + } |
| 25 | + let f: fn(&mut Data, &Data, &Data) = vectored; |
| 26 | + let f = ::test::black_box(f); |
| 27 | + |
| 28 | + bench.iter(|| { |
| 29 | + f(dst, src1, src2); |
| 30 | + }); |
| 31 | + } |
| 32 | + }; |
| 33 | +} |
| 34 | + |
| 35 | +macro_rules! bench_type { |
| 36 | + ($U:ident) => { |
| 37 | + mod $U { |
| 38 | + type U = $U; |
| 39 | + const ITERATIONS: usize = super::BYTES / size_of::<U>(); |
| 40 | + type Data = [U; ITERATIONS]; |
| 41 | + bench_mask_kind!(constant, |_, _| const { U::MAX / 3 }); |
| 42 | + bench_mask_kind!(invariant, |src: &Data, _| src[0]); |
| 43 | + bench_mask_kind!(variable, |src: &Data, k| src[k]); |
| 44 | + } |
| 45 | + }; |
| 46 | +} |
| 47 | + |
| 48 | +macro_rules! bench_mask_kind { |
| 49 | + ($mask_kind:ident, $mask:expr) => { |
| 50 | + mod $mask_kind { |
| 51 | + use super::{Data, ITERATIONS, U}; |
| 52 | + bench_template!(U::gather_bits, gather_bits, $mask); |
| 53 | + bench_template!(U::scatter_bits, scatter_bits, $mask); |
| 54 | + } |
| 55 | + }; |
| 56 | +} |
| 57 | + |
| 58 | +bench_type!(u8); |
| 59 | +bench_type!(u16); |
| 60 | +bench_type!(u32); |
| 61 | +bench_type!(u64); |
| 62 | +bench_type!(u128); |
0 commit comments