performance regression of binary_search #115271

kekeimiku · 2023-08-27T12:43:29Z

This affects my real project. [usize] seems to cause a huge performance regression.

After investigation, it is caused by the new binary_search introduced by this pr #74024 .

Why is the new binary_search not binary_search_unstable ?

macOS Pro M2

test new_binary_search_l1            ... bench:          38 ns/iter (+/- 0)
test new_binary_search_l1_with_dups  ... bench:          22 ns/iter (+/- 0)
test new_binary_search_l1_worst_case ... bench:           7 ns/iter (+/- 1)
test new_binary_search_l2            ... bench:          56 ns/iter (+/- 6)
test new_binary_search_l2_with_dups  ... bench:          35 ns/iter (+/- 0)
test new_binary_search_l2_worst_case ... bench:          11 ns/iter (+/- 0)
test new_binary_search_l3            ... bench:         104 ns/iter (+/- 1)
test new_binary_search_l3_with_dups  ... bench:          85 ns/iter (+/- 1)
test new_binary_search_l3_worst_case ... bench:          19 ns/iter (+/- 0)
test old_binary_search_l1            ... bench:           5 ns/iter (+/- 0)
test old_binary_search_l1_with_dups  ... bench:           5 ns/iter (+/- 0)
test old_binary_search_l1_worst_case ... bench:           4 ns/iter (+/- 0)
test old_binary_search_l2            ... bench:           8 ns/iter (+/- 0)
test old_binary_search_l2_with_dups  ... bench:           8 ns/iter (+/- 0)
test old_binary_search_l2_worst_case ... bench:           7 ns/iter (+/- 0)
test old_binary_search_l3            ... bench:          24 ns/iter (+/- 0)
test old_binary_search_l3_with_dups  ... bench:          25 ns/iter (+/- 6)
test old_binary_search_l3_worst_case ... bench:          12 ns/iter (+/- 0)

lib.rs

#![feature(core_intrinsics)]

use std::cmp::Ord;
use std::cmp::Ordering::{self, Equal, Greater, Less};

pub fn old_binary_search<T>(s: &[T], x: &T) -> Result<usize, usize>
where
    T: Ord,
{
    old_binary_search_by(s, |p| p.cmp(x))
}

#[inline]
pub fn old_binary_search_by<'a, T, F>(s: &'a [T], mut f: F) -> Result<usize, usize>
where
    F: FnMut(&'a T) -> Ordering,
{
    let mut size = s.len();
    if size == 0 {
        return Err(0);
    }
    let mut base = 0usize;
    while size > 1 {
        let half = size / 2;
        let mid = base + half;
        // mid is always in [0, size), that means mid is >= 0 and < size.
        // mid >= 0: by definition
        // mid < size: mid = size / 2 + size / 4 + size / 8 ...
        let cmp = f(unsafe { s.get_unchecked(mid) });
        base = if cmp == Greater { base } else { mid };
        size -= half;
    }
    // base is always in [0, size) because base <= mid.
    let cmp = f(unsafe { s.get_unchecked(base) });
    if cmp == Equal {
        Ok(base)
    } else {
        Err(base + (cmp == Less) as usize)
    }
}

pub fn new_binary_search<T>(s: &[T], x: &T) -> Result<usize, usize>
where
    T: Ord,
{
    new_binary_search_by(s, |p| p.cmp(x))
}

#[inline]
pub fn new_binary_search_by<'a, T, F>(s: &'a [T], mut f: F) -> Result<usize, usize>
where
    F: FnMut(&'a T) -> Ordering,
{
    // INVARIANTS:
    // - 0 <= left <= left + size = right <= self.len()
    // - f returns Less for everything in self[..left]
    // - f returns Greater for everything in self[right..]
    let mut size = s.len();
    let mut left = 0;
    let mut right = size;
    while left < right {
        let mid = left + size / 2;

        // SAFETY: the while condition means `size` is strictly positive, so
        // `size/2 < size`. Thus `left + size/2 < left + size`, which
        // coupled with the `left + size <= self.len()` invariant means
        // we have `left + size/2 < self.len()`, and this is in-bounds.
        let cmp = f(unsafe { s.get_unchecked(mid) });

        // The reason why we use if/else control flow rather than match
        // is because match reorders comparison operations, which is perf sensitive.
        // This is x86 asm for u8: https://rust.godbolt.org/z/8Y8Pra.
        if cmp == Less {
            left = mid + 1;
        } else if cmp == Greater {
            right = mid;
        } else {
            // SAFETY: same as the `get_unchecked` above
            unsafe { core::intrinsics::assume(mid < s.len()) };
            return Ok(mid);
        }

        size = right - left;
    }

    // SAFETY: directly true from the overall invariant.
    // Note that this is `<=`, unlike the assume in the `Ok` path.
    unsafe { core::intrinsics::assume(left <= s.len()) };
    Err(left)
}

bench.rs

  #![feature(test)]
extern crate test;

use test::black_box;
use test::Bencher;

use binary_search_bench::*;

enum Cache {
    L1,
    L2,
    L3,
}

fn old_bench_binary_search<F>(b: &mut Bencher, cache: Cache, mapper: F)
where
    F: Fn(usize) -> usize,
{
    let size = match cache {
        Cache::L1 => 1000,      // 8kb
        Cache::L2 => 10_000,    // 80kb
        Cache::L3 => 1_000_000, // 8Mb
    };
    let v = (0..size).map(&mapper).collect::<Vec<_>>();
    let mut r = 0usize;
    b.iter(move || {
        // LCG constants from https://en.wikipedia.org/wiki/Numerical_Recipes.
        r = r.wrapping_mul(1664525).wrapping_add(1013904223);
        // Lookup the whole range to get 50% hits and 50% misses.
        let i = mapper(r % size);
        black_box(old_binary_search(&v, &i).is_ok());
    })
}

fn old_bench_binary_search_worst_case(b: &mut Bencher, cache: Cache) {
    let size = match cache {
        Cache::L1 => 1000,      // 8kb
        Cache::L2 => 10_000,    // 80kb
        Cache::L3 => 1_000_000, // 8Mb
    };
    let mut v = vec![0; size];
    let i = 1;
    v[size - 1] = i;
    b.iter(move || {
        black_box(old_binary_search(&v, &i).is_ok());
    })
}

#[bench]
fn old_binary_search_l1(b: &mut Bencher) {
    old_bench_binary_search(b, Cache::L1, |i| i * 2);
}

#[bench]
fn old_binary_search_l2(b: &mut Bencher) {
    old_bench_binary_search(b, Cache::L2, |i| i * 2);
}

#[bench]
fn old_binary_search_l3(b: &mut Bencher) {
    old_bench_binary_search(b, Cache::L3, |i| i * 2);
}

#[bench]
fn old_binary_search_l1_with_dups(b: &mut Bencher) {
    old_bench_binary_search(b, Cache::L1, |i| i / 16 * 16);
}

#[bench]
fn old_binary_search_l2_with_dups(b: &mut Bencher) {
    old_bench_binary_search(b, Cache::L2, |i| i / 16 * 16);
}

#[bench]
fn old_binary_search_l3_with_dups(b: &mut Bencher) {
    old_bench_binary_search(b, Cache::L3, |i| i / 16 * 16);
}

#[bench]
fn old_binary_search_l1_worst_case(b: &mut Bencher) {
    old_bench_binary_search_worst_case(b, Cache::L1);
}

#[bench]
fn old_binary_search_l2_worst_case(b: &mut Bencher) {
    old_bench_binary_search_worst_case(b, Cache::L2);
}

#[bench]
fn old_binary_search_l3_worst_case(b: &mut Bencher) {
    old_bench_binary_search_worst_case(b, Cache::L3);
}

fn new_bench_binary_search<F>(b: &mut Bencher, cache: Cache, mapper: F)
where
    F: Fn(usize) -> usize,
{
    let size = match cache {
        Cache::L1 => 1000,      // 8kb
        Cache::L2 => 10_000,    // 80kb
        Cache::L3 => 1_000_000, // 8Mb
    };
    let v = (0..size).map(&mapper).collect::<Vec<_>>();
    let mut r = 0usize;
    b.iter(move || {
        // LCG constants from https://en.wikipedia.org/wiki/Numerical_Recipes.
        r = r.wrapping_mul(1664525).wrapping_add(1013904223);
        // Lookup the whole range to get 50% hits and 50% misses.
        let i = mapper(r % size);
        black_box(new_binary_search(&v, &i).is_ok());
    })
}

fn new_bench_binary_search_worst_case(b: &mut Bencher, cache: Cache) {
    let size = match cache {
        Cache::L1 => 1000,      // 8kb
        Cache::L2 => 10_000,    // 80kb
        Cache::L3 => 1_000_000, // 8Mb
    };
    let mut v = vec![0; size];
    let i = 1;
    v[size - 1] = i;
    b.iter(move || {
        black_box(new_binary_search(&v, &i).is_ok());
    })
}

#[bench]
fn new_binary_search_l1(b: &mut Bencher) {
    new_bench_binary_search(b, Cache::L1, |i| i * 2);
}

#[bench]
fn new_binary_search_l2(b: &mut Bencher) {
    new_bench_binary_search(b, Cache::L2, |i| i * 2);
}

#[bench]
fn new_binary_search_l3(b: &mut Bencher) {
    new_bench_binary_search(b, Cache::L3, |i| i * 2);
}

#[bench]
fn new_binary_search_l1_with_dups(b: &mut Bencher) {
    new_bench_binary_search(b, Cache::L1, |i| i / 16 * 16);
}

#[bench]
fn new_binary_search_l2_with_dups(b: &mut Bencher) {
    new_bench_binary_search(b, Cache::L2, |i| i / 16 * 16);
}

#[bench]
fn new_binary_search_l3_with_dups(b: &mut Bencher) {
    new_bench_binary_search(b, Cache::L3, |i| i / 16 * 16);
}

#[bench]
fn new_binary_search_l1_worst_case(b: &mut Bencher) {
    new_bench_binary_search_worst_case(b, Cache::L1);
}

#[bench]
fn new_binary_search_l2_worst_case(b: &mut Bencher) {
    new_bench_binary_search_worst_case(b, Cache::L2);
}

#[bench]
fn new_binary_search_l3_worst_case(b: &mut Bencher) {
    new_bench_binary_search_worst_case(b, Cache::L3);
}

```[tasklist] ### Tasks ```

The text was updated successfully, but these errors were encountered:

the8472 · 2023-08-27T13:35:33Z

If your use-case permits it you can try partition_point instead which doesn't check the equality case. Its limitation is that it doesn't distinguish the presence/absence of the element at the returned index.

kekeimiku · 2023-11-24T06:59:06Z

There seems to be a huge performance regression only in aarch64.
x86_64 i7-9750H

okaneco · 2023-11-24T15:08:58Z

I haven't run your benchmark yet, but #117722 was merged which might reclaim some performance.

Using old_binary_search_by from your lib.rs,
New BSearch vs. #117722 BSearch - https://rust.godbolt.org/z/qEfPPsY59
Old BSearch vs. #117722 BSearch - https://rust.godbolt.org/z/8qE13cnvW

kekeimiku · 2023-11-25T08:58:42Z

@okaneco Hi, thank you very much for your contribution.

Here are the new benchmarks. (MacBook Pro M2).

It looks like 117722 is faster than new_binary_search, but still not as good as old_binary_search.

1.76.0-nightly (37b2813a7 2023-11-24)
cargo bench

test binary_search_117722_l1            ... bench:          18 ns/iter (+/- 1)
test binary_search_117722_l1_with_dups  ... bench:          10 ns/iter (+/- 0)
test binary_search_117722_l1_worst_case ... bench:           7 ns/iter (+/- 0)
test binary_search_117722_l2            ... bench:          27 ns/iter (+/- 1)
test binary_search_117722_l2_with_dups  ... bench:          18 ns/iter (+/- 0)
test binary_search_117722_l2_worst_case ... bench:          13 ns/iter (+/- 0)
test binary_search_117722_l3            ... bench:          60 ns/iter (+/- 0)
test binary_search_117722_l3_with_dups  ... bench:          43 ns/iter (+/- 0)
test binary_search_117722_l3_worst_case ... bench:          24 ns/iter (+/- 0)
test new_binary_search_l1               ... bench:          39 ns/iter (+/- 0)
test new_binary_search_l1_with_dups     ... bench:          22 ns/iter (+/- 0)
test new_binary_search_l1_worst_case    ... bench:           7 ns/iter (+/- 0)
test new_binary_search_l2               ... bench:          55 ns/iter (+/- 0)
test new_binary_search_l2_with_dups     ... bench:          36 ns/iter (+/- 0)
test new_binary_search_l2_worst_case    ... bench:          11 ns/iter (+/- 0)
test new_binary_search_l3               ... bench:         105 ns/iter (+/- 0)
test new_binary_search_l3_with_dups     ... bench:          85 ns/iter (+/- 2)
test new_binary_search_l3_worst_case    ... bench:          19 ns/iter (+/- 0)
test old_binary_search_l1               ... bench:           5 ns/iter (+/- 0)
test old_binary_search_l1_with_dups     ... bench:           5 ns/iter (+/- 0)
test old_binary_search_l1_worst_case    ... bench:           4 ns/iter (+/- 0)
test old_binary_search_l2               ... bench:           8 ns/iter (+/- 0)
test old_binary_search_l2_with_dups     ... bench:           8 ns/iter (+/- 0)
test old_binary_search_l2_worst_case    ... bench:           7 ns/iter (+/- 0)
test old_binary_search_l3               ... bench:          24 ns/iter (+/- 0)
test old_binary_search_l3_with_dups     ... bench:          24 ns/iter (+/- 2)
test old_binary_search_l3_worst_case    ... bench:          12 ns/iter (+/- 0)

lib.rs

#![feature(core_intrinsics)]

use std::cmp::Ord;
use std::cmp::Ordering::{self, Equal, Greater, Less};

pub fn old_binary_search<T>(s: &[T], x: &T) -> Result<usize, usize>
where
    T: Ord,
{
    old_binary_search_by(s, |p| p.cmp(x))
}

#[inline(always)]
pub fn old_binary_search_by<'a, T, F>(s: &'a [T], mut f: F) -> Result<usize, usize>
where
    F: FnMut(&'a T) -> Ordering,
{
    let mut size = s.len();
    if size == 0 {
        return Err(0);
    }
    let mut base = 0usize;
    while size > 1 {
        let half = size / 2;
        let mid = base + half;
        // mid is always in [0, size), that means mid is >= 0 and < size.
        // mid >= 0: by definition
        // mid < size: mid = size / 2 + size / 4 + size / 8 ...
        let cmp = f(unsafe { s.get_unchecked(mid) });
        base = if cmp == Greater { base } else { mid };
        size -= half;
    }
    // base is always in [0, size) because base <= mid.
    let cmp = f(unsafe { s.get_unchecked(base) });
    if cmp == Equal {
        Ok(base)
    } else {
        Err(base + (cmp == Less) as usize)
    }
}

pub fn new_binary_search<T>(s: &[T], x: &T) -> Result<usize, usize>
where
    T: Ord,
{
    new_binary_search_by(s, |p| p.cmp(x))
}

#[inline(always)]
pub fn new_binary_search_by<'a, T, F>(s: &'a [T], mut f: F) -> Result<usize, usize>
where
    F: FnMut(&'a T) -> Ordering,
{
    // INVARIANTS:
    // - 0 <= left <= left + size = right <= self.len()
    // - f returns Less for everything in self[..left]
    // - f returns Greater for everything in self[right..]
    let mut size = s.len();
    let mut left = 0;
    let mut right = size;
    while left < right {
        let mid = left + size / 2;

        // SAFETY: the while condition means `size` is strictly positive, so
        // `size/2 < size`. Thus `left + size/2 < left + size`, which
        // coupled with the `left + size <= self.len()` invariant means
        // we have `left + size/2 < self.len()`, and this is in-bounds.
        let cmp = f(unsafe { s.get_unchecked(mid) });

        // The reason why we use if/else control flow rather than match
        // is because match reorders comparison operations, which is perf sensitive.
        // This is x86 asm for u8: https://rust.godbolt.org/z/8Y8Pra.
        if cmp == Less {
            left = mid + 1;
        } else if cmp == Greater {
            right = mid;
        } else {
            return Ok(mid);
        }

        size = right - left;
    }
    Err(left)
}

pub fn binary_search_117722<T>(s: &[T], x: &T) -> Result<usize, usize>
where
    T: Ord,
{
    binary_search_by_117722(s, |p| p.cmp(x))
}

#[inline(always)]
pub fn binary_search_by_117722<'a, F, T>(s: &'a [T], mut f: F) -> Result<usize, usize>
where
    F: FnMut(&'a T) -> Ordering,
{
    // INVARIANTS:
    // - 0 <= left <= left + size = right <= self.len()
    // - f returns Less for everything in self[..left]
    // - f returns Greater for everything in self[right..]
    let mut size = s.len();
    let mut left = 0;
    let mut right = size;
    while left < right {
        let mid = left + size / 2;

        // SAFETY: the while condition means `size` is strictly positive, so
        // `size/2 < size`. Thus `left + size/2 < left + size`, which
        // coupled with the `left + size <= self.len()` invariant means
        // we have `left + size/2 < self.len()`, and this is in-bounds.
        let cmp = f(unsafe { s.get_unchecked(mid) });

        // This control flow produces conditional moves, which results in
        // fewer branches and instructions than if/else or matching on
        // cmp::Ordering.
        // This is x86 asm for u8: https://rust.godbolt.org/z/698eYffTx.
        left = if cmp == Less { mid + 1 } else { left };
        right = if cmp == Greater { mid } else { right };
        if cmp == Equal {
            // SAFETY: same as the `get_unchecked` above
            unsafe { std::intrinsics::assume(mid < s.len()) };
            return Ok(mid);
        }

        size = right - left;
    }

    // SAFETY: directly true from the overall invariant.
    // Note that this is `<=`, unlike the assume in the `Ok` path.
    unsafe { std::intrinsics::assume(left <= s.len()) };
    Err(left)
}

bench.rs

#![feature(test)]
extern crate test;

use test::black_box;
use test::Bencher;

use binary_search_bench::*;

enum Cache {
    L1,
    L2,
    L3,
}

fn old_bench_binary_search<F>(b: &mut Bencher, cache: Cache, mapper: F)
where
    F: Fn(usize) -> usize,
{
    let size = match cache {
        Cache::L1 => 1000,      // 8kb
        Cache::L2 => 10_000,    // 80kb
        Cache::L3 => 1_000_000, // 8Mb
    };
    let v = (0..size).map(&mapper).collect::<Vec<_>>();
    let mut r = 0usize;
    b.iter(move || {
        // LCG constants from https://en.wikipedia.org/wiki/Numerical_Recipes.
        r = r.wrapping_mul(1664525).wrapping_add(1013904223);
        // Lookup the whole range to get 50% hits and 50% misses.
        let i = mapper(r % size);
        black_box(old_binary_search(&v, &i).is_ok());
    })
}

fn old_bench_binary_search_worst_case(b: &mut Bencher, cache: Cache) {
    let size = match cache {
        Cache::L1 => 1000,      // 8kb
        Cache::L2 => 10_000,    // 80kb
        Cache::L3 => 1_000_000, // 8Mb
    };
    let mut v = vec![0; size];
    let i = 1;
    v[size - 1] = i;
    b.iter(move || {
        black_box(old_binary_search(&v, &i).is_ok());
    })
}

#[bench]
fn old_binary_search_l1(b: &mut Bencher) {
    old_bench_binary_search(b, Cache::L1, |i| i * 2);
}

#[bench]
fn old_binary_search_l2(b: &mut Bencher) {
    old_bench_binary_search(b, Cache::L2, |i| i * 2);
}

#[bench]
fn old_binary_search_l3(b: &mut Bencher) {
    old_bench_binary_search(b, Cache::L3, |i| i * 2);
}

#[bench]
fn old_binary_search_l1_with_dups(b: &mut Bencher) {
    old_bench_binary_search(b, Cache::L1, |i| i / 16 * 16);
}

#[bench]
fn old_binary_search_l2_with_dups(b: &mut Bencher) {
    old_bench_binary_search(b, Cache::L2, |i| i / 16 * 16);
}

#[bench]
fn old_binary_search_l3_with_dups(b: &mut Bencher) {
    old_bench_binary_search(b, Cache::L3, |i| i / 16 * 16);
}

#[bench]
fn old_binary_search_l1_worst_case(b: &mut Bencher) {
    old_bench_binary_search_worst_case(b, Cache::L1);
}

#[bench]
fn old_binary_search_l2_worst_case(b: &mut Bencher) {
    old_bench_binary_search_worst_case(b, Cache::L2);
}

#[bench]
fn old_binary_search_l3_worst_case(b: &mut Bencher) {
    old_bench_binary_search_worst_case(b, Cache::L3);
}

fn new_bench_binary_search<F>(b: &mut Bencher, cache: Cache, mapper: F)
where
    F: Fn(usize) -> usize,
{
    let size = match cache {
        Cache::L1 => 1000,      // 8kb
        Cache::L2 => 10_000,    // 80kb
        Cache::L3 => 1_000_000, // 8Mb
    };
    let v = (0..size).map(&mapper).collect::<Vec<_>>();
    let mut r = 0usize;
    b.iter(move || {
        // LCG constants from https://en.wikipedia.org/wiki/Numerical_Recipes.
        r = r.wrapping_mul(1664525).wrapping_add(1013904223);
        // Lookup the whole range to get 50% hits and 50% misses.
        let i = mapper(r % size);
        black_box(new_binary_search(&v, &i).is_ok());
    })
}

fn new_bench_binary_search_worst_case(b: &mut Bencher, cache: Cache) {
    let size = match cache {
        Cache::L1 => 1000,      // 8kb
        Cache::L2 => 10_000,    // 80kb
        Cache::L3 => 1_000_000, // 8Mb
    };
    let mut v = vec![0; size];
    let i = 1;
    v[size - 1] = i;
    b.iter(move || {
        black_box(new_binary_search(&v, &i).is_ok());
    })
}

#[bench]
fn new_binary_search_l1(b: &mut Bencher) {
    new_bench_binary_search(b, Cache::L1, |i| i * 2);
}

#[bench]
fn new_binary_search_l2(b: &mut Bencher) {
    new_bench_binary_search(b, Cache::L2, |i| i * 2);
}

#[bench]
fn new_binary_search_l3(b: &mut Bencher) {
    new_bench_binary_search(b, Cache::L3, |i| i * 2);
}

#[bench]
fn new_binary_search_l1_with_dups(b: &mut Bencher) {
    new_bench_binary_search(b, Cache::L1, |i| i / 16 * 16);
}

#[bench]
fn new_binary_search_l2_with_dups(b: &mut Bencher) {
    new_bench_binary_search(b, Cache::L2, |i| i / 16 * 16);
}

#[bench]
fn new_binary_search_l3_with_dups(b: &mut Bencher) {
    new_bench_binary_search(b, Cache::L3, |i| i / 16 * 16);
}

#[bench]
fn new_binary_search_l1_worst_case(b: &mut Bencher) {
    new_bench_binary_search_worst_case(b, Cache::L1);
}

#[bench]
fn new_binary_search_l2_worst_case(b: &mut Bencher) {
    new_bench_binary_search_worst_case(b, Cache::L2);
}

#[bench]
fn new_binary_search_l3_worst_case(b: &mut Bencher) {
    new_bench_binary_search_worst_case(b, Cache::L3);
}

fn bench_binary_search_117722<F>(b: &mut Bencher, cache: Cache, mapper: F)
where
    F: Fn(usize) -> usize,
{
    let size = match cache {
        Cache::L1 => 1000,      // 8kb
        Cache::L2 => 10_000,    // 80kb
        Cache::L3 => 1_000_000, // 8Mb
    };
    let v = (0..size).map(&mapper).collect::<Vec<_>>();
    let mut r = 0usize;
    b.iter(move || {
        // LCG constants from https://en.wikipedia.org/wiki/Numerical_Recipes.
        r = r.wrapping_mul(1664525).wrapping_add(1013904223);
        // Lookup the whole range to get 50% hits and 50% misses.
        let i = mapper(r % size);
        black_box(binary_search_117722(&v, &i).is_ok());
    })
}

fn bench_binary_search_117722_worst_case(b: &mut Bencher, cache: Cache) {
    let size = match cache {
        Cache::L1 => 1000,      // 8kb
        Cache::L2 => 10_000,    // 80kb
        Cache::L3 => 1_000_000, // 8Mb
    };
    let mut v = vec![0; size];
    let i = 1;
    v[size - 1] = i;
    b.iter(move || {
        black_box(binary_search_117722(&v, &i).is_ok());
    })
}

#[bench]
fn binary_search_117722_l1(b: &mut Bencher) {
    bench_binary_search_117722(b, Cache::L1, |i| i * 2);
}

#[bench]
fn binary_search_117722_l2(b: &mut Bencher) {
    bench_binary_search_117722(b, Cache::L2, |i| i * 2);
}

#[bench]
fn binary_search_117722_l3(b: &mut Bencher) {
    bench_binary_search_117722(b, Cache::L3, |i| i * 2);
}

#[bench]
fn binary_search_117722_l1_with_dups(b: &mut Bencher) {
    bench_binary_search_117722(b, Cache::L1, |i| i / 16 * 16);
}

#[bench]
fn binary_search_117722_l2_with_dups(b: &mut Bencher) {
    bench_binary_search_117722(b, Cache::L2, |i| i / 16 * 16);
}

#[bench]
fn binary_search_117722_l3_with_dups(b: &mut Bencher) {
    bench_binary_search_117722(b, Cache::L3, |i| i / 16 * 16);
}

#[bench]
fn binary_search_117722_l1_worst_case(b: &mut Bencher) {
    bench_binary_search_117722_worst_case(b, Cache::L1);
}

#[bench]
fn binary_search_117722_l2_worst_case(b: &mut Bencher) {
    bench_binary_search_117722_worst_case(b, Cache::L2);
}

#[bench]
fn binary_search_117722_l3_worst_case(b: &mut Bencher) {
    bench_binary_search_117722_worst_case(b, Cache::L3);
}

Cargo.toml

[package]
name = "binary_search_bench"
version = "0.1.0"
edition = "2021"

[profile.bench]
opt-level = 3
lto = true
codegen-units = 1
panic = "abort"
strip = true
debug = false

kekeimiku · 2023-11-25T13:23:15Z

x86_64 i7-9750H.

rustbot added the needs-triage This issue may need triage. Remove it if it has been sufficiently triaged. label Aug 27, 2023

kekeimiku changed the title ~~Significant slowdown of binary_search~~ performance regression of binary_search Aug 27, 2023

kekeimiku mentioned this issue Dec 11, 2023

performance kekeimiku/PointerSearcher-X#26

Open

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

performance regression of binary_search #115271

performance regression of binary_search #115271

kekeimiku commented Aug 27, 2023 •

edited

the8472 commented Aug 27, 2023

kekeimiku commented Nov 24, 2023

okaneco commented Nov 24, 2023

kekeimiku commented Nov 25, 2023

kekeimiku commented Nov 25, 2023

performance regression of binary_search #115271

performance regression of binary_search #115271

Comments

kekeimiku commented Aug 27, 2023 • edited

the8472 commented Aug 27, 2023

kekeimiku commented Nov 24, 2023

okaneco commented Nov 24, 2023

kekeimiku commented Nov 25, 2023

kekeimiku commented Nov 25, 2023

kekeimiku commented Aug 27, 2023 •

edited