From 2b82c072c75d64c434a62f185b67fb41028d6f71 Mon Sep 17 00:00:00 2001 From: Ulrik Sverdrup Date: Fri, 7 Aug 2015 12:06:43 +0200 Subject: [PATCH] StrSearcher: Improve inner loop in TwoWaySearcher::next, next_back The innermost loop of TwoWaySearcher checks the boundary of the haystack vs position + needle.len(), and it checks the last byte of the needle against the byteset. If these two steps are combined by using the indexing of the last needle byte's position as bounds check, the algorithm improves its throughput. We improve the innermost loop by reducing the number of instructions used, and elminating the panic case for the checked indexing that was previously used. Selected benchmarks from the external/workspace testsuite. Benchmarks improve across the board. ``` before: test bb_in_aa::twoway_find ... bench: 4,229 ns/iter (+/- 1,305) = 23646 MB/s test bb_in_aa::twoway_rfind ... bench: 3,873 ns/iter (+/- 101) = 25819 MB/s test short_1let_long::twoway_find ... bench: 7,075 ns/iter (+/- 29) = 360 MB/s test short_1let_long::twoway_rfind ... bench: 6,640 ns/iter (+/- 79) = 384 MB/s test short_2let_long::twoway_find ... bench: 3,823 ns/iter (+/- 16) = 667 MB/s test short_2let_long::twoway_rfind ... bench: 3,774 ns/iter (+/- 44) = 675 MB/s test short_3let_long::twoway_find ... bench: 3,582 ns/iter (+/- 47) = 712 MB/s test short_3let_long::twoway_rfind ... bench: 3,616 ns/iter (+/- 34) = 705 MB/s with this commit: test bb_in_aa::twoway_find ... bench: 2,952 ns/iter (+/- 20) = 33875 MB/s test bb_in_aa::twoway_rfind ... bench: 2,939 ns/iter (+/- 99) = 34025 MB/s test short_1let_long::twoway_find ... bench: 4,593 ns/iter (+/- 4) = 555 MB/s test short_1let_long::twoway_rfind ... bench: 4,592 ns/iter (+/- 76) = 555 MB/s test short_2let_long::twoway_find ... bench: 2,804 ns/iter (+/- 3) = 909 MB/s test short_2let_long::twoway_rfind ... bench: 2,807 ns/iter (+/- 40) = 908 MB/s test short_3let_long::twoway_find ... bench: 3,105 ns/iter (+/- 120) = 821 MB/s test short_3let_long::twoway_rfind ... bench: 3,019 ns/iter (+/- 50) = 844 MB/s ``` - `bb_in_aa`: fast skip due to byteset filter loop improves. - 1/2/3let: Searches for 1, 2, or 3 ascii bytes improves. --- src/libcore/str/pattern.rs | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/src/libcore/str/pattern.rs b/src/libcore/str/pattern.rs index 8bc1ba207bdfb..0ea3b38a3cf29 100644 --- a/src/libcore/str/pattern.rs +++ b/src/libcore/str/pattern.rs @@ -906,19 +906,25 @@ impl TwoWaySearcher { { // `next()` uses `self.position` as its cursor let old_pos = self.position; + let needle_last = needle.len() - 1; 'search: loop { // Check that we have room to search in - if needle.len() > haystack.len() - self.position { - self.position = haystack.len(); - return S::rejecting(old_pos, self.position); - } + // position + needle_last can not overflow if we assume slices + // are bounded by isize's range. + let tail_byte = match haystack.get(self.position + needle_last) { + Some(&b) => b, + None => { + self.position = haystack.len(); + return S::rejecting(old_pos, self.position); + } + }; if S::use_early_reject() && old_pos != self.position { return S::rejecting(old_pos, self.position); } // Quickly skip by large portions unrelated to our substring - if !self.byteset_contains(haystack[self.position + needle.len() - 1]) { + if !self.byteset_contains(tail_byte) { self.position += needle.len(); if !long_period { self.memory = 0; @@ -986,17 +992,23 @@ impl TwoWaySearcher { let old_end = self.end; 'search: loop { // Check that we have room to search in - if needle.len() > self.end { - self.end = 0; - return S::rejecting(0, old_end); - } + // end - needle.len() will wrap around when there is no more room, + // but due to slice length limits it can never wrap all the way back + // into the length of haystack. + let front_byte = match haystack.get(self.end.wrapping_sub(needle.len())) { + Some(&b) => b, + None => { + self.end = 0; + return S::rejecting(0, old_end); + } + }; if S::use_early_reject() && old_end != self.end { return S::rejecting(self.end, old_end); } // Quickly skip by large portions unrelated to our substring - if !self.byteset_contains(haystack[self.end - needle.len()]) { + if !self.byteset_contains(front_byte) { self.end -= needle.len(); if !long_period { self.memory_back = needle.len();