Skip to content

Commit

Permalink
Rollup merge of #115331 - the8472:chars_advance, r=cuviper
Browse files Browse the repository at this point in the history
optimize str::iter::Chars::advance_by

```
OLD:
    str::iter::chars_advance_by_0001  0.00ns/iter  +/- 0.00ns
    str::iter::chars_advance_by_0010 13.00ns/iter  +/- 1.00ns
    str::iter::chars_advance_by_1000  1.20µs/iter +/- 15.00ns

NEW:
    str::iter::chars_advance_by_0001  0.00ns/iter +/- 0.00ns
    str::iter::chars_advance_by_0010  6.00ns/iter +/- 0.00ns
    str::iter::chars_advance_by_1000 75.00ns/iter +/- 1.00ns
```
  • Loading branch information
matthiaskrgr committed Nov 28, 2023
2 parents c2ec908 + 40cf1f9 commit 4af1f99
Show file tree
Hide file tree
Showing 5 changed files with 80 additions and 0 deletions.
11 changes: 11 additions & 0 deletions library/alloc/tests/str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1170,6 +1170,17 @@ fn test_iterator() {
assert_eq!(s.chars().count(), v.len());
}

#[test]
fn test_iterator_advance() {
let s = "「赤錆」と呼ばれる鉄錆は、水の存在下での鉄の自然酸化によって生じる、オキシ水酸化鉄(III) 等の(含水)酸化物粒子の疎な凝集膜であるとみなせる。";
let chars: Vec<char> = s.chars().collect();
let mut it = s.chars();
it.advance_by(1).unwrap();
assert_eq!(it.next(), Some(chars[1]));
it.advance_by(33).unwrap();
assert_eq!(it.next(), Some(chars[35]));
}

#[test]
fn test_rev_iterator() {
let s = "ศไทย中华Việt Nam";
Expand Down
1 change: 1 addition & 0 deletions library/core/benches/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#![feature(trusted_random_access)]
#![feature(iter_array_chunks)]
#![feature(iter_next_chunk)]
#![feature(iter_advance_by)]

extern crate test;

Expand Down
1 change: 1 addition & 0 deletions library/core/benches/str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use test::{black_box, Bencher};

mod char_count;
mod corpora;
mod iter;

#[bench]
fn str_validate_emoji(b: &mut Bencher) {
Expand Down
17 changes: 17 additions & 0 deletions library/core/benches/str/iter.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
use super::corpora;
use test::{black_box, Bencher};

#[bench]
fn chars_advance_by_1000(b: &mut Bencher) {
b.iter(|| black_box(corpora::ru::LARGE).chars().advance_by(1000));
}

#[bench]
fn chars_advance_by_0010(b: &mut Bencher) {
b.iter(|| black_box(corpora::ru::LARGE).chars().advance_by(10));
}

#[bench]
fn chars_advance_by_0001(b: &mut Bencher) {
b.iter(|| black_box(corpora::ru::LARGE).chars().advance_by(1));
}
50 changes: 50 additions & 0 deletions library/core/src/str/iter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use crate::iter::{TrustedRandomAccess, TrustedRandomAccessNoCoerce};
use crate::ops::Try;
use crate::option;
use crate::slice::{self, Split as SliceSplit};
use core::num::NonZeroUsize;

use super::from_utf8_unchecked;
use super::pattern::Pattern;
Expand Down Expand Up @@ -49,6 +50,55 @@ impl<'a> Iterator for Chars<'a> {
super::count::count_chars(self.as_str())
}

#[inline]
fn advance_by(&mut self, mut remainder: usize) -> Result<(), NonZeroUsize> {
const CHUNK_SIZE: usize = 32;

if remainder >= CHUNK_SIZE {
let mut chunks = self.iter.as_slice().array_chunks::<CHUNK_SIZE>();
let mut bytes_skipped: usize = 0;

while remainder > CHUNK_SIZE
&& let Some(chunk) = chunks.next()
{
bytes_skipped += CHUNK_SIZE;

let mut start_bytes = [false; CHUNK_SIZE];

for i in 0..CHUNK_SIZE {
start_bytes[i] = !super::validations::utf8_is_cont_byte(chunk[i]);
}

remainder -= start_bytes.into_iter().map(|i| i as u8).sum::<u8>() as usize;
}

// SAFETY: The amount of bytes exists since we just iterated over them,
// so advance_by will succeed.
unsafe { self.iter.advance_by(bytes_skipped).unwrap_unchecked() };

// skip trailing continuation bytes
while self.iter.len() > 0 {
let b = self.iter.as_slice()[0];
if !super::validations::utf8_is_cont_byte(b) {
break;
}
// SAFETY: We just peeked at the byte, therefore it exists
unsafe { self.iter.advance_by(1).unwrap_unchecked() };
}
}

while (remainder > 0) && (self.iter.len() > 0) {
remainder -= 1;
let b = self.iter.as_slice()[0];
let slurp = super::validations::utf8_char_width(b);
// SAFETY: utf8 validity requires that the string must contain
// the continuation bytes (if any)
unsafe { self.iter.advance_by(slurp).unwrap_unchecked() };
}

NonZeroUsize::new(remainder).map_or(Ok(()), Err)
}

#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
let len = self.iter.len();
Expand Down

0 comments on commit 4af1f99

Please sign in to comment.