Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a fast-path to Debug ASCII &str #121150

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions library/core/benches/str.rs
Expand Up @@ -3,6 +3,7 @@ use test::{black_box, Bencher};

mod char_count;
mod corpora;
mod debug;
mod iter;

#[bench]
Expand Down
79 changes: 79 additions & 0 deletions library/core/benches/str/debug.rs
@@ -0,0 +1,79 @@
//! This primarily benchmarks `impl Debug for str`,
//! and it also explicitly tests that we minimizes calls to the underlying `Write`r.
//! While that is an implementation detail and there are no guarantees about it,
//! we should still try to minimize those calls over time rather than regress them.

use std::fmt::{self, Write};
use test::{black_box, Bencher};

#[derive(Default)]
struct CountingWriter {
buf: String,
write_calls: usize,
}

impl Write for CountingWriter {
fn write_str(&mut self, s: &str) -> fmt::Result {
self.buf.push_str(s);
self.write_calls += 1;
Ok(())
}
}

fn assert_fmt(s: &str, expected: &str, expected_write_calls: usize) {
let mut w = CountingWriter::default();

write!(&mut w, "{s:?}").unwrap();
assert_eq!(s.len(), 64);
assert_eq!(w.buf, expected);
assert_eq!(w.write_calls, expected_write_calls);
}

#[bench]
fn ascii_only(b: &mut Bencher) {
let s = "just a bit of ascii text that has no escapes. 64 bytes exactly!!";
assert_fmt(s, r#""just a bit of ascii text that has no escapes. 64 bytes exactly!!""#, 3);
b.iter(|| {
black_box(format!("{:?}", black_box(s)));
});
}

#[bench]
fn ascii_escapes(b: &mut Bencher) {
let s = "some\tmore\tascii\ttext\nthis time with some \"escapes\", also 64 byte";
assert_fmt(
s,
r#""some\tmore\tascii\ttext\nthis time with some \"escapes\", also 64 byte""#,
15,
);
b.iter(|| {
black_box(format!("{:?}", black_box(s)));
});
}

#[bench]
fn some_unicode(b: &mut Bencher) {
let s = "egy kis szöveg néhány unicode betűvel. legyen ez is 64 byte.";
assert_fmt(s, r#""egy kis szöveg néhány unicode betűvel. legyen ez is 64 byte.""#, 3);
b.iter(|| {
black_box(format!("{:?}", black_box(s)));
});
}

#[bench]
fn mostly_unicode(b: &mut Bencher) {
let s = "предложение из кириллических букв.";
assert_fmt(s, r#""предложение из кириллических букв.""#, 3);
b.iter(|| {
black_box(format!("{:?}", black_box(s)));
});
}

#[bench]
fn mixed(b: &mut Bencher) {
let s = "\"❤️\"\n\"hűha ez betű\"\n\"кириллических букв\".";
assert_fmt(s, r#""\"❤\u{fe0f}\"\n\"hűha ez betű\"\n\"кириллических букв\".""#, 21);
b.iter(|| {
black_box(format!("{:?}", black_box(s)));
});
}
69 changes: 51 additions & 18 deletions library/core/src/fmt/mod.rs
Expand Up @@ -2387,23 +2387,57 @@
impl Debug for str {
fn fmt(&self, f: &mut Formatter<'_>) -> Result {
f.write_char('"')?;
let mut from = 0;
for (i, c) in self.char_indices() {
let esc = c.escape_debug_ext(EscapeDebugExtArgs {
escape_grapheme_extended: true,
escape_single_quote: false,
escape_double_quote: true,
});
// If char needs escaping, flush backlog so far and write, else skip
if esc.len() != 1 {
f.write_str(&self[from..i])?;
for c in esc {
f.write_char(c)?;

let mut printable_range = 0..0;
let mut rest = self.as_bytes();
while rest.len() > 0 {
// first, handle an ascii-only prefix
let non_ascii_position = rest.iter().position(|&b| b >= 0x80).unwrap_or(rest.len());
let mut ascii_bytes = unsafe { rest.get_unchecked(..non_ascii_position) };

Check failure on line 2396 in library/core/src/fmt/mod.rs

View workflow job for this annotation

GitHub Actions / PR - mingw-check-tidy

undocumented unsafe
rest = unsafe { rest.get_unchecked(non_ascii_position..) };

Check failure on line 2397 in library/core/src/fmt/mod.rs

View workflow job for this annotation

GitHub Actions / PR - mingw-check-tidy

undocumented unsafe

fn needs_escape(b: u8) -> bool {
b > 0x7E || b < 0x20 || b == b'\\' || b == b'"'
}
while let Some(escape_position) = ascii_bytes.iter().position(|&b| needs_escape(b)) {
printable_range.end += escape_position;
f.write_str(unsafe { self.get_unchecked(printable_range.clone()) })?;

Check failure on line 2404 in library/core/src/fmt/mod.rs

View workflow job for this annotation

GitHub Actions / PR - mingw-check-tidy

undocumented unsafe
f.write_str(
crate::ascii::escape_default(unsafe {

Check failure on line 2406 in library/core/src/fmt/mod.rs

View workflow job for this annotation

GitHub Actions / PR - mingw-check-tidy

undocumented unsafe
*ascii_bytes.get_unchecked(escape_position)
})
.as_str(),
)?;
ascii_bytes = unsafe { ascii_bytes.get_unchecked(escape_position + 1..) };

Check failure on line 2411 in library/core/src/fmt/mod.rs

View workflow job for this annotation

GitHub Actions / PR - mingw-check-tidy

undocumented unsafe
printable_range = (printable_range.end + 1)..(printable_range.end + 1);
}
printable_range.end += ascii_bytes.len();

// then, handle a unicode-only prefix
let ascii_position = rest.iter().position(|&b| b < 0x80).unwrap_or(rest.len());
rest = unsafe { rest.get_unchecked(ascii_position..) };

Check failure on line 2418 in library/core/src/fmt/mod.rs

View workflow job for this annotation

GitHub Actions / PR - mingw-check-tidy

undocumented unsafe

let unicode_chunk = unsafe {

Check failure on line 2420 in library/core/src/fmt/mod.rs

View workflow job for this annotation

GitHub Actions / PR - mingw-check-tidy

undocumented unsafe
self.get_unchecked(printable_range.end..printable_range.end + ascii_position)
};
for c in unicode_chunk.chars() {
unsafe { crate::hint::assert_unchecked(c as u32 >= 0x80) };

Check failure on line 2424 in library/core/src/fmt/mod.rs

View workflow job for this annotation

GitHub Actions / PR - mingw-check-tidy

undocumented unsafe
let esc = c.escape_debug_ext(EscapeDebugExtArgs {
escape_grapheme_extended: true,
escape_single_quote: false,
escape_double_quote: true,
});
if esc.len() != 1 {
f.write_str(unsafe { self.get_unchecked(printable_range.clone()) })?;

Check failure on line 2431 in library/core/src/fmt/mod.rs

View workflow job for this annotation

GitHub Actions / PR - mingw-check-tidy

undocumented unsafe
Display::fmt(&esc, f)?;
printable_range.start = printable_range.end + c.len_utf8();
}
from = i + c.len_utf8();
printable_range.end += c.len_utf8();
}
}
f.write_str(&self[from..])?;

f.write_str(unsafe { self.get_unchecked(printable_range) })?;

Check failure on line 2439 in library/core/src/fmt/mod.rs

View workflow job for this annotation

GitHub Actions / PR - mingw-check-tidy

undocumented unsafe

f.write_char('"')
}
}
Expand All @@ -2419,13 +2453,12 @@
impl Debug for char {
fn fmt(&self, f: &mut Formatter<'_>) -> Result {
f.write_char('\'')?;
for c in self.escape_debug_ext(EscapeDebugExtArgs {
let esc = self.escape_debug_ext(EscapeDebugExtArgs {
escape_grapheme_extended: true,
escape_single_quote: true,
escape_double_quote: false,
}) {
f.write_char(c)?
}
});
Display::fmt(&esc, f)?;
f.write_char('\'')
}
}
Expand Down