From e48c68479eb9f898154843dd2ae291d53df952a3 Mon Sep 17 00:00:00 2001 From: Miccah Castorina Date: Sun, 24 Jan 2021 16:03:38 -0600 Subject: [PATCH 1/3] Add a check for ASCII characters in to_upper and to_lower This extra check has better performance. See discussion here: https://internals.rust-lang.org/t/to-upper-speed/13896 --- library/core/src/unicode/unicode_data.rs | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/library/core/src/unicode/unicode_data.rs b/library/core/src/unicode/unicode_data.rs index 9c92a8ba28ae4..16803bf2e83b9 100644 --- a/library/core/src/unicode/unicode_data.rs +++ b/library/core/src/unicode/unicode_data.rs @@ -549,16 +549,24 @@ pub mod white_space { #[rustfmt::skip] pub mod conversions { pub fn to_lower(c: char) -> [char; 3] { - match bsearch_case_table(c, LOWERCASE_TABLE) { - None => [c, '\0', '\0'], - Some(index) => LOWERCASE_TABLE[index].1, + if c.is_ascii() { + [(c as u8).to_ascii_lowercase() as char, '\0', '\0'] + } else { + match bsearch_case_table(c, LOWERCASE_TABLE) { + None => [c, '\0', '\0'], + Some(index) => LOWERCASE_TABLE[index].1, + } } } pub fn to_upper(c: char) -> [char; 3] { - match bsearch_case_table(c, UPPERCASE_TABLE) { - None => [c, '\0', '\0'], - Some(index) => UPPERCASE_TABLE[index].1, + if c.is_ascii() { + [(c as u8).to_ascii_uppercase() as char, '\0', '\0'] + } else { + match bsearch_case_table(c, UPPERCASE_TABLE) { + None => [c, '\0', '\0'], + Some(index) => UPPERCASE_TABLE[index].1, + } } } From 8acb5665072f499effeb77eb2c60fd9cf74a2955 Mon Sep 17 00:00:00 2001 From: Miccah Castorina Date: Sun, 24 Jan 2021 20:29:44 -0600 Subject: [PATCH 2/3] Add to_lowercase and to_uppercase char benchmarks --- library/core/benches/char/methods.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/library/core/benches/char/methods.rs b/library/core/benches/char/methods.rs index de4b63030fa7c..749cf979fad13 100644 --- a/library/core/benches/char/methods.rs +++ b/library/core/benches/char/methods.rs @@ -45,3 +45,13 @@ fn bench_to_ascii_uppercase(b: &mut Bencher) { fn bench_to_ascii_lowercase(b: &mut Bencher) { b.iter(|| CHARS.iter().cycle().take(10_000).map(|c| c.to_ascii_lowercase()).min()) } + +#[bench] +fn bench_char_to_uppercase(b: &mut Bencher) { + b.iter(|| (0..=255).cycle().take(10_000).map(|b| char::from(b).to_uppercase()).count()) +} + +#[bench] +fn bench_char_to_lowercase(b: &mut Bencher) { + b.iter(|| (0..=255).cycle().take(10_000).map(|b| char::from(b).to_lowercase()).count()) +} From 229fdf839a348ddbdd2c6cf411767a565394a8ae Mon Sep 17 00:00:00 2001 From: Miccah Castorina Date: Tue, 2 Feb 2021 20:18:03 -0600 Subject: [PATCH 3/3] Add two more benchmarks for strictly ASCII and non ASCII cases --- library/core/benches/char/methods.rs | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/library/core/benches/char/methods.rs b/library/core/benches/char/methods.rs index 749cf979fad13..9408f83c32f7c 100644 --- a/library/core/benches/char/methods.rs +++ b/library/core/benches/char/methods.rs @@ -47,11 +47,31 @@ fn bench_to_ascii_lowercase(b: &mut Bencher) { } #[bench] -fn bench_char_to_uppercase(b: &mut Bencher) { +fn bench_ascii_mix_to_uppercase(b: &mut Bencher) { b.iter(|| (0..=255).cycle().take(10_000).map(|b| char::from(b).to_uppercase()).count()) } #[bench] -fn bench_char_to_lowercase(b: &mut Bencher) { +fn bench_ascii_mix_to_lowercase(b: &mut Bencher) { b.iter(|| (0..=255).cycle().take(10_000).map(|b| char::from(b).to_lowercase()).count()) } + +#[bench] +fn bench_ascii_char_to_uppercase(b: &mut Bencher) { + b.iter(|| (0..=127).cycle().take(10_000).map(|b| char::from(b).to_uppercase()).count()) +} + +#[bench] +fn bench_ascii_char_to_lowercase(b: &mut Bencher) { + b.iter(|| (0..=127).cycle().take(10_000).map(|b| char::from(b).to_lowercase()).count()) +} + +#[bench] +fn bench_non_ascii_char_to_uppercase(b: &mut Bencher) { + b.iter(|| (128..=255).cycle().take(10_000).map(|b| char::from(b).to_uppercase()).count()) +} + +#[bench] +fn bench_non_ascii_char_to_lowercase(b: &mut Bencher) { + b.iter(|| (128..=255).cycle().take(10_000).map(|b| char::from(b).to_lowercase()).count()) +}