From acb42cf3ab0a6c53d9a0deb84e8758c085f466c6 Mon Sep 17 00:00:00 2001 From: George Bateman Date: Thu, 13 Oct 2022 22:47:18 +0100 Subject: [PATCH 1/4] Make is_ascii_hexdigit branchless Bitwise-or with 0x20 before checking if character in range a-z avoids need to check if it is in range A-Z. This makes the generated code shorter and faster. --- library/core/src/char/methods.rs | 3 ++- library/core/src/num/mod.rs | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/library/core/src/char/methods.rs b/library/core/src/char/methods.rs index 224bc9effe61e..3b5d0240842be 100644 --- a/library/core/src/char/methods.rs +++ b/library/core/src/char/methods.rs @@ -1510,7 +1510,8 @@ impl char { #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")] #[inline] pub const fn is_ascii_hexdigit(&self) -> bool { - matches!(*self, '0'..='9' | 'A'..='F' | 'a'..='f') + // Bitwise or can avoid need for branches in compiled code. + matches!(*self, '0'..='9') || matches!(*self as u32 | 0x20, 0x61..=0x7a) } /// Checks if the value is an ASCII punctuation character: diff --git a/library/core/src/num/mod.rs b/library/core/src/num/mod.rs index 311c5fa5b6834..7a87bc9c8c31b 100644 --- a/library/core/src/num/mod.rs +++ b/library/core/src/num/mod.rs @@ -688,7 +688,8 @@ impl u8 { #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")] #[inline] pub const fn is_ascii_hexdigit(&self) -> bool { - matches!(*self, b'0'..=b'9' | b'A'..=b'F' | b'a'..=b'f') + // Bitwise or can avoid need for branches in compiled code. + matches!(*self, b'0'..=b'9') || matches!(*self | 0x20, b'a'..=b'z') } /// Checks if the value is an ASCII punctuation character: From 5b624ac29f60ef425d7d741cde24d4d6ca9623a8 Mon Sep 17 00:00:00 2001 From: George Bateman Date: Thu, 13 Oct 2022 23:15:59 +0100 Subject: [PATCH 2/4] Fix typo in range --- library/core/src/char/methods.rs | 2 +- library/core/src/num/mod.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/library/core/src/char/methods.rs b/library/core/src/char/methods.rs index 3b5d0240842be..e2db7c261a5a9 100644 --- a/library/core/src/char/methods.rs +++ b/library/core/src/char/methods.rs @@ -1511,7 +1511,7 @@ impl char { #[inline] pub const fn is_ascii_hexdigit(&self) -> bool { // Bitwise or can avoid need for branches in compiled code. - matches!(*self, '0'..='9') || matches!(*self as u32 | 0x20, 0x61..=0x7a) + matches!(*self, '0'..='9') || matches!(*self as u32 | 0x20, 0x61..=0x66) } /// Checks if the value is an ASCII punctuation character: diff --git a/library/core/src/num/mod.rs b/library/core/src/num/mod.rs index 7a87bc9c8c31b..47ad5c9a2daaa 100644 --- a/library/core/src/num/mod.rs +++ b/library/core/src/num/mod.rs @@ -689,7 +689,7 @@ impl u8 { #[inline] pub const fn is_ascii_hexdigit(&self) -> bool { // Bitwise or can avoid need for branches in compiled code. - matches!(*self, b'0'..=b'9') || matches!(*self | 0x20, b'a'..=b'z') + matches!(*self, b'0'..=b'9') || matches!(*self | 0x20, b'a'..=b'f') } /// Checks if the value is an ASCII punctuation character: From f6248976b513856a8218c56dfe4969bc212d13df Mon Sep 17 00:00:00 2001 From: George Bateman Date: Sat, 15 Oct 2022 00:50:02 +0100 Subject: [PATCH 3/4] Clarify comment and use character literals in char::is_ascii_hexdigit --- library/core/src/char/methods.rs | 6 ++++-- library/core/src/num/mod.rs | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/library/core/src/char/methods.rs b/library/core/src/char/methods.rs index e2db7c261a5a9..cf687d5e495d4 100644 --- a/library/core/src/char/methods.rs +++ b/library/core/src/char/methods.rs @@ -1510,8 +1510,10 @@ impl char { #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")] #[inline] pub const fn is_ascii_hexdigit(&self) -> bool { - // Bitwise or can avoid need for branches in compiled code. - matches!(*self, '0'..='9') || matches!(*self as u32 | 0x20, 0x61..=0x66) + // Bitwise or converts A-Z to a-z, avoiding need for branches in compiled code. + const A: u32 = 'a' as u32; + const F: u32 = 'f' as u32; + matches!(*self, '0'..='9') || matches!(*self as u32 | 0x20, A..=F) } /// Checks if the value is an ASCII punctuation character: diff --git a/library/core/src/num/mod.rs b/library/core/src/num/mod.rs index 47ad5c9a2daaa..7dc10033dfb47 100644 --- a/library/core/src/num/mod.rs +++ b/library/core/src/num/mod.rs @@ -688,7 +688,7 @@ impl u8 { #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")] #[inline] pub const fn is_ascii_hexdigit(&self) -> bool { - // Bitwise or can avoid need for branches in compiled code. + // Bitwise or converts A-Z to a-z, avoiding need for branches in compiled code. matches!(*self, b'0'..=b'9') || matches!(*self | 0x20, b'a'..=b'f') } From 8d1d17f3b2edbdda77d0d957f314886704970fa9 Mon Sep 17 00:00:00 2001 From: George Bateman Date: Sat, 22 Oct 2022 22:59:20 +0100 Subject: [PATCH 4/4] Fully optimize is_ascii_hexdigit by hand This should compile to the same thing as the previous commit (at a suitable optimization level) but makes it very clear what is intended. --- library/core/src/char/methods.rs | 5 ++--- library/core/src/num/mod.rs | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/library/core/src/char/methods.rs b/library/core/src/char/methods.rs index cf687d5e495d4..015c4b041b53e 100644 --- a/library/core/src/char/methods.rs +++ b/library/core/src/char/methods.rs @@ -1511,9 +1511,8 @@ impl char { #[inline] pub const fn is_ascii_hexdigit(&self) -> bool { // Bitwise or converts A-Z to a-z, avoiding need for branches in compiled code. - const A: u32 = 'a' as u32; - const F: u32 = 'f' as u32; - matches!(*self, '0'..='9') || matches!(*self as u32 | 0x20, A..=F) + ((*self as u32).wrapping_sub('0' as u32) < 10) + | ((*self as u32 | 0x20).wrapping_sub('a' as u32) < 6) } /// Checks if the value is an ASCII punctuation character: diff --git a/library/core/src/num/mod.rs b/library/core/src/num/mod.rs index 7dc10033dfb47..82731fd9436c0 100644 --- a/library/core/src/num/mod.rs +++ b/library/core/src/num/mod.rs @@ -689,7 +689,7 @@ impl u8 { #[inline] pub const fn is_ascii_hexdigit(&self) -> bool { // Bitwise or converts A-Z to a-z, avoiding need for branches in compiled code. - matches!(*self, b'0'..=b'9') || matches!(*self | 0x20, b'a'..=b'f') + (self.wrapping_sub(b'0') < 10) | ((*self | 0x20).wrapping_sub(b'a') < 6) } /// Checks if the value is an ASCII punctuation character: