Skip to content

Commit

Permalink
perf(parser): more efficient number parsing (#3342)
Browse files Browse the repository at this point in the history
Follow-on after #3296.

Make parsing binary/octal/hex numeric literals a little more efficient.

These changes all rely on that we know more than the compiler does -
that strings passed to these `parse_*` functions can only contain a
certain set of characters.
  • Loading branch information
overlookmotel committed May 21, 2024
1 parent e7a6595 commit fdb31c3
Showing 1 changed file with 56 additions and 24 deletions.
80 changes: 56 additions & 24 deletions crates/oxc_parser/src/lexer/number.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
//! Parsing utilities for converting Javascript numbers to Rust f64
//! code copied from [jsparagus](https://github.com/mozilla-spidermonkey/jsparagus/blob/master/crates/parser/src/numeric_value.rs)

use static_assertions::const_assert_eq;

use num_bigint::BigInt;
use num_traits::Num as _;
use std::borrow::Cow;
Expand Down Expand Up @@ -56,58 +58,88 @@ fn parse_float_without_underscores(s: &str) -> Result<f64, &'static str> {
/// these numbers are usually not long. On x84_64, FMUL has a latency of 4 clock
/// cycles, which doesn't include addition. Some platorms support mul + add in a
/// single instruction, but many others do not.
#[allow(clippy::cast_precision_loss, clippy::cast_possible_truncation)]
#[allow(clippy::cast_precision_loss, clippy::cast_lossless)]
fn parse_binary(s: &str) -> f64 {
// b'0' is 0x30 and b'1' is 0x31.
// So we can convert from binary digit to its value with `c & 1`.
// This is produces more compact assembly than `c - b'0'`.
// https://godbolt.org/z/1vvrK78jf
const fn byte_to_value(c: u8) -> u8 {
debug_assert!(c == b'0' || c == b'1');
c & 1
}
const_assert_eq!(byte_to_value(b'0'), 0);
const_assert_eq!(byte_to_value(b'1'), 1);

debug_assert!(!s.is_empty());

let mut result = 0_u64;

for c in s.as_bytes() {
debug_assert!(c != &b'_');
#[allow(clippy::cast_lossless)]
let value = (c - b'0') as u64;
result <<= 1;
result |= value;
result |= byte_to_value(*c) as u64;
}

result as f64
}

#[allow(clippy::cast_precision_loss)]
#[allow(clippy::cast_precision_loss, clippy::cast_lossless)]
fn parse_octal(s: &str) -> f64 {
// b'0' is 0x30 and b'7' is 0x37.
// So we can convert from any octal digit to its value with `c & 7`.
// This is produces more compact assembly than `c - b'0'`.
// https://godbolt.org/z/9rYTsMoMM
const fn byte_to_value(c: u8) -> u8 {
debug_assert!(c >= b'0' && c <= b'7');
c & 7
}
const_assert_eq!(byte_to_value(b'0'), 0);
const_assert_eq!(byte_to_value(b'7'), 7);

debug_assert!(!s.is_empty());

let mut result = 0_u64;

for c in s.as_bytes() {
debug_assert!(c != &b'_');
#[allow(clippy::cast_lossless)]
let value = (c - b'0') as u64;
result <<= 3;
result |= value;
result |= byte_to_value(*c) as u64;
}

result as f64
}

#[allow(clippy::cast_precision_loss, clippy::cast_lossless)]
fn parse_hex(s: &str) -> f64 {
// b'0' is 0x30 and b'9' is 0x39.
// b'A' is 0x41 and b'F' is 0x46.
// b'a' is 0x61 and b'f' is 0x66.
// So we can convert from a digit to its value with `c & 15`,
// and from `A-F` or `a-f` to its value with `(c & 15) + 9`.
// We could use `(c & 7) + 9` for `A-F`, but we use `(c & 15) + 9`
// so that both branches share the same `c & 15` operation.
// This is produces more slightly more assembly than explicitly matching all possibilities,
// but only because compiler unrolls the loop.
// https://godbolt.org/z/5fsdv8rGo
const fn byte_to_value(c: u8) -> u8 {
debug_assert!(
(c >= b'0' && c <= b'9') || (c >= b'A' && c <= b'F') || (c >= b'a' && c <= b'f')
);
if c < b'A' {
c & 15 // 0-9
} else {
(c & 15) + 9 // A-F or a-f
}
}
const_assert_eq!(byte_to_value(b'0'), 0);
const_assert_eq!(byte_to_value(b'9'), 9);
const_assert_eq!(byte_to_value(b'A'), 10);
const_assert_eq!(byte_to_value(b'F'), 15);
const_assert_eq!(byte_to_value(b'a'), 10);
const_assert_eq!(byte_to_value(b'f'), 15);

debug_assert!(!s.is_empty());

let mut result = 0_u64;

for c in s.as_bytes() {
debug_assert!(c != &b'_');
let value = match c {
b'0'..=b'9' => c - b'0',
b'A'..=b'F' => c - b'A' + 10,
b'a'..=b'f' => c - b'a' + 10,
_ => unreachable!("invalid hex syntax {}", s),
};
result <<= 4;
result |= value as u64;
result |= byte_to_value(*c) as u64;
}

result as f64
}

Expand Down

0 comments on commit fdb31c3

Please sign in to comment.