Skip to content

Commit

Permalink
refactor(es/parser): Do not use lexical (#7758)
Browse files Browse the repository at this point in the history
This PR replaces the current usage of lexical within the swc_ecma_parser
crate with equivalent parsing of large numbers using BigInt.

**Description:**

As discussed in
#7752, lexical contains a
number of soundness issues but doesn't appear to be actively supported.
Given the relatively low integration surface it seems reasonable to
replace the usage of lexical with another package to avoid this issue.

**Related issue:**

- Closes #7752
  • Loading branch information
gmcsorley-work committed Aug 6, 2023
1 parent c9a5560 commit e50cfde
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 38 deletions.
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion crates/swc_ecma_parser/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ verify = ["swc_ecma_visit"]

[dependencies]
either = { version = "1.4" }
lexical = { version = "6.1.0", features = ["power-of-two", "parse-integers", "parse-floats"], default-features = false }
num-bigint = "0.4"
num-traits = "0.2.15"
serde = { version = "1", features = ["derive"] }
smallvec = "1.8.0"
smartstring = "1"
Expand Down
12 changes: 3 additions & 9 deletions crates/swc_ecma_parser/src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -294,15 +294,9 @@ impl<'a> Lexer<'a> {
let next = self.input.peek();

let bigint = match next {
Some('x') | Some('X') => {
self.read_radix_number::<16, { lexical::NumberFormatBuilder::hexadecimal() }>()
}
Some('o') | Some('O') => {
self.read_radix_number::<8, { lexical::NumberFormatBuilder::octal() }>()
}
Some('b') | Some('B') => {
self.read_radix_number::<2, { lexical::NumberFormatBuilder::binary() }>()
}
Some('x') | Some('X') => self.read_radix_number::<16>(),
Some('o') | Some('O') => self.read_radix_number::<8>(),
Some('b') | Some('B') => self.read_radix_number::<2>(),
_ => {
return self.read_number(false).map(|v| match v {
Left((value, raw)) => Num { value, raw },
Expand Down
51 changes: 24 additions & 27 deletions crates/swc_ecma_parser/src/lexer/number.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ use std::{borrow::Cow, fmt::Write};

use either::Either;
use num_bigint::BigInt as BigIntValue;
use num_traits::{Num as NumTrait, ToPrimitive};
use smartstring::{LazyCompact, SmartString};
use swc_common::SyntaxContext;
use tracing::trace;
Expand Down Expand Up @@ -56,9 +57,7 @@ impl<'a> Lexer<'a> {
let starts_with_zero = self.cur().unwrap() == '0';

// Use read_number_no_dot to support long numbers.
let (val, s, mut raw, not_octal) = self
.read_number_no_dot_as_str::<10, { lexical::NumberFormatBuilder::from_radix(10) }>(
)?;
let (val, s, mut raw, not_octal) = self.read_number_no_dot_as_str::<10>()?;

if self.eat(b'n') {
raw.push('n');
Expand Down Expand Up @@ -102,17 +101,17 @@ impl<'a> Lexer<'a> {
self.emit_strict_mode_error(start, SyntaxError::LegacyDecimal);
} else {
// It's Legacy octal, and we should reinterpret value.
let val = lexical::parse_with_options::<
f64,
_,
{ lexical::NumberFormatBuilder::from_radix(8) },
>(
val_str,
&lexical::parse_float_options::Options::from_radix(8),
)
.unwrap_or_else(|err| {
panic!("failed to parse {} using `lexical`: {:?}", val_str, err)
});
let val = BigIntValue::from_str_radix(val_str, 8)
.unwrap_or_else(|err| {
panic!(
"failed to parse {} using `from_str_radix`: {:?}",
val_str, err
)
})
.to_f64()
.unwrap_or_else(|| {
panic!("failed to parse {} into float using BigInt", val_str)
});

return self.make_legacy_octal(start, val).map(|value| {
Either::Left((value, self.atoms.borrow_mut().intern(&*raw)))
Expand Down Expand Up @@ -235,7 +234,7 @@ impl<'a> Lexer<'a> {
}

/// Returns `Left(value)` or `Right(BigInt)`
pub(super) fn read_radix_number<const RADIX: u8, const FORMAT: u128>(
pub(super) fn read_radix_number<const RADIX: u8>(
&mut self,
) -> LexResult<Either<(f64, Atom), (Box<BigIntValue>, Atom)>> {
debug_assert!(
Expand Down Expand Up @@ -263,7 +262,7 @@ impl<'a> Lexer<'a> {

buf.push(c);

let (val, s, raw, _) = l.read_number_no_dot_as_str::<RADIX, FORMAT>()?;
let (val, s, raw, _) = l.read_number_no_dot_as_str::<RADIX>()?;

buf.push_str(&raw);

Expand Down Expand Up @@ -315,7 +314,7 @@ impl<'a> Lexer<'a> {
///
///
/// Returned bool is `true` is there was `8` or `9`.
fn read_number_no_dot_as_str<const RADIX: u8, const FORMAT: u128>(
fn read_number_no_dot_as_str<const RADIX: u8>(
&mut self,
) -> LexResult<(f64, LazyBigInt<RADIX>, SmartString<LazyCompact>, bool)> {
debug_assert!(
Expand Down Expand Up @@ -351,13 +350,12 @@ impl<'a> Lexer<'a> {
let raw_str = raw.0.take().unwrap();
// Remove number separator from number
let raw_number_str = raw_str.replace('_', "");

let parsed_float = BigIntValue::from_str_radix(&raw_number_str, RADIX as u32)
.expect("failed to parse float using BigInt")
.to_f64()
.expect("failed to parse float using BigInt");
Ok((
lexical::parse_with_options::<f64, _, FORMAT>(
raw_number_str.as_bytes(),
&lexical::parse_float_options::Options::from_radix(RADIX),
)
.expect("failed to parse float using lexical"),
parsed_float,
LazyBigInt::new(raw_number_str),
raw_str,
non_octal,
Expand Down Expand Up @@ -696,7 +694,7 @@ mod tests {
assert_eq!(
(0o73 as f64, "0o73".into()),
lex("0o73", |l| l
.read_radix_number::<8, { lexical::NumberFormatBuilder::octal() }>()
.read_radix_number::<8>()
.unwrap()
.left()
.unwrap())
Expand Down Expand Up @@ -750,18 +748,17 @@ mod tests {
111111111111111111111111111111111111111111111111111111111111111111\
111111111111111111111111111111111111111111111111111111111111111111\
0010111110001111111111";
const FORMAT: u128 = lexical::NumberFormatBuilder::binary();
assert_eq!(
lex(LONG, |l| l
.read_radix_number::<2, FORMAT>()
.read_radix_number::<2>()
.unwrap()
.left()
.unwrap()),
(9.671_406_556_917_009e24, LONG.into())
);
assert_eq!(
lex(VERY_LARGE_BINARY_NUMBER, |l| l
.read_radix_number::<2, FORMAT>()
.read_radix_number::<2>()
.unwrap()
.left()
.unwrap()),
Expand Down

1 comment on commit e50cfde

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Benchmark

Benchmark suite Current: e50cfde Previous: 1122de7 Ratio
es/full/bugs-1 304611 ns/iter (± 6064) 295914 ns/iter (± 8434) 1.03
es/full/minify/libraries/antd 1351980626 ns/iter (± 13265254) 1343013103 ns/iter (± 9171052) 1.01
es/full/minify/libraries/d3 287109775 ns/iter (± 5171984) 280868615 ns/iter (± 4462783) 1.02
es/full/minify/libraries/echarts 1113357930 ns/iter (± 15877315) 1079116256 ns/iter (± 11951375) 1.03
es/full/minify/libraries/jquery 85627841 ns/iter (± 292509) 85269434 ns/iter (± 260022) 1.00
es/full/minify/libraries/lodash 99406274 ns/iter (± 440203) 98954150 ns/iter (± 351279) 1.00
es/full/minify/libraries/moment 50170311 ns/iter (± 169746) 49959071 ns/iter (± 99283) 1.00
es/full/minify/libraries/react 18179381 ns/iter (± 88470) 18011389 ns/iter (± 35356) 1.01
es/full/minify/libraries/terser 226528784 ns/iter (± 2479283) 220850711 ns/iter (± 979269) 1.03
es/full/minify/libraries/three 409311239 ns/iter (± 8795904) 390107365 ns/iter (± 3902790) 1.05
es/full/minify/libraries/typescript 2755793001 ns/iter (± 11567024) 2666167376 ns/iter (± 9454880) 1.03
es/full/minify/libraries/victory 585847601 ns/iter (± 13282001) 561880222 ns/iter (± 8582351) 1.04
es/full/minify/libraries/vue 122059951 ns/iter (± 675238) 120267849 ns/iter (± 541279) 1.01
es/full/codegen/es3 34313 ns/iter (± 69) 35476 ns/iter (± 99) 0.97
es/full/codegen/es5 34197 ns/iter (± 117) 35484 ns/iter (± 73) 0.96
es/full/codegen/es2015 34277 ns/iter (± 111) 35387 ns/iter (± 115) 0.97
es/full/codegen/es2016 34221 ns/iter (± 66) 35596 ns/iter (± 71) 0.96
es/full/codegen/es2017 34265 ns/iter (± 162) 35538 ns/iter (± 147) 0.96
es/full/codegen/es2018 34256 ns/iter (± 88) 35566 ns/iter (± 107) 0.96
es/full/codegen/es2019 34285 ns/iter (± 103) 35519 ns/iter (± 219) 0.97
es/full/codegen/es2020 34229 ns/iter (± 88) 35501 ns/iter (± 93) 0.96
es/full/all/es3 173302261 ns/iter (± 669232) 172743214 ns/iter (± 1231695) 1.00
es/full/all/es5 164834820 ns/iter (± 2076179) 164858712 ns/iter (± 1337508) 1.00
es/full/all/es2015 125405254 ns/iter (± 985763) 124324434 ns/iter (± 896457) 1.01
es/full/all/es2016 124114410 ns/iter (± 1021988) 123434125 ns/iter (± 387921) 1.01
es/full/all/es2017 123728469 ns/iter (± 472193) 122917546 ns/iter (± 714464) 1.01
es/full/all/es2018 121482747 ns/iter (± 876580) 120743653 ns/iter (± 661044) 1.01
es/full/all/es2019 121123844 ns/iter (± 1003417) 119301080 ns/iter (± 737894) 1.02
es/full/all/es2020 116068942 ns/iter (± 795746) 115566620 ns/iter (± 639092) 1.00
es/full/parser 538508 ns/iter (± 6570) 533227 ns/iter (± 5610) 1.01
es/full/base/fixer 17190 ns/iter (± 73) 18659 ns/iter (± 136) 0.92
es/full/base/resolver_and_hygiene 80343 ns/iter (± 93) 81926 ns/iter (± 93) 0.98
serialization of serde 294 ns/iter (± 0) 291 ns/iter (± 0) 1.01
css/minify/libraries/bootstrap 30159893 ns/iter (± 304748) 28795234 ns/iter (± 75696) 1.05
css/visitor/compare/clone 1644030 ns/iter (± 4263) 1658944 ns/iter (± 3243) 0.99
css/visitor/compare/visit_mut_span 1768789 ns/iter (± 5336) 1784477 ns/iter (± 4373) 0.99
css/visitor/compare/visit_mut_span_panic 1854257 ns/iter (± 5601) 1858858 ns/iter (± 5680) 1.00
css/visitor/compare/fold_span 2588600 ns/iter (± 13902) 2609178 ns/iter (± 11360) 0.99
css/visitor/compare/fold_span_panic 2765447 ns/iter (± 17797) 2803620 ns/iter (± 12107) 0.99
css/lexer/bootstrap_5_1_3 4545648 ns/iter (± 11648) 4476079 ns/iter (± 1562) 1.02
css/lexer/foundation_6_7_4 3839590 ns/iter (± 2693) 3774179 ns/iter (± 2932) 1.02
css/lexer/tailwind_3_1_1 724657 ns/iter (± 840) 714509 ns/iter (± 310) 1.01
css/parser/bootstrap_5_1_3 18826427 ns/iter (± 43795) 19123151 ns/iter (± 33066) 0.98
css/parser/foundation_6_7_4 15128825 ns/iter (± 336787) 15223725 ns/iter (± 10994) 0.99
css/parser/tailwind_3_1_1 2930432 ns/iter (± 7372) 2948723 ns/iter (± 2167) 0.99
es/codegen/colors 736931 ns/iter (± 404624) 732336 ns/iter (± 401087) 1.01
es/codegen/large 3142810 ns/iter (± 1677548) 3124554 ns/iter (± 1672423) 1.01
es/codegen/with-parser/colors 45417 ns/iter (± 482) 44075 ns/iter (± 531) 1.03
es/codegen/with-parser/large 483585 ns/iter (± 1150) 483034 ns/iter (± 893) 1.00
es/minify/libraries/antd 1194922172 ns/iter (± 14047234) 1162761527 ns/iter (± 11970820) 1.03
es/minify/libraries/d3 244916359 ns/iter (± 2415276) 241981291 ns/iter (± 1316626) 1.01
es/minify/libraries/echarts 951955854 ns/iter (± 13297754) 928507728 ns/iter (± 12391128) 1.03
es/minify/libraries/jquery 74710698 ns/iter (± 1083377) 74402589 ns/iter (± 451664) 1.00
es/minify/libraries/lodash 89000965 ns/iter (± 294654) 88812779 ns/iter (± 274180) 1.00
es/minify/libraries/moment 43839026 ns/iter (± 212452) 43865647 ns/iter (± 68410) 1.00
es/minify/libraries/react 16144960 ns/iter (± 56754) 16097717 ns/iter (± 55855) 1.00
es/minify/libraries/terser 191433556 ns/iter (± 3337827) 189550086 ns/iter (± 1059551) 1.01
es/minify/libraries/three 330535480 ns/iter (± 4183569) 326866482 ns/iter (± 2753950) 1.01
es/minify/libraries/typescript 2331996731 ns/iter (± 23379655) 2308666259 ns/iter (± 12832078) 1.01
es/minify/libraries/victory 495862824 ns/iter (± 8265547) 486155814 ns/iter (± 6372371) 1.02
es/minify/libraries/vue 109825185 ns/iter (± 1035668) 108105368 ns/iter (± 410438) 1.02
es/visitor/compare/clone 1967309 ns/iter (± 5985) 1936366 ns/iter (± 7972) 1.02
es/visitor/compare/visit_mut_span 2286596 ns/iter (± 8099) 2278880 ns/iter (± 6056) 1.00
es/visitor/compare/visit_mut_span_panic 2339690 ns/iter (± 4159) 2319314 ns/iter (± 4206) 1.01
es/visitor/compare/fold_span 3390957 ns/iter (± 9847) 3326141 ns/iter (± 34974) 1.02
es/visitor/compare/fold_span_panic 3545470 ns/iter (± 14466) 3480809 ns/iter (± 4043) 1.02
es/lexer/colors 13289 ns/iter (± 28) 12383 ns/iter (± 43) 1.07
es/lexer/angular 6095098 ns/iter (± 5032) 5978412 ns/iter (± 5214) 1.02
es/lexer/backbone 795510 ns/iter (± 478) 781189 ns/iter (± 296) 1.02
es/lexer/jquery 4512857 ns/iter (± 7061) 4383192 ns/iter (± 2321) 1.03
es/lexer/jquery mobile 6894191 ns/iter (± 3812) 6715428 ns/iter (± 8313) 1.03
es/lexer/mootools 3568940 ns/iter (± 4574) 3476558 ns/iter (± 2887) 1.03
es/lexer/underscore 675493 ns/iter (± 797) 653043 ns/iter (± 440) 1.03
es/lexer/three 21417600 ns/iter (± 24584) 20609943 ns/iter (± 9886) 1.04
es/lexer/yui 3867867 ns/iter (± 4699) 3760274 ns/iter (± 960) 1.03
es/parser/colors 27114 ns/iter (± 47) 26301 ns/iter (± 43) 1.03
es/parser/angular 13638878 ns/iter (± 94419) 13398972 ns/iter (± 39035) 1.02
es/parser/backbone 2000314 ns/iter (± 11183) 1997031 ns/iter (± 8635) 1.00
es/parser/jquery 10962032 ns/iter (± 89225) 10807802 ns/iter (± 40233) 1.01
es/parser/jquery mobile 16794036 ns/iter (± 171872) 16585402 ns/iter (± 66178) 1.01
es/parser/mootools 8418050 ns/iter (± 31758) 8348554 ns/iter (± 19304) 1.01
es/parser/underscore 1730913 ns/iter (± 11081) 1715966 ns/iter (± 8968) 1.01
es/parser/three 48206891 ns/iter (± 435035) 46689038 ns/iter (± 278711) 1.03
es/parser/yui 8270789 ns/iter (± 35678) 8230237 ns/iter (± 25414) 1.00
es/preset-env/usage/builtin_type 138288 ns/iter (± 32666) 140491 ns/iter (± 32990) 0.98
es/preset-env/usage/property 16637 ns/iter (± 35) 16689 ns/iter (± 44) 1.00
es/resolver/typescript 90815550 ns/iter (± 1030956) 88147719 ns/iter (± 1083920) 1.03
es/fixer/typescript 65088772 ns/iter (± 732016) 63793701 ns/iter (± 687519) 1.02
es/hygiene/typescript 133423600 ns/iter (± 1019738) 131005544 ns/iter (± 696052) 1.02
es/resolver_with_hygiene/typescript 248468567 ns/iter (± 3262527) 240772424 ns/iter (± 1680146) 1.03
es/visitor/base-perf/module_clone 58671 ns/iter (± 140) 60030 ns/iter (± 275) 0.98
es/visitor/base-perf/fold_empty 62376 ns/iter (± 237) 64177 ns/iter (± 370) 0.97
es/visitor/base-perf/fold_noop_impl_all 63209 ns/iter (± 640) 63628 ns/iter (± 537) 0.99
es/visitor/base-perf/fold_noop_impl_vec 63020 ns/iter (± 423) 64263 ns/iter (± 266) 0.98
es/visitor/base-perf/boxing_boxed_clone 58 ns/iter (± 0) 56 ns/iter (± 0) 1.04
es/visitor/base-perf/boxing_unboxed_clone 39 ns/iter (± 0) 41 ns/iter (± 0) 0.95
es/visitor/base-perf/boxing_boxed 109 ns/iter (± 0) 110 ns/iter (± 0) 0.99
es/visitor/base-perf/boxing_unboxed 77 ns/iter (± 0) 79 ns/iter (± 0) 0.97
es/visitor/base-perf/visit_empty 0 ns/iter (± 0) 0 ns/iter (± 0) NaN
es/visitor/base-perf/visit_contains_this 2536 ns/iter (± 10) 2574 ns/iter (± 22) 0.99
es/base/parallel/resolver/typescript 4190055955 ns/iter (± 196233678) 3973775216 ns/iter (± 325370232) 1.05
es/base/parallel/hygiene/typescript 1462052990 ns/iter (± 15382848) 1487849691 ns/iter (± 3491625) 0.98
misc/visitors/time-complexity/time 5 156 ns/iter (± 5) 137 ns/iter (± 1) 1.14
misc/visitors/time-complexity/time 10 404 ns/iter (± 15) 402 ns/iter (± 5) 1.00
misc/visitors/time-complexity/time 15 901 ns/iter (± 26) 511 ns/iter (± 72) 1.76
misc/visitors/time-complexity/time 20 1496 ns/iter (± 88) 908 ns/iter (± 2) 1.65
misc/visitors/time-complexity/time 40 4359 ns/iter (± 274) 3497 ns/iter (± 39) 1.25
misc/visitors/time-complexity/time 60 11120 ns/iter (± 669) 7241 ns/iter (± 15) 1.54
es/full-target/es2016 232627 ns/iter (± 405) 237953 ns/iter (± 1510) 0.98
es/full-target/es2017 222057 ns/iter (± 873) 224845 ns/iter (± 2836) 0.99
es/full-target/es2018 209120 ns/iter (± 675) 214963 ns/iter (± 547) 0.97
es2020_nullish_coalescing 70224 ns/iter (± 226) 68436 ns/iter (± 468) 1.03
es2020_optional_chaining 81869 ns/iter (± 344) 79154 ns/iter (± 242) 1.03
es2022_class_properties 119609 ns/iter (± 334) 118447 ns/iter (± 306) 1.01
es2018_object_rest_spread 76278 ns/iter (± 504) 73513 ns/iter (± 175) 1.04
es2019_optional_catch_binding 64075 ns/iter (± 193) 62806 ns/iter (± 283) 1.02
es2017_async_to_generator 64376 ns/iter (± 178) 61670 ns/iter (± 247) 1.04
es2016_exponentiation 69222 ns/iter (± 231) 66485 ns/iter (± 228) 1.04
es2015_arrow 72477 ns/iter (± 195) 69179 ns/iter (± 158) 1.05
es2015_block_scoped_fn 68117 ns/iter (± 276) 67018 ns/iter (± 150) 1.02
es2015_block_scoping 123642 ns/iter (± 637) 121218 ns/iter (± 630) 1.02

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.