From aa22746d034c2579bcb0f0404866ff933b9037ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Donny/=EA=B0=95=EB=8F=99=EC=9C=A4?= Date: Tue, 7 Nov 2023 09:51:43 +0900 Subject: [PATCH] perf(atoms): Replace `string-cache` with `hstr` (#8126) **Description:** `hstr` is an alternative for `string-cache` which does not support static strings and does not use a global mutex. **Related issue:** - Closes #4946. - Closes #7974. --- Cargo.lock | 38 +++++----- crates/swc_atoms/Cargo.toml | 8 +- crates/swc_atoms/build.rs | 29 -------- crates/swc_atoms/src/lib.rs | 25 ++++--- .../swc_css_minifier/src/compressor/color.rs | 6 +- crates/swc_css_prefixer/src/prefixer.rs | 6 +- crates/swc_ecma_parser/src/lexer/jsx.rs | 12 +-- crates/swc_ecma_parser/src/lexer/mod.rs | 40 ++++++---- crates/swc_ecma_parser/src/lexer/number.rs | 27 ++++--- crates/swc_ecma_parser/src/lexer/util.rs | 4 +- crates/swc_ecma_parser/src/token.rs | 73 ++++++++++--------- 11 files changed, 133 insertions(+), 135 deletions(-) delete mode 100644 crates/swc_atoms/build.rs diff --git a/Cargo.lock b/Cargo.lock index f9084c9e61f5..0725fa1bd526 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1037,12 +1037,12 @@ dependencies = [ [[package]] name = "dashmap" -version = "5.4.0" +version = "5.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "907076dfda823b0b36d2a1bb5f90c96660a5bbcd7729e10727f07858f22c4edc" +checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856" dependencies = [ "cfg-if", - "hashbrown 0.12.3", + "hashbrown 0.14.1", "lock_api", "once_cell", "parking_lot_core", @@ -1634,6 +1634,20 @@ version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" +[[package]] +name = "hstr" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5700a2810cdc52f9a306f40c88031fac8d480f3d51ef38b9ca9ff5b8d4f3814" +dependencies = [ + "dashmap", + "new_debug_unreachable", + "once_cell", + "phf 0.11.2", + "rustc-hash", + "smallvec", +] + [[package]] name = "http" version = "0.2.9" @@ -3471,9 +3485,9 @@ checksum = "03b634d87b960ab1a38c4fe143b508576f075e7c978bfad18217645ebfdfa2ec" [[package]] name = "smallvec" -version = "1.10.0" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0" +checksum = "942b4a808e05215192e39f4ab80813e599068285906cc91aa64f923db842bd5a" [[package]] name = "smartstring" @@ -3771,13 +3785,11 @@ name = "swc_atoms" version = "0.6.0" dependencies = [ "bytecheck", + "hstr", "once_cell", "rkyv", "rustc-hash", "serde", - "string_cache", - "string_cache_codegen", - "triomphe", ] [[package]] @@ -5870,16 +5882,6 @@ dependencies = [ "tracing-log", ] -[[package]] -name = "triomphe" -version = "0.1.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1ee9bd9239c339d714d657fac840c6d2a4f9c45f4f9ec7b0975113458be78db" -dependencies = [ - "serde", - "stable_deref_trait", -] - [[package]] name = "try-lock" version = "0.2.4" diff --git a/crates/swc_atoms/Cargo.toml b/crates/swc_atoms/Cargo.toml index 1b96d84c5312..3cd8ee7c13f3 100644 --- a/crates/swc_atoms/Cargo.toml +++ b/crates/swc_atoms/Cargo.toml @@ -1,6 +1,5 @@ [package] authors = ["강동윤 "] -build = "build.rs" description = "Atoms for the swc project." documentation = "https://rustdoc.swc.rs/swc_atoms/" edition = "2021" @@ -19,6 +18,7 @@ rkyv-impl = ["__rkyv", "rkyv", "bytecheck"] [dependencies] # bytecheck version should be in sync with rkyv version. Do not bump individually. bytecheck = { version = "0.6.10", optional = true } +hstr = "0.2.3" once_cell = "1" rkyv = { package = "rkyv", version = "=0.7.42", optional = true, features = [ "strict", @@ -26,9 +26,3 @@ rkyv = { package = "rkyv", version = "=0.7.42", optional = true, features = [ ] } rustc-hash = "1.1.0" serde = "1" -string_cache = "0.8.7" -triomphe = "0.1.8" - - -[build-dependencies] -string_cache_codegen = "0.5.2" diff --git a/crates/swc_atoms/build.rs b/crates/swc_atoms/build.rs deleted file mode 100644 index 7870949717ff..000000000000 --- a/crates/swc_atoms/build.rs +++ /dev/null @@ -1,29 +0,0 @@ -use std::{env, path::Path}; - -fn main() { - let strs = include_str!("words.txt") - .lines() - .map(|l| l.trim()) - .collect::>(); - gen("internal_word", "InternalWord", &strs); -} - -fn gen(mac_name: &str, type_name: &str, atoms: &[&str]) { - string_cache_codegen::AtomType::new(type_name, &format!("{}!", mac_name)) - .atoms(atoms) - .with_atom_doc( - " -[JsWord] is an interned string. - -This type should be used instead of [String] for values, because lots of -values are duplicated. For example, if an identifier is named `myVariable`, -there will be lots of identifier usages with the value `myVariable`. - -This type - - makes equality comparison faster. - - reduces memory usage. - ", - ) - .write_to_file(&Path::new(&env::var("OUT_DIR").unwrap()).join(format!("{}.rs", mac_name))) - .unwrap(); -} diff --git a/crates/swc_atoms/src/lib.rs b/crates/swc_atoms/src/lib.rs index c5c7f6936d6d..2b20f40353f4 100644 --- a/crates/swc_atoms/src/lib.rs +++ b/crates/swc_atoms/src/lib.rs @@ -26,7 +26,7 @@ pub use self::{atom as js_word, Atom as JsWord}; #[derive(Clone, Default)] #[cfg_attr(feature = "rkyv-impl", derive(rkyv::bytecheck::CheckBytes))] #[cfg_attr(feature = "rkyv-impl", repr(C))] -pub struct Atom(string_cache::Atom); +pub struct Atom(hstr::Atom); /// Safety: We do not perform slicing of single [Atom] from multiple threads. /// In other words, typically [Atom] is created in a single thread (and in the @@ -39,11 +39,11 @@ unsafe impl Sync for Atom {} impl Atom { /// Creates a new [Atom] from a string. - pub fn new(s: S) -> Self + pub fn new<'i, S>(s: S) -> Self where - S: AsRef, + S: Into>, { - Atom(s.as_ref().into()) + Atom(hstr::Atom::from(s.into())) } #[inline] @@ -52,9 +52,6 @@ impl Atom { } } -/// API wrappers for [tendril]. -impl Atom {} - impl Deref for Atom { type Target = str; @@ -236,9 +233,17 @@ where #[doc(hidden)] pub type CahcedAtom = Lazy; -include!(concat!(env!("OUT_DIR"), "/internal_word.rs")); - /// This should be used as a key for hash maps and hash sets. /// /// This will be replaced with [Atom] in the future. -pub type StaticString = String; +pub type StaticString = Atom; + +#[derive(Default)] +pub struct AtomStore(hstr::AtomStore); + +impl AtomStore { + #[inline] + pub fn atom<'a>(&mut self, s: impl Into>) -> Atom { + Atom(self.0.atom(s)) + } +} diff --git a/crates/swc_css_minifier/src/compressor/color.rs b/crates/swc_css_minifier/src/compressor/color.rs index 7008df7d122d..072992f4b08d 100644 --- a/crates/swc_css_minifier/src/compressor/color.rs +++ b/crates/swc_css_minifier/src/compressor/color.rs @@ -351,12 +351,12 @@ impl Compressor { value, span, .. - })) => match &*value.to_ascii_lowercase() { - "transparent" => { + })) => match value.to_ascii_lowercase() { + ref s if *s == "transparent" => { *color = make_color!(*span, 0.0_f64, 0.0_f64, 0.0_f64, 0.0_f64); } name => { - if let Some(value) = NAMED_COLORS.get(name) { + if let Some(value) = NAMED_COLORS.get(&name) { *color = make_color!( *span, value.rgb[0] as f64, diff --git a/crates/swc_css_prefixer/src/prefixer.rs b/crates/swc_css_prefixer/src/prefixer.rs index 09c81fcd4e58..2cc477aaca03 100644 --- a/crates/swc_css_prefixer/src/prefixer.rs +++ b/crates/swc_css_prefixer/src/prefixer.rs @@ -5,7 +5,7 @@ use std::mem::take; use once_cell::sync::Lazy; use preset_env_base::{query::targets_to_versions, version::Version, BrowserData, Versions}; -use swc_atoms::{JsWord, StaticString}; +use swc_atoms::JsWord; use swc_common::{collections::AHashMap, EqIgnoreSpan, DUMMY_SP}; use swc_css_ast::*; use swc_css_utils::{ @@ -16,9 +16,9 @@ use swc_css_visit::{VisitMut, VisitMutWith}; use crate::options::Options; -static PREFIXES_AND_BROWSERS: Lazy>; 2]>> = +static PREFIXES_AND_BROWSERS: Lazy>; 2]>> = Lazy::new(|| { - let map: AHashMap>; 2]> = + let map: AHashMap>; 2]> = serde_json::from_str(include_str!("../data/prefixes_and_browsers.json")) .expect("failed to parse json"); diff --git a/crates/swc_ecma_parser/src/lexer/jsx.rs b/crates/swc_ecma_parser/src/lexer/jsx.rs index b70acbde1d7a..ebe8fa57eb60 100644 --- a/crates/swc_ecma_parser/src/lexer/jsx.rs +++ b/crates/swc_ecma_parser/src/lexer/jsx.rs @@ -1,5 +1,4 @@ use either::Either; -use swc_atoms::Atom; use super::*; use crate::token::Token; @@ -48,7 +47,7 @@ impl<'a> Lexer<'a> { }); return Ok(Some(Token::JSXText { - raw: Atom::new(out), + raw: self.atoms.borrow_mut().atom(out), })); } '>' => { @@ -323,9 +322,10 @@ impl<'a> Lexer<'a> { raw.push(quote); + let mut b = self.atoms.borrow_mut(); Ok(Token::Str { - value: out.into(), - raw: Atom::new(raw), + value: b.atom(out), + raw: b.atom(raw), }) } @@ -350,7 +350,9 @@ impl<'a> Lexer<'a> { } }); - Ok(Token::JSXName { name: slice.into() }) + Ok(Token::JSXName { + name: self.atoms.borrow_mut().atom(slice), + }) } } diff --git a/crates/swc_ecma_parser/src/lexer/mod.rs b/crates/swc_ecma_parser/src/lexer/mod.rs index 075d892558cb..a37842e220f8 100644 --- a/crates/swc_ecma_parser/src/lexer/mod.rs +++ b/crates/swc_ecma_parser/src/lexer/mod.rs @@ -5,7 +5,7 @@ use std::{cell::RefCell, char, iter::FusedIterator, rc::Rc}; use either::Either::{Left, Right}; use smallvec::{smallvec, SmallVec}; use smartstring::SmartString; -use swc_atoms::Atom; +use swc_atoms::{Atom, AtomStore}; use swc_common::{comments::Comments, input::StringInput, BytePos, Span}; use swc_ecma_ast::{op, AssignOp, EsVersion}; @@ -132,6 +132,8 @@ pub struct Lexer<'a> { module_errors: Rc>>, buf: Rc>, + + atoms: Rc>, } impl FusedIterator for Lexer<'_> {} @@ -157,6 +159,7 @@ impl<'a> Lexer<'a> { errors: Default::default(), module_errors: Default::default(), buf: Rc::new(RefCell::new(String::with_capacity(256))), + atoms: Default::default(), } } @@ -764,8 +767,10 @@ impl<'a> Lexer<'a> { fn read_ident_unknown(&mut self) -> LexResult { debug_assert!(self.cur().is_some()); - let (word, _) = - self.read_word_as_str_with(|s, _, _| Word::Ident(IdentLike::Other(s.into())))?; + let atoms = self.atoms.clone(); + let (word, _) = self.read_word_as_str_with(|s, _, _| { + Word::Ident(IdentLike::Other(atoms.borrow_mut().atom(s))) + })?; Ok(Word(word)) } @@ -778,6 +783,8 @@ impl<'a> Lexer<'a> { ) -> LexResult> { debug_assert!(self.cur().is_some()); + let atoms = self.atoms.clone(); + let start = self.cur_pos(); let (word, has_escape) = self.read_word_as_str_with(|s, _, can_be_known| { if can_be_known { @@ -786,7 +793,7 @@ impl<'a> Lexer<'a> { } } - Word::Ident(IdentLike::Other(s.into())) + Word::Ident(IdentLike::Other(atoms.borrow_mut().atom(s))) })?; // Note: ctx is store in lexer because of this error. @@ -1000,6 +1007,7 @@ impl<'a> Lexer<'a> { self.bump(); // '"' + let atoms = self.atoms.clone(); self.with_buf(|l, out| { while let Some(c) = { // Optimization @@ -1018,9 +1026,10 @@ impl<'a> Lexer<'a> { l.bump(); + let mut b = atoms.borrow_mut(); return Ok(Token::Str { - value: (&**out).into(), - raw: raw.into(), + value: b.atom(&*out), + raw: b.atom(raw), }); } '\\' => { @@ -1052,9 +1061,10 @@ impl<'a> Lexer<'a> { l.emit_error(start, SyntaxError::UnterminatedStrLit); + let mut b = atoms.borrow_mut(); Ok(Token::Str { - value: (&**out).into(), - raw: raw.into(), + value: b.atom(&*out), + raw: b.atom(raw), }) }) } @@ -1074,6 +1084,8 @@ impl<'a> Lexer<'a> { let (mut escaped, mut in_class) = (false, false); + let atoms = self.atoms.clone(); + let content = self.with_buf(|l, buf| { while let Some(c) = l.cur() { // This is ported from babel. @@ -1102,7 +1114,7 @@ impl<'a> Lexer<'a> { buf.push(c); } - Ok(Atom::new(&**buf)) + Ok(atoms.borrow_mut().atom(&**buf)) })?; // input is terminated without following `/` @@ -1122,9 +1134,9 @@ impl<'a> Lexer<'a> { // let flags_start = self.cur_pos(); let flags = { match self.cur() { - Some(c) if c.is_ident_start() => { - self.read_word_as_str_with(|s, _, _| s.into()).map(Some) - } + Some(c) if c.is_ident_start() => self + .read_word_as_str_with(|s, _, _| atoms.borrow_mut().atom(s)) + .map(Some), _ => Ok(None), } }? @@ -1145,7 +1157,7 @@ impl<'a> Lexer<'a> { self.input.bump(); } let s = self.input.uncons_while(|c| !c.is_line_terminator()); - Ok(Some(Atom::new(s))) + Ok(Some(self.atoms.borrow_mut().atom(s))) } fn read_tmpl_token(&mut self, start_of_tpl: BytePos) -> LexResult { @@ -1170,7 +1182,7 @@ impl<'a> Lexer<'a> { // TODO: Handle error return Ok(Token::Template { cooked: cooked.map(Atom::from), - raw: Atom::new(&*raw), + raw: self.atoms.borrow_mut().atom(&*raw), }); } diff --git a/crates/swc_ecma_parser/src/lexer/number.rs b/crates/swc_ecma_parser/src/lexer/number.rs index f063d824a77c..8c2ef403095f 100644 --- a/crates/swc_ecma_parser/src/lexer/number.rs +++ b/crates/swc_ecma_parser/src/lexer/number.rs @@ -62,7 +62,10 @@ impl<'a> Lexer<'a> { if self.eat(b'n') { raw.push('n'); - return Ok(Either::Right((Box::new(s.into_value()), (&*raw).into()))); + return Ok(Either::Right(( + Box::new(s.into_value()), + self.atoms.borrow_mut().atom(&*raw), + ))); } write!(raw_val, "{}", &s.value).unwrap(); @@ -81,9 +84,9 @@ impl<'a> Lexer<'a> { // e.g. `000` is octal if start.0 != self.last_pos().0 - 1 { // `-1` is utf 8 length of `0` - return self - .make_legacy_octal(start, 0f64) - .map(|value| Either::Left((value, (&*raw).into()))); + return self.make_legacy_octal(start, 0f64).map(|value| { + Either::Left((value, self.atoms.borrow_mut().atom(&*raw))) + }); } } else { // strict mode hates non-zero decimals starting with zero. @@ -110,9 +113,9 @@ impl<'a> Lexer<'a> { panic!("failed to parse {} into float using BigInt", val_str) }); - return self - .make_legacy_octal(start, val) - .map(|value| Either::Left((value, (&*raw).into()))); + return self.make_legacy_octal(start, val).map(|value| { + Either::Left((value, self.atoms.borrow_mut().atom(&*raw))) + }); } } } @@ -224,7 +227,7 @@ impl<'a> Lexer<'a> { self.ensure_not_ident()?; - Ok(Either::Left((val, (&*raw_str).into()))) + Ok(Either::Left((val, self.atoms.borrow_mut().atom(&*raw_str)))) } /// Returns `Left(value)` or `Right(BigInt)` @@ -238,6 +241,7 @@ impl<'a> Lexer<'a> { ); debug_assert_eq!(self.cur(), Some('0')); + let atoms = self.atoms.clone(); self.with_buf(|l, buf| { l.bump(); @@ -263,12 +267,15 @@ impl<'a> Lexer<'a> { if l.eat(b'n') { buf.push('n'); - return Ok(Either::Right((Box::new(s.into_value()), (&**buf).into()))); + return Ok(Either::Right(( + Box::new(s.into_value()), + atoms.borrow_mut().atom(&**buf), + ))); } l.ensure_not_ident()?; - Ok(Either::Left((val, (&**buf).into()))) + Ok(Either::Left((val, atoms.borrow_mut().atom(&**buf)))) }) } diff --git a/crates/swc_ecma_parser/src/lexer/util.rs b/crates/swc_ecma_parser/src/lexer/util.rs index e765e8402b35..6e5b515d4be7 100644 --- a/crates/swc_ecma_parser/src/lexer/util.rs +++ b/crates/swc_ecma_parser/src/lexer/util.rs @@ -257,7 +257,7 @@ impl<'a> Lexer<'a> { let cmt = Comment { kind: CommentKind::Line, span: Span::new(start, end, SyntaxContext::empty()), - text: s.into(), + text: self.atoms.borrow_mut().atom(s), }; if is_for_next { @@ -343,7 +343,7 @@ impl<'a> Lexer<'a> { let cmt = Comment { kind: CommentKind::Block, span: Span::new(start, end, SyntaxContext::empty()), - text: s.into(), + text: self.atoms.borrow_mut().atom(s), }; let _ = self.input.peek(); diff --git a/crates/swc_ecma_parser/src/token.rs b/crates/swc_ecma_parser/src/token.rs index c46cb192b972..5a23967f9a8a 100644 --- a/crates/swc_ecma_parser/src/token.rs +++ b/crates/swc_ecma_parser/src/token.rs @@ -7,7 +7,7 @@ use std::{ }; use num_bigint::BigInt as BigIntValue; -use swc_atoms::{atom, Atom, JsWord}; +use swc_atoms::{atom, Atom, AtomStore, JsWord}; use swc_common::{Span, Spanned}; use swc_ecma_ast::{AssignOp, BinaryOp}; @@ -504,38 +504,25 @@ pub enum IdentLike { Other(JsWord), } -impl Word { - pub(crate) fn kind(&self) -> WordKind { - match self { - Word::Keyword(k) => WordKind::Keyword(*k), - Word::Null => WordKind::Null, - Word::True => WordKind::True, - Word::False => WordKind::False, - Word::Ident(IdentLike::Known(i)) => WordKind::Ident(IdentKind::Known(*i)), - Word::Ident(IdentLike::Other(..)) => WordKind::Ident(IdentKind::Other), - } +impl From<&'_ str> for IdentLike { + fn from(s: &str) -> Self { + s.parse::() + .map(Self::Known) + .unwrap_or_else(|_| Self::Other(s.into())) } } -impl WordKind { - pub(crate) const fn before_expr(self) -> bool { - match self { - Self::Keyword(k) => k.before_expr(), - _ => false, - } - } - - pub(crate) const fn starts_expr(self) -> bool { - match self { - Self::Keyword(k) => k.starts_expr(), - _ => true, - } +impl IdentLike { + pub(crate) fn from_str(atoms: &mut AtomStore, s: &str) -> IdentLike { + s.parse::() + .map(Self::Known) + .unwrap_or_else(|_| Self::Other(atoms.atom(s))) } } -impl From<&'_ str> for Word { - fn from(i: &str) -> Self { - match i { +impl Word { + pub fn from_str(atoms: &mut AtomStore, s: &str) -> Self { + match s { "null" => Word::Null, "true" => Word::True, "false" => Word::False, @@ -574,17 +561,35 @@ impl From<&'_ str> for Word { "typeof" => TypeOf.into(), "void" => Void.into(), "delete" => Delete.into(), - _ => Word::Ident(i.into()), + _ => Word::Ident(IdentLike::from_str(atoms, s)), + } + } + + pub(crate) fn kind(&self) -> WordKind { + match self { + Word::Keyword(k) => WordKind::Keyword(*k), + Word::Null => WordKind::Null, + Word::True => WordKind::True, + Word::False => WordKind::False, + Word::Ident(IdentLike::Known(i)) => WordKind::Ident(IdentKind::Known(*i)), + Word::Ident(IdentLike::Other(..)) => WordKind::Ident(IdentKind::Other), } } } -impl From<&'_ str> for IdentLike { - #[inline] - fn from(s: &str) -> Self { - s.parse::() - .map(Self::Known) - .unwrap_or_else(|_| Self::Other(s.into())) +impl WordKind { + pub(crate) const fn before_expr(self) -> bool { + match self { + Self::Keyword(k) => k.before_expr(), + _ => false, + } + } + + pub(crate) const fn starts_expr(self) -> bool { + match self { + Self::Keyword(k) => k.starts_expr(), + _ => true, + } } }