Skip to content

Commit

Permalink
perf(es/parser): Use smarter lookup table for lexer (#8226)
Browse files Browse the repository at this point in the history
**Description:**

`phf` is slower than expected.
  • Loading branch information
kdy1 committed Nov 6, 2023
1 parent f00238d commit d4ae44a
Show file tree
Hide file tree
Showing 3 changed files with 303 additions and 74 deletions.
44 changes: 32 additions & 12 deletions crates/swc_ecma_ast/src/ident.rs
Expand Up @@ -151,6 +151,12 @@ impl From<Ident> for Id {
}
}

#[repr(C, align(64))]
struct Align64<T>(pub(crate) T);

const T: bool = true;
const F: bool = false;

impl Ident {
/// In `op`, [EqIgnoreSpan] of [Ident] will ignore the syntax context.
pub fn within_ignored_ctxt<F, Ret>(op: F) -> Ret
Expand All @@ -175,26 +181,40 @@ impl Ident {
/// Returns true if `c` is a valid character for an identifier start.
#[inline]
pub fn is_valid_start(c: char) -> bool {
c == '$' || c == '_' || c.is_ascii_alphabetic() || {
if c.is_ascii() {
false
} else {
UnicodeID::is_id_start(c)
}
// This contains `$` (36) and `_` (95)
const ASCII_START: Align64<[bool; 128]> = Align64([
F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
F, F, F, F, F, F, F, T, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
F, F, F, F, F, F, F, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,
T, T, T, T, F, F, F, F, T, F, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,
T, T, T, T, T, T, T, F, F, F, F, F,
]);

if c.is_ascii() {
return ASCII_START.0[c as usize];
}

UnicodeID::is_id_start(c)
}

/// Returns true if `c` is a valid character for an identifier part after
/// start.
#[inline]
pub fn is_valid_continue(c: char) -> bool {
c == '$' || c == '_' || c == '\u{200c}' || c == '\u{200d}' || c.is_ascii_alphanumeric() || {
if c.is_ascii() {
false
} else {
UnicodeID::is_id_continue(c)
}
// This contains `$` (36)
const ASCII_CONTINUE: Align64<[bool; 128]> = Align64([
F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
F, F, F, F, F, F, F, T, F, F, F, F, F, F, F, F, F, F, F, T, T, T, T, T, T, T, T, T, T,
F, F, F, F, F, F, F, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,
T, T, T, T, F, F, F, F, T, F, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,
T, T, T, T, T, T, T, F, F, F, F, F,
]);

if c.is_ascii() {
return ASCII_CONTINUE.0[c as usize];
}

UnicodeID::is_id_continue(c) || c == '\u{200c}' || c == '\u{200d}'
}

/// Alternative for `toIdentifier` of babel.
Expand Down
93 changes: 40 additions & 53 deletions crates/swc_ecma_parser/src/lexer/mod.rs
Expand Up @@ -21,7 +21,7 @@ pub use self::{
};
use crate::{
error::{Error, SyntaxError},
token::{BinOpToken, Keyword, Token, Word},
token::{BinOpToken, IdentLike, Token, Word},
Context, Syntax,
};

Expand Down Expand Up @@ -759,59 +759,34 @@ impl<'a> Lexer<'a> {
Ok(Some(token))
}

/// See https://tc39.github.io/ecma262/#sec-names-and-keywords
fn read_ident_or_keyword(&mut self) -> LexResult<Token> {
static KNOWN_WORDS: phf::Map<&str, Word> = phf::phf_map! {
"await" => Word::Keyword(Keyword::Await),
"break" => Word::Keyword(Keyword::Break),
"case" => Word::Keyword(Keyword::Case),
"catch" => Word::Keyword(Keyword::Catch),
"class" => Word::Keyword(Keyword::Class),
"const" => Word::Keyword(Keyword::Const),
"continue" => Word::Keyword(Keyword::Continue),
"debugger" => Word::Keyword(Keyword::Debugger),
"default" => Word::Keyword(Keyword::Default_),
"delete" => Word::Keyword(Keyword::Delete),
"do" => Word::Keyword(Keyword::Do),
"else" => Word::Keyword(Keyword::Else),
"export" => Word::Keyword(Keyword::Export),
"extends" => Word::Keyword(Keyword::Extends),
"false" => Word::False,
"finally" => Word::Keyword(Keyword::Finally),
"for" => Word::Keyword(Keyword::For),
"function" => Word::Keyword(Keyword::Function),
"if" => Word::Keyword(Keyword::If),
"import" => Word::Keyword(Keyword::Import),
"in" => Word::Keyword(Keyword::In),
"instanceof" => Word::Keyword(Keyword::InstanceOf),
"let" => Word::Keyword(Keyword::Let),
"new" => Word::Keyword(Keyword::New),
"null" => Word::Null,
"return" => Word::Keyword(Keyword::Return),
"super" => Word::Keyword(Keyword::Super),
"switch" => Word::Keyword(Keyword::Switch),
"this" => Word::Keyword(Keyword::This),
"throw" => Word::Keyword(Keyword::Throw),
"true" => Word::True,
"try" => Word::Keyword(Keyword::Try),
"typeof" => Word::Keyword(Keyword::TypeOf),
"var" => Word::Keyword(Keyword::Var),
"void" => Word::Keyword(Keyword::Void),
"while" => Word::Keyword(Keyword::While),
"with" => Word::Keyword(Keyword::With),
"yield" => Word::Keyword(Keyword::Yield),
/// This can be used if there's no keyword starting with the first
/// character.
fn read_ident_unknown(&mut self) -> LexResult<Token> {
debug_assert!(self.cur().is_some());

};
let (word, _) =
self.read_word_as_str_with(|s, _, _| Word::Ident(IdentLike::Other(s.into())))?;

Ok(Word(word))
}

/// This can be used if there's no keyword starting with the first
/// character.
fn read_word_with(
&mut self,
convert: impl FnOnce(&str) -> Option<Word>,
) -> LexResult<Option<Token>> {
debug_assert!(self.cur().is_some());
let start = self.cur_pos();

let (word, has_escape) = self.read_word_as_str_with(|s| {
if let Some(word) = KNOWN_WORDS.get(s) {
return word.clone();
let start = self.cur_pos();
let (word, has_escape) = self.read_word_as_str_with(|s, _, can_be_known| {
if can_be_known {
if let Some(word) = convert(s) {
return word;
}
}

Word::Ident(s.into())
Word::Ident(IdentLike::Other(s.into()))
})?;

// Note: ctx is store in lexer because of this error.
Expand All @@ -824,25 +799,35 @@ impl<'a> Lexer<'a> {
SyntaxError::EscapeInReservedWord { word: word.into() },
)?
} else {
Ok(Word(word))
Ok(Some(Token::Word(word)))
}
}

/// This method is optimized for texts without escape sequences.
///
/// `convert(text, has_escape, can_be_keyword)`
fn read_word_as_str_with<F, Ret>(&mut self, convert: F) -> LexResult<(Ret, bool)>
where
F: FnOnce(&str) -> Ret,
F: FnOnce(&str, bool, bool) -> Ret,
{
debug_assert!(self.cur().is_some());
let mut first = true;
let mut can_be_keyword = true;

self.with_buf(|l, buf| {
let mut has_escape = false;

while let Some(c) = {
// Optimization
{
let s = l.input.uncons_while(|c| c.is_ident_part());
let s = l.input.uncons_while(|c| {
// Performance optimization
if c.is_ascii_uppercase() || c.is_ascii_digit() || !c.is_ascii() {
can_be_keyword = false;
}

c.is_ident_part()
});
if !s.is_empty() {
first = false;
}
Expand Down Expand Up @@ -892,7 +877,7 @@ impl<'a> Lexer<'a> {
}
first = false;
}
let value = convert(buf);
let value = convert(buf, has_escape, can_be_keyword);

Ok((value, has_escape))
})
Expand Down Expand Up @@ -1137,7 +1122,9 @@ impl<'a> Lexer<'a> {
// let flags_start = self.cur_pos();
let flags = {
match self.cur() {
Some(c) if c.is_ident_start() => self.read_word_as_str_with(|s| s.into()).map(Some),
Some(c) if c.is_ident_start() => {
self.read_word_as_str_with(|s, _, _| s.into()).map(Some)
}
_ => Ok(None),
}
}?
Expand Down

0 comments on commit d4ae44a

Please sign in to comment.