Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf(es/lexer): Use jump table for read_token #7058

Merged
merged 25 commits into from
Mar 11, 2023
259 changes: 90 additions & 169 deletions crates/swc_ecma_parser/src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,12 @@ use swc_atoms::{Atom, AtomGenerator};
use swc_common::{comments::Comments, input::StringInput, BytePos, Span};
use swc_ecma_ast::{op, EsVersion};

use self::{comments_buffer::CommentsBuffer, state::State, util::*};
use self::{
comments_buffer::CommentsBuffer,
state::State,
table::{ByteHandler, BYTE_HANDLERS},
util::*,
};
pub use self::{
input::Input,
state::{TokenContext, TokenContexts},
Expand All @@ -25,6 +30,7 @@ pub mod input;
mod jsx;
mod number;
mod state;
mod table;
#[cfg(test)]
mod tests;
pub mod util;
Expand Down Expand Up @@ -161,178 +167,24 @@ impl<'a> Lexer<'a> {

/// babel: `getTokenFromCode`
fn read_token(&mut self) -> LexResult<Option<Token>> {
let c = self.input.cur_as_ascii();

match c {
None => {}
Some(c) => {
match c {
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm curious: Why didn't the Rust compiler make a switch table for this match?

b'#' => return self.read_token_number_sign(),

//
b'.' => return self.read_token_dot().map(Some),

b'(' | b')' | b';' | b',' | b'[' | b']' | b'{' | b'}' | b'@' | b'`' | b'~' => {
// These tokens are emitted directly.
self.input.bump();
return Ok(Some(match c {
b'(' => LParen,
b')' => RParen,
b';' => Semi,
b',' => Comma,
b'[' => LBracket,
b']' => RBracket,
b'{' => LBrace,
b'}' => RBrace,
b'@' => At,
b'`' => tok!('`'),
b'~' => tok!('~'),

_ => unreachable!(),
}));
}

b'?' => return self.read_token_question_mark().map(Some),

b':' => return self.read_token_colon().map(Some),

b'0' => return self.read_token_zero().map(Some),

b'1'..=b'9' => {
return self
.read_number(false)
.map(|v| match v {
Left((value, raw)) => Num { value, raw },
Right((value, raw)) => BigInt { value, raw },
})
.map(Some);
}

b'"' | b'\'' => return self.read_str_lit().map(Some),

b'/' => return self.read_slash(),

b'%' | b'*' => return self.read_token_mul_mod(c).map(Some),

// Logical operators
b'|' | b'&' => return self.read_token_logical(c).map(Some),
b'^' => {
// Bitwise xor
self.input.bump();
return Ok(Some(if self.input.cur() == Some('=') {
self.input.bump();
AssignOp(BitXorAssign)
} else {
BinOp(BitXor)
}));
}

b'+' | b'-' => {
let start = self.cur_pos();

self.input.bump();

// '++', '--'
return Ok(Some(if self.input.cur() == Some(c as char) {
self.input.bump();

// Handle -->
if self.state.had_line_break && c == b'-' && self.eat(b'>') {
self.emit_module_mode_error(
start,
SyntaxError::LegacyCommentInModule,
);
self.skip_line_comment(0);
self.skip_space::<true>()?;
return self.read_token();
}

if c == b'+' {
PlusPlus
} else {
MinusMinus
}
} else if self.input.eat_byte(b'=') {
AssignOp(if c == b'+' { AddAssign } else { SubAssign })
} else {
BinOp(if c == b'+' { Add } else { Sub })
}));
}

b'<' | b'>' => return self.read_token_lt_gt(),

b'!' | b'=' => {
let start = self.cur_pos();
let had_line_break_before_last = self.had_line_break_before_last();

self.input.bump();

return Ok(Some(if self.input.eat_byte(b'=') {
// "=="

if self.input.eat_byte(b'=') {
if c == b'!' {
BinOp(NotEqEq)
} else {
// =======
// ^
if had_line_break_before_last && self.is_str("====") {
self.emit_error_span(
fixed_len_span(start, 7),
SyntaxError::TS1185,
);
self.skip_line_comment(4);
self.skip_space::<true>()?;
return self.read_token();
}

BinOp(EqEqEq)
}
} else if c == b'!' {
BinOp(NotEq)
} else {
BinOp(EqEq)
}
} else if c == b'=' && self.input.eat_byte(b'>') {
// "=>"

Arrow
} else if c == b'!' {
Bang
} else {
AssignOp(Assign)
}));
}
let byte = match self.input.as_str().as_bytes().first() {
Some(&v) => v,
None => return Ok(None),
};

b'a'..=b'z' | b'A'..=b'Z' | b'$' | b'_' | b'\\' => {
// Fast path for ascii identifiers.
return self.read_ident_or_keyword().map(Some);
}
_ => {}
}
}
}
let handler = unsafe { *(&BYTE_HANDLERS as *const ByteHandler).offset(byte as isize) };

let c = match self.input.cur() {
Some(c) => c,
match handler {
Some(handler) => handler(self),
None => {
return Ok(None);
}
};

let token = {
// Identifier or keyword. '\uXXXX' sequences are allowed in
// identifiers, so '\' also dispatches to that.
if c == '\\' || c.is_ident_start() {
return self.read_ident_or_keyword().map(Some);
let start = self.cur_pos();
self.input.bump_bytes(1);
self.error_span(
pos_span(start),
SyntaxError::UnexpectedChar { c: byte as _ },
)
}

let start = self.cur_pos();
self.input.bump();
self.error_span(pos_span(start), SyntaxError::UnexpectedChar { c })?
};

Ok(Some(token))
}
}

/// `#`
Expand Down Expand Up @@ -695,6 +547,75 @@ impl<'a> Lexer<'a> {

Ok(Some(vec![c.into()]))
}

fn read_token_plus_minus(&mut self, c: u8) -> LexResult<Option<Token>> {
let start = self.cur_pos();

self.input.bump();

// '++', '--'
Ok(Some(if self.input.cur() == Some(c as char) {
self.input.bump();

// Handle -->
if self.state.had_line_break && c == b'-' && self.eat(b'>') {
self.emit_module_mode_error(start, SyntaxError::LegacyCommentInModule);
self.skip_line_comment(0);
self.skip_space::<true>()?;
return self.read_token();
}

if c == b'+' {
PlusPlus
} else {
MinusMinus
}
} else if self.input.eat_byte(b'=') {
AssignOp(if c == b'+' { AddAssign } else { SubAssign })
} else {
BinOp(if c == b'+' { Add } else { Sub })
}))
}

fn read_token_bang_or_eq(&mut self, c: u8) -> LexResult<Option<Token>> {
let start = self.cur_pos();
let had_line_break_before_last = self.had_line_break_before_last();

self.input.bump();

Ok(Some(if self.input.eat_byte(b'=') {
// "=="

if self.input.eat_byte(b'=') {
if c == b'!' {
BinOp(NotEqEq)
} else {
// =======
// ^
if had_line_break_before_last && self.is_str("====") {
self.emit_error_span(fixed_len_span(start, 7), SyntaxError::TS1185);
self.skip_line_comment(4);
self.skip_space::<true>()?;
return self.read_token();
}

BinOp(EqEqEq)
}
} else if c == b'!' {
BinOp(NotEq)
} else {
BinOp(EqEq)
}
} else if c == b'=' && self.input.eat_byte(b'>') {
// "=>"

Arrow
} else if c == b'!' {
Bang
} else {
AssignOp(Assign)
}))
}
}

impl<'a> Lexer<'a> {
Expand Down