Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf(es/lexer): Use jump table for skip_space #7073

Merged
merged 15 commits into from
Mar 13, 2023
Merged
2 changes: 1 addition & 1 deletion crates/swc_ecma_parser/scripts/instrument/bench.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@ set -eu
export RUST_LOG=off
export MIMALLOC_SHOW_STATS=1

cargo profile instruments --release -t time --features tracing/release_max_level_info --features swc_common/concurrent --features swc_common/parking_lot --bench parser -- --bench --color
cargo profile instruments --release -t time --features tracing/release_max_level_info --features swc_common/concurrent --features swc_common/parking_lot --bench parser -- --bench --color $@
1 change: 1 addition & 0 deletions crates/swc_ecma_parser/src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ mod table;
#[cfg(test)]
mod tests;
pub mod util;
mod whitespace;

pub(crate) type LexResult<T> = Result<T, Error>;

Expand Down
51 changes: 19 additions & 32 deletions crates/swc_ecma_parser/src/lexer/util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,10 @@ use swc_common::{
use swc_ecma_ast::Ident;
use tracing::warn;

use super::{comments_buffer::BufferedComment, input::Input, Char, LexResult, Lexer};
use super::{
comments_buffer::BufferedComment, input::Input, whitespace::SkipWhitespace, Char, LexResult,
Lexer,
};
use crate::{
error::{Error, SyntaxError},
lexer::comments_buffer::BufferedCommentKind,
Expand Down Expand Up @@ -184,18 +187,20 @@ impl<'a> Lexer<'a> {
/// See https://tc39.github.io/ecma262/#sec-white-space
pub(super) fn skip_space<const LEX_COMMENTS: bool>(&mut self) -> LexResult<()> {
loop {
let cur_b = self.input.cur_as_ascii();
let (offset, newline) = {
let mut skip = SkipWhitespace {
input: self.input.as_str(),
newline: false,
offset: 0,
};

if matches!(cur_b, Some(b'\n' | b'\r')) {
self.input.bump();
self.state.had_line_break = true;
continue;
}
skip.scan();

if matches!(cur_b, Some(b'\x09' | b'\x0b' | b'\x0c' | b'\x20' | b'\xa0')) {
self.input.bump();
continue;
}
(skip.offset, skip.newline)
};

self.input.bump_bytes(offset);
self.state.had_line_break |= newline;

if LEX_COMMENTS && self.input.is_byte(b'/') {
if self.peek() == Some('/') {
Expand All @@ -205,34 +210,15 @@ impl<'a> Lexer<'a> {
self.skip_block_comment()?;
continue;
}
break;
}

let c = self.cur();
let c = match c {
Some(v) => v,
None => break,
};

match c {
// white spaces
'\u{feff}' => {}
// line breaks
'\u{2028}' | '\u{2029}' => {
self.state.had_line_break = true;
}

_ if c.is_whitespace() => {}

_ => break,
}

self.bump();
break;
}

Ok(())
}

#[inline(never)]
pub(super) fn skip_line_comment(&mut self, start_skip: usize) {
let start = self.cur_pos();
self.input.bump_bytes(start_skip);
Expand Down Expand Up @@ -282,6 +268,7 @@ impl<'a> Lexer<'a> {
}

/// Expects current char to be '/' and next char to be '*'.
#[inline(never)]
pub(super) fn skip_block_comment(&mut self) -> LexResult<()> {
let start = self.cur_pos();

Expand Down
100 changes: 100 additions & 0 deletions crates/swc_ecma_parser/src/lexer/whitespace.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
/// Returns true if it's done
pub(super) type ByteHandler = Option<for<'aa> fn(&mut SkipWhitespace<'aa>) -> usize>;

/// Lookup table for whitespace
static BYTE_HANDLERS: [ByteHandler; 256] = [
// 0 1 2 3 4 5 6 7 8 9 A B C D E F //
___, ___, ___, ___, ___, ___, ___, ___, ___, SPC, NLN, SPC, SPC, NLN, ___, ___, // 0
___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, // 1
SPC, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, // 2
___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, // 3
___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, // 4
___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, // 5
___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, // 6
___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, // 7
UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, // 8
UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, // 9
UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, // A
UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, // B
UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, // C
UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, // D
UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, // E
UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, // F
];

/// Stop
const ___: ByteHandler = None;

/// Newline
const NLN: ByteHandler = Some(|skip| {
skip.newline = true;

1
});

/// Space
const SPC: ByteHandler = Some(|_| 1);

/// Unicode
const UNI: ByteHandler = Some(|skip| {
let s = unsafe {
// Safety: `skip.offset` is always valid
skip.input.get_unchecked(skip.offset..)
};

let c = unsafe {
// Safety: Byte handlers are called only when `skip.input` is not empty
s.chars().next().unwrap_unchecked()
};

match c {
// white spaces
'\u{feff}' => {}
// line breaks
'\u{2028}' | '\u{2029}' => {
skip.newline = true;
}

_ if c.is_whitespace() => {}

_ => return 0,
}

c.len_utf8()
});

/// API is taked from oxc by Boshen (https://github.com/Boshen/oxc/pull/26)
pub(super) struct SkipWhitespace<'a> {
pub input: &'a str,

/// Total offset
pub offset: usize,

/// Found newline
pub newline: bool,
}

impl SkipWhitespace<'_> {
#[inline(always)]
pub fn scan(&mut self) {
let mut byte;
loop {
byte = match self.input.as_bytes().get(self.offset).copied() {
Some(v) => v,
None => return,
};

let handler = unsafe { *(&BYTE_HANDLERS as *const ByteHandler).offset(byte as isize) };

if let Some(handler) = handler {
let delta = handler(self);
if delta == 0 {
return;
}
self.offset += delta;
} else {
return;
}
}
}
}