From 4986151dda8c16d592cd2d3148aa1cc4a722ff3e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Donny/=EA=B0=95=EB=8F=99=EC=9C=A4?= Date: Sat, 11 Mar 2023 21:24:06 +0900 Subject: [PATCH 01/15] Add whitespace module --- crates/swc_ecma_parser/src/lexer/mod.rs | 1 + crates/swc_ecma_parser/src/lexer/whitespace.rs | 1 + 2 files changed, 2 insertions(+) create mode 100644 crates/swc_ecma_parser/src/lexer/whitespace.rs diff --git a/crates/swc_ecma_parser/src/lexer/mod.rs b/crates/swc_ecma_parser/src/lexer/mod.rs index 05b855f18a2b..06e8cefa1a7c 100644 --- a/crates/swc_ecma_parser/src/lexer/mod.rs +++ b/crates/swc_ecma_parser/src/lexer/mod.rs @@ -34,6 +34,7 @@ mod table; #[cfg(test)] mod tests; pub mod util; +mod whitespace; pub(crate) type LexResult = Result; diff --git a/crates/swc_ecma_parser/src/lexer/whitespace.rs b/crates/swc_ecma_parser/src/lexer/whitespace.rs new file mode 100644 index 000000000000..8b137891791f --- /dev/null +++ b/crates/swc_ecma_parser/src/lexer/whitespace.rs @@ -0,0 +1 @@ + From 1c209b661792c378e8553662a63f5da75a200a12 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Donny/=EA=B0=95=EB=8F=99=EC=9C=A4?= Date: Sat, 11 Mar 2023 21:44:50 +0900 Subject: [PATCH 02/15] Use lookup table --- crates/swc_ecma_parser/src/lexer/util.rs | 48 +++---- .../swc_ecma_parser/src/lexer/whitespace.rs | 121 ++++++++++++++++++ 2 files changed, 137 insertions(+), 32 deletions(-) diff --git a/crates/swc_ecma_parser/src/lexer/util.rs b/crates/swc_ecma_parser/src/lexer/util.rs index 7365d4b72432..d5585beb430d 100644 --- a/crates/swc_ecma_parser/src/lexer/util.rs +++ b/crates/swc_ecma_parser/src/lexer/util.rs @@ -12,7 +12,10 @@ use swc_common::{ use swc_ecma_ast::Ident; use tracing::warn; -use super::{comments_buffer::BufferedComment, input::Input, Char, LexResult, Lexer}; +use super::{ + comments_buffer::BufferedComment, input::Input, whitespace::SkipWhitespace, Char, LexResult, + Lexer, +}; use crate::{ error::{Error, SyntaxError}, lexer::comments_buffer::BufferedCommentKind, @@ -184,18 +187,20 @@ impl<'a> Lexer<'a> { /// See https://tc39.github.io/ecma262/#sec-white-space pub(super) fn skip_space(&mut self) -> LexResult<()> { loop { - let cur_b = self.input.cur_as_ascii(); + let (offset, newline) = { + let mut skip = SkipWhitespace { + input: self.input.as_str(), + newline: false, + offset: 0, + }; - if matches!(cur_b, Some(b'\n' | b'\r')) { - self.input.bump(); - self.state.had_line_break = true; - continue; - } + skip.scan(); - if matches!(cur_b, Some(b'\x09' | b'\x0b' | b'\x0c' | b'\x20' | b'\xa0')) { - self.input.bump(); - continue; - } + (skip.offset, skip.newline) + }; + + self.input.bump_bytes(offset); + self.state.had_line_break |= newline; if LEX_COMMENTS && self.input.is_byte(b'/') { if self.peek() == Some('/') { @@ -207,27 +212,6 @@ impl<'a> Lexer<'a> { } break; } - - let c = self.cur(); - let c = match c { - Some(v) => v, - None => break, - }; - - match c { - // white spaces - '\u{feff}' => {} - // line breaks - '\u{2028}' | '\u{2029}' => { - self.state.had_line_break = true; - } - - _ if c.is_whitespace() => {} - - _ => break, - } - - self.bump(); } Ok(()) diff --git a/crates/swc_ecma_parser/src/lexer/whitespace.rs b/crates/swc_ecma_parser/src/lexer/whitespace.rs index 8b137891791f..85f1633ef06d 100644 --- a/crates/swc_ecma_parser/src/lexer/whitespace.rs +++ b/crates/swc_ecma_parser/src/lexer/whitespace.rs @@ -1 +1,122 @@ +/// Returns true if it's done +pub(super) type ByteHandler = Option fn(&mut SkipWhitespace<'aa>) -> bool>; +/// Lookup table for whitespace +static BYTE_HANDLERS: [ByteHandler; 256] = [ + // 0 1 2 3 4 5 6 7 8 9 A B C D E F // + ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, SPC, ___, SPC, ___, ___, ___, // 0 + ___, NLN, ___, ___, NLN, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, // 1 + ___, SPC, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, // 2 + ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, // 3 + ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, // 4 + ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, // 5 + ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, // 6 + ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, // 7 + UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, // 8 + UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, // 9 + UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, // A + UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, // B + UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, // C + UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, // D + UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, // E + UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, // F +]; + +/// Stop +const ___: ByteHandler = None; + +/// Newline +const NLN: ByteHandler = Some(|skip| { + skip.offset += 1; + skip.newline = true; + + false +}); + +/// Space +const SPC: ByteHandler = Some(|skip| { + skip.offset += 1; + + false +}); + +/// Unicode +const UNI: ByteHandler = Some(|skip| { + let s = unsafe { + // Safety: `skip.offset` is always valid + skip.input.get_unchecked(skip.offset..) + }; + + let (len, c) = unsafe { + // Safety: Byte handlers are called only when `skip.input` is not empty + s.char_indices().next().unwrap_unchecked() + }; + + match c { + // white spaces + '\u{feff}' => {} + // line breaks + '\u{2028}' | '\u{2029}' => { + skip.newline = true; + } + + _ if c.is_whitespace() => {} + + _ => return true, + } + + skip.offset += len; + + false +}); + +/// API is taked from oxc by Boshen (https://github.com/Boshen/oxc/pull/26) +pub(super) struct SkipWhitespace<'a> { + pub input: &'a str, + + /// Total offset + pub offset: usize, + + /// Found newline + pub newline: bool, +} + +macro_rules! unwind_loop { + ($e:expr) => {{ + $e; + $e; + $e; + $e; + $e; + $e; + $e; + $e; + }}; +} + +impl SkipWhitespace<'_> { + pub fn scan(&mut self) { + let mut byte; + + loop { + unwind_loop!({ + byte = self.input.as_bytes().get(self.offset).copied(); + + if let Some(byte) = byte { + let handler = + unsafe { *(&BYTE_HANDLERS as *const ByteHandler).offset(byte as isize) }; + + if let Some(handler) = handler { + if handler(self) { + return; + } + } else { + return; + } + } else { + return; + } + }) + } + } +} From 94212785ef6d239632e30c3bc834b46ccbf70d03 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Donny/=EA=B0=95=EB=8F=99=EC=9C=A4?= Date: Sat, 11 Mar 2023 21:46:53 +0900 Subject: [PATCH 03/15] Fix --- crates/swc_ecma_parser/src/lexer/util.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crates/swc_ecma_parser/src/lexer/util.rs b/crates/swc_ecma_parser/src/lexer/util.rs index d5585beb430d..615a0150ffb3 100644 --- a/crates/swc_ecma_parser/src/lexer/util.rs +++ b/crates/swc_ecma_parser/src/lexer/util.rs @@ -210,8 +210,9 @@ impl<'a> Lexer<'a> { self.skip_block_comment()?; continue; } - break; } + + break; } Ok(()) From 4b7a1eac093c219c2648b2906abb1429f0de6ace Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Donny/=EA=B0=95=EB=8F=99=EC=9C=A4?= Date: Sat, 11 Mar 2023 21:51:06 +0900 Subject: [PATCH 04/15] Fix tabelx --- crates/swc_ecma_parser/src/lexer/whitespace.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/swc_ecma_parser/src/lexer/whitespace.rs b/crates/swc_ecma_parser/src/lexer/whitespace.rs index 85f1633ef06d..20ad839489bf 100644 --- a/crates/swc_ecma_parser/src/lexer/whitespace.rs +++ b/crates/swc_ecma_parser/src/lexer/whitespace.rs @@ -6,7 +6,7 @@ static BYTE_HANDLERS: [ByteHandler; 256] = [ // 0 1 2 3 4 5 6 7 8 9 A B C D E F // ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, SPC, ___, SPC, ___, ___, ___, // 0 ___, NLN, ___, ___, NLN, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, // 1 - ___, SPC, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, // 2 + SPC, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, // 2 ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, // 3 ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, // 4 ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, // 5 From 3e5cdb405d01d1f943c55b9cbbb072f01f50b3a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Donny/=EA=B0=95=EB=8F=99=EC=9C=A4?= Date: Sat, 11 Mar 2023 21:59:10 +0900 Subject: [PATCH 05/15] Fix table --- crates/swc_ecma_parser/src/lexer/whitespace.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/crates/swc_ecma_parser/src/lexer/whitespace.rs b/crates/swc_ecma_parser/src/lexer/whitespace.rs index 20ad839489bf..a578f9f3db97 100644 --- a/crates/swc_ecma_parser/src/lexer/whitespace.rs +++ b/crates/swc_ecma_parser/src/lexer/whitespace.rs @@ -4,8 +4,8 @@ pub(super) type ByteHandler = Option fn(&mut SkipWhitespace<'aa>) -> bo /// Lookup table for whitespace static BYTE_HANDLERS: [ByteHandler; 256] = [ // 0 1 2 3 4 5 6 7 8 9 A B C D E F // - ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, SPC, ___, SPC, ___, ___, ___, // 0 - ___, NLN, ___, ___, NLN, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, // 1 + ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, NLN, ___, SPC, NLN, ___, ___, // 0 + ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, // 1 SPC, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, // 2 ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, // 3 ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, // 4 @@ -101,6 +101,7 @@ impl SkipWhitespace<'_> { loop { unwind_loop!({ byte = self.input.as_bytes().get(self.offset).copied(); + dbg!(byte); if let Some(byte) = byte { let handler = From 90970c73cd5d69cefebac77df6e4115cb558f9db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Donny/=EA=B0=95=EB=8F=99=EC=9C=A4?= Date: Sat, 11 Mar 2023 22:00:15 +0900 Subject: [PATCH 06/15] remove dbg --- crates/swc_ecma_parser/src/lexer/whitespace.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/crates/swc_ecma_parser/src/lexer/whitespace.rs b/crates/swc_ecma_parser/src/lexer/whitespace.rs index a578f9f3db97..3ec154fc4aad 100644 --- a/crates/swc_ecma_parser/src/lexer/whitespace.rs +++ b/crates/swc_ecma_parser/src/lexer/whitespace.rs @@ -101,7 +101,6 @@ impl SkipWhitespace<'_> { loop { unwind_loop!({ byte = self.input.as_bytes().get(self.offset).copied(); - dbg!(byte); if let Some(byte) = byte { let handler = From c5e92038a8289176bf06915dac57f2c929e38e2c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Donny/=EA=B0=95=EB=8F=99=EC=9C=A4?= Date: Sat, 11 Mar 2023 22:04:07 +0900 Subject: [PATCH 07/15] Remove unwind --- .../swc_ecma_parser/src/lexer/whitespace.rs | 38 ++++++------------- 1 file changed, 12 insertions(+), 26 deletions(-) diff --git a/crates/swc_ecma_parser/src/lexer/whitespace.rs b/crates/swc_ecma_parser/src/lexer/whitespace.rs index 3ec154fc4aad..45095d14f51c 100644 --- a/crates/swc_ecma_parser/src/lexer/whitespace.rs +++ b/crates/swc_ecma_parser/src/lexer/whitespace.rs @@ -81,42 +81,28 @@ pub(super) struct SkipWhitespace<'a> { pub newline: bool, } -macro_rules! unwind_loop { - ($e:expr) => {{ - $e; - $e; - $e; - $e; - $e; - $e; - $e; - $e; - }}; -} - impl SkipWhitespace<'_> { + #[inline(always)] pub fn scan(&mut self) { let mut byte; loop { - unwind_loop!({ - byte = self.input.as_bytes().get(self.offset).copied(); - - if let Some(byte) = byte { - let handler = - unsafe { *(&BYTE_HANDLERS as *const ByteHandler).offset(byte as isize) }; - - if let Some(handler) = handler { - if handler(self) { - return; - } - } else { + byte = self.input.as_bytes().get(self.offset).copied(); + + if let Some(byte) = byte { + let handler = + unsafe { *(&BYTE_HANDLERS as *const ByteHandler).offset(byte as isize) }; + + if let Some(handler) = handler { + if handler(self) { return; } } else { return; } - }) + } else { + return; + } } } } From 1c486691573e722be295e7bb2cf94f42e987f8b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Donny/=EA=B0=95=EB=8F=99=EC=9C=A4?= Date: Sat, 11 Mar 2023 22:05:18 +0900 Subject: [PATCH 08/15] Fix table --- crates/swc_ecma_parser/src/lexer/whitespace.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/swc_ecma_parser/src/lexer/whitespace.rs b/crates/swc_ecma_parser/src/lexer/whitespace.rs index 45095d14f51c..2f9a5cfb266e 100644 --- a/crates/swc_ecma_parser/src/lexer/whitespace.rs +++ b/crates/swc_ecma_parser/src/lexer/whitespace.rs @@ -4,7 +4,7 @@ pub(super) type ByteHandler = Option fn(&mut SkipWhitespace<'aa>) -> bo /// Lookup table for whitespace static BYTE_HANDLERS: [ByteHandler; 256] = [ // 0 1 2 3 4 5 6 7 8 9 A B C D E F // - ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, NLN, ___, SPC, NLN, ___, ___, // 0 + ___, ___, ___, ___, ___, ___, ___, ___, ___, SPC, NLN, ___, SPC, NLN, ___, ___, // 0 ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, // 1 SPC, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, // 2 ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, // 3 From 231f13b53360def55923291cbcd4a32ece2c8b5d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Donny/=EA=B0=95=EB=8F=99=EC=9C=A4?= Date: Sat, 11 Mar 2023 22:23:39 +0900 Subject: [PATCH 09/15] $@ --- crates/swc_ecma_parser/scripts/instrument/bench.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/swc_ecma_parser/scripts/instrument/bench.sh b/crates/swc_ecma_parser/scripts/instrument/bench.sh index 1f8c08c49775..e0b4d34c4903 100755 --- a/crates/swc_ecma_parser/scripts/instrument/bench.sh +++ b/crates/swc_ecma_parser/scripts/instrument/bench.sh @@ -4,4 +4,4 @@ set -eu export RUST_LOG=off export MIMALLOC_SHOW_STATS=1 -cargo profile instruments --release -t time --features tracing/release_max_level_info --features swc_common/concurrent --features swc_common/parking_lot --bench parser -- --bench --color +cargo profile instruments --release -t time --features tracing/release_max_level_info --features swc_common/concurrent --features swc_common/parking_lot --bench parser -- --bench --color $@ From 9711ffc79039984594ce8557f2673cf909da8972 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Donny/=EA=B0=95=EB=8F=99=EC=9C=A4?= Date: Sat, 11 Mar 2023 22:23:52 +0900 Subject: [PATCH 10/15] Use for loop --- crates/swc_ecma_parser/src/lexer/whitespace.rs | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/crates/swc_ecma_parser/src/lexer/whitespace.rs b/crates/swc_ecma_parser/src/lexer/whitespace.rs index 2f9a5cfb266e..f5c8deba1d0c 100644 --- a/crates/swc_ecma_parser/src/lexer/whitespace.rs +++ b/crates/swc_ecma_parser/src/lexer/whitespace.rs @@ -84,20 +84,11 @@ pub(super) struct SkipWhitespace<'a> { impl SkipWhitespace<'_> { #[inline(always)] pub fn scan(&mut self) { - let mut byte; + for &byte in self.input.as_bytes() { + let handler = unsafe { *(&BYTE_HANDLERS as *const ByteHandler).offset(byte as isize) }; - loop { - byte = self.input.as_bytes().get(self.offset).copied(); - - if let Some(byte) = byte { - let handler = - unsafe { *(&BYTE_HANDLERS as *const ByteHandler).offset(byte as isize) }; - - if let Some(handler) = handler { - if handler(self) { - return; - } - } else { + if let Some(handler) = handler { + if handler(self) { return; } } else { From 3a62f0ff281aaa690e4ae1c67ed81456415166d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Donny/=EA=B0=95=EB=8F=99=EC=9C=A4?= Date: Sat, 11 Mar 2023 22:39:39 +0900 Subject: [PATCH 11/15] inline --- crates/swc_ecma_parser/src/lexer/util.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/crates/swc_ecma_parser/src/lexer/util.rs b/crates/swc_ecma_parser/src/lexer/util.rs index 615a0150ffb3..d31fddc594bb 100644 --- a/crates/swc_ecma_parser/src/lexer/util.rs +++ b/crates/swc_ecma_parser/src/lexer/util.rs @@ -218,6 +218,7 @@ impl<'a> Lexer<'a> { Ok(()) } + #[inline(never)] pub(super) fn skip_line_comment(&mut self, start_skip: usize) { let start = self.cur_pos(); self.input.bump_bytes(start_skip); @@ -267,6 +268,7 @@ impl<'a> Lexer<'a> { } /// Expects current char to be '/' and next char to be '*'. + #[inline(never)] pub(super) fn skip_block_comment(&mut self) -> LexResult<()> { let start = self.cur_pos(); From 532619ceea7bf8b6492eb56ffb49faa740788693 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Donny/=EA=B0=95=EB=8F=99=EC=9C=A4?= Date: Sat, 11 Mar 2023 23:05:28 +0900 Subject: [PATCH 12/15] Use return value --- .../swc_ecma_parser/src/lexer/whitespace.rs | 21 +++++++------------ 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/crates/swc_ecma_parser/src/lexer/whitespace.rs b/crates/swc_ecma_parser/src/lexer/whitespace.rs index f5c8deba1d0c..70945db70017 100644 --- a/crates/swc_ecma_parser/src/lexer/whitespace.rs +++ b/crates/swc_ecma_parser/src/lexer/whitespace.rs @@ -1,5 +1,5 @@ /// Returns true if it's done -pub(super) type ByteHandler = Option fn(&mut SkipWhitespace<'aa>) -> bool>; +pub(super) type ByteHandler = Option fn(&mut SkipWhitespace<'aa>) -> usize>; /// Lookup table for whitespace static BYTE_HANDLERS: [ByteHandler; 256] = [ @@ -27,18 +27,13 @@ const ___: ByteHandler = None; /// Newline const NLN: ByteHandler = Some(|skip| { - skip.offset += 1; skip.newline = true; - false + 1 }); /// Space -const SPC: ByteHandler = Some(|skip| { - skip.offset += 1; - - false -}); +const SPC: ByteHandler = Some(|_| 1); /// Unicode const UNI: ByteHandler = Some(|skip| { @@ -62,12 +57,10 @@ const UNI: ByteHandler = Some(|skip| { _ if c.is_whitespace() => {} - _ => return true, + _ => return 0, } - skip.offset += len; - - false + len }); /// API is taked from oxc by Boshen (https://github.com/Boshen/oxc/pull/26) @@ -88,9 +81,11 @@ impl SkipWhitespace<'_> { let handler = unsafe { *(&BYTE_HANDLERS as *const ByteHandler).offset(byte as isize) }; if let Some(handler) = handler { - if handler(self) { + let delta = handler(self); + if delta == 0 { return; } + self.offset += delta; } else { return; } From e44706b9cf85fa63b7b22dfa0a337e7c3d39035a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Donny/=EA=B0=95=EB=8F=99=EC=9C=A4?= Date: Mon, 13 Mar 2023 16:06:40 +0900 Subject: [PATCH 13/15] Fix `scan` --- crates/swc_ecma_parser/src/lexer/whitespace.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/crates/swc_ecma_parser/src/lexer/whitespace.rs b/crates/swc_ecma_parser/src/lexer/whitespace.rs index 70945db70017..4304661647f6 100644 --- a/crates/swc_ecma_parser/src/lexer/whitespace.rs +++ b/crates/swc_ecma_parser/src/lexer/whitespace.rs @@ -77,7 +77,13 @@ pub(super) struct SkipWhitespace<'a> { impl SkipWhitespace<'_> { #[inline(always)] pub fn scan(&mut self) { - for &byte in self.input.as_bytes() { + let mut byte; + loop { + byte = match self.input.as_bytes().get(self.offset).copied() { + Some(v) => v, + None => return, + }; + let handler = unsafe { *(&BYTE_HANDLERS as *const ByteHandler).offset(byte as isize) }; if let Some(handler) = handler { From df74d1b917e19ed7d99ec1840702286520006c1c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Donny/=EA=B0=95=EB=8F=99=EC=9C=A4?= Date: Mon, 13 Mar 2023 16:07:39 +0900 Subject: [PATCH 14/15] Fix --- crates/swc_ecma_parser/src/lexer/whitespace.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/crates/swc_ecma_parser/src/lexer/whitespace.rs b/crates/swc_ecma_parser/src/lexer/whitespace.rs index 4304661647f6..313e00f563cb 100644 --- a/crates/swc_ecma_parser/src/lexer/whitespace.rs +++ b/crates/swc_ecma_parser/src/lexer/whitespace.rs @@ -42,9 +42,9 @@ const UNI: ByteHandler = Some(|skip| { skip.input.get_unchecked(skip.offset..) }; - let (len, c) = unsafe { + let c = unsafe { // Safety: Byte handlers are called only when `skip.input` is not empty - s.char_indices().next().unwrap_unchecked() + s.chars().next().unwrap_unchecked() }; match c { @@ -60,7 +60,7 @@ const UNI: ByteHandler = Some(|skip| { _ => return 0, } - len + c.len_utf8() }); /// API is taked from oxc by Boshen (https://github.com/Boshen/oxc/pull/26) From 2ab9af3cfe8f02ff9cf6d029fbdc30e58bffb25d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Donny/=EA=B0=95=EB=8F=99=EC=9C=A4?= Date: Mon, 13 Mar 2023 16:11:40 +0900 Subject: [PATCH 15/15] Fix tabel --- crates/swc_ecma_parser/src/lexer/whitespace.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/swc_ecma_parser/src/lexer/whitespace.rs b/crates/swc_ecma_parser/src/lexer/whitespace.rs index 313e00f563cb..a38cffa05a03 100644 --- a/crates/swc_ecma_parser/src/lexer/whitespace.rs +++ b/crates/swc_ecma_parser/src/lexer/whitespace.rs @@ -4,7 +4,7 @@ pub(super) type ByteHandler = Option fn(&mut SkipWhitespace<'aa>) -> us /// Lookup table for whitespace static BYTE_HANDLERS: [ByteHandler; 256] = [ // 0 1 2 3 4 5 6 7 8 9 A B C D E F // - ___, ___, ___, ___, ___, ___, ___, ___, ___, SPC, NLN, ___, SPC, NLN, ___, ___, // 0 + ___, ___, ___, ___, ___, ___, ___, ___, ___, SPC, NLN, SPC, SPC, NLN, ___, ___, // 0 ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, // 1 SPC, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, // 2 ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, // 3