Skip to content

Commit

Permalink
refactor(parser): remove TokenValue::RegExp from Token
Browse files Browse the repository at this point in the history
This PR is part of #1880.

`Token` size is reduced from 48 to 40 bytes.

To reconstruct the regex pattern and flags within the parser , the regex string is
re-parsed from the end by reading all valid flags.

In order to make things work nicely, the lexer will no longer recover
from a invalid regex.
  • Loading branch information
Boshen committed Jan 7, 2024
1 parent dad94f7 commit aa6aa76
Show file tree
Hide file tree
Showing 5 changed files with 39 additions and 55 deletions.
2 changes: 1 addition & 1 deletion crates/oxc_ast/src/ast/literal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ impl<'a> Hash for NumberLiteral<'a> {
}
}

#[derive(Debug, Clone, Hash)]
#[derive(Debug, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize), serde(tag = "type"))]
pub struct BigintLiteral {
#[cfg_attr(feature = "serde", serde(flatten))]
Expand Down
9 changes: 7 additions & 2 deletions crates/oxc_ast/src/ast_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -163,8 +163,13 @@ impl<'a> AstBuilder<'a> {
TemplateElementValue { raw, cooked }
}

pub fn reg_exp_literal(&self, span: Span, pattern: Atom, flags: RegExpFlags) -> RegExpLiteral {
RegExpLiteral { span, value: EmptyObject, regex: RegExp { pattern, flags } }
pub fn reg_exp_literal(
&self,
span: Span,
pattern: &'a str,
flags: RegExpFlags,
) -> RegExpLiteral {
RegExpLiteral { span, value: EmptyObject, regex: RegExp { pattern: pattern.into(), flags } }
}

pub fn literal_string_expression(&self, literal: StringLiteral) -> Expression<'a> {
Expand Down
36 changes: 22 additions & 14 deletions crates/oxc_parser/src/js/expression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -185,8 +185,8 @@ impl<'a> Parser<'a> {
Kind::LParen => self.parse_parenthesized_expression(span),
Kind::Slash | Kind::SlashEq => {
self.read_regex();
self.parse_literal_regexp()
.map(|literal| self.ast.literal_regexp_expression(literal))
let literal = self.parse_literal_regexp();
Ok(self.ast.literal_regexp_expression(literal))
}
// JSXElement, JSXFragment
Kind::LAngle if self.source_type.is_jsx() => self.parse_jsx_expression(),
Expand Down Expand Up @@ -315,20 +315,28 @@ impl<'a> Parser<'a> {
Ok(self.ast.bigint_literal(self.end_span(span), value, base))
}

pub(crate) fn parse_literal_regexp(&mut self) -> Result<RegExpLiteral> {
pub(crate) fn parse_literal_regexp(&mut self) -> RegExpLiteral {
let span = self.start_span();
let r = match self.cur_kind() {
Kind::RegExp => self.cur_token().value.as_regex(),
_ => return Err(self.unexpected()),
};
let pattern = Atom::from(r.pattern);
let flags = r.flags;

// split out the flag part of `/regex/flag` by looking for `/` from the end
let regex_src = self.cur_src();
let mut flags = RegExpFlags::empty();

let mut split_index = None;
for (i, c) in regex_src.char_indices().rev() {
if let Ok(flag) = RegExpFlags::try_from(c) {
flags |= flag;
} else {
split_index.replace(i);
break;
}
}

// `/` are omitted from the pattern
let pattern = split_index.map_or(regex_src, |i| regex_src.get(1..i).unwrap_or(""));

self.bump_any();
Ok(RegExpLiteral {
span: self.end_span(span),
value: EmptyObject {},
regex: RegExp { pattern, flags },
})
self.ast.reg_exp_literal(self.end_span(span), pattern, flags)
}

pub(crate) fn parse_literal_string(&mut self) -> Result<StringLiteral> {
Expand Down
30 changes: 8 additions & 22 deletions crates/oxc_parser/src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ use oxc_syntax::{
},
unicode_id_start::is_id_start_unicode,
};
pub use token::{RegExp, Token, TokenValue};
pub use token::{Token, TokenValue};

pub use self::{kind::Kind, number::parse_big_int};
use self::{
Expand Down Expand Up @@ -819,7 +819,6 @@ impl<'a> Lexer<'a> {

/// 12.9.5 Regular Expression Literals
fn read_regex(&mut self) -> Kind {
let start = self.current.token.start + 1; // +1 to exclude `/`
let mut in_escape = false;
let mut in_character_class = false;
loop {
Expand Down Expand Up @@ -848,40 +847,27 @@ impl<'a> Lexer<'a> {
}
}

let end = self.offset() - 1; // -1 to exclude `/`
let pattern = &self.source[start as usize..end as usize];

let mut flags = RegExpFlags::empty();

while let Some(ch @ ('$' | '_' | 'a'..='z' | 'A'..='Z' | '0'..='9')) = self.peek() {
self.current.chars.next();
if !ch.is_ascii_lowercase() {
self.error(diagnostics::RegExpFlag(ch, self.current_offset()));
continue;
return Kind::Undetermined;
}
let flag = match ch {
'g' => RegExpFlags::G,
'i' => RegExpFlags::I,
'm' => RegExpFlags::M,
's' => RegExpFlags::S,
'u' => RegExpFlags::U,
'y' => RegExpFlags::Y,
'd' => RegExpFlags::D,
'v' => RegExpFlags::V,
_ => {
self.error(diagnostics::RegExpFlag(ch, self.current_offset()));
continue;
}
let flag = if let Ok(flag) = RegExpFlags::try_from(ch) {
flag
} else {
self.error(diagnostics::RegExpFlag(ch, self.current_offset()));
return Kind::Undetermined;
};
if flags.contains(flag) {
self.error(diagnostics::RegExpFlagTwice(ch, self.current_offset()));
continue;
return Kind::Undetermined;
}
flags |= flag;
}

self.current.token.value = TokenValue::RegExp(RegExp { pattern, flags });

Kind::RegExp
}

Expand Down
17 changes: 1 addition & 16 deletions crates/oxc_parser/src/lexer/token.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
//! Token

use oxc_ast::ast::RegExpFlags;
use oxc_span::Span;

use super::kind::Kind;
Expand Down Expand Up @@ -29,7 +28,7 @@ pub struct Token<'a> {
mod size_asserts {
use oxc_index::assert_eq_size;

assert_eq_size!(super::Token, [u8; 48]);
assert_eq_size!(super::Token, [u8; 40]);
}

impl<'a> Token<'a> {
Expand All @@ -43,13 +42,6 @@ pub enum TokenValue<'a> {
None,
Number(f64),
String(&'a str),
RegExp(RegExp<'a>),
}

#[derive(Debug, Copy, Clone)]
pub struct RegExp<'a> {
pub pattern: &'a str,
pub flags: RegExpFlags,
}

impl<'a> Default for TokenValue<'a> {
Expand All @@ -66,13 +58,6 @@ impl<'a> TokenValue<'a> {
}
}

pub fn as_regex(&self) -> &RegExp<'a> {
match self {
Self::RegExp(regex) => regex,
_ => unreachable!("expected regex!"),
}
}

pub fn get_string(&self) -> Option<&str> {
match self {
Self::String(s) => Some(s),
Expand Down

0 comments on commit aa6aa76

Please sign in to comment.