Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor(parser): remove TokenValue::RegExp from Token #1926

Merged
merged 1 commit into from
Jan 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions crates/oxc_ast/src/ast_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -163,8 +163,13 @@ impl<'a> AstBuilder<'a> {
TemplateElementValue { raw, cooked }
}

pub fn reg_exp_literal(&self, span: Span, pattern: Atom, flags: RegExpFlags) -> RegExpLiteral {
RegExpLiteral { span, value: EmptyObject, regex: RegExp { pattern, flags } }
pub fn reg_exp_literal(
&self,
span: Span,
pattern: &'a str,
flags: RegExpFlags,
) -> RegExpLiteral {
RegExpLiteral { span, value: EmptyObject, regex: RegExp { pattern: pattern.into(), flags } }
}

pub fn literal_string_expression(&self, literal: StringLiteral) -> Expression<'a> {
Expand Down
36 changes: 22 additions & 14 deletions crates/oxc_parser/src/js/expression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -185,8 +185,8 @@ impl<'a> Parser<'a> {
Kind::LParen => self.parse_parenthesized_expression(span),
Kind::Slash | Kind::SlashEq => {
self.read_regex();
self.parse_literal_regexp()
.map(|literal| self.ast.literal_regexp_expression(literal))
let literal = self.parse_literal_regexp();
Ok(self.ast.literal_regexp_expression(literal))
}
// JSXElement, JSXFragment
Kind::LAngle if self.source_type.is_jsx() => self.parse_jsx_expression(),
Expand Down Expand Up @@ -315,20 +315,28 @@ impl<'a> Parser<'a> {
Ok(self.ast.bigint_literal(self.end_span(span), value, base))
}

pub(crate) fn parse_literal_regexp(&mut self) -> Result<RegExpLiteral> {
pub(crate) fn parse_literal_regexp(&mut self) -> RegExpLiteral {
let span = self.start_span();
let r = match self.cur_kind() {
Kind::RegExp => self.cur_token().value.as_regex(),
_ => return Err(self.unexpected()),
};
let pattern = Atom::from(r.pattern);
let flags = r.flags;

// split out the flag part of `/regex/flag` by looking for `/` from the end
let regex_src = self.cur_src();
let mut flags = RegExpFlags::empty();

let mut split_index = None;
for (i, c) in regex_src.char_indices().rev() {
if let Ok(flag) = RegExpFlags::try_from(c) {
flags |= flag;
} else {
split_index.replace(i);
break;
}
}

// `/` are omitted from the pattern
let pattern = split_index.map_or(regex_src, |i| regex_src.get(1..i).unwrap_or(""));

self.bump_any();
Ok(RegExpLiteral {
span: self.end_span(span),
value: EmptyObject {},
regex: RegExp { pattern, flags },
})
self.ast.reg_exp_literal(self.end_span(span), pattern, flags)
}

pub(crate) fn parse_literal_string(&mut self) -> Result<StringLiteral> {
Expand Down
30 changes: 8 additions & 22 deletions crates/oxc_parser/src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ use oxc_syntax::{
},
unicode_id_start::is_id_start_unicode,
};
pub use token::{RegExp, Token, TokenValue};
pub use token::{Token, TokenValue};

pub use self::{kind::Kind, number::parse_big_int};
use self::{
Expand Down Expand Up @@ -819,7 +819,6 @@ impl<'a> Lexer<'a> {

/// 12.9.5 Regular Expression Literals
fn read_regex(&mut self) -> Kind {
let start = self.current.token.start + 1; // +1 to exclude `/`
let mut in_escape = false;
let mut in_character_class = false;
loop {
Expand Down Expand Up @@ -848,40 +847,27 @@ impl<'a> Lexer<'a> {
}
}

let end = self.offset() - 1; // -1 to exclude `/`
let pattern = &self.source[start as usize..end as usize];

let mut flags = RegExpFlags::empty();

while let Some(ch @ ('$' | '_' | 'a'..='z' | 'A'..='Z' | '0'..='9')) = self.peek() {
self.current.chars.next();
if !ch.is_ascii_lowercase() {
self.error(diagnostics::RegExpFlag(ch, self.current_offset()));
continue;
return Kind::Undetermined;
}
let flag = match ch {
'g' => RegExpFlags::G,
'i' => RegExpFlags::I,
'm' => RegExpFlags::M,
's' => RegExpFlags::S,
'u' => RegExpFlags::U,
'y' => RegExpFlags::Y,
'd' => RegExpFlags::D,
'v' => RegExpFlags::V,
_ => {
self.error(diagnostics::RegExpFlag(ch, self.current_offset()));
continue;
}
let flag = if let Ok(flag) = RegExpFlags::try_from(ch) {
flag
} else {
self.error(diagnostics::RegExpFlag(ch, self.current_offset()));
return Kind::Undetermined;
};
if flags.contains(flag) {
self.error(diagnostics::RegExpFlagTwice(ch, self.current_offset()));
continue;
return Kind::Undetermined;
}
flags |= flag;
}

self.current.token.value = TokenValue::RegExp(RegExp { pattern, flags });

Kind::RegExp
}

Expand Down
17 changes: 1 addition & 16 deletions crates/oxc_parser/src/lexer/token.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
//! Token

use oxc_ast::ast::RegExpFlags;
use oxc_span::Span;

use super::kind::Kind;
Expand Down Expand Up @@ -29,7 +28,7 @@ pub struct Token<'a> {
mod size_asserts {
use oxc_index::assert_eq_size;

assert_eq_size!(super::Token, [u8; 48]);
assert_eq_size!(super::Token, [u8; 40]);
}

impl<'a> Token<'a> {
Expand All @@ -43,13 +42,6 @@ pub enum TokenValue<'a> {
None,
Number(f64),
String(&'a str),
RegExp(RegExp<'a>),
}

#[derive(Debug, Copy, Clone)]
pub struct RegExp<'a> {
pub pattern: &'a str,
pub flags: RegExpFlags,
}

impl<'a> Default for TokenValue<'a> {
Expand All @@ -66,13 +58,6 @@ impl<'a> TokenValue<'a> {
}
}

pub fn as_regex(&self) -> &RegExp<'a> {
match self {
Self::RegExp(regex) => regex,
_ => unreachable!("expected regex!"),
}
}

pub fn get_string(&self) -> Option<&str> {
match self {
Self::String(s) => Some(s),
Expand Down
27 changes: 27 additions & 0 deletions tasks/coverage/parser_babel.snap
Original file line number Diff line number Diff line change
Expand Up @@ -1141,6 +1141,12 @@ Expect to Parse: "typescript/types/const-type-parameters-babel-7/input.ts"
╭─[core/uncategorised/380/input.js:1:1]
1 │ var x = /
· ──
2 │ /
╰────

× Unexpected token
╭─[core/uncategorised/380/input.js:1:1]
1 │ var x = /
2 │ /
╰────

Expand Down Expand Up @@ -1523,6 +1529,12 @@ Expect to Parse: "typescript/types/const-type-parameters-babel-7/input.ts"
╭─[core/uncategorised/441/input.js:1:1]
1 │ /a\
· ────
2 │ /
╰────

× Unexpected token
╭─[core/uncategorised/441/input.js:1:1]
1 │ /a\
2 │ /
╰────

Expand Down Expand Up @@ -7967,6 +7979,11 @@ Expect to Parse: "typescript/types/const-type-parameters-babel-7/input.ts"
2 │ /
╰────

× Unexpected token
╭─[esprima/invalid-syntax/migrated_0040/input.js:2:1]
2 │ /
╰────

× Invalid Unicode escape sequence
╭─[esprima/invalid-syntax/migrated_0041/input.js:1:1]
1 │ var x = /[a-z]/\ux
Expand Down Expand Up @@ -8141,6 +8158,11 @@ Expect to Parse: "typescript/types/const-type-parameters-babel-7/input.ts"
2 │ /
╰────

× Unexpected token
╭─[esprima/invalid-syntax/migrated_0062/input.js:2:1]
2 │ /
╰────

× Unterminated string
╭─[esprima/invalid-syntax/migrated_0063/input.js:1:1]
1 │ var x = "
Expand Down Expand Up @@ -8681,6 +8703,11 @@ Expect to Parse: "typescript/types/const-type-parameters-babel-7/input.ts"
2 │ /
╰────

× Unexpected token
╭─[esprima/invalid-syntax/migrated_0157/input.js:2:1]
2 │ /
╰────

× Unexpected token
╭─[esprima/invalid-syntax/migrated_0158/input.js:2:1]
2 │
Expand Down
25 changes: 25 additions & 0 deletions tasks/coverage/parser_test262.snap
Original file line number Diff line number Diff line change
Expand Up @@ -18251,6 +18251,11 @@ Expect Syntax Error: "language/import/import-attributes/json-named-bindings.js"
· ─
╰────

× Unexpected token
╭─[language/line-terminators/invalid-regexp-cr.js:18:1]
18 │ /
╰────

× Unterminated regular expression
╭─[language/line-terminators/invalid-regexp-lf.js:16:1]
16 │
Expand All @@ -18259,20 +18264,35 @@ Expect Syntax Error: "language/import/import-attributes/json-named-bindings.js"
18 │ /
╰────

× Unexpected token
╭─[language/line-terminators/invalid-regexp-lf.js:18:1]
18 │ /
╰────

× Unterminated regular expression
╭─[language/line-terminators/invalid-regexp-ls.js:16:1]
16 │
17 │ /
/
· ──
╰────

× Unexpected token
╭─[language/line-terminators/invalid-regexp-ls.js:17:1]
17 │ /
/
╰────

× Unterminated regular expression
╭─[language/line-terminators/invalid-regexp-ps.js:16:1]
16 │
17 │ /
/
· ──
╰────

× Unexpected token
╭─[language/line-terminators/invalid-regexp-ps.js:17:1]
17 │ /
/
╰────

× Unterminated string
╭─[language/line-terminators/invalid-string-cr.js:15:1]
15 │
Expand Down Expand Up @@ -31537,6 +31557,11 @@ Expect Syntax Error: "language/import/import-attributes/json-named-bindings.js"
· ───────
╰────

× Expected `}` but found `EOF`
╭─[language/statements/function/invalid-function-body-1.js:17:1]
17 │ function __func(){/ ABC}
╰────

× Unexpected token
╭─[language/statements/function/invalid-function-body-2.js:16:1]
16 │
Expand Down
5 changes: 5 additions & 0 deletions tasks/coverage/parser_typescript.snap
Original file line number Diff line number Diff line change
Expand Up @@ -17136,6 +17136,11 @@ Expect to Parse: "conformance/salsa/plainJSRedeclare3.ts"
· ────────────
╰────

× Expected `)` but found `EOF`
╭─[conformance/parser/ecmascript5/RegularExpressions/parserRegularExpressionDivideAmbiguity4.ts:1:1]
1 │ foo(/notregexp);
╰────

× Expected a semicolon or an implicit semicolon after a statement, but found none
╭─[conformance/parser/ecmascript5/RegularExpressions/parserRegularExpressionDivideAmbiguity7.ts:1:1]
1 │ (a/8
Expand Down