diff --git a/crates/swc_ecma_parser/src/lexer/mod.rs b/crates/swc_ecma_parser/src/lexer/mod.rs index 0e6c0330e5db..7f64433c1685 100644 --- a/crates/swc_ecma_parser/src/lexer/mod.rs +++ b/crates/swc_ecma_parser/src/lexer/mod.rs @@ -969,24 +969,42 @@ impl<'a> Lexer<'a> { fn read_str_lit(&mut self) -> LexResult { debug_assert!(self.cur() == Some('\'') || self.cur() == Some('"')); let start = self.cur_pos(); - let quote = self.cur().unwrap(); + let quote = self.cur().unwrap() as u8; self.bump(); // '"' - self.with_buf(|l, out| { - while let Some(c) = { - // Optimization - { - let s = l - .input - .uncons_while(|c| c != quote && c != '\\' && !c.is_line_break()); - out.push_str(s); - } - l.cur() - } { - match c { - c if c == quote => { - l.bump(); + let mut has_escape = false; + let mut slice_start = self.input.cur_pos(); + + self.with_buf(|l, buf| { + loop { + if let Some(c) = l.input.cur_as_ascii() { + if c == quote { + let value_end = l.cur_pos(); + + let value = if !has_escape { + let s = unsafe { + // Safety: slice_start and value_end are valid position because we + // got them from `self.input` + l.input.slice(slice_start, value_end) + }; + + l.atoms.atom(s) + } else { + let s = unsafe { + // Safety: slice_start and value_end are valid position because we + // got them from `self.input` + l.input.slice(slice_start, value_end) + }; + buf.push_str(s); + + l.atoms.atom(&**buf) + }; + + unsafe { + // Safety: cur is quote + l.input.bump(); + } let end = l.cur_pos(); @@ -995,28 +1013,67 @@ impl<'a> Lexer<'a> { // `self.input` l.input.slice(start, end) }; + let raw = l.atoms.atom(raw); - return Ok(Token::Str { - value: l.atoms.atom(&*out), - raw: l.atoms.atom(raw), - }); + return Ok(Token::Str { value, raw }); } - '\\' => { + + if c == b'\\' { + has_escape = true; + + { + let end = l.cur_pos(); + let s = unsafe { + // Safety: start and end are valid position because we got them from + // `self.input` + l.input.slice(slice_start, end) + }; + buf.push_str(s); + } + if let Some(chars) = l.read_escaped_char(false)? { for c in chars { - out.extend(c); + buf.extend(c); } } + + slice_start = l.cur_pos(); + continue; } - c if c.is_line_break() => { + + if (c as char).is_line_break() { break; } - _ => { - out.push(c); - l.bump(); + unsafe { + // Safety: cur is a ascii character + l.input.bump(); } + continue; } + + match l.input.cur() { + Some(c) => { + if c.is_line_break() { + break; + } + unsafe { + // Safety: cur is Some(c) + l.input.bump(); + } + } + None => break, + } + } + + { + let end = l.cur_pos(); + let s = unsafe { + // Safety: start and end are valid position because we got them from + // `self.input` + l.input.slice(slice_start, end) + }; + buf.push_str(s); } l.emit_error(start, SyntaxError::UnterminatedStrLit); @@ -1029,7 +1086,7 @@ impl<'a> Lexer<'a> { l.input.slice(start, end) }; Ok(Token::Str { - value: l.atoms.atom(&*out), + value: l.atoms.atom(&*buf), raw: l.atoms.atom(raw), }) })