Skip to content

Commit

Permalink
fix(css/parser): Fix input buffer and improve error recovery (#4948)
Browse files Browse the repository at this point in the history
  • Loading branch information
alexander-akait committed Jun 12, 2022
1 parent 3bb75f1 commit bf74d05
Show file tree
Hide file tree
Showing 18 changed files with 136 additions and 123 deletions.
34 changes: 18 additions & 16 deletions crates/swc_css_parser/src/lexer/mod.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
use std::{cell::RefCell, char::REPLACEMENT_CHARACTER, rc::Rc};
use std::{cell::RefCell, char::REPLACEMENT_CHARACTER, mem::take, rc::Rc};

use swc_atoms::{js_word, JsWord};
use swc_common::{input::Input, BytePos, Span};
use swc_css_ast::{NumberType, Token, TokenAndSpan};

use crate::{
error::{Error, ErrorKind},
parser::{input::ParserInput, PResult, ParserConfig},
parser::{input::ParserInput, ParserConfig},
};

pub(crate) type LexResult<T> = Result<T, ErrorKind>;
Expand All @@ -27,6 +27,7 @@ where
raw_buf: Rc<RefCell<String>>,
sub_buf: Rc<RefCell<String>>,
sub_raw_buf: Rc<RefCell<String>>,
errors: Vec<Error>,
}

impl<I> Lexer<I>
Expand All @@ -47,6 +48,7 @@ where
raw_buf: Rc::new(RefCell::new(String::with_capacity(256))),
sub_buf: Rc::new(RefCell::new(String::with_capacity(32))),
sub_raw_buf: Rc::new(RefCell::new(String::with_capacity(32))),
errors: vec![],
}
}

Expand Down Expand Up @@ -138,16 +140,6 @@ where
{
type State = LexerState;

fn next(&mut self) -> PResult<TokenAndSpan> {
let token = self.read_token();
let end = self.last_pos.take().unwrap_or_else(|| self.input.cur_pos());
let span = Span::new(self.start_pos, end, Default::default());

token
.map(|token| TokenAndSpan { span, token })
.map_err(|kind| Error::new(span, kind))
}

fn start_pos(&mut self) -> swc_common::BytePos {
self.input.cur_pos()
}
Expand All @@ -161,6 +153,10 @@ where
fn reset(&mut self, state: &Self::State) {
self.input.reset_to(state.pos);
}

fn take_errors(&mut self) -> Vec<Error> {
take(&mut self.errors)
}
}

impl<I> Lexer<I>
Expand Down Expand Up @@ -479,10 +475,16 @@ where

break;
}
Some(_) => {}
None => {
return Err(ErrorKind::UnterminatedBlockComment);
let end = self.last_pos.take().unwrap_or_else(|| self.input.cur_pos());
let span = Span::new(self.start_pos, end, Default::default());

self.errors
.push(Error::new(span, ErrorKind::UnterminatedBlockComment));

return Ok(());
}
_ => {}
}
}
}
Expand All @@ -501,10 +503,10 @@ where
Some(c) if is_newline(c) => {
break;
}
Some(_) => {}
None => {
return Err(ErrorKind::UnterminatedBlockComment);
return Ok(());
}
_ => {}
}
}
}
Expand Down
56 changes: 25 additions & 31 deletions crates/swc_css_parser/src/parser/input.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::fmt::Debug;
use std::{fmt::Debug, mem::take};

use swc_common::{BytePos, Span, SyntaxContext};
use swc_css_ast::{Token, TokenAndSpan, Tokens};
Expand All @@ -9,13 +9,13 @@ use crate::error::{Error, ErrorKind};
pub trait ParserInput: Iterator<Item = TokenAndSpan> {
type State: Debug;

fn next(&mut self) -> PResult<TokenAndSpan>;

fn start_pos(&mut self) -> BytePos;

fn state(&mut self) -> Self::State;

fn reset(&mut self, state: &Self::State);

fn take_errors(&mut self) -> Vec<Error>;
}

#[derive(Debug)]
Expand All @@ -35,6 +35,7 @@ where
{
pub fn new(mut input: I) -> Self {
let last_pos = input.start_pos();

Buffer {
cur: None,
peeked: None,
Expand Down Expand Up @@ -73,7 +74,7 @@ where
self.cur()?;

if self.peeked.is_none() {
self.peeked = Some(ParserInput::next(&mut self.input)?);
self.peeked = self.input.next();
}

Ok(self.peeked.as_ref().map(|v| &v.token))
Expand Down Expand Up @@ -109,20 +110,18 @@ where
}

if self.cur.is_none() {
let res = ParserInput::next(&mut self.input);

if let Err(err) = &res {
if let ErrorKind::Eof = err.kind() {
return Ok(());
}
}
let token_and_span = self.input.next();

self.cur = res.map(Some)?;
self.cur = token_and_span;
}

Ok(())
}

pub fn take_errors(&mut self) -> Vec<Error> {
take(&mut self.input.take_errors())
}

pub(super) fn skip_ws(&mut self) -> PResult<()> {
loop {
match self.cur.as_ref().map(|v| &v.token) {
Expand Down Expand Up @@ -171,30 +170,23 @@ impl<'a> TokensInput<'a> {
}

fn cur(&mut self) -> PResult<&TokenAndSpan> {
let ret = self.tokens.tokens.get(self.idx);
let ret = match ret {
let token_and_span = match self.tokens.tokens.get(self.idx) {
Some(v) => v,
None => {
let bp = self.tokens.span.hi;
let span = Span::new(bp, bp, SyntaxContext::empty());

return Err(Error::new(span, ErrorKind::Eof));
}
};

Ok(ret)
Ok(token_and_span)
}
}

impl<'a> ParserInput for TokensInput<'a> {
type State = TokensState;

fn next(&mut self) -> PResult<TokenAndSpan> {
let ret = self.cur()?.clone();
self.idx += 1;

Ok(ret)
}

fn start_pos(&mut self) -> BytePos {
self.tokens.span.lo
}
Expand All @@ -206,21 +198,23 @@ impl<'a> ParserInput for TokensInput<'a> {
fn reset(&mut self, state: &Self::State) {
self.idx = state.idx;
}

fn take_errors(&mut self) -> Vec<Error> {
vec![]
}
}

impl<'a> Iterator for TokensInput<'a> {
type Item = TokenAndSpan;

fn next(&mut self) -> Option<Self::Item> {
let token_and_span = ParserInput::next(self);
let token_and_span = match self.cur() {
Ok(token_and_span) => token_and_span.clone(),
_ => return None,
};

match token_and_span {
Ok(token_and_span) => {
return Some(token_and_span);
}
Err(..) => {
return None;
}
}
self.idx += 1;

Some(token_and_span)
}
}
19 changes: 18 additions & 1 deletion crates/swc_css_parser/src/parser/macros.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,13 @@ macro_rules! cur {
None => {
let last_pos = $parser.input.last_pos()?;
let span = swc_common::Span::new(last_pos, last_pos, Default::default());

for error in $parser.input.take_errors() {
let (span, kind) = *error.into_inner();

$parser.errors.push(Error::new(span, kind));
}

return Err(crate::error::Error::new(span, crate::error::ErrorKind::Eof));
}
}
Expand Down Expand Up @@ -116,7 +123,17 @@ macro_rules! is_one_of_case_insensitive_ident {

macro_rules! is {
($parser:expr, EOF) => {{
$parser.input.cur()?.is_none()
let is_eof = $parser.input.cur()?.is_none();

if is_eof {
for error in $parser.input.take_errors() {
let (span, kind) = *error.into_inner();

$parser.errors.push(Error::new(span, kind));
}
}

is_eof
}};

($parser:expr, $tt:tt) => {{
Expand Down
88 changes: 15 additions & 73 deletions crates/swc_css_parser/tests/fixture.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ use std::path::PathBuf;
use swc_common::{errors::Handler, input::SourceFileInput, Span, Spanned};
use swc_css_ast::*;
use swc_css_parser::{
error::ErrorKind,
lexer::Lexer,
parse_tokens,
parser::{input::ParserInput, Parser, ParserConfig},
Expand All @@ -24,12 +23,12 @@ fn tokens_input(input: PathBuf) {

let tokens = {
let mut lexer = Lexer::new(SourceFileInput::from(&*fm), Default::default());

let mut tokens = vec![];

while let Ok(t) = ParserInput::next(&mut lexer) {
tokens.push(t);
while let Some(token_and_span) = lexer.next() {
tokens.push(token_and_span);
}

Tokens {
span: Span::new(fm.start_pos, fm.end_pos, Default::default()),
tokens,
Expand Down Expand Up @@ -78,30 +77,20 @@ fn test_pass(input: PathBuf, config: ParserConfig) {
actual_json.clone().compare_to_file(&ref_json_path).unwrap();

if !config.allow_wrong_line_comments {
let mut errors = vec![];

let mut lexer = Lexer::new(SourceFileInput::from(&*fm), Default::default());
let mut tokens = Tokens {
span: Span::new(fm.start_pos, fm.end_pos, Default::default()),
tokens: vec![],
};

loop {
let res = ParserInput::next(&mut lexer);

match res {
Ok(t) => {
tokens.tokens.push(t);
}

Err(e) => {
if matches!(e.kind(), ErrorKind::Eof) {
break;
}
panic!("failed to lex tokens: {:?}", e)
}
}
while let Some(token_and_span) = lexer.next() {
tokens.tokens.push(token_and_span);
}

let mut errors = vec![];
errors.extend(lexer.take_errors());

let ss_tok: Stylesheet = parse_tokens(
&tokens,
ParserConfig {
Expand Down Expand Up @@ -190,30 +179,20 @@ fn recovery(input: PathBuf) {
actual_json.clone().compare_to_file(&ref_json_path).unwrap();

{
let mut errors = vec![];

let mut lexer = Lexer::new(SourceFileInput::from(&*fm), Default::default());
let mut tokens = Tokens {
span: Span::new(fm.start_pos, fm.end_pos, Default::default()),
tokens: vec![],
};

loop {
let res = ParserInput::next(&mut lexer);

match res {
Ok(t) => {
tokens.tokens.push(t);
}

Err(e) => {
if matches!(e.kind(), ErrorKind::Eof) {
break;
}
panic!("failed to lex tokens: {:?}", e)
}
}
while let Some(token_and_span) = lexer.next() {
tokens.tokens.push(token_and_span);
}

let mut errors = vec![];
errors.extend(lexer.take_errors());

let ss_tok: Stylesheet = parse_tokens(
&tokens,
ParserConfig {
Expand Down Expand Up @@ -541,40 +520,3 @@ fn span(input: PathBuf) {
.compare_to_file(&dir.join("span.rust-debug"))
.unwrap();
}

#[testing::fixture("tests/errors/**/input.css")]
fn fail(input: PathBuf) {
let stderr_path = input.parent().unwrap().join("output.stderr");

let stderr = testing::run_test2(false, |cm, handler| -> Result<(), _> {
let config = ParserConfig {
..Default::default()
};

let fm = cm.load_file(&input).unwrap();
let lexer = Lexer::new(SourceFileInput::from(&*fm), config);
let mut parser = Parser::new(lexer, config);

let stylesheet = parser.parse_all();

match stylesheet {
Ok(..) => {}
Err(err) => {
err.to_diagnostics(&handler).emit();
}
}

for err in parser.take_errors() {
err.to_diagnostics(&handler).emit();
}

if !handler.has_errors() {
panic!("should error")
}

Err(())
})
.unwrap_err();

stderr.compare_to_file(&stderr_path).unwrap();
}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
// Only comment // test

1 comment on commit bf74d05

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Benchmark

Benchmark suite Current: bf74d05 Previous: 42a1d7c Ratio
es/full/minify/libraries/antd 1800978820 ns/iter (± 31212582) 2134152294 ns/iter (± 14404779) 0.84
es/full/minify/libraries/d3 382405866 ns/iter (± 3696986) 522759033 ns/iter (± 15316032) 0.73
es/full/minify/libraries/echarts 2049264502 ns/iter (± 20048399) 2428538393 ns/iter (± 16131581) 0.84
es/full/minify/libraries/jquery 83283638 ns/iter (± 3403270) 110265964 ns/iter (± 2713996) 0.76
es/full/minify/libraries/lodash 125596993 ns/iter (± 657569) 149703217 ns/iter (± 4594558) 0.84
es/full/minify/libraries/moment 52603055 ns/iter (± 217468) 61932187 ns/iter (± 518760) 0.85
es/full/minify/libraries/react 17084607 ns/iter (± 864351) 20769485 ns/iter (± 165332) 0.82
es/full/minify/libraries/terser 373705026 ns/iter (± 6496608) 739060620 ns/iter (± 12493222) 0.51
es/full/minify/libraries/three 497739722 ns/iter (± 16184291) 682932209 ns/iter (± 18312032) 0.73
es/full/minify/libraries/typescript 3887916109 ns/iter (± 21947667) 4750763891 ns/iter (± 39273284) 0.82
es/full/minify/libraries/victory 651114134 ns/iter (± 14790348) 914040266 ns/iter (± 7712927) 0.71
es/full/minify/libraries/vue 136828233 ns/iter (± 856331) 160320376 ns/iter (± 1973577) 0.85
es/full/codegen/es3 34022 ns/iter (± 135) 37310 ns/iter (± 1639) 0.91
es/full/codegen/es5 29960 ns/iter (± 129) 38942 ns/iter (± 2047) 0.77
es/full/codegen/es2015 29962 ns/iter (± 132) 39153 ns/iter (± 2057) 0.77
es/full/codegen/es2016 29973 ns/iter (± 126) 37828 ns/iter (± 1858) 0.79
es/full/codegen/es2017 29940 ns/iter (± 128) 37996 ns/iter (± 1487) 0.79
es/full/codegen/es2018 29933 ns/iter (± 129) 38044 ns/iter (± 1971) 0.79
es/full/codegen/es2019 29961 ns/iter (± 122) 39454 ns/iter (± 1927) 0.76
es/full/codegen/es2020 29936 ns/iter (± 127) 37365 ns/iter (± 2538) 0.80
es/full/all/es3 170684596 ns/iter (± 2689585) 214642660 ns/iter (± 6521519) 0.80
es/full/all/es5 173241642 ns/iter (± 8281382) 203848548 ns/iter (± 6208748) 0.85
es/full/all/es2015 142937140 ns/iter (± 2608520) 163096855 ns/iter (± 3585537) 0.88
es/full/all/es2016 143474648 ns/iter (± 2610776) 160370698 ns/iter (± 5212629) 0.89
es/full/all/es2017 140190784 ns/iter (± 2540786) 161619281 ns/iter (± 5255918) 0.87
es/full/all/es2018 139420474 ns/iter (± 3121559) 156946218 ns/iter (± 6776063) 0.89
es/full/all/es2019 116853482 ns/iter (± 8910200) 149513523 ns/iter (± 4726936) 0.78
es/full/all/es2020 112265549 ns/iter (± 1065330) 152637747 ns/iter (± 4705684) 0.74
es/full/parser 517472 ns/iter (± 51598) 772020 ns/iter (± 44925) 0.67
es/full/base/fixer 23818 ns/iter (± 141) 34272 ns/iter (± 1093) 0.69
es/full/base/resolver_and_hygiene 122821 ns/iter (± 1775) 182671 ns/iter (± 5938) 0.67
serialization of ast node 180 ns/iter (± 0) 208 ns/iter (± 7) 0.87
serialization of serde 181 ns/iter (± 0) 200 ns/iter (± 6) 0.91

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.