Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]

name = "cssparser"
version = "0.16.1"
version = "0.17.0"
authors = [ "Simon Sapin <simon.sapin@exyr.org>" ]

description = "Rust implementation of CSS Syntax Level 3"
Expand Down
20 changes: 12 additions & 8 deletions src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,12 @@ pub struct SourcePosition {
pub enum BasicParseError<'a> {
/// An unexpected token was encountered.
UnexpectedToken(Token<'a>),
/// A particular token was expected but not found.
ExpectedToken(Token<'a>),
/// The end of the input was encountered unexpectedly.
EndOfInput,
/// An `@` rule was encountered that was invalid.
AtRuleInvalid,
AtRuleInvalid(CompactCowStr<'a>),
/// The body of an '@' rule was invalid.
AtRuleBodyInvalid,
/// A qualified rule was encountered that was invalid.
QualifiedRuleInvalid,
}
Expand Down Expand Up @@ -188,6 +188,11 @@ impl<'i: 't, 't> Parser<'i, 't> {
}
}

/// Return the current line that is being parsed.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Newlines are not significant in CSS. Why is the current line relevant? Is this counting on authors not using minification, adding newlines between declarations, and not writing multi-line declarations?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Gecko's error messages include a sourceLine property which is expected to be the current line being parsed.

pub fn current_line(&self) -> &'i str {
self.tokenizer.0.current_source_line()
}

/// Check whether the input is exhausted. That is, if `.next()` would return a token.
///
/// This ignores whitespace and comments.
Expand Down Expand Up @@ -357,9 +362,9 @@ impl<'i: 't, 't> Parser<'i, 't> {
#[inline]
pub fn parse_entirely<F, T, E>(&mut self, parse: F) -> Result<T, ParseError<'i, E>>
where F: FnOnce(&mut Parser<'i, 't>) -> Result<T, ParseError<'i, E>> {
let result = parse(self);
let result = parse(self)?;
self.expect_exhausted()?;
result
Ok(result)
}

/// Parse a list of comma-separated values, all with the same syntax.
Expand Down Expand Up @@ -482,8 +487,7 @@ impl<'i: 't, 't> Parser<'i, 't> {
match self.next()? {
Token::UnquotedUrl(value) => Ok(value),
Token::Function(ref name) if name.eq_ignore_ascii_case("url") => {
self.parse_nested_block(|input| input.expect_string()
.map_err(|e| ParseError::Basic(e)))
self.parse_nested_block(|input| input.expect_string().map_err(ParseError::Basic))
.map_err(ParseError::<()>::basic)
},
t => Err(BasicParseError::UnexpectedToken(t))
Expand All @@ -497,7 +501,7 @@ impl<'i: 't, 't> Parser<'i, 't> {
Token::UnquotedUrl(value) => Ok(value),
Token::QuotedString(value) => Ok(value),
Token::Function(ref name) if name.eq_ignore_ascii_case("url") => {
self.parse_nested_block(|input| input.expect_string().map_err(|e| ParseError::Basic(e)))
self.parse_nested_block(|input| input.expect_string().map_err(ParseError::Basic))
.map_err(ParseError::<()>::basic)
},
t => Err(BasicParseError::UnexpectedToken(t))
Expand Down
18 changes: 8 additions & 10 deletions src/rules_and_declarations.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ pub trait AtRuleParser<'i> {
-> Result<AtRuleType<Self::Prelude, Self::AtRule>, ParseError<'i, Self::Error>> {
let _ = name;
let _ = input;
Err(ParseError::Basic(BasicParseError::AtRuleInvalid))
Err(ParseError::Basic(BasicParseError::AtRuleInvalid(name)))
}

/// Parse the content of a `{ /* ... */ }` block for the body of the at-rule.
Expand All @@ -131,7 +131,7 @@ pub trait AtRuleParser<'i> {
-> Result<Self::AtRule, ParseError<'i, Self::Error>> {
let _ = prelude;
let _ = input;
Err(ParseError::Basic(BasicParseError::AtRuleInvalid))
Err(ParseError::Basic(BasicParseError::AtRuleBodyInvalid))
}

/// An `OptionalBlock` prelude was followed by `;`.
Expand Down Expand Up @@ -257,9 +257,9 @@ where P: DeclarationParser<'i, Declaration = I, Error = E> +
Ok(Token::AtKeyword(name)) => {
return Some(parse_at_rule(start_position, name, self.input, &mut self.parser))
}
Ok(_) => {
Ok(t) => {
return Some(self.input.parse_until_after(Delimiter::Semicolon,
|_| Err(ParseError::Basic(BasicParseError::ExpectedToken(Token::Semicolon))))
|_| Err(ParseError::Basic(BasicParseError::UnexpectedToken(t))))
.map_err(|e| PreciseParseError {
error: e,
span: start_position..self.input.position()
Expand Down Expand Up @@ -462,16 +462,14 @@ fn parse_at_rule<'i: 't, 't, P, E>(start_position: SourcePosition, name: Compact
_ => unreachable!()
}
}
Err(_) => {
Err(error) => {
let end_position = input.position();
let error = match input.next() {
Ok(Token::CurlyBracketBlock) => BasicParseError::UnexpectedToken(Token::CurlyBracketBlock),
Ok(Token::Semicolon) => BasicParseError::UnexpectedToken(Token::Semicolon),
Err(e) => e,
match input.next() {
Ok(Token::CurlyBracketBlock) | Ok(Token::Semicolon) | Err(_) => {},
_ => unreachable!()
};
Err(PreciseParseError {
error: ParseError::Basic(error),
error: error,
span: start_position..end_position,
})
}
Expand Down
8 changes: 4 additions & 4 deletions src/serializer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -129,8 +129,8 @@ impl<'a> ToCss for Token<'a> {
Token::SquareBracketBlock => dest.write_str("[")?,
Token::CurlyBracketBlock => dest.write_str("{")?,

Token::BadUrl => dest.write_str("url(<bad url>)")?,
Token::BadString => dest.write_str("\"<bad string>\n")?,
Token::BadUrl(_) => dest.write_str("url(<bad url>)")?,
Token::BadString(_) => dest.write_str("\"<bad string>\n")?,
Token::CloseParenthesis => dest.write_str(")")?,
Token::CloseSquareBracket => dest.write_str("]")?,
Token::CloseCurlyBracket => dest.write_str("}")?,
Expand Down Expand Up @@ -376,7 +376,7 @@ impl<'a> Token<'a> {
TokenSerializationType(match *self {
Token::Ident(_) => Ident,
Token::AtKeyword(_) | Token::Hash(_) | Token::IDHash(_) => AtKeywordOrHash,
Token::UnquotedUrl(_) | Token::BadUrl => UrlOrBadUrl,
Token::UnquotedUrl(_) | Token::BadUrl(_) => UrlOrBadUrl,
Token::Delim('#') => DelimHash,
Token::Delim('@') => DelimAt,
Token::Delim('.') | Token::Delim('+') => DelimDotOrPlus,
Expand All @@ -400,7 +400,7 @@ impl<'a> Token<'a> {
Token::ParenthesisBlock => OpenParen,
Token::SquareBracketBlock | Token::CurlyBracketBlock |
Token::CloseParenthesis | Token::CloseSquareBracket | Token::CloseCurlyBracket |
Token::QuotedString(_) | Token::BadString |
Token::QuotedString(_) | Token::BadString(_) |
Token::Delim(_) | Token::Colon | Token::Semicolon | Token::Comma | Token::CDO |
Token::IncludeMatch | Token::PrefixMatch | Token::SuffixMatch
=> Other,
Expand Down
51 changes: 40 additions & 11 deletions src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -451,26 +451,26 @@ fn serialize_rgba_two_digit_float_if_roundtrips() {
fn line_numbers() {
let mut input = ParserInput::new("foo bar\nbaz\r\n\n\"a\\\r\nb\"");
let mut input = Parser::new(&mut input);
assert_eq!(input.current_source_location(), SourceLocation { line: 1, column: 1 });
assert_eq!(input.current_source_location(), SourceLocation { line: 0, column: 0 });
assert_eq!(input.next_including_whitespace(), Ok(Token::Ident("foo".into())));
assert_eq!(input.current_source_location(), SourceLocation { line: 1, column: 4 });
assert_eq!(input.current_source_location(), SourceLocation { line: 0, column: 3 });
assert_eq!(input.next_including_whitespace(), Ok(Token::WhiteSpace(" ")));
assert_eq!(input.current_source_location(), SourceLocation { line: 1, column: 5 });
assert_eq!(input.current_source_location(), SourceLocation { line: 0, column: 4 });
assert_eq!(input.next_including_whitespace(), Ok(Token::Ident("bar".into())));
assert_eq!(input.current_source_location(), SourceLocation { line: 1, column: 8 });
assert_eq!(input.current_source_location(), SourceLocation { line: 0, column: 7 });
assert_eq!(input.next_including_whitespace(), Ok(Token::WhiteSpace("\n")));
assert_eq!(input.current_source_location(), SourceLocation { line: 2, column: 1 });
assert_eq!(input.current_source_location(), SourceLocation { line: 1, column: 0 });
assert_eq!(input.next_including_whitespace(), Ok(Token::Ident("baz".into())));
assert_eq!(input.current_source_location(), SourceLocation { line: 2, column: 4 });
assert_eq!(input.current_source_location(), SourceLocation { line: 1, column: 3 });
let position = input.position();

assert_eq!(input.next_including_whitespace(), Ok(Token::WhiteSpace("\r\n\n")));
assert_eq!(input.current_source_location(), SourceLocation { line: 4, column: 1 });
assert_eq!(input.current_source_location(), SourceLocation { line: 3, column: 0 });

assert_eq!(input.source_location(position), SourceLocation { line: 2, column: 4 });
assert_eq!(input.source_location(position), SourceLocation { line: 1, column: 3 });

assert_eq!(input.next_including_whitespace(), Ok(Token::QuotedString("ab".into())));
assert_eq!(input.current_source_location(), SourceLocation { line: 5, column: 3 });
assert_eq!(input.current_source_location(), SourceLocation { line: 4, column: 2 });
assert!(input.next_including_whitespace().is_err());
}

Expand Down Expand Up @@ -848,8 +848,8 @@ fn one_component_value_to_json(token: Token, input: &mut Parser) -> Json {
v.extend(nested(input));
v
}),
Token::BadUrl => JArray!["error", "bad-url"],
Token::BadString => JArray!["error", "bad-string"],
Token::BadUrl(_) => JArray!["error", "bad-url"],
Token::BadString(_) => JArray!["error", "bad-string"],
Token::CloseParenthesis => JArray!["error", ")"],
Token::CloseSquareBracket => JArray!["error", "]"],
Token::CloseCurlyBracket => JArray!["error", "}"],
Expand Down Expand Up @@ -920,3 +920,32 @@ fn parse_until_before_stops_at_delimiter_or_end_of_input() {
}
}
}

#[test]
fn parser_maintains_current_line() {
let mut input = ParserInput::new("ident ident;\nident ident ident;\nident");
let mut parser = Parser::new(&mut input);
assert_eq!(parser.current_line(), "ident ident;");
assert_eq!(parser.next(), Ok(Token::Ident("ident".into())));
assert_eq!(parser.next(), Ok(Token::Ident("ident".into())));
assert_eq!(parser.next(), Ok(Token::Semicolon));

assert_eq!(parser.next(), Ok(Token::Ident("ident".into())));
assert_eq!(parser.current_line(), "ident ident ident;");
assert_eq!(parser.next(), Ok(Token::Ident("ident".into())));
assert_eq!(parser.next(), Ok(Token::Ident("ident".into())));
assert_eq!(parser.next(), Ok(Token::Semicolon));

assert_eq!(parser.next(), Ok(Token::Ident("ident".into())));
assert_eq!(parser.current_line(), "ident");
}

#[test]
fn parse_entirely_reports_first_error() {
#[derive(PartialEq, Debug)]
enum E { Foo }
let mut input = ParserInput::new("ident");
let mut parser = Parser::new(&mut input);
let result: Result<(), _> = parser.parse_entirely(|_| Err(ParseError::Custom(E::Foo)));
assert_eq!(result, Err(ParseError::Custom(E::Foo)));
}
45 changes: 32 additions & 13 deletions src/tokenizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -157,12 +157,12 @@ pub enum Token<'a> {
/// A `<bad-url-token>`
///
/// This token always indicates a parse error.
BadUrl,
BadUrl(CompactCowStr<'a>),

/// A `<bad-string-token>`
///
/// This token always indicates a parse error.
BadString,
BadString(CompactCowStr<'a>),

/// A `<)-token>`
///
Expand Down Expand Up @@ -194,7 +194,7 @@ impl<'a> Token<'a> {
pub fn is_parse_error(&self) -> bool {
matches!(
*self,
BadUrl | BadString | CloseParenthesis | CloseSquareBracket | CloseCurlyBracket
BadUrl(_) | BadString(_) | CloseParenthesis | CloseSquareBracket | CloseCurlyBracket
)
}
}
Expand Down Expand Up @@ -226,7 +226,7 @@ impl<'a> Tokenizer<'a> {
input: input,
position: 0,
last_known_source_location: Cell::new((SourcePosition(0),
SourceLocation { line: 1, column: 1 })),
SourceLocation { line: 0, column: 0 })),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SourceLocation’s doc-comments needs to up updated accordingly.

var_functions: SeenStatus::DontCare,
viewport_percentages: SeenStatus::DontCare,
}
Expand Down Expand Up @@ -287,6 +287,17 @@ impl<'a> Tokenizer<'a> {
self.source_location(position)
}

pub fn current_source_line(&self) -> &'a str {
let current = self.position;
let start = self.input[0..current]
.rfind(|c| matches!(c, '\r' | '\n' | '\x0C'))
.map_or(0, |start| start + 1);
let end = self.input[current..]
.find(|c| matches!(c, '\r' | '\n' | '\x0C'))
.map_or(self.input.len(), |end| current + end);
&self.input[start..end]
}

pub fn source_location(&self, position: SourcePosition) -> SourceLocation {
let target = position.0;
let mut location;
Expand All @@ -301,7 +312,7 @@ impl<'a> Tokenizer<'a> {
// So if the requested position is before the last known one,
// start over from the beginning.
position = 0;
location = SourceLocation { line: 1, column: 1 };
location = SourceLocation { line: 0, column: 0 };
}
let mut source = &self.input[position..target];
while let Some(newline_position) = source.find(|c| matches!(c, '\n' | '\r' | '\x0C')) {
Expand All @@ -310,7 +321,7 @@ impl<'a> Tokenizer<'a> {
source = &source[offset..];
position += offset;
location.line += 1;
location.column = 1;
location.column = 0;
}
debug_assert!(position <= target);
location.column += (target - position) as u32;
Expand Down Expand Up @@ -386,10 +397,10 @@ pub struct SourcePosition(usize);
/// The line and column number for a given position within the input.
#[derive(PartialEq, Eq, Debug, Clone, Copy)]
pub struct SourceLocation {
/// The line number, starting at 1 for the first line.
/// The line number, starting at 0 for the first line.
pub line: u32,

/// The column number within a line, starting at 1 for first the character of the line.
/// The column number within a line, starting at 0 for first the character of the line.
pub column: u32,
}

Expand Down Expand Up @@ -556,14 +567,14 @@ fn next_token<'a>(tokenizer: &mut Tokenizer<'a>) -> Result<Token<'a>, ()> {
fn consume_string<'a>(tokenizer: &mut Tokenizer<'a>, single_quote: bool) -> Token<'a> {
match consume_quoted_string(tokenizer, single_quote) {
Ok(value) => QuotedString(value),
Err(()) => BadString
Err(value) => BadString(value)
}
}


/// Return `Err(())` on syntax error (ie. unescaped newline)
fn consume_quoted_string<'a>(tokenizer: &mut Tokenizer<'a>, single_quote: bool)
-> Result<CompactCowStr<'a>, ()> {
-> Result<CompactCowStr<'a>, CompactCowStr<'a>> {
tokenizer.advance(1); // Skip the initial quote
// start_pos is at code point boundary, after " or '
let start_pos = tokenizer.position();
Expand Down Expand Up @@ -596,15 +607,22 @@ fn consume_quoted_string<'a>(tokenizer: &mut Tokenizer<'a>, single_quote: bool)
string_bytes = tokenizer.slice_from(start_pos).as_bytes().to_owned();
break
}
b'\n' | b'\r' | b'\x0C' => { return Err(()) },
b'\n' | b'\r' | b'\x0C' => {
return Err(tokenizer.slice_from(start_pos).into())
},
_ => {}
}
tokenizer.consume_byte();
}

while !tokenizer.is_eof() {
if matches!(tokenizer.next_byte_unchecked(), b'\n' | b'\r' | b'\x0C') {
return Err(());
return Err(
// string_bytes is well-formed UTF-8, see other comments.
unsafe {
from_utf8_release_unchecked(string_bytes)
}.into()
);
}
let b = tokenizer.consume_byte();
match_byte! { b,
Expand Down Expand Up @@ -1013,6 +1031,7 @@ fn consume_unquoted_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Result<Token<'a>,
}

fn consume_bad_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> {
let start_pos = tokenizer.position();
// Consume up to the closing )
while !tokenizer.is_eof() {
match_byte! { tokenizer.consume_byte(),
Expand All @@ -1023,7 +1042,7 @@ fn consume_unquoted_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Result<Token<'a>,
_ => {},
}
}
BadUrl
BadUrl(tokenizer.slice_from(start_pos).into())
}
}

Expand Down