-
Notifications
You must be signed in to change notification settings - Fork 141
Parser changes for Gecko integration #168
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
9 commits
Select commit
Hold shift + click to select a range
5bab829
Add API to retrieve contents of current line being parsed.
jdm f71a5a2
Differentiate between default errors for at-rule parsing.
jdm 0c8a8a9
parse_entirely should not prioritize EndOfInput errors.
jdm b0c8cdf
Style cleanup for existing parser code.
jdm 0d93af0
Report actual unexpected token in declarations.
jdm c4c8f78
Propagate error from parsing at rules.
jdm 5a2d392
Store bad string and url values.
jdm a8e2252
Make column and line numbers match Gecko's CSS parser.
jdm c6156c0
Increase package version.
jdm File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -157,12 +157,12 @@ pub enum Token<'a> { | |
/// A `<bad-url-token>` | ||
/// | ||
/// This token always indicates a parse error. | ||
BadUrl, | ||
BadUrl(CompactCowStr<'a>), | ||
|
||
/// A `<bad-string-token>` | ||
/// | ||
/// This token always indicates a parse error. | ||
BadString, | ||
BadString(CompactCowStr<'a>), | ||
|
||
/// A `<)-token>` | ||
/// | ||
|
@@ -194,7 +194,7 @@ impl<'a> Token<'a> { | |
pub fn is_parse_error(&self) -> bool { | ||
matches!( | ||
*self, | ||
BadUrl | BadString | CloseParenthesis | CloseSquareBracket | CloseCurlyBracket | ||
BadUrl(_) | BadString(_) | CloseParenthesis | CloseSquareBracket | CloseCurlyBracket | ||
) | ||
} | ||
} | ||
|
@@ -226,7 +226,7 @@ impl<'a> Tokenizer<'a> { | |
input: input, | ||
position: 0, | ||
last_known_source_location: Cell::new((SourcePosition(0), | ||
SourceLocation { line: 1, column: 1 })), | ||
SourceLocation { line: 0, column: 0 })), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
var_functions: SeenStatus::DontCare, | ||
viewport_percentages: SeenStatus::DontCare, | ||
} | ||
|
@@ -287,6 +287,17 @@ impl<'a> Tokenizer<'a> { | |
self.source_location(position) | ||
} | ||
|
||
pub fn current_source_line(&self) -> &'a str { | ||
let current = self.position; | ||
let start = self.input[0..current] | ||
.rfind(|c| matches!(c, '\r' | '\n' | '\x0C')) | ||
.map_or(0, |start| start + 1); | ||
let end = self.input[current..] | ||
.find(|c| matches!(c, '\r' | '\n' | '\x0C')) | ||
.map_or(self.input.len(), |end| current + end); | ||
&self.input[start..end] | ||
} | ||
|
||
pub fn source_location(&self, position: SourcePosition) -> SourceLocation { | ||
let target = position.0; | ||
let mut location; | ||
|
@@ -301,7 +312,7 @@ impl<'a> Tokenizer<'a> { | |
// So if the requested position is before the last known one, | ||
// start over from the beginning. | ||
position = 0; | ||
location = SourceLocation { line: 1, column: 1 }; | ||
location = SourceLocation { line: 0, column: 0 }; | ||
} | ||
let mut source = &self.input[position..target]; | ||
while let Some(newline_position) = source.find(|c| matches!(c, '\n' | '\r' | '\x0C')) { | ||
|
@@ -310,7 +321,7 @@ impl<'a> Tokenizer<'a> { | |
source = &source[offset..]; | ||
position += offset; | ||
location.line += 1; | ||
location.column = 1; | ||
location.column = 0; | ||
} | ||
debug_assert!(position <= target); | ||
location.column += (target - position) as u32; | ||
|
@@ -386,10 +397,10 @@ pub struct SourcePosition(usize); | |
/// The line and column number for a given position within the input. | ||
#[derive(PartialEq, Eq, Debug, Clone, Copy)] | ||
pub struct SourceLocation { | ||
/// The line number, starting at 1 for the first line. | ||
/// The line number, starting at 0 for the first line. | ||
pub line: u32, | ||
|
||
/// The column number within a line, starting at 1 for first the character of the line. | ||
/// The column number within a line, starting at 0 for first the character of the line. | ||
pub column: u32, | ||
} | ||
|
||
|
@@ -556,14 +567,14 @@ fn next_token<'a>(tokenizer: &mut Tokenizer<'a>) -> Result<Token<'a>, ()> { | |
fn consume_string<'a>(tokenizer: &mut Tokenizer<'a>, single_quote: bool) -> Token<'a> { | ||
match consume_quoted_string(tokenizer, single_quote) { | ||
Ok(value) => QuotedString(value), | ||
Err(()) => BadString | ||
Err(value) => BadString(value) | ||
} | ||
} | ||
|
||
|
||
/// Return `Err(())` on syntax error (ie. unescaped newline) | ||
fn consume_quoted_string<'a>(tokenizer: &mut Tokenizer<'a>, single_quote: bool) | ||
-> Result<CompactCowStr<'a>, ()> { | ||
-> Result<CompactCowStr<'a>, CompactCowStr<'a>> { | ||
tokenizer.advance(1); // Skip the initial quote | ||
// start_pos is at code point boundary, after " or ' | ||
let start_pos = tokenizer.position(); | ||
|
@@ -596,15 +607,22 @@ fn consume_quoted_string<'a>(tokenizer: &mut Tokenizer<'a>, single_quote: bool) | |
string_bytes = tokenizer.slice_from(start_pos).as_bytes().to_owned(); | ||
break | ||
} | ||
b'\n' | b'\r' | b'\x0C' => { return Err(()) }, | ||
b'\n' | b'\r' | b'\x0C' => { | ||
return Err(tokenizer.slice_from(start_pos).into()) | ||
}, | ||
_ => {} | ||
} | ||
tokenizer.consume_byte(); | ||
} | ||
|
||
while !tokenizer.is_eof() { | ||
if matches!(tokenizer.next_byte_unchecked(), b'\n' | b'\r' | b'\x0C') { | ||
return Err(()); | ||
return Err( | ||
// string_bytes is well-formed UTF-8, see other comments. | ||
unsafe { | ||
from_utf8_release_unchecked(string_bytes) | ||
}.into() | ||
); | ||
} | ||
let b = tokenizer.consume_byte(); | ||
match_byte! { b, | ||
|
@@ -1013,6 +1031,7 @@ fn consume_unquoted_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Result<Token<'a>, | |
} | ||
|
||
fn consume_bad_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> { | ||
let start_pos = tokenizer.position(); | ||
// Consume up to the closing ) | ||
while !tokenizer.is_eof() { | ||
match_byte! { tokenizer.consume_byte(), | ||
|
@@ -1023,7 +1042,7 @@ fn consume_unquoted_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Result<Token<'a>, | |
_ => {}, | ||
} | ||
} | ||
BadUrl | ||
BadUrl(tokenizer.slice_from(start_pos).into()) | ||
} | ||
} | ||
|
||
|
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Newlines are not significant in CSS. Why is the current line relevant? Is this counting on authors not using minification, adding newlines between declarations, and not writing multi-line declarations?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Gecko's error messages include a
sourceLine
property which is expected to be the current line being parsed.