From d9435a25c4563750daa2c7242d8c96f14a578502 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Tue, 25 Jul 2017 15:24:17 +0200 Subject: [PATCH] Fix another panic in bad-url token parsing https://bugzilla.mozilla.org/show_bug.cgi?id=1383975 --- Cargo.toml | 2 +- src/tests.rs | 12 +++++++++++- src/tokenizer.rs | 26 +++++++++++++------------- 3 files changed, 25 insertions(+), 15 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index c3fed7a8..32aebe42 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "cssparser" -version = "0.18.1" +version = "0.18.2" authors = [ "Simon Sapin " ] description = "Rust implementation of CSS Syntax Level 3" diff --git a/src/tests.rs b/src/tests.rs index 55107636..4f10fcf1 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -276,7 +276,17 @@ fn outer_block_end_consumed() { fn bad_url_slice_out_of_bounds() { let mut input = ParserInput::new("url(\u{1}\\"); let mut parser = Parser::new(&mut input); - let _ = parser.next_including_whitespace_and_comments(); // This used to panic + let result = parser.next_including_whitespace_and_comments(); // This used to panic + assert_eq!(result, Ok(&Token::BadUrl("\u{1}\\".into()))); +} + +/// https://bugzilla.mozilla.org/show_bug.cgi?id=1383975 +#[test] +fn bad_url_slice_not_at_char_boundary() { + let mut input = ParserInput::new("url(9\n۰"); + let mut parser = Parser::new(&mut input); + let result = parser.next_including_whitespace_and_comments(); // This used to panic + assert_eq!(result, Ok(&Token::BadUrl("9\n۰".into()))); } #[test] diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 7504a49f..2fdfc024 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -964,7 +964,7 @@ fn consume_unquoted_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Result, b' ' | b'\t' | b'\n' | b'\r' | b'\x0C' => { let value = tokenizer.slice_from(start_pos); tokenizer.advance(1); - return consume_url_end(tokenizer, value.into()) + return consume_url_end(tokenizer, start_pos, value.into()) } b')' => { let value = tokenizer.slice_from(start_pos); @@ -974,7 +974,7 @@ fn consume_unquoted_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Result, b'\x01'...b'\x08' | b'\x0B' | b'\x0E'...b'\x1F' | b'\x7F' // non-printable | b'"' | b'\'' | b'(' => { tokenizer.advance(1); - return consume_bad_url(tokenizer) + return consume_bad_url(tokenizer, start_pos) }, b'\\' | b'\0' => { // * The tokenizer’s input is UTF-8 since it’s `&str`. @@ -993,22 +993,20 @@ fn consume_unquoted_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Result, while !tokenizer.is_eof() { match_byte! { tokenizer.consume_byte(), b' ' | b'\t' | b'\n' | b'\r' | b'\x0C' => { - return consume_url_end( - tokenizer, - // string_bytes is well-formed UTF-8, see other comments. - unsafe { from_utf8_release_unchecked(string_bytes) }.into() - ) + // string_bytes is well-formed UTF-8, see other comments. + let string = unsafe { from_utf8_release_unchecked(string_bytes) }.into(); + return consume_url_end(tokenizer, start_pos, string) } b')' => { break; } b'\x01'...b'\x08' | b'\x0B' | b'\x0E'...b'\x1F' | b'\x7F' // non-printable | b'"' | b'\'' | b'(' => { - return consume_bad_url(tokenizer); + return consume_bad_url(tokenizer, start_pos); } b'\\' => { if tokenizer.has_newline_at(0) { - return consume_bad_url(tokenizer) + return consume_bad_url(tokenizer, start_pos) } // This pushes one well-formed code point to string_bytes @@ -1028,21 +1026,23 @@ fn consume_unquoted_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Result, ) } - fn consume_url_end<'a>(tokenizer: &mut Tokenizer<'a>, string: CowRcStr<'a>) -> Token<'a> { + fn consume_url_end<'a>(tokenizer: &mut Tokenizer<'a>, + start_pos: SourcePosition, + string: CowRcStr<'a>) + -> Token<'a> { while !tokenizer.is_eof() { match_byte! { tokenizer.consume_byte(), b' ' | b'\t' | b'\n' | b'\r' | b'\x0C' => {}, b')' => { break }, _ => { - return consume_bad_url(tokenizer); + return consume_bad_url(tokenizer, start_pos); } } } UnquotedUrl(string) } - fn consume_bad_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> { - let start_pos = tokenizer.position(); + fn consume_bad_url<'a>(tokenizer: &mut Tokenizer<'a>, start_pos: SourcePosition) -> Token<'a> { // Consume up to the closing ) while !tokenizer.is_eof() { match_byte! { tokenizer.consume_byte(),