diff --git a/Cargo.toml b/Cargo.toml index 5459a734..3073ff44 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "cssparser" -version = "0.14.0" +version = "0.15.0" authors = [ "Simon Sapin " ] description = "Rust implementation of CSS Syntax Level 3" diff --git a/src/color.rs b/src/color.rs index d0da405c..095e7a41 100644 --- a/src/color.rs +++ b/src/color.rs @@ -6,7 +6,6 @@ use std::fmt; use std::f32::consts::PI; use super::{Token, Parser, ToCss, ParseError, BasicParseError}; -use tokenizer::NumericValue; #[cfg(feature = "serde")] use serde::{Deserialize, Deserializer, Serialize, Serializer}; @@ -430,11 +429,11 @@ fn parse_color_function<'i, 't>(name: &str, arguments: &mut Parser<'i, 't>) -> R }; let token = try!(arguments.next()); match token { - Token::Number(NumericValue { value: v, .. }) => { + Token::Number { value: v, .. } => { clamp_unit_f32(v) } - Token::Percentage(ref v) => { - clamp_unit_f32(v.unit_value) + Token::Percentage { unit_value: v, .. } => { + clamp_unit_f32(v) } t => { return Err(BasicParseError::UnexpectedToken(t)) @@ -459,10 +458,10 @@ fn parse_rgb_components_rgb<'i, 't>(arguments: &mut Parser<'i, 't>) -> Result<(u // Either integers or percentages, but all the same type. // https://drafts.csswg.org/css-color/#rgb-functions match try!(arguments.next()) { - Token::Number(NumericValue { value: v, .. }) => { + Token::Number { value: v, .. } => { red = clamp_floor_256_f32(v); green = clamp_floor_256_f32(match try!(arguments.next()) { - Token::Number(NumericValue { value: v, .. }) => v, + Token::Number { value: v, .. } => v, Token::Comma => { uses_commas = true; try!(arguments.expect_number()) @@ -474,10 +473,10 @@ fn parse_rgb_components_rgb<'i, 't>(arguments: &mut Parser<'i, 't>) -> Result<(u } blue = clamp_floor_256_f32(try!(arguments.expect_number())); } - Token::Percentage(ref v) => { - red = clamp_unit_f32(v.unit_value); + Token::Percentage { unit_value, .. } => { + red = clamp_unit_f32(unit_value); green = clamp_unit_f32(match try!(arguments.next()) { - Token::Percentage(ref v) => v.unit_value, + Token::Percentage { unit_value, .. } => unit_value, Token::Comma => { uses_commas = true; try!(arguments.expect_percentage()) @@ -501,8 +500,8 @@ fn parse_rgb_components_hsl<'i, 't>(arguments: &mut Parser<'i, 't>) -> Result<(u // https://drafts.csswg.org/css-values/#angles let token = try!(arguments.next()); let hue_degrees = match token { - Token::Number(NumericValue { value: v, .. }) => Ok(v), - Token::Dimension(NumericValue { value: v, .. }, ref unit) => { + Token::Number { value: v, .. } => Ok(v), + Token::Dimension { value: v, ref unit, .. } => { match_ignore_ascii_case! { &*unit, "deg" => Ok(v), "grad" => Ok(v * 360. / 400.), @@ -521,7 +520,7 @@ fn parse_rgb_components_hsl<'i, 't>(arguments: &mut Parser<'i, 't>) -> Result<(u // Saturation and lightness are clamped to 0% ... 100% // https://drafts.csswg.org/css-color/#the-hsl-notation let saturation = match try!(arguments.next()) { - Token::Percentage(ref v) => v.unit_value, + Token::Percentage { unit_value, .. } => unit_value, Token::Comma => { uses_commas = true; try!(arguments.expect_percentage()) diff --git a/src/compact_cow_str.rs b/src/compact_cow_str.rs new file mode 100644 index 00000000..c57d24da --- /dev/null +++ b/src/compact_cow_str.rs @@ -0,0 +1,246 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +use std::borrow::{Borrow, Cow}; +use std::cmp; +use std::fmt; +use std::hash; +use std::marker::PhantomData; +use std::mem; +use std::ops::Deref; +use std::slice; +use std::str; + +// All bits set except the highest +const MAX_LEN: usize = !0 >> 1; + +// Only the highest bit +const OWNED_TAG: usize = MAX_LEN + 1; + +/// Like `Cow<'a, str>`, but with smaller `std::mem::size_of`. (Two words instead of four.) +pub struct CompactCowStr<'a> { + // `tagged_len` is a tag in its highest bit, and the string length in the rest of the bits. + // + // * If the tag is 1, the memory pointed to by `ptr` is owned + // and the lifetime parameter is irrelevant. + // `ptr` and `len` are the components of a `Box`. + // + // * If the tag is 0, the memory is borrowed. + // `ptr` and `len` are the components of a `&'a str`. + + // FIXME: https://github.com/rust-lang/rust/issues/27730 use NonZero or Shared + ptr: *const u8, + tagged_len: usize, + phantom: PhantomData<&'a str>, +} + +impl<'a> From<&'a str> for CompactCowStr<'a> { + #[inline] + fn from(s: &'a str) -> Self { + let len = s.len(); + assert!(len <= MAX_LEN); + CompactCowStr { + ptr: s.as_ptr(), + tagged_len: len, + phantom: PhantomData, + } + } +} + +impl<'a> From> for CompactCowStr<'a> { + #[inline] + fn from(s: Box) -> Self { + let ptr = s.as_ptr(); + let len = s.len(); + assert!(len <= MAX_LEN); + mem::forget(s); + CompactCowStr { + ptr: ptr, + tagged_len: len | OWNED_TAG, + phantom: PhantomData, + } + } +} + +impl<'a> CompactCowStr<'a> { + /// Whether this string refers to borrowed memory + /// (as opposed to owned, which would be freed when `CompactCowStr` goes out of scope). + #[inline] + pub fn is_borrowed(&self) -> bool { + (self.tagged_len & OWNED_TAG) == 0 + } + + /// The length of this string + #[inline] + pub fn len(&self) -> usize { + self.tagged_len & !OWNED_TAG + } + + // Intentionally private since it is easy to use incorrectly. + #[inline] + fn as_raw_str(&self) -> *const str { + unsafe { + str::from_utf8_unchecked(slice::from_raw_parts(self.ptr, self.len())) + } + } + + /// If this string is borrowed, return a slice with the original lifetime, + /// not borrowing `self`. + /// + /// (`Deref` is implemented unconditionally, but returns a slice with a shorter lifetime.) + #[inline] + pub fn as_str(&self) -> Option<&'a str> { + if self.is_borrowed() { + Some(unsafe { &*self.as_raw_str() }) + } else { + None + } + } + + /// Convert into `String`, re-using the memory allocation if it was already owned. + #[inline] + pub fn into_owned(self) -> String { + unsafe { + let raw = self.as_raw_str(); + let is_borrowed = self.is_borrowed(); + mem::forget(self); + if is_borrowed { + String::from(&*raw) + } else { + Box::from_raw(raw as *mut str).into_string() + } + } + } +} + +impl<'a> Clone for CompactCowStr<'a> { + #[inline] + fn clone(&self) -> Self { + if self.is_borrowed() { + CompactCowStr { ..*self } + } else { + Self::from(String::from(&**self).into_boxed_str()) + } + } +} + +impl<'a> Drop for CompactCowStr<'a> { + #[inline] + fn drop(&mut self) { + if !self.is_borrowed() { + unsafe { + Box::from_raw(self.as_raw_str() as *mut str); + } + } + } +} + +impl<'a> Deref for CompactCowStr<'a> { + type Target = str; + + #[inline] + fn deref(&self) -> &str { + unsafe { + &*self.as_raw_str() + } + } +} + +impl<'a> From> for Cow<'a, str> { + #[inline] + fn from(cow: CompactCowStr<'a>) -> Self { + unsafe { + let raw = cow.as_raw_str(); + let is_borrowed = cow.is_borrowed(); + mem::forget(cow); + if is_borrowed { + Cow::Borrowed(&*raw) + } else { + Cow::Owned(Box::from_raw(raw as *mut str).into_string()) + } + } + } +} + +impl<'a> From for CompactCowStr<'a> { + #[inline] + fn from(s: String) -> Self { + Self::from(s.into_boxed_str()) + } +} + +impl<'a> From> for CompactCowStr<'a> { + #[inline] + fn from(s: Cow<'a, str>) -> Self { + match s { + Cow::Borrowed(s) => Self::from(s), + Cow::Owned(s) => Self::from(s), + } + } +} + +impl<'a> AsRef for CompactCowStr<'a> { + #[inline] + fn as_ref(&self) -> &str { + self + } +} + +impl<'a> Borrow for CompactCowStr<'a> { + #[inline] + fn borrow(&self) -> &str { + self + } +} + +impl<'a> Default for CompactCowStr<'a> { + #[inline] + fn default() -> Self { + Self::from("") + } +} + +impl<'a> hash::Hash for CompactCowStr<'a> { + #[inline] + fn hash(&self, hasher: &mut H) { + str::hash(self, hasher) + } +} + +impl<'a, T: AsRef> PartialEq for CompactCowStr<'a> { + #[inline] + fn eq(&self, other: &T) -> bool { + str::eq(self, other.as_ref()) + } +} + +impl<'a, T: AsRef> PartialOrd for CompactCowStr<'a> { + #[inline] + fn partial_cmp(&self, other: &T) -> Option { + str::partial_cmp(self, other.as_ref()) + } +} + +impl<'a> Eq for CompactCowStr<'a> {} + +impl<'a> Ord for CompactCowStr<'a> { + #[inline] + fn cmp(&self, other: &Self) -> cmp::Ordering { + str::cmp(self, other) + } +} + +impl<'a> fmt::Display for CompactCowStr<'a> { + #[inline] + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + str::fmt(self, formatter) + } +} + +impl<'a> fmt::Debug for CompactCowStr<'a> { + #[inline] + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + str::fmt(self, formatter) + } +} diff --git a/src/lib.rs b/src/lib.rs index e7ab4b7d..f6a8a362 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -80,7 +80,7 @@ fn parse_border_spacing(_context: &ParserContext, input: &mut Parser) pub use cssparser_macros::*; -pub use tokenizer::{Token, NumericValue, PercentageValue, SourceLocation}; +pub use tokenizer::{Token, SourceLocation}; pub use rules_and_declarations::{parse_important}; pub use rules_and_declarations::{DeclarationParser, DeclarationListParser, parse_one_declaration}; pub use rules_and_declarations::{RuleListParser, parse_one_rule}; @@ -91,6 +91,7 @@ pub use nth::parse_nth; pub use serializer::{ToCss, CssStringWriter, serialize_identifier, serialize_string, TokenSerializationType}; pub use parser::{Parser, Delimiter, Delimiters, SourcePosition, ParseError, BasicParseError, ParserInput}; pub use unicode_range::UnicodeRange; +pub use compact_cow_str::CompactCowStr; // For macros #[doc(hidden)] pub use macros::_internal__to_lowercase; @@ -116,6 +117,7 @@ mod color; mod nth; mod serializer; mod unicode_range; +mod compact_cow_str; -#[cfg(test)] -mod tests; +#[cfg(test)] mod tests; +#[cfg(test)] mod size_of_tests; diff --git a/src/nth.rs b/src/nth.rs index 680ab5aa..70bef563 100644 --- a/src/nth.rs +++ b/src/nth.rs @@ -14,26 +14,17 @@ use super::{Token, Parser, BasicParseError}; pub fn parse_nth<'i, 't>(input: &mut Parser<'i, 't>) -> Result<(i32, i32), BasicParseError<'i>> { let token = try!(input.next()); match token { - Token::Number(ref value) => { - match value.int_value { - Some(v) => Ok((0, v as i32)), - None => Err(()), - } + Token::Number { int_value: Some(b), .. } => { + Ok((0, b)) } - Token::Dimension(value, ref unit) => { - match value.int_value { - Some(v) => { - let a = v as i32; - match_ignore_ascii_case! { - &unit, - "n" => Ok(try!(parse_b(input, a))), - "n-" => Ok(try!(parse_signless_b(input, a, -1))), - _ => { - parse_n_dash_digits(&*unit).map(|val| (a, val)) - } - } + Token::Dimension { int_value: Some(a), ref unit, .. } => { + match_ignore_ascii_case! { + &unit, + "n" => Ok(try!(parse_b(input, a))), + "n-" => Ok(try!(parse_signless_b(input, a, -1))), + _ => { + parse_n_dash_digits(&*unit).map(|val| (a, val)) } - None => Err(()), } } Token::Ident(ref value) => { @@ -72,12 +63,7 @@ fn parse_b<'i, 't>(input: &mut Parser<'i, 't>, a: i32) -> Result<(i32, i32), Bas match token { Ok(Token::Delim('+')) => Ok(try!(parse_signless_b(input, a, 1))), Ok(Token::Delim('-')) => Ok(try!(parse_signless_b(input, a, -1))), - Ok(Token::Number(ref value)) if value.has_sign => { - match value.int_value { - Some(v) => Ok((a, v as i32)), - None => Err(()), - } - } + Ok(Token::Number { has_sign: true, int_value: Some(b), .. }) => Ok((a, b)), _ => { input.reset(start_position); Ok((a, 0)) @@ -88,12 +74,7 @@ fn parse_b<'i, 't>(input: &mut Parser<'i, 't>, a: i32) -> Result<(i32, i32), Bas fn parse_signless_b<'i, 't>(input: &mut Parser<'i, 't>, a: i32, b_sign: i32) -> Result<(i32, i32), BasicParseError<'i>> { let token = try!(input.next()); match token { - Token::Number(ref value) if !value.has_sign => { - match value.int_value { - Some(v) => Ok((a, b_sign * v as i32)), - None => Err(()), - } - } + Token::Number { has_sign: false, int_value: Some(b), .. } => Ok((a, b_sign * b)), _ => Err(()) }.map_err(|()| BasicParseError::UnexpectedToken(token)) } diff --git a/src/parser.rs b/src/parser.rs index a92a3d5b..d7bb00ec 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -2,11 +2,11 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +use compact_cow_str::CompactCowStr; use std::ops::Range; use std::ascii::AsciiExt; use std::ops::BitOr; -use std::borrow::Cow; -use tokenizer::{self, Token, NumericValue, PercentageValue, Tokenizer, SourceLocation}; +use tokenizer::{self, Token, Tokenizer, SourceLocation}; /// A capture of the internal state of a `Parser` (including the position within the input), @@ -440,7 +440,7 @@ impl<'i: 't, 't> Parser<'i, 't> { /// Parse a and return the unescaped value. #[inline] - pub fn expect_ident(&mut self) -> Result, BasicParseError<'i>> { + pub fn expect_ident(&mut self) -> Result, BasicParseError<'i>> { match try!(self.next()) { Token::Ident(value) => Ok(value), t => Err(BasicParseError::UnexpectedToken(t)) @@ -458,7 +458,7 @@ impl<'i: 't, 't> Parser<'i, 't> { /// Parse a and return the unescaped value. #[inline] - pub fn expect_string(&mut self) -> Result, BasicParseError<'i>> { + pub fn expect_string(&mut self) -> Result, BasicParseError<'i>> { match try!(self.next()) { Token::QuotedString(value) => Ok(value), t => Err(BasicParseError::UnexpectedToken(t)) @@ -467,7 +467,7 @@ impl<'i: 't, 't> Parser<'i, 't> { /// Parse either a or a , and return the unescaped value. #[inline] - pub fn expect_ident_or_string(&mut self) -> Result, BasicParseError<'i>> { + pub fn expect_ident_or_string(&mut self) -> Result, BasicParseError<'i>> { match try!(self.next()) { Token::Ident(value) => Ok(value), Token::QuotedString(value) => Ok(value), @@ -477,7 +477,7 @@ impl<'i: 't, 't> Parser<'i, 't> { /// Parse a and return the unescaped value. #[inline] - pub fn expect_url(&mut self) -> Result, BasicParseError<'i>> { + pub fn expect_url(&mut self) -> Result, BasicParseError<'i>> { match try!(self.next()) { Token::UnquotedUrl(value) => Ok(value), Token::Function(ref name) if name.eq_ignore_ascii_case("url") => { @@ -491,7 +491,7 @@ impl<'i: 't, 't> Parser<'i, 't> { /// Parse either a or a , and return the unescaped value. #[inline] - pub fn expect_url_or_string(&mut self) -> Result, BasicParseError<'i>> { + pub fn expect_url_or_string(&mut self) -> Result, BasicParseError<'i>> { match try!(self.next()) { Token::UnquotedUrl(value) => Ok(value), Token::QuotedString(value) => Ok(value), @@ -507,7 +507,7 @@ impl<'i: 't, 't> Parser<'i, 't> { #[inline] pub fn expect_number(&mut self) -> Result> { match try!(self.next()) { - Token::Number(NumericValue { value, .. }) => Ok(value), + Token::Number { value, .. } => Ok(value), t => Err(BasicParseError::UnexpectedToken(t)) } } @@ -517,7 +517,7 @@ impl<'i: 't, 't> Parser<'i, 't> { pub fn expect_integer(&mut self) -> Result> { let token = try!(self.next()); match token { - Token::Number(NumericValue { int_value: Some(int_value), .. }) => { + Token::Number { int_value: Some(int_value), .. } => { Ok(int_value) } t => Err(BasicParseError::UnexpectedToken(t)) @@ -529,7 +529,7 @@ impl<'i: 't, 't> Parser<'i, 't> { #[inline] pub fn expect_percentage(&mut self) -> Result> { match try!(self.next()) { - Token::Percentage(PercentageValue { unit_value, .. }) => Ok(unit_value), + Token::Percentage { unit_value, .. } => Ok(unit_value), t => Err(BasicParseError::UnexpectedToken(t)) } } @@ -607,7 +607,7 @@ impl<'i: 't, 't> Parser<'i, 't> { /// /// If the result is `Ok`, you can then call the `Parser::parse_nested_block` method. #[inline] - pub fn expect_function(&mut self) -> Result, BasicParseError<'i>> { + pub fn expect_function(&mut self) -> Result, BasicParseError<'i>> { match try!(self.next()) { Token::Function(name) => Ok(name), t => Err(BasicParseError::UnexpectedToken(t)) diff --git a/src/rules_and_declarations.rs b/src/rules_and_declarations.rs index 6d8470de..04caf526 100644 --- a/src/rules_and_declarations.rs +++ b/src/rules_and_declarations.rs @@ -4,10 +4,10 @@ // https://drafts.csswg.org/css-syntax/#parsing +use compact_cow_str::CompactCowStr; use parser::{parse_until_before, parse_until_after, parse_nested_block}; use std::ascii::AsciiExt; use std::ops::Range; -use std::borrow::Cow; use super::{Token, Parser, Delimiter, SourcePosition, ParseError, BasicParseError}; @@ -72,7 +72,7 @@ pub trait DeclarationParser<'i> { /// If `!important` can be used in a given context, /// `input.try(parse_important).is_ok()` should be used at the end /// of the implementation of this method and the result should be part of the return value. - fn parse_value<'t>(&mut self, name: Cow<'i, str>, input: &mut Parser<'i, 't>) + fn parse_value<'t>(&mut self, name: CompactCowStr<'i>, input: &mut Parser<'i, 't>) -> Result>; } @@ -112,7 +112,7 @@ pub trait AtRuleParser<'i> { /// The given `input` is a "delimited" parser /// that ends wherever the prelude should end. /// (Before the next semicolon, the next `{`, or the end of the current block.) - fn parse_prelude<'t>(&mut self, name: Cow<'i, str>, input: &mut Parser<'i, 't>) + fn parse_prelude<'t>(&mut self, name: CompactCowStr<'i>, input: &mut Parser<'i, 't>) -> Result, ParseError<'i, Self::Error>> { let _ = name; let _ = input; @@ -407,7 +407,7 @@ pub struct PreciseParseError<'i, E: 'i> { pub span: Range, } -fn parse_at_rule<'i: 't, 't, P, E>(start_position: SourcePosition, name: Cow<'i, str>, +fn parse_at_rule<'i: 't, 't, P, E>(start_position: SourcePosition, name: CompactCowStr<'i>, input: &mut Parser<'i, 't>, parser: &mut P) -> Result<

>::AtRule, PreciseParseError<'i, E>> where P: AtRuleParser<'i, Error = E> { diff --git a/src/serializer.rs b/src/serializer.rs index 9f7a4aa5..7b7b166f 100644 --- a/src/serializer.rs +++ b/src/serializer.rs @@ -5,7 +5,7 @@ use std::ascii::AsciiExt; use std::fmt::{self, Write}; -use super::{Token, NumericValue, PercentageValue}; +use super::Token; /// Trait for things the can serialize themselves in CSS syntax. @@ -43,20 +43,21 @@ pub trait ToCss { #[inline] -fn write_numeric(value: NumericValue, dest: &mut W) -> fmt::Result where W: fmt::Write { +fn write_numeric(value: f32, int_value: Option, has_sign: bool, dest: &mut W) + -> fmt::Result where W: fmt::Write { // `value.value >= 0` is true for negative 0. - if value.has_sign && value.value.is_sign_positive() { + if has_sign && value.is_sign_positive() { try!(dest.write_str("+")); } - if value.value == 0.0 && value.value.is_sign_negative() { + if value == 0.0 && value.is_sign_negative() { // Negative zero. Work around #20596. try!(dest.write_str("-0")) } else { - try!(write!(dest, "{}", value.value)) + try!(write!(dest, "{}", value)) } - if value.int_value.is_none() && value.value.fract() == 0. { + if int_value.is_none() && value.fract() == 0. { try!(dest.write_str(".0")); } Ok(()) @@ -87,18 +88,15 @@ impl<'a> ToCss for Token<'a> { }, Token::Delim(value) => try!(write!(dest, "{}", value)), - Token::Number(value) => try!(write_numeric(value, dest)), - Token::Percentage(PercentageValue { unit_value, int_value, has_sign }) => { - let value = NumericValue { - value: unit_value * 100., - int_value: int_value, - has_sign: has_sign, - }; - try!(write_numeric(value, dest)); + Token::Number { value, int_value, has_sign } => { + try!(write_numeric(value, int_value, has_sign, dest)) + } + Token::Percentage { unit_value, int_value, has_sign } => { + try!(write_numeric(unit_value * 100., int_value, has_sign, dest)); try!(dest.write_str("%")); }, - Token::Dimension(value, ref unit) => { - try!(write_numeric(value, dest)); + Token::Dimension { value, int_value, has_sign, ref unit } => { + try!(write_numeric(value, int_value, has_sign, dest)); // Disambiguate with scientific notation. let unit = &**unit; if unit == "e" || unit == "E" || unit.starts_with("e-") || unit.starts_with("E-") { @@ -389,9 +387,9 @@ impl<'a> Token<'a> { Token::Delim('|') => DelimBar, Token::Delim('/') => DelimSlash, Token::Delim('*') => DelimAsterisk, - Token::Number(_) => Number, - Token::Percentage(_) => Percentage, - Token::Dimension(..) => Dimension, + Token::Number { .. } => Number, + Token::Percentage { .. } => Percentage, + Token::Dimension { .. } => Dimension, Token::WhiteSpace(_) => WhiteSpace, Token::Comment(_) => DelimSlash, Token::DashMatch => DashMatch, diff --git a/src/size_of_tests.rs b/src/size_of_tests.rs new file mode 100644 index 00000000..ba4fbf62 --- /dev/null +++ b/src/size_of_tests.rs @@ -0,0 +1,37 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +use compact_cow_str::CompactCowStr; +use std::borrow::Cow; +use tokenizer::Token; + +#[macro_export] +macro_rules! size_of_test { + ($testname: ident, $t: ty, $expected_size: expr) => { + #[test] + fn $testname() { + let new = ::std::mem::size_of::<$t>(); + let old = $expected_size; + if new < old { + panic!( + "Your changes have decreased the stack size of {} from {} to {}. \ + Good work! Please update the expected size in {}.", + stringify!($t), old, new, file!() + ) + } else if new > old { + panic!( + "Your changes have increased the stack size of {} from {} to {}. \ + Please consider choosing a design which avoids this increase. \ + If you feel that the increase is necessary, update the size in {}.", + stringify!($t), old, new, file!() + ) + } + } + } +} + +// These assume 64-bit +size_of_test!(token, Token, 32); +size_of_test!(std_cow_str, Cow<'static, str>, 32); +size_of_test!(compact_cow_str, CompactCowStr, 16); diff --git a/src/tests.rs b/src/tests.rs index 90d321b8..d601aa9c 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -6,18 +6,17 @@ extern crate test; use encoding_rs; -use std::borrow::Cow::{self, Borrowed}; use rustc_serialize::json::{self, Json, ToJson}; #[cfg(feature = "bench")] use self::test::Bencher; -use super::{Parser, Delimiter, Token, NumericValue, PercentageValue, SourceLocation, ParseError, +use super::{Parser, Delimiter, Token, SourceLocation, ParseError, DeclarationListParser, DeclarationParser, RuleListParser, BasicParseError, AtRuleType, AtRuleParser, QualifiedRuleParser, ParserInput, parse_one_declaration, parse_one_rule, parse_important, stylesheet_encoding, EncodingSupport, - TokenSerializationType, + TokenSerializationType, CompactCowStr, Color, RGBA, parse_nth, UnicodeRange, ToCss}; macro_rules! JArray { @@ -290,12 +289,12 @@ fn unquoted_url_escaping() { )\ "); let mut input = ParserInput::new(&serialized); - assert_eq!(Parser::new(&mut input).next(), Ok(token)) + assert_eq!(Parser::new(&mut input).next(), Ok(token)); } #[test] fn test_expect_url() { - fn parse<'a>(s: &mut ParserInput<'a>) -> Result, BasicParseError<'a>> { + fn parse<'a>(s: &mut ParserInput<'a>) -> Result, BasicParseError<'a>> { Parser::new(s).expect_url() } let mut input = ParserInput::new("url()"); @@ -453,15 +452,15 @@ fn line_numbers() { let mut input = ParserInput::new("foo bar\nbaz\r\n\n\"a\\\r\nb\""); let mut input = Parser::new(&mut input); assert_eq!(input.current_source_location(), SourceLocation { line: 1, column: 1 }); - assert_eq!(input.next_including_whitespace(), Ok(Token::Ident(Borrowed("foo")))); + assert_eq!(input.next_including_whitespace(), Ok(Token::Ident("foo".into()))); assert_eq!(input.current_source_location(), SourceLocation { line: 1, column: 4 }); assert_eq!(input.next_including_whitespace(), Ok(Token::WhiteSpace(" "))); assert_eq!(input.current_source_location(), SourceLocation { line: 1, column: 5 }); - assert_eq!(input.next_including_whitespace(), Ok(Token::Ident(Borrowed("bar")))); + assert_eq!(input.next_including_whitespace(), Ok(Token::Ident("bar".into()))); assert_eq!(input.current_source_location(), SourceLocation { line: 1, column: 8 }); assert_eq!(input.next_including_whitespace(), Ok(Token::WhiteSpace("\n"))); assert_eq!(input.current_source_location(), SourceLocation { line: 2, column: 1 }); - assert_eq!(input.next_including_whitespace(), Ok(Token::Ident(Borrowed("baz")))); + assert_eq!(input.next_including_whitespace(), Ok(Token::Ident("baz".into()))); assert_eq!(input.current_source_location(), SourceLocation { line: 2, column: 4 }); let position = input.position(); @@ -470,7 +469,7 @@ fn line_numbers() { assert_eq!(input.source_location(position), SourceLocation { line: 2, column: 4 }); - assert_eq!(input.next_including_whitespace(), Ok(Token::QuotedString(Borrowed("ab")))); + assert_eq!(input.next_including_whitespace(), Ok(Token::QuotedString("ab".into()))); assert_eq!(input.current_source_location(), SourceLocation { line: 5, column: 3 }); assert!(input.next_including_whitespace().is_err()); } @@ -679,7 +678,7 @@ impl<'i> DeclarationParser<'i> for JsonParser { type Declaration = Json; type Error = (); - fn parse_value<'t>(&mut self, name: Cow<'i, str>, input: &mut Parser<'i, 't>) + fn parse_value<'t>(&mut self, name: CompactCowStr<'i>, input: &mut Parser<'i, 't>) -> Result> { let mut value = vec![]; let mut important = false; @@ -720,7 +719,7 @@ impl<'i> AtRuleParser<'i> for JsonParser { type AtRule = Json; type Error = (); - fn parse_prelude<'t>(&mut self, name: Cow<'i, str>, input: &mut Parser<'i, 't>) + fn parse_prelude<'t>(&mut self, name: CompactCowStr<'i>, input: &mut Parser<'i, 't>) -> Result, Json>, ParseError<'i, ()>> { Ok(AtRuleType::OptionalBlock(vec![ "at-rule".to_json(), @@ -769,11 +768,15 @@ fn component_values_to_json(input: &mut Parser) -> Vec { } fn one_component_value_to_json(token: Token, input: &mut Parser) -> Json { - fn numeric(value: NumericValue) -> Vec { + fn numeric(value: f32, int_value: Option, has_sign: bool) -> Vec { vec![ - Token::Number(value).to_css_string().to_json(), - match value.int_value { Some(i) => i.to_json(), None => value.value.to_json() }, - match value.int_value { Some(_) => "integer", None => "number" }.to_json() + Token::Number { + value: value, + int_value: int_value, + has_sign: has_sign, + }.to_css_string().to_json(), + match int_value { Some(i) => i.to_json(), None => value.to_json() }, + match int_value { Some(_) => "integer", None => "number" }.to_json() ] } @@ -794,23 +797,19 @@ fn one_component_value_to_json(token: Token, input: &mut Parser) -> Json { Token::Delim('\\') => "\\".to_json(), Token::Delim(value) => value.to_string().to_json(), - Token::Number(value) => Json::Array({ + Token::Number { value, int_value, has_sign } => Json::Array({ let mut v = vec!["number".to_json()]; - v.extend(numeric(value)); + v.extend(numeric(value, int_value, has_sign)); v }), - Token::Percentage(PercentageValue { unit_value, int_value, has_sign }) => Json::Array({ + Token::Percentage { unit_value, int_value, has_sign } => Json::Array({ let mut v = vec!["percentage".to_json()]; - v.extend(numeric(NumericValue { - value: unit_value * 100., - int_value: int_value, - has_sign: has_sign, - })); + v.extend(numeric(unit_value * 100., int_value, has_sign)); v }), - Token::Dimension(value, unit) => Json::Array({ + Token::Dimension { value, int_value, has_sign, unit } => Json::Array({ let mut v = vec!["dimension".to_json()]; - v.extend(numeric(value)); + v.extend(numeric(value, int_value, has_sign)); v.push(unit.to_json()); v }), diff --git a/src/tokenizer.rs b/src/tokenizer.rs index c0e50df3..11559b18 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -8,59 +8,93 @@ use std::ops::Range; use std::cell::Cell; use std::char; use std::ascii::AsciiExt; -use std::borrow::{Cow, ToOwned}; -use std::borrow::Cow::{Owned, Borrowed}; use std::i32; +use compact_cow_str::CompactCowStr; use self::Token::*; /// One of the pieces the CSS input is broken into. /// -/// Some components use `CowString` in order to borrow from the original input string +/// Some components use `Cow` in order to borrow from the original input string /// and avoid allocating/copying when possible. #[derive(PartialEq, Debug, Clone)] pub enum Token<'a> { /// A [``](https://drafts.csswg.org/css-syntax/#ident-token-diagram) - Ident(Cow<'a, str>), + Ident(CompactCowStr<'a>), /// A [``](https://drafts.csswg.org/css-syntax/#at-keyword-token-diagram) /// /// The value does not include the `@` marker. - AtKeyword(Cow<'a, str>), + AtKeyword(CompactCowStr<'a>), /// A [``](https://drafts.csswg.org/css-syntax/#hash-token-diagram) with the type flag set to "unrestricted" /// /// The value does not include the `#` marker. - Hash(Cow<'a, str>), + Hash(CompactCowStr<'a>), /// A [``](https://drafts.csswg.org/css-syntax/#hash-token-diagram) with the type flag set to "id" /// /// The value does not include the `#` marker. - IDHash(Cow<'a, str>), // Hash that is a valid ID selector. + IDHash(CompactCowStr<'a>), // Hash that is a valid ID selector. /// A [``](https://drafts.csswg.org/css-syntax/#string-token-diagram) /// /// The value does not include the quotes. - QuotedString(Cow<'a, str>), + QuotedString(CompactCowStr<'a>), /// A [``](https://drafts.csswg.org/css-syntax/#url-token-diagram) or `url( )` function /// /// The value does not include the `url(` `)` markers or the quotes. - UnquotedUrl(Cow<'a, str>), + UnquotedUrl(CompactCowStr<'a>), /// A `` Delim(char), /// A [``](https://drafts.csswg.org/css-syntax/#number-token-diagram) - Number(NumericValue), + Number { + /// Whether the number had a `+` or `-` sign. + /// + /// This is used is some cases like the micro syntax. (See the `parse_nth` function.) + has_sign: bool, + + /// The value as a float + value: f32, + + /// If the origin source did not include a fractional part, the value as an integer. + int_value: Option, + }, /// A [``](https://drafts.csswg.org/css-syntax/#percentage-token-diagram) - Percentage(PercentageValue), + Percentage { + /// Whether the number had a `+` or `-` sign. + has_sign: bool, + + /// The value as a float, divided by 100 so that the nominal range is 0.0 to 1.0. + unit_value: f32, + + /// If the origin source did not include a fractional part, the value as an integer. + /// It is **not** divided by 100. + int_value: Option, + }, /// A [``](https://drafts.csswg.org/css-syntax/#dimension-token-diagram) - Dimension(NumericValue, Cow<'a, str>), + Dimension { + /// Whether the number had a `+` or `-` sign. + /// + /// This is used is some cases like the micro syntax. (See the `parse_nth` function.) + has_sign: bool, + + /// The value as a float + value: f32, + + /// If the origin source did not include a fractional part, the value as an integer. + int_value: Option, + + /// The unit, e.g. "px" in `12px` + unit: CompactCowStr<'a> + }, /// A [``](https://drafts.csswg.org/css-syntax/#whitespace-token-diagram) WhiteSpace(&'a str), @@ -109,7 +143,7 @@ pub enum Token<'a> { /// A [``](https://drafts.csswg.org/css-syntax/#function-token-diagram) /// /// The value (name) does not include the `(` marker. - Function(Cow<'a, str>), + Function(CompactCowStr<'a>), /// A `<(-token>` ParenthesisBlock, @@ -166,36 +200,6 @@ impl<'a> Token<'a> { } -/// The numeric value of `Number` and `Dimension` tokens. -#[derive(PartialEq, Debug, Copy, Clone)] -pub struct NumericValue { - /// The value as a float - pub value: f32, - - /// If the origin source did not include a fractional part, the value as an integer. - pub int_value: Option, - - /// Whether the number had a `+` or `-` sign. - /// - /// This is used is some cases like the micro syntax. (See the `parse_nth` function.) - pub has_sign: bool, -} - - -/// The numeric value of `Percentage` tokens. -#[derive(PartialEq, Debug, Copy, Clone)] -pub struct PercentageValue { - /// The value as a float, divided by 100 so that the nominal range is 0.0 to 1.0. - pub unit_value: f32, - - /// If the origin source did not include a fractional part, the value as an integer. It is **not** divided by 100. - pub int_value: Option, - - /// Whether the number had a `+` or `-` sign. - pub has_sign: bool, -} - - #[derive(Clone)] pub struct Tokenizer<'a> { input: &'a str, @@ -559,28 +563,28 @@ fn consume_string<'a>(tokenizer: &mut Tokenizer<'a>, single_quote: bool) -> Toke /// Return `Err(())` on syntax error (ie. unescaped newline) fn consume_quoted_string<'a>(tokenizer: &mut Tokenizer<'a>, single_quote: bool) - -> Result, ()> { + -> Result, ()> { tokenizer.advance(1); // Skip the initial quote // start_pos is at code point boundary, after " or ' let start_pos = tokenizer.position(); let mut string_bytes; loop { if tokenizer.is_eof() { - return Ok(Borrowed(tokenizer.slice_from(start_pos))) + return Ok(tokenizer.slice_from(start_pos).into()) } match_byte! { tokenizer.next_byte_unchecked(), b'"' => { if !single_quote { let value = tokenizer.slice_from(start_pos); tokenizer.advance(1); - return Ok(Borrowed(value)) + return Ok(value.into()) } } b'\'' => { if single_quote { let value = tokenizer.slice_from(start_pos); tokenizer.advance(1); - return Ok(Borrowed(value)) + return Ok(value.into()) } } b'\\' | b'\0' => { @@ -644,10 +648,10 @@ fn consume_quoted_string<'a>(tokenizer: &mut Tokenizer<'a>, single_quote: bool) string_bytes.push(b); } - Ok(Owned( + Ok( // string_bytes is well-formed UTF-8, see other comments. - unsafe { from_utf8_release_unchecked(string_bytes) } - )) + unsafe { from_utf8_release_unchecked(string_bytes) }.into() + ) } @@ -688,13 +692,13 @@ fn consume_ident_like<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> { } } -fn consume_name<'a>(tokenizer: &mut Tokenizer<'a>) -> Cow<'a, str> { +fn consume_name<'a>(tokenizer: &mut Tokenizer<'a>) -> CompactCowStr<'a> { // start_pos is the end of the previous token, therefore at a code point boundary let start_pos = tokenizer.position(); let mut value_bytes; loop { if tokenizer.is_eof() { - return Borrowed(tokenizer.slice_from(start_pos)) + return tokenizer.slice_from(start_pos).into() } match_byte! { tokenizer.next_byte_unchecked(), b'a'...b'z' | b'A'...b'Z' | b'0'...b'9' | b'_' | b'-' => { tokenizer.advance(1) }, @@ -709,7 +713,7 @@ fn consume_name<'a>(tokenizer: &mut Tokenizer<'a>) -> Cow<'a, str> { } b => { if b.is_ascii() { - return Borrowed(tokenizer.slice_from(start_pos)); + return tokenizer.slice_from(start_pos).into(); } tokenizer.advance(1); } @@ -744,10 +748,8 @@ fn consume_name<'a>(tokenizer: &mut Tokenizer<'a>) -> Cow<'a, str> { } } } - Owned( - // string_bytes is well-formed UTF-8, see other comments. - unsafe { from_utf8_release_unchecked(value_bytes) } - ) + // string_bytes is well-formed UTF-8, see other comments. + unsafe { from_utf8_release_unchecked(value_bytes) }.into() } fn byte_to_hex_digit(b: u8) -> Option { @@ -858,30 +860,35 @@ fn consume_numeric<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> { if !tokenizer.is_eof() && tokenizer.next_byte_unchecked() == b'%' { tokenizer.advance(1); - return Percentage(PercentageValue { + return Percentage { unit_value: (value / 100.) as f32, int_value: int_value, has_sign: has_sign, - }) + } } - let value = NumericValue { - value: value as f32, - int_value: int_value, - has_sign: has_sign, - }; + let value = value as f32; if is_ident_start(tokenizer) { - let name = consume_name(tokenizer); + let unit = consume_name(tokenizer); if tokenizer.viewport_percentages == SeenStatus::LookingForThem { - if name.eq_ignore_ascii_case("vh") || - name.eq_ignore_ascii_case("vw") || - name.eq_ignore_ascii_case("vmin") || - name.eq_ignore_ascii_case("vmax") { + if unit.eq_ignore_ascii_case("vh") || + unit.eq_ignore_ascii_case("vw") || + unit.eq_ignore_ascii_case("vmin") || + unit.eq_ignore_ascii_case("vmax") { tokenizer.viewport_percentages = SeenStatus::SeenAtLeastOne; } } - Dimension(value, name) + Dimension { + value: value, + int_value: int_value, + has_sign: has_sign, + unit: unit, + } } else { - Number(value) + Number { + value: value, + int_value: int_value, + has_sign: has_sign, + } } } @@ -903,7 +910,7 @@ fn consume_unquoted_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Result, b'"' | b'\'' => { return Err(()) }, // Do not advance b')' => { tokenizer.advance(offset + 1); - return Ok(UnquotedUrl(Borrowed(""))); + return Ok(UnquotedUrl("".into())); } _ => { tokenizer.advance(offset); @@ -914,7 +921,7 @@ fn consume_unquoted_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Result, } } tokenizer.position = tokenizer.input.len(); - return Ok(UnquotedUrl(Borrowed(""))); + return Ok(UnquotedUrl("".into())); fn consume_unquoted_url_internal<'a>(tokenizer: &mut Tokenizer<'a>) -> Token<'a> { // This function is only called with start_pos at a code point boundary. @@ -922,18 +929,18 @@ fn consume_unquoted_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Result, let mut string_bytes: Vec; loop { if tokenizer.is_eof() { - return UnquotedUrl(Borrowed(tokenizer.slice_from(start_pos))) + return UnquotedUrl(tokenizer.slice_from(start_pos).into()) } match_byte! { tokenizer.next_byte_unchecked(), b' ' | b'\t' | b'\n' | b'\r' | b'\x0C' => { let value = tokenizer.slice_from(start_pos); tokenizer.advance(1); - return consume_url_end(tokenizer, Borrowed(value)) + return consume_url_end(tokenizer, value.into()) } b')' => { let value = tokenizer.slice_from(start_pos); tokenizer.advance(1); - return UnquotedUrl(Borrowed(value)) + return UnquotedUrl(value.into()) } b'\x01'...b'\x08' | b'\x0B' | b'\x0E'...b'\x1F' | b'\x7F' // non-printable | b'"' | b'\'' | b'(' => { @@ -957,10 +964,11 @@ fn consume_unquoted_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Result, while !tokenizer.is_eof() { match_byte! { tokenizer.consume_byte(), b' ' | b'\t' | b'\n' | b'\r' | b'\x0C' => { - return consume_url_end(tokenizer, Owned( + return consume_url_end( + tokenizer, // string_bytes is well-formed UTF-8, see other comments. - unsafe { from_utf8_release_unchecked(string_bytes) } - )) + unsafe { from_utf8_release_unchecked(string_bytes) }.into() + ) } b')' => { break; @@ -985,13 +993,13 @@ fn consume_unquoted_url<'a>(tokenizer: &mut Tokenizer<'a>) -> Result, b => { string_bytes.push(b) } } } - UnquotedUrl(Owned( + UnquotedUrl( // string_bytes is well-formed UTF-8, see other comments. - unsafe { from_utf8_release_unchecked(string_bytes) } - )) + unsafe { from_utf8_release_unchecked(string_bytes) }.into() + ) } - fn consume_url_end<'a>(tokenizer: &mut Tokenizer<'a>, string: Cow<'a, str>) -> Token<'a> { + fn consume_url_end<'a>(tokenizer: &mut Tokenizer<'a>, string: CompactCowStr<'a>) -> Token<'a> { while !tokenizer.is_eof() { match_byte! { tokenizer.consume_byte(), b' ' | b'\t' | b'\n' | b'\r' | b'\x0C' => {}, diff --git a/src/unicode_range.rs b/src/unicode_range.rs index 64030358..3fb54a84 100644 --- a/src/unicode_range.rs +++ b/src/unicode_range.rs @@ -64,15 +64,15 @@ fn parse_tokens<'i, 't>(input: &mut Parser<'i, 't>) -> Result<(), BasicParseErro } parse_question_marks(input) } - Token::Dimension(..) => { + Token::Dimension { .. } => { parse_question_marks(input) } - Token::Number(_) => { + Token::Number { .. } => { let after_number = input.position(); match input.next_including_whitespace() { Ok(Token::Delim('?')) => parse_question_marks(input), - Ok(Token::Dimension(..)) => {} - Ok(Token::Number(_)) => {} + Ok(Token::Dimension { .. }) => {} + Ok(Token::Number { .. }) => {} _ => input.reset(after_number) } }