Skip to content

Commit

Permalink
temp
Browse files Browse the repository at this point in the history
  • Loading branch information
dralley committed Jun 14, 2024
1 parent 826f75c commit d3f5fbe
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 13 deletions.
7 changes: 2 additions & 5 deletions Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
### Misc Changes

- [#379]: Added tests for attribute value normalization
- [#379]: Fixed error messages for some variants of `EscapeError`
which were incorrectly described as occurring during escaping (rather than unescaping).
- [#650]: Change the type of `Event::PI` to a new dedicated `BytesPI` type.

[#379]: https://github.com/tafia/quick-xml/pull/379
Expand Down Expand Up @@ -293,11 +295,6 @@ serde >= 1.0.181
- [#565]: Correctly set minimum required version of tokio dependency to 1.10
- [#565]: Fix compilation error when build with serde <1.0.139


### New Tests

- [#379]: Added tests for attribute value normalization

[externally tagged]: https://serde.rs/enum-representations.html#externally-tagged
[#490]: https://github.com/tafia/quick-xml/pull/490
[#510]: https://github.com/tafia/quick-xml/issues/510
Expand Down
27 changes: 19 additions & 8 deletions src/escape.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,24 +27,30 @@ pub enum EscapeError {
InvalidDecimal(char),
/// Not a valid unicode codepoint
InvalidCodepoint(u32),
/// The recursion limit was reached while attempting to unescape XML entities,
/// or normalize an attribute value. This could indicate an entity loop.
///
/// Limiting recursion prevents Denial-of-Service type attacks
/// such as the "billion laughs" [attack](https://en.wikipedia.org/wiki/Billion_laughs_attack).
ReachedRecursionLimit(Range<usize>),
}

impl std::fmt::Display for EscapeError {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
EscapeError::EntityWithNull(e) => write!(
f,
"Error while escaping character at range {:?}: Null character entity not allowed",
"Error while unescaping character at range {:?}: Null character entity not allowed",
e
),
EscapeError::UnrecognizedSymbol(rge, res) => write!(
f,
"Error while escaping character at range {:?}: Unrecognized escape symbol: {:?}",
"Error while unescaping character at range {:?}: Unrecognized escape symbol: {:?}",
rge, res
),
EscapeError::UnterminatedEntity(e) => write!(
f,
"Error while escaping character at range {:?}: Cannot find ';' after '&'",
"Error while unescaping character at range {:?}: Cannot find ';' after '&'",
e
),
EscapeError::TooLongHexadecimal => write!(f, "Cannot convert hexadecimal to utf8"),
Expand All @@ -54,6 +60,10 @@ impl std::fmt::Display for EscapeError {
EscapeError::TooLongDecimal => write!(f, "Cannot convert decimal to utf8"),
EscapeError::InvalidDecimal(e) => write!(f, "'{}' is not a valid decimal character", e),
EscapeError::InvalidCodepoint(n) => write!(f, "'{}' is not a valid codepoint", n),
EscapeError::ReachedRecursionLimit(e) => write!(
f,
"Error while unescaping entity at range {:?}: Recursion limit reached"
),
}
}
}
Expand Down Expand Up @@ -310,7 +320,7 @@ where
let mut iter = bytes.iter();

if let Some(i) = iter.position(is_normalization_char) {
let mut normalized = String::new();
let mut normalized = String::with_capacity(value.len());
let pos = normalize_step(
&mut normalized,
&mut iter,
Expand Down Expand Up @@ -405,7 +415,7 @@ where
let pat = &input[start..end];
// 1. For a character reference, append the referenced character
// to the normalized value.
if pat.starts_with('#') {
if let Some(entity) = pat.strip_prefix('#') {
let entity = &pat[1..]; // starts after the #
let codepoint = parse_number(entity, start..end)?;
normalized.push_str(codepoint.encode_utf8(&mut [0u8; 4]));
Expand All @@ -432,6 +442,7 @@ where
Ok(index + 1) // +1 - skip character
}

// SAFETY: We call normalize_step only when is_normalization_char() return true
_ => unreachable!("Only '\\t', '\\n', '\\r', ' ', and '&' are possible here"),
}
}
Expand Down Expand Up @@ -2160,14 +2171,14 @@ mod normalization {
fn unclosed_entity() {
assert_eq!(
normalize_attribute_value("string with unclosed &entity reference", |_| {
// 0 ^ = 21 ^ = 38
// 0 ^ = 21 ^ = 38
Some("replacement")
}),
Err(EscapeError::UnterminatedEntity(21..38))
);
assert_eq!(
normalize_attribute_value("string with unclosed &#32 (character) reference", |_| {
// 0 ^ = 21 ^ = 47
// 0 ^ = 21 ^ = 47
None
}),
Err(EscapeError::UnterminatedEntity(21..47))
Expand All @@ -2178,7 +2189,7 @@ mod normalization {
fn unknown_entity() {
assert_eq!(
normalize_attribute_value("string with unknown &entity; reference", |_| { None }),
// 0 ^ ^ = 21..27
// 0 ^ 21 ^ 27
Err(EscapeError::UnrecognizedSymbol(
21..27,
"entity".to_string()
Expand Down
39 changes: 39 additions & 0 deletions src/events/attributes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -967,6 +967,45 @@ mod xml {
);
}

/// An obvious loop should be detected immediately
#[test]
#[ignore]
fn test_entity_loop() {
let raw_value = "&d;".as_bytes();
let attr = Attribute::from(("foo".as_bytes(), raw_value));
fn custom_resolver(ent: &str) -> Option<&'static str> {
match ent {
"d" => Some("&d;"),
}
}
assert_eq!(
attr.normalized_value_with(&custom_resolver),
Err(EscapeError::ReachedRecursionLimit(1..2))
);
}

/// A more complex entity loop should hit the recursion limit.
#[test]
#[ignore]
fn test_recursion_limit() {
let raw_value = "&a;".as_bytes();
let attr = Attribute::from(("foo".as_bytes(), raw_value));
fn custom_resolver(ent: &str) -> Option<&'static str> {
match ent {
"a" => Some("&b;"),
"b" => Some("&c;"),
"c" => Some("&d;"),
"d" => Some("&e;"),
"e" => Some("&f;"),
"f" => Some("&a;"),
}
}
assert_eq!(
attr.normalized_value_with(&custom_resolver),
Err(EscapeError::ReachedRecursionLimit(1..2))
);
}

#[test]
fn test_char_references() {
// character literal references are substituted without being replaced by spaces
Expand Down

0 comments on commit d3f5fbe

Please sign in to comment.