diff --git a/src/errors.rs b/src/errors.rs index 5a22229f..1b3a95c8 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -12,9 +12,9 @@ use std::sync::Arc; /// The error type used by this crate. #[derive(Clone, Debug)] pub enum Error { - /// IO error. + /// XML document cannot be read from or written to underlying source. /// - /// `Arc` instead of `IoError` since `IoError` is not `Clone`. + /// Contains the reference-counted I/O error to make the error type `Clone`able. Io(Arc), /// Input decoding error. If [`encoding`] feature is disabled, contains `None`, /// otherwise contains the UTF-8 decoding error diff --git a/src/reader/mod.rs b/src/reader/mod.rs index d024fe18..b49c3650 100644 --- a/src/reader/mod.rs +++ b/src/reader/mod.rs @@ -213,7 +213,7 @@ macro_rules! read_event_impl { } /// Read bytes up to `<` and skip it. If current byte (after skipping all space -/// characters if [`Parser::trim_text_start`] is `true`) is already `<`, then +/// characters if [`ReaderState::trim_text_start`] is `true`) is already `<`, then /// returns the next event, otherwise stay at position just after the `<` symbol. /// /// Moves parser to the `OpenedTag` state. @@ -409,7 +409,7 @@ enum ParseState { /// [`Event::Start`] event. The next event emitted will be an [`Event::End`], /// after which reader returned to the `ClosedTag` state. /// - /// [`expand_empty_elements`]: Parser::expand_empty_elements + /// [`expand_empty_elements`]: ReaderState::expand_empty_elements Empty, /// Reader enters this state when `Eof` event generated or an error occurred. /// This is the last state, the reader stay in it forever. diff --git a/src/reader/state.rs b/src/reader/state.rs index 6108a51b..c145e197 100644 --- a/src/reader/state.rs +++ b/src/reader/state.rs @@ -132,17 +132,18 @@ impl ReaderState { /// Wraps content of `buf` into the [`Event::End`] event. Does the check that /// end name matches the last opened start name if `self.check_end_names` is set. pub fn emit_end<'b>(&mut self, buf: &'b [u8]) -> Result> { + // Strip the `/` character. `content` contains data between `` + let content = &buf[1..]; // XML standard permits whitespaces after the markup name in closing tags. // Let's strip them from the buffer before comparing tag names. let name = if self.trim_markup_names_in_closing_tags { - if let Some(pos_end_name) = buf[1..].iter().rposition(|&b| !is_whitespace(b)) { - let (name, _) = buf[1..].split_at(pos_end_name + 1); - name + if let Some(pos_end_name) = content.iter().rposition(|&b| !is_whitespace(b)) { + &content[..pos_end_name + 1] } else { - &buf[1..] + content } } else { - &buf[1..] + content }; let decoder = self.decoder(); diff --git a/test-gen/src/main.rs b/test-gen/src/main.rs index 65b77d7d..92a1e89f 100644 --- a/test-gen/src/main.rs +++ b/test-gen/src/main.rs @@ -284,6 +284,15 @@ fn main() { .expect(&format!("label `{}` is unsupported", label)); process_index(enc, &codepoints); + if enc == ISO_8859_8 { + // ISO_8859_8_I does not have its own index in encoding/indexes.json, + // but it have the same mapping as ISO_8859_8. + // + // Wikipedia (https://en.wikipedia.org/wiki/ISO-8859-8-I): + // The WHATWG Encoding Standard used by HTML5 treats ISO-8859-8 and ISO-8859-8-I + // as distinct encodings with the same mapping due to influence on the layout direction + process_index(ISO_8859_8_I, &codepoints); + } } // https://encoding.spec.whatwg.org/#x-user-defined-decoder make_xml(X_USER_DEFINED, '\u{F780}'..='\u{F7FF}'); diff --git a/tests/documents/encoding/ISO-8859-8-I.xml b/tests/documents/encoding/ISO-8859-8-I.xml index af5aadbb..9ee16eef 100644 --- a/tests/documents/encoding/ISO-8859-8-I.xml +++ b/tests/documents/encoding/ISO-8859-8-I.xml @@ -1,16 +1,17 @@ - - + + - - -  -  !"#$%'()*+,-./0123456789:;=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ - - <[[CDATA[[ -  !"#$%'()*+,-./0123456789:;=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~]]> + + + + !"#$%'()*+,-./0123456789:;=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ + + ?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~]]> \ No newline at end of file diff --git a/tests/issues.rs b/tests/issues.rs index 90efc732..e6d52758 100644 --- a/tests/issues.rs +++ b/tests/issues.rs @@ -9,6 +9,22 @@ use quick_xml::name::QName; use quick_xml::reader::Reader; use quick_xml::Error; +/// Regression test for https://github.com/tafia/quick-xml/issues/94 +#[test] +fn issue94() { + let data = br#" + +"#; + let mut reader = Reader::from_reader(&data[..]); + reader.trim_text(true); + loop { + match reader.read_event() { + Ok(Event::Eof) | Err(..) => break, + _ => (), + } + } +} + /// Regression test for https://github.com/tafia/quick-xml/issues/115 #[test] fn issue115() { @@ -22,6 +38,41 @@ fn issue115() { } } +/// Regression test for https://github.com/tafia/quick-xml/issues/299 +#[test] +fn issue299() -> Result<(), Error> { + let xml = r#" + + + + + + +"#; + let mut reader = Reader::from_str(xml); + loop { + match reader.read_event()? { + Event::Start(e) | Event::Empty(e) => { + let attr_count = match e.name().as_ref() { + b"MICEX_DOC" => 1, + b"SECURITY" => 4, + b"RECORDS" => 26, + _ => unreachable!(), + }; + assert_eq!( + attr_count, + e.attributes().filter(Result::is_ok).count(), + "mismatch att count on '{:?}'", + reader.decoder().decode(e.name().as_ref()) + ); + } + Event::Eof => break, + _ => (), + } + } + Ok(()) +} + /// Regression test for https://github.com/tafia/quick-xml/issues/360 #[test] fn issue360() { diff --git a/tests/test.rs b/tests/test.rs index b068079f..3104e6dd 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -2,7 +2,6 @@ use quick_xml::events::attributes::Attribute; use quick_xml::events::Event::*; use quick_xml::name::QName; use quick_xml::reader::Reader; -use quick_xml::Error; use std::borrow::Cow; use pretty_assertions::assert_eq; @@ -89,21 +88,6 @@ fn test_comment_starting_with_gt() { } } -#[test] -fn test_issue94() { - let data = br#" - -"#; - let mut reader = Reader::from_reader(&data[..]); - reader.trim_text(true); - loop { - match reader.read_event() { - Ok(Eof) | Err(..) => break, - _ => (), - } - } -} - #[test] fn test_no_trim() { let mut reader = Reader::from_str(" text "); @@ -151,37 +135,3 @@ fn test_clone_reader() { assert!(matches!(cloned.read_event().unwrap(), Text(_))); assert!(matches!(cloned.read_event().unwrap(), End(_))); } - -#[test] -fn test_issue299() -> Result<(), Error> { - let xml = r#" - - - - - - -"#; - let mut reader = Reader::from_str(xml); - loop { - match reader.read_event()? { - Start(e) | Empty(e) => { - let attr_count = match e.name().as_ref() { - b"MICEX_DOC" => 1, - b"SECURITY" => 4, - b"RECORDS" => 26, - _ => unreachable!(), - }; - assert_eq!( - attr_count, - e.attributes().filter(Result::is_ok).count(), - "mismatch att count on '{:?}'", - reader.decoder().decode(e.name().as_ref()) - ); - } - Eof => break, - _ => (), - } - } - Ok(()) -} diff --git a/tests/unit_tests.rs b/tests/unit_tests.rs index e0438c9b..a5e8c2db 100644 --- a/tests/unit_tests.rs +++ b/tests/unit_tests.rs @@ -435,27 +435,6 @@ fn test_offset_err_comment() { } } -#[test] -fn test_offset_err_comment_2_buf() { - let mut r = Reader::from_str(" tag found - Err(e) => assert_eq!( - r.buffer_position(), - 4, - "expecting buf_pos = 4, found {}, err {:?}", - r.buffer_position(), - e - ), - e => panic!("expecting error, found {:?}", e), - } -} - #[test] fn test_offset_err_comment_trim_text() { let mut r = Reader::from_str("\r\n