diff --git a/Changelog.md b/Changelog.md index 0ba5a368..080d475b 100644 --- a/Changelog.md +++ b/Changelog.md @@ -14,11 +14,15 @@ ### Bug Fixes +- [#755]: Fix incorrect missing of trimming all-space text events when + `trim_text_start = false` and `trim_text_end = true`. + ### Misc Changes - [#650]: Change the type of `Event::PI` to a new dedicated `BytesPI` type. [#650]: https://github.com/tafia/quick-xml/issues/650 +[#755]: https://github.com/tafia/quick-xml/pull/755 ## 0.32.0 -- 2024-06-10 diff --git a/src/reader/async_tokio.rs b/src/reader/async_tokio.rs index 8e56346b..19ca613a 100644 --- a/src/reader/async_tokio.rs +++ b/src/reader/async_tokio.rs @@ -9,7 +9,8 @@ use crate::events::Event; use crate::name::{QName, ResolveResult}; use crate::reader::buffered_reader::impl_buffered_source; use crate::reader::{ - is_whitespace, BangType, ElementParser, NsReader, ParseState, Parser, PiParser, Reader, Span, + is_whitespace, BangType, ElementParser, NsReader, ParseState, Parser, PiParser, ReadTextResult, + Reader, Span, }; /// A struct for read XML asynchronously from an [`AsyncBufRead`]. @@ -77,7 +78,6 @@ impl Reader { read_event_impl!( self, buf, TokioAdapter(&mut self.reader), - read_until_open_async, read_until_close_async, await ) @@ -141,17 +141,6 @@ impl Reader { Ok(read_to_end!(self, end, buf, read_event_into_async, { buf.clear(); }, await)) } - /// Read until '<' is found, moves reader to an `OpenedTag` state and returns a `Text` event. - /// - /// Returns inner `Ok` if the loop should be broken and an event returned. - /// Returns inner `Err` with the same `buf` because Rust borrowck stumbles upon this case in particular. - async fn read_until_open_async<'b>( - &mut self, - buf: &'b mut Vec, - ) -> Result, &'b mut Vec>> { - read_until_open!(self, buf, TokioAdapter(&mut self.reader), read_event_into_async, await) - } - /// Private function to read until `>` is found. This function expects that /// it was called just after encounter a `<` symbol. async fn read_until_close_async<'b>(&mut self, buf: &'b mut Vec) -> Result> { diff --git a/src/reader/buffered_reader.rs b/src/reader/buffered_reader.rs index 1cbe3681..a231956b 100644 --- a/src/reader/buffered_reader.rs +++ b/src/reader/buffered_reader.rs @@ -8,13 +8,13 @@ use std::path::Path; use crate::errors::{Error, Result}; use crate::events::Event; use crate::name::QName; -use crate::reader::{is_whitespace, BangType, Parser, Reader, Span, XmlSource}; +use crate::reader::{is_whitespace, BangType, Parser, ReadTextResult, Reader, Span, XmlSource}; macro_rules! impl_buffered_source { ($($lf:lifetime, $reader:tt, $async:ident, $await:ident)?) => { #[cfg(not(feature = "encoding"))] #[inline] - $($async)? fn remove_utf8_bom(&mut self) -> Result<()> { + $($async)? fn remove_utf8_bom(&mut self) -> io::Result<()> { use crate::encoding::UTF8_BOM; loop { @@ -26,14 +26,14 @@ macro_rules! impl_buffered_source { Ok(()) }, Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, - Err(e) => Err(Error::Io(e.into())), + Err(e) => Err(e), }; } } #[cfg(feature = "encoding")] #[inline] - $($async)? fn detect_encoding(&mut self) -> Result> { + $($async)? fn detect_encoding(&mut self) -> io::Result> { loop { break match self $(.$reader)? .fill_buf() $(.$await)? { Ok(n) => if let Some((enc, bom_len)) = crate::encoding::detect_encoding(n) { @@ -43,54 +43,106 @@ macro_rules! impl_buffered_source { Ok(None) }, Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, - Err(e) => Err(Error::Io(e.into())), + Err(e) => Err(e), }; } } + #[inline] + $($async)? fn read_text $(<$lf>)? ( + &mut self, + buf: &'b mut Vec, + position: &mut usize, + ) -> ReadTextResult<'b, &'b mut Vec> { + let mut read = 0; + let start = buf.len(); + loop { + let available = match self $(.$reader)? .fill_buf() $(.$await)? { + Ok(n) if n.is_empty() => break, + Ok(n) => n, + Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, + Err(e) => { + *position += read; + return ReadTextResult::Err(e); + } + }; + + match memchr::memchr(b'<', available) { + Some(0) => { + self $(.$reader)? .consume(1); + *position += 1; + return ReadTextResult::Markup(buf); + } + Some(i) => { + buf.extend_from_slice(&available[..i]); + + let used = i + 1; + self $(.$reader)? .consume(used); + read += used; + + *position += read; + return ReadTextResult::UpToMarkup(&buf[start..]); + } + None => { + buf.extend_from_slice(available); + + let used = available.len(); + self $(.$reader)? .consume(used); + read += used; + } + } + } + + *position += read; + ReadTextResult::UpToEof(&buf[start..]) + } + #[inline] $($async)? fn read_bytes_until $(<$lf>)? ( &mut self, byte: u8, buf: &'b mut Vec, position: &mut usize, - ) -> Result<(&'b [u8], bool)> { + ) -> io::Result<(&'b [u8], bool)> { // search byte must be within the ascii range debug_assert!(byte.is_ascii()); let mut read = 0; - let mut done = false; let start = buf.len(); - while !done { - let used = { - let available = match self $(.$reader)? .fill_buf() $(.$await)? { - Ok(n) if n.is_empty() => break, - Ok(n) => n, - Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, - Err(e) => { - *position += read; - return Err(Error::Io(e.into())); - } - }; - - match memchr::memchr(byte, available) { - Some(i) => { - buf.extend_from_slice(&available[..i]); - done = true; - i + 1 - } - None => { - buf.extend_from_slice(available); - available.len() - } + loop { + let available = match self $(.$reader)? .fill_buf() $(.$await)? { + Ok(n) if n.is_empty() => break, + Ok(n) => n, + Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, + Err(e) => { + *position += read; + return Err(e); } }; - self $(.$reader)? .consume(used); - read += used; + + match memchr::memchr(byte, available) { + Some(i) => { + buf.extend_from_slice(&available[..i]); + + let used = i + 1; + self $(.$reader)? .consume(used); + read += used; + + *position += read; + return Ok((&buf[start..], true)); + } + None => { + buf.extend_from_slice(available); + + let used = available.len(); + self $(.$reader)? .consume(used); + read += used; + } + } } - *position += read; - Ok((&buf[start..], done)) + *position += read; + Ok((&buf[start..], false)) } #[inline] @@ -188,7 +240,7 @@ macro_rules! impl_buffered_source { } #[inline] - $($async)? fn skip_whitespace(&mut self, position: &mut usize) -> Result<()> { + $($async)? fn skip_whitespace(&mut self, position: &mut usize) -> io::Result<()> { loop { break match self $(.$reader)? .fill_buf() $(.$await)? { Ok(n) => { @@ -202,32 +254,18 @@ macro_rules! impl_buffered_source { } } Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, - Err(e) => Err(Error::Io(e.into())), + Err(e) => Err(e), }; } } #[inline] - $($async)? fn skip_one(&mut self, byte: u8) -> Result { - // search byte must be within the ascii range - debug_assert!(byte.is_ascii()); - - match self.peek_one() $(.$await)? ? { - Some(b) if b == byte => { - self $(.$reader)? .consume(1); - Ok(true) - } - _ => Ok(false), - } - } - - #[inline] - $($async)? fn peek_one(&mut self) -> Result> { + $($async)? fn peek_one(&mut self) -> io::Result> { loop { break match self $(.$reader)? .fill_buf() $(.$await)? { Ok(n) => Ok(n.first().cloned()), Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, - Err(e) => Err(Error::Io(e.into())), + Err(e) => Err(e), }; } } diff --git a/src/reader/mod.rs b/src/reader/mod.rs index 29df75cb..4f353c00 100644 --- a/src/reader/mod.rs +++ b/src/reader/mod.rs @@ -2,6 +2,7 @@ #[cfg(feature = "encoding")] use encoding_rs::Encoding; +use std::io; use std::ops::Range; use crate::encoding::Decoder; @@ -205,13 +206,12 @@ macro_rules! read_event_impl { ( $self:ident, $buf:ident, $reader:expr, - $read_until_open:ident, $read_until_close:ident $(, $await:ident)? ) => {{ let event = loop { - match $self.state.state { - ParseState::Init => { // Go to OpenedTag state + break match $self.state.state { + ParseState::Init => { // Go to InsideMarkup state // If encoding set explicitly, we not need to detect it. For example, // explicit UTF-8 set automatically if Reader was created using `from_str`. // But we still need to remove BOM for consistency with no encoding @@ -227,81 +227,59 @@ macro_rules! read_event_impl { #[cfg(not(feature = "encoding"))] $reader.remove_utf8_bom() $(.$await)? ?; - // Go to OpenedTag state - match $self.$read_until_open($buf) $(.$await)? { - Ok(Ok(ev)) => break Ok(ev), - Ok(Err(b)) => $buf = b, - Err(err) => break Err(err), - } + $self.state.state = ParseState::InsideText; + continue; }, - ParseState::ClosedTag => { // Go to OpenedTag state - match $self.$read_until_open($buf) $(.$await)? { - Ok(Ok(ev)) => break Ok(ev), - Ok(Err(b)) => $buf = b, - Err(err) => break Err(err), + ParseState::InsideText => { // Go to InsideMarkup or Done state + if $self.state.config.trim_text_start { + $reader.skip_whitespace(&mut $self.state.offset) $(.$await)? ?; + } + + match $reader.read_text($buf, &mut $self.state.offset) $(.$await)? { + ReadTextResult::Markup(buf) => { + $self.state.state = ParseState::InsideMarkup; + // Pass `buf` to the next next iteration of parsing loop + $buf = buf; + continue; + } + ReadTextResult::UpToMarkup(bytes) => { + $self.state.state = ParseState::InsideMarkup; + // FIXME: Can produce an empty event if: + // - event contains only spaces + // - trim_text_start = false + // - trim_text_end = true + Ok(Event::Text($self.state.emit_text(bytes))) + } + ReadTextResult::UpToEof(bytes) => { + $self.state.state = ParseState::Done; + // Trim bytes from end if required + let event = $self.state.emit_text(bytes); + if event.is_empty() { + Ok(Event::Eof) + } else { + Ok(Event::Text(event)) + } + } + ReadTextResult::Err(e) => Err(Error::Io(e.into())), } }, - // Go to ClosedTag state in next two arms - ParseState::OpenedTag => break $self.$read_until_close($buf) $(.$await)?, - ParseState::Empty => break $self.state.close_expanded_empty(), - ParseState::Exit => break Ok(Event::Eof), + // Go to InsideText state in next two arms + ParseState::InsideMarkup => $self.$read_until_close($buf) $(.$await)?, + ParseState::InsideEmpty => Ok(Event::End($self.state.close_expanded_empty())), + ParseState::Done => Ok(Event::Eof), }; }; match event { // #513: In case of ill-formed errors we already consume the wrong data // and change the state. We can continue parsing if we wish Err(Error::IllFormed(_)) => {} - Err(_) | Ok(Event::Eof) => $self.state.state = ParseState::Exit, + Err(_) | Ok(Event::Eof) => $self.state.state = ParseState::Done, _ => {} } event }}; } -/// Read bytes up to `<` and skip it. If current byte (after skipping all space -/// characters if [`Config::trim_text_start`] is `true`) is already `<`, then -/// returns the next event, otherwise stay at position just after the `<` symbol. -/// -/// Moves parser to the `OpenedTag` state. -/// -/// This code is executed in two cases: -/// - after start of parsing just after skipping BOM if it is present -/// - after parsing `` or `` -macro_rules! read_until_open { - ( - $self:ident, $buf:ident, - $reader:expr, - $read_event:ident - $(, $await:ident)? - ) => {{ - if $self.state.config.trim_text_start { - $reader.skip_whitespace(&mut $self.state.offset) $(.$await)? ?; - } - - // If we already at the `<` symbol, do not try to return an empty Text event - if $reader.skip_one(b'<') $(.$await)? ? { - $self.state.offset += 1; - $self.state.state = ParseState::OpenedTag; - // Pass $buf to the next next iteration of parsing loop - return Ok(Err($buf)); - } - - match $reader - .read_bytes_until(b'<', $buf, &mut $self.state.offset) - $(.$await)? - { - Ok((bytes, found)) => { - if found { - $self.state.state = ParseState::OpenedTag; - } - // Return Text event with `bytes` content or Eof if bytes is empty - $self.state.emit_text(bytes).map(Ok) - } - Err(e) => Err(e), - } - }}; -} - /// Read bytes up to the `>` and skip it. This method is expected to be called /// after seeing the `<` symbol and skipping it. Inspects the next (current) /// symbol and returns an appropriate [`Event`]: @@ -313,7 +291,7 @@ macro_rules! read_until_open { /// |`?` |[`PI`] /// |_other_|[`Start`] or [`Empty`] /// -/// Moves parser to the `ClosedTag` state. +/// Moves parser to the `InsideText` state. /// /// [`Comment`]: Event::Comment /// [`CData`]: Event::CData @@ -328,7 +306,7 @@ macro_rules! read_until_close { $reader:expr $(, $await:ident)? ) => {{ - $self.state.state = ParseState::ClosedTag; + $self.state.state = ParseState::InsideText; let start = $self.state.offset; match $reader.peek_one() $(.$await)? { @@ -358,7 +336,7 @@ macro_rules! read_until_close { $self.state.last_error_offset = start - 1; Err(Error::Syntax(SyntaxError::UnclosedTag)) } - Err(e) => Err(e), + Err(e) => Err(Error::Io(e.into())), }, // ` match $reader @@ -378,7 +356,7 @@ macro_rules! read_until_close { .read_with(ElementParser::default(), $buf, &mut $self.state.offset) $(.$await)? { - Ok(bytes) => $self.state.emit_start(bytes), + Ok(bytes) => Ok($self.state.emit_start(bytes)), Err(e) => Err(e), }, // `<` - syntax error, tag not closed @@ -388,7 +366,7 @@ macro_rules! read_until_close { $self.state.last_error_offset = start - 1; Err(Error::Syntax(SyntaxError::UnclosedTag)) } - Err(e) => Err(e), + Err(e) => Err(Error::Io(e.into())), } }}; } @@ -450,41 +428,41 @@ pub type Span = Range; /// subgraph _ /// direction LR /// -/// Init -- "(no event)"\n --> OpenedTag -/// OpenedTag -- Decl, DocType, PI\nComment, CData\nStart, Empty, End --> ClosedTag -/// ClosedTag -- "#lt;false#gt;\n(no event)"\nText --> OpenedTag +/// Init -- "(no event)"\n --> InsideMarkup +/// InsideMarkup -- Decl, DocType, PI\nComment, CData\nStart, Empty, End --> InsideText +/// InsideText -- "#lt;false#gt;\n(no event)"\nText --> InsideMarkup /// end -/// ClosedTag -- "#lt;true#gt;"\nStart --> Empty -/// Empty -- End --> ClosedTag -/// _ -. Eof .-> Exit +/// InsideText -- "#lt;true#gt;"\nStart --> InsideEmpty +/// InsideEmpty -- End --> InsideText +/// _ -. Eof .-> Done /// ``` #[derive(Clone, Debug)] enum ParseState { /// Initial state in which reader stay after creation. Transition from that /// state could produce a `Text`, `Decl`, `Comment` or `Start` event. The next - /// state is always `OpenedTag`. The reader will never return to this state. The - /// event emitted during transition to `OpenedTag` is a `StartEvent` if the + /// state is always `InsideMarkup`. The reader will never return to this state. The + /// event emitted during transition to `InsideMarkup` is a `StartEvent` if the /// first symbol not `<`, otherwise no event are emitted. Init, /// State after seeing the `<` symbol. Depending on the next symbol all other /// events could be generated. /// - /// After generating one event the reader moves to the `ClosedTag` state. - OpenedTag, + /// After generating one event the reader moves to the `InsideText` state. + InsideMarkup, /// State in which reader searches the `<` symbol of a markup. All bytes before /// that symbol will be returned in the [`Event::Text`] event. After that - /// the reader moves to the `OpenedTag` state. - ClosedTag, + /// the reader moves to the `InsideMarkup` state. + InsideText, /// This state is used only if option [`expand_empty_elements`] is set to `true`. - /// Reader enters to this state when it is in a `ClosedTag` state and emits an + /// Reader enters to this state when it is in a `InsideText` state and emits an /// [`Event::Start`] event. The next event emitted will be an [`Event::End`], - /// after which reader returned to the `ClosedTag` state. + /// after which reader returned to the `InsideText` state. /// /// [`expand_empty_elements`]: Config::expand_empty_elements - Empty, + InsideEmpty, /// Reader enters this state when `Eof` event generated or an error occurred. /// This is the last state, the reader stay in it forever. - Exit, + Done, } /// A reference to an encoding together with information about how it was retrieved. @@ -690,9 +668,9 @@ impl Reader { /// Gets the current byte position in the input data. pub fn buffer_position(&self) -> usize { - // when internal state is OpenedTag, we have actually read until '<', + // when internal state is InsideMarkup, we have actually read until '<', // which we don't want to show - if let ParseState::OpenedTag = self.state.state { + if let ParseState::InsideMarkup = self.state.state { self.state.offset - 1 } else { self.state.offset @@ -738,18 +716,7 @@ impl Reader { where R: XmlSource<'i, B>, { - read_event_impl!(self, buf, self.reader, read_until_open, read_until_close) - } - - /// Read until '<' is found, moves reader to an `OpenedTag` state and returns a `Text` event. - /// - /// Returns inner `Ok` if the loop should be broken and an event returned. - /// Returns inner `Err` with the same `buf` because Rust borrowck stumbles upon this case in particular. - fn read_until_open<'i, B>(&mut self, buf: B) -> Result, B>> - where - R: XmlSource<'i, B>, - { - read_until_open!(self, buf, self.reader, read_event_impl) + read_event_impl!(self, buf, self.reader, read_until_close) } /// Private function to read until `>` is found. This function expects that @@ -764,6 +731,20 @@ impl Reader { //////////////////////////////////////////////////////////////////////////////////////////////////// +/// Result of an attempt to read XML textual data from the reader. +enum ReadTextResult<'r, B> { + /// Start of markup (`<` character) was found in the first byte. + /// Contains buffer that should be returned back to the next iteration cycle + /// to satisfy borrow checker requirements. + Markup(B), + /// Contains text block up to start of markup (`<` character). + UpToMarkup(&'r [u8]), + /// Contains text block up to EOF, start of markup (`<` character) was not found. + UpToEof(&'r [u8]), + /// IO error occurred. + Err(io::Error), +} + /// Used to decouple reading of data from data source and parsing XML structure from it. /// This is a state preserved between getting chunks of bytes from the reader. /// @@ -801,11 +782,38 @@ pub trait Parser { trait XmlSource<'r, B> { /// Removes UTF-8 BOM if it is present #[cfg(not(feature = "encoding"))] - fn remove_utf8_bom(&mut self) -> Result<()>; + fn remove_utf8_bom(&mut self) -> io::Result<()>; /// Determines encoding from the start of input and removes BOM if it is present #[cfg(feature = "encoding")] - fn detect_encoding(&mut self) -> Result>; + fn detect_encoding(&mut self) -> io::Result>; + + /// Read input until start of markup (the `<`) is found or end of input is reached. + /// + /// Returns a slice of data read up to `<` (exclusive), and a flag noting whether + /// `<` was found in the input or not. + /// + /// # Example + /// + /// ```ignore + /// let mut position = 0; + /// let mut input = b"abc ReadTextResult<'r, B>; /// Read input until `byte` is found or end of input is reached. /// @@ -838,7 +846,7 @@ trait XmlSource<'r, B> { byte: u8, buf: B, position: &mut usize, - ) -> Result<(&'r [u8], bool)>; + ) -> io::Result<(&'r [u8], bool)>; /// Read input until processing instruction is finished. /// @@ -884,18 +892,11 @@ trait XmlSource<'r, B> { /// /// # Parameters /// - `position`: Will be increased by amount of bytes consumed - fn skip_whitespace(&mut self, position: &mut usize) -> Result<()>; - - /// Consume and discard one character if it matches the given byte. Return - /// `true` if it matched. - /// - /// # Parameters - /// - `byte`: Character to skip - fn skip_one(&mut self, byte: u8) -> Result; + fn skip_whitespace(&mut self, position: &mut usize) -> io::Result<()>; /// Return one character without consuming it, so that future `read_*` calls /// will still include it. On EOF, return `None`. - fn peek_one(&mut self) -> Result>; + fn peek_one(&mut self) -> io::Result>; } /// Possible elements started with ` Reader<&'a [u8]> { impl<'a> XmlSource<'a, ()> for &'a [u8] { #[cfg(not(feature = "encoding"))] #[inline] - fn remove_utf8_bom(&mut self) -> Result<()> { + fn remove_utf8_bom(&mut self) -> io::Result<()> { if self.starts_with(crate::encoding::UTF8_BOM) { *self = &self[crate::encoding::UTF8_BOM.len()..]; } @@ -247,7 +248,7 @@ impl<'a> XmlSource<'a, ()> for &'a [u8] { #[cfg(feature = "encoding")] #[inline] - fn detect_encoding(&mut self) -> Result> { + fn detect_encoding(&mut self) -> io::Result> { if let Some((enc, bom_len)) = crate::encoding::detect_encoding(self) { *self = &self[bom_len..]; return Ok(Some(enc)); @@ -255,13 +256,36 @@ impl<'a> XmlSource<'a, ()> for &'a [u8] { Ok(None) } + #[inline] + fn read_text(&mut self, _buf: (), position: &mut usize) -> ReadTextResult<'a, ()> { + match memchr::memchr(b'<', self) { + Some(0) => { + *position += 1; + *self = &self[1..]; + ReadTextResult::Markup(()) + } + Some(i) => { + *position += i + 1; + let bytes = &self[..i]; + *self = &self[i + 1..]; + ReadTextResult::UpToMarkup(bytes) + } + None => { + *position += self.len(); + let bytes = &self[..]; + *self = &[]; + ReadTextResult::UpToEof(bytes) + } + } + } + #[inline] fn read_bytes_until( &mut self, byte: u8, _buf: (), position: &mut usize, - ) -> Result<(&'a [u8], bool)> { + ) -> io::Result<(&'a [u8], bool)> { // search byte must be within the ascii range debug_assert!(byte.is_ascii()); @@ -318,7 +342,7 @@ impl<'a> XmlSource<'a, ()> for &'a [u8] { } #[inline] - fn skip_whitespace(&mut self, position: &mut usize) -> Result<()> { + fn skip_whitespace(&mut self, position: &mut usize) -> io::Result<()> { let whitespaces = self .iter() .position(|b| !is_whitespace(*b)) @@ -329,19 +353,7 @@ impl<'a> XmlSource<'a, ()> for &'a [u8] { } #[inline] - fn skip_one(&mut self, byte: u8) -> Result { - // search byte must be within the ascii range - debug_assert!(byte.is_ascii()); - if self.first() == Some(&byte) { - *self = &self[1..]; - Ok(true) - } else { - Ok(false) - } - } - - #[inline] - fn peek_one(&mut self) -> Result> { + fn peek_one(&mut self) -> io::Result> { Ok(self.first().copied()) } } diff --git a/src/reader/state.rs b/src/reader/state.rs index ffe4814a..bf56d242 100644 --- a/src/reader/state.rs +++ b/src/reader/state.rs @@ -52,15 +52,11 @@ pub(super) struct ReaderState { } impl ReaderState { - /// Trims end whitespaces from `bytes`, if required, and returns a [`Text`] - /// event or an [`Eof`] event, if text after trimming is empty. + /// Trims end whitespaces from `bytes`, if required, and returns a text event. /// /// # Parameters /// - `bytes`: data from the start of stream to the first `<` or from `>` to `<` - /// - /// [`Text`]: Event::Text - /// [`Eof`]: Event::Eof - pub fn emit_text<'b>(&mut self, bytes: &'b [u8]) -> Result> { + pub fn emit_text<'b>(&mut self, bytes: &'b [u8]) -> BytesText<'b> { let mut content = bytes; if self.config.trim_text_end { @@ -68,15 +64,10 @@ impl ReaderState { let len = bytes .iter() .rposition(|&b| !is_whitespace(b)) - .map_or_else(|| bytes.len(), |p| p + 1); + .map_or(0, |p| p + 1); content = &bytes[..len]; } - - if content.is_empty() { - Ok(Event::Eof) - } else { - Ok(Event::Text(BytesText::wrap(content, self.decoder()))) - } + BytesText::wrap(content, self.decoder()) } /// reads `BytesElement` starting with a `!`, @@ -257,18 +248,18 @@ impl ReaderState { /// /// # Parameters /// - `content`: Content of a tag between `<` and `>` - pub fn emit_start<'b>(&mut self, content: &'b [u8]) -> Result> { + pub fn emit_start<'b>(&mut self, content: &'b [u8]) -> Event<'b> { if let Some(content) = content.strip_suffix(b"/") { // This is self-closed tag `` let event = BytesStart::wrap(content, name_len(content)); if self.config.expand_empty_elements { - self.state = ParseState::Empty; + self.state = ParseState::InsideEmpty; self.opened_starts.push(self.opened_buffer.len()); self.opened_buffer.extend(event.name().as_ref()); - Ok(Event::Start(event)) + Event::Start(event) } else { - Ok(Event::Empty(event)) + Event::Empty(event) } } else { let event = BytesStart::wrap(content, name_len(content)); @@ -278,17 +269,17 @@ impl ReaderState { // enabled, we should have that information self.opened_starts.push(self.opened_buffer.len()); self.opened_buffer.extend(event.name().as_ref()); - Ok(Event::Start(event)) + Event::Start(event) } } #[inline] - pub fn close_expanded_empty(&mut self) -> Result> { - self.state = ParseState::ClosedTag; + pub fn close_expanded_empty(&mut self) -> BytesEnd<'static> { + self.state = ParseState::InsideText; let name = self .opened_buffer .split_off(self.opened_starts.pop().unwrap()); - Ok(Event::End(BytesEnd::wrap(name.into()))) + BytesEnd::wrap(name.into()) } /// Get the decoder, used to decode bytes, read by this reader, to the strings.