Skip to content

Commit

Permalink
Merge pull request #756 from Mingun/pi-type
Browse files Browse the repository at this point in the history
Introduce dedicated type for processing instruction
  • Loading branch information
Mingun committed Jun 14, 2024
2 parents 8d38e4c + 875a10f commit a44792f
Show file tree
Hide file tree
Showing 10 changed files with 204 additions and 43 deletions.
4 changes: 4 additions & 0 deletions Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@

### Misc Changes

- [#650]: Change the type of `Event::PI` to a new dedicated `BytesPI` type.

[#650]: https://github.com/tafia/quick-xml/issues/650


## 0.32.0 -- 2024-06-10

Expand Down
4 changes: 3 additions & 1 deletion fuzz/fuzz_targets/fuzz_target_1.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@ where
}
Ok(Event::Text(ref e))
| Ok(Event::Comment(ref e))
| Ok(Event::PI(ref e))
| Ok(Event::DocType(ref e)) => {
debug_format!(e);
if let Err(err) = e.unescape() {
Expand All @@ -56,6 +55,9 @@ where
break;
}
}
Ok(Event::PI(ref e)) => {
debug_format!(e);
}
Ok(Event::Decl(ref e)) => {
debug_format!(e);
let _ = black_box(e.version());
Expand Down
4 changes: 2 additions & 2 deletions fuzz/fuzz_targets/structured_roundtrip.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

use arbitrary::{Arbitrary, Unstructured};
use libfuzzer_sys::fuzz_target;
use quick_xml::events::{BytesCData, BytesText, Event};
use quick_xml::events::{BytesCData, BytesPI, BytesText, Event};
use quick_xml::reader::{Config, NsReader, Reader};
use quick_xml::writer::Writer;
use std::{hint::black_box, io::Cursor};
Expand Down Expand Up @@ -71,7 +71,7 @@ fn fuzz_round_trip(driver: Driver) -> quick_xml::Result<()> {
_ = element_writer.write_cdata_content(BytesCData::new(*text))?;
}
WritePiContent(text) => {
_ = element_writer.write_pi_content(BytesText::from_escaped(*text))?;
_ = element_writer.write_pi_content(BytesPI::new(*text))?;
}
WriteEmpty => {
_ = element_writer.write_empty()?;
Expand Down
155 changes: 153 additions & 2 deletions src/events/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ use crate::escape::{
escape, minimal_escape, partial_escape, resolve_predefined_entity, unescape_with,
};
use crate::name::{LocalName, QName};
use crate::reader::is_whitespace;
use crate::reader::{is_whitespace, name_len};
use crate::utils::write_cow_string;
#[cfg(feature = "serialize")]
use crate::utils::CowRef;
Expand Down Expand Up @@ -992,6 +992,157 @@ impl<'a> arbitrary::Arbitrary<'a> for BytesCData<'a> {

////////////////////////////////////////////////////////////////////////////////////////////////////

/// [Processing instructions][PI] (PIs) allow documents to contain instructions for applications.
///
/// [PI]: https://www.w3.org/TR/xml11/#sec-pi
#[derive(Clone, Eq, PartialEq)]
pub struct BytesPI<'a> {
content: BytesStart<'a>,
}

impl<'a> BytesPI<'a> {
/// Creates a new `BytesPI` from a byte sequence in the specified encoding.
#[inline]
pub(crate) fn wrap(content: &'a [u8], target_len: usize) -> Self {
Self {
content: BytesStart::wrap(content, target_len),
}
}

/// Creates a new `BytesPI` from a string.
///
/// # Warning
///
/// `content` must not contain the `?>` sequence.
#[inline]
pub fn new<C: Into<Cow<'a, str>>>(content: C) -> Self {
let buf = str_cow_to_bytes(content);
let name_len = name_len(&buf);
Self {
content: BytesStart { buf, name_len },
}
}

/// Ensures that all data is owned to extend the object's lifetime if
/// necessary.
#[inline]
pub fn into_owned(self) -> BytesPI<'static> {
BytesPI {
content: self.content.into_owned().into(),
}
}

/// Extracts the inner `Cow` from the `BytesPI` event container.
#[inline]
pub fn into_inner(self) -> Cow<'a, [u8]> {
self.content.buf
}

/// Converts the event into a borrowed event.
#[inline]
pub fn borrow(&self) -> BytesPI {
BytesPI {
content: self.content.borrow(),
}
}

/// A target used to identify the application to which the instruction is directed.
///
/// # Example
///
/// ```
/// # use pretty_assertions::assert_eq;
/// use quick_xml::events::BytesPI;
///
/// let instruction = BytesPI::new(r#"xml-stylesheet href="style.css""#);
/// assert_eq!(instruction.target(), b"xml-stylesheet");
/// ```
#[inline]
pub fn target(&self) -> &[u8] {
self.content.name().0
}

/// Content of the processing instruction. Contains everything between target
/// name and the end of the instruction. A direct consequence is that the first
/// character is always a space character.
///
/// # Example
///
/// ```
/// # use pretty_assertions::assert_eq;
/// use quick_xml::events::BytesPI;
///
/// let instruction = BytesPI::new(r#"xml-stylesheet href="style.css""#);
/// assert_eq!(instruction.content(), br#" href="style.css""#);
/// ```
#[inline]
pub fn content(&self) -> &[u8] {
self.content.attributes_raw()
}

/// A view of the processing instructions' content as a list of key-value pairs.
///
/// Key-value pairs are used in some processing instructions, for example in
/// `<?xml-stylesheet?>`.
///
/// Returned iterator does not validate attribute values as may required by
/// target's rules. For example, it doesn't check that substring `?>` is not
/// present in the attribute value. That shouldn't be the problem when event
/// is produced by the reader, because reader detects end of processing instruction
/// by the first `?>` sequence, as required by the specification, and therefore
/// this sequence cannot appear inside it.
///
/// # Example
///
/// ```
/// # use pretty_assertions::assert_eq;
/// use std::borrow::Cow;
/// use quick_xml::events::attributes::Attribute;
/// use quick_xml::events::BytesPI;
/// use quick_xml::name::QName;
///
/// let instruction = BytesPI::new(r#"xml-stylesheet href="style.css""#);
/// for attr in instruction.attributes() {
/// assert_eq!(attr, Ok(Attribute {
/// key: QName(b"href"),
/// value: Cow::Borrowed(b"style.css"),
/// }));
/// }
/// ```
#[inline]
pub fn attributes(&self) -> Attributes {
self.content.attributes()
}
}

impl<'a> Debug for BytesPI<'a> {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
write!(f, "BytesPI {{ content: ")?;
write_cow_string(f, &self.content.buf)?;
write!(f, " }}")
}
}

impl<'a> Deref for BytesPI<'a> {
type Target = [u8];

fn deref(&self) -> &[u8] {
&self.content
}
}

#[cfg(feature = "arbitrary")]
impl<'a> arbitrary::Arbitrary<'a> for BytesPI<'a> {
fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
Ok(Self::new(<&str>::arbitrary(u)?))
}
fn size_hint(depth: usize) -> (usize, Option<usize>) {
return <&str as arbitrary::Arbitrary>::size_hint(depth);
}
}

////////////////////////////////////////////////////////////////////////////////////////////////////

/// Event emitted by [`Reader::read_event_into`].
///
/// [`Reader::read_event_into`]: crate::reader::Reader::read_event_into
Expand All @@ -1013,7 +1164,7 @@ pub enum Event<'a> {
/// XML declaration `<?xml ...?>`.
Decl(BytesDecl<'a>),
/// Processing instruction `<?...?>`.
PI(BytesText<'a>),
PI(BytesPI<'a>),
/// Document type definition data (DTD) stored in `<!DOCTYPE ...>`.
DocType(BytesText<'a>),
/// End of XML document.
Expand Down
18 changes: 14 additions & 4 deletions src/reader/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -996,6 +996,16 @@ pub(crate) const fn is_whitespace(b: u8) -> bool {
matches!(b, b' ' | b'\r' | b'\n' | b'\t')
}

/// Calculates name from an element-like content. Name is the first word in `content`,
/// where word boundaries is XML space characters.
#[inline]
pub(crate) fn name_len(content: &[u8]) -> usize {
content
.iter()
.position(|&b| is_whitespace(b))
.unwrap_or(content.len())
}

////////////////////////////////////////////////////////////////////////////////////////////////////

#[cfg(test)]
Expand Down Expand Up @@ -1687,7 +1697,7 @@ mod test {

/// Ensures, that no empty `Text` events are generated
mod $read_event {
use crate::events::{BytesCData, BytesDecl, BytesEnd, BytesStart, BytesText, Event};
use crate::events::{BytesCData, BytesDecl, BytesEnd, BytesPI, BytesStart, BytesText, Event};
use crate::reader::Reader;
use pretty_assertions::assert_eq;

Expand Down Expand Up @@ -1757,7 +1767,7 @@ mod test {

assert_eq!(
reader.$read_event($buf) $(.$await)? .unwrap(),
Event::PI(BytesText::from_escaped("xml-stylesheet '? >\" "))
Event::PI(BytesPI::new("xml-stylesheet '? >\" "))
);
}

Expand Down Expand Up @@ -1838,7 +1848,7 @@ mod test {
$(, $async:ident, $await:ident)?
) => {
mod small_buffers {
use crate::events::{BytesCData, BytesDecl, BytesStart, BytesText, Event};
use crate::events::{BytesCData, BytesDecl, BytesPI, BytesStart, BytesText, Event};
use crate::reader::Reader;
use pretty_assertions::assert_eq;

Expand Down Expand Up @@ -1872,7 +1882,7 @@ mod test {

assert_eq!(
reader.$read_event(&mut buf) $(.$await)? .unwrap(),
Event::PI(BytesText::new("pi"))
Event::PI(BytesPI::new("pi"))
);
assert_eq!(
reader.$read_event(&mut buf) $(.$await)? .unwrap(),
Expand Down
24 changes: 10 additions & 14 deletions src/reader/state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@ use encoding_rs::UTF_8;

use crate::encoding::Decoder;
use crate::errors::{Error, IllFormedError, Result, SyntaxError};
use crate::events::{BytesCData, BytesDecl, BytesEnd, BytesStart, BytesText, Event};
use crate::events::{BytesCData, BytesDecl, BytesEnd, BytesPI, BytesStart, BytesText, Event};
#[cfg(feature = "encoding")]
use crate::reader::EncodingRef;
use crate::reader::{is_whitespace, BangType, Config, ParseState};
use crate::reader::{is_whitespace, name_len, BangType, Config, ParseState};

/// A struct that holds a current reader state and a parser configuration.
/// It is independent on a way of reading data: the reader feed data into it and
Expand Down Expand Up @@ -242,7 +242,7 @@ impl ReaderState {

Ok(Event::Decl(event))
} else {
Ok(Event::PI(BytesText::wrap(content, self.decoder())))
Ok(Event::PI(BytesPI::wrap(content, name_len(content))))
}
} else {
// <?....EOF
Expand All @@ -258,31 +258,27 @@ impl ReaderState {
/// # Parameters
/// - `content`: Content of a tag between `<` and `>`
pub fn emit_start<'b>(&mut self, content: &'b [u8]) -> Result<Event<'b>> {
let len = content.len();
let name_end = content
.iter()
.position(|&b| is_whitespace(b))
.unwrap_or(len);
if let Some(&b'/') = content.last() {
if let Some(content) = content.strip_suffix(b"/") {
// This is self-closed tag `<something/>`
let name_len = if name_end < len { name_end } else { len - 1 };
let event = BytesStart::wrap(&content[..len - 1], name_len);
let event = BytesStart::wrap(content, name_len(content));

if self.config.expand_empty_elements {
self.state = ParseState::Empty;
self.opened_starts.push(self.opened_buffer.len());
self.opened_buffer.extend(&content[..name_len]);
self.opened_buffer.extend(event.name().as_ref());
Ok(Event::Start(event))
} else {
Ok(Event::Empty(event))
}
} else {
let event = BytesStart::wrap(content, name_len(content));

// #514: Always store names event when .check_end_names == false,
// because checks can be temporary disabled and when they would be
// enabled, we should have that information
self.opened_starts.push(self.opened_buffer.len());
self.opened_buffer.extend(&content[..name_end]);
Ok(Event::Start(BytesStart::wrap(content, name_end)))
self.opened_buffer.extend(event.name().as_ref());
Ok(Event::Start(event))
}
}

Expand Down
6 changes: 3 additions & 3 deletions src/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use std::result::Result as StdResult;

use crate::encoding::UTF8_BOM;
use crate::errors::{Error, Result};
use crate::events::{attributes::Attribute, BytesCData, BytesStart, BytesText, Event};
use crate::events::{attributes::Attribute, BytesCData, BytesPI, BytesStart, BytesText, Event};

#[cfg(feature = "async-tokio")]
mod async_tokio;
Expand Down Expand Up @@ -551,10 +551,10 @@ impl<'a, W: Write> ElementWriter<'a, W> {
}

/// Write a processing instruction `<?...?>` inside the current element.
pub fn write_pi_content(self, text: BytesText) -> Result<&'a mut Writer<W>> {
pub fn write_pi_content(self, pi: BytesPI) -> Result<&'a mut Writer<W>> {
self.writer
.write_event(Event::Start(self.start_tag.borrow()))?;
self.writer.write_event(Event::PI(text))?;
self.writer.write_event(Event::PI(pi))?;
self.writer
.write_event(Event::End(self.start_tag.to_end()))?;
Ok(self.writer)
Expand Down
Loading

0 comments on commit a44792f

Please sign in to comment.