Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,11 @@
- [#393]: New module `name` with `QName`, `LocalName`, `Namespace`, `Prefix`
and `PrefixDeclaration` wrappers around byte arrays and `ResolveResult` with
the result of namespace resolution
- [#180]: Make `Decoder` struct public. You already had access to it via the
`Reader::decoder()` method, but could not name it in the code. Now the preferred
way to access decoding functionality is via this struct
- [#191]: New event variant `StartText` emitted for bytes before the XML declaration
or a start comment or a tag. For streams with BOM this event will contain a BOM

### Bug Fixes

Expand Down Expand Up @@ -56,6 +61,33 @@
- [#393]: Now `BytesStart::name()` and `BytesEnd::name()` returns `QName`, and
`BytesStart::local_name()` and `BytesEnd::local_name()` returns `LocalName`

- [#191]: Remove unused `reader.decoder().decode_owned()`. If you ever used it,
use `String::from_utf8` instead (which that function did)
- [#191]: Remove `*_without_bom` methods from the `Attributes` struct because they are useless.
Use the same-named methods without that suffix instead. Attribute values cannot contain BOM
- [#191]: Remove `Reader::decode()` and `Reader::decode_without_bom()`, they are replaced by
`Decoder::decode()` and `Decoder::decode_with_bom_removal()`.
Use `reader.decoder().decode_*(...)` instead of `reader.decode_*(...)` for now.
`Reader::encoding()` is replaced by `Decoder::encoding()` as well
- [#191]: Remove poorly designed `BytesText::unescape_and_decode_without_bom()` and
`BytesText::unescape_and_decode_without_bom_with_custom_entities()`. Although these methods worked
as expected, this was only due to good luck. They was replaced by the
`BytesStartText::decode_with_bom_removal()`:
- conceptually, you should decode BOM only for the first `Text` event from the
reader (since now `StartText` event is emitted instead for this)
- text before the first tag is not an XML content at all, so it is meaningless
to try to unescape something in it

- [#180]: Eliminated the differences in the decoding API when feature `encoding` enabled and when it is
disabled. Signatures of functions are now the same regardless of whether or not the feature is
enabled, and an error will be returned instead of performing replacements for invalid characters
in both cases.

Previously, if the `encoding` feature was enabled, decoding functions would return `Result<Cow<&str>>`
while without this feature they would return `Result<&str>`. With this change, only `Result<Cow<&str>>`
is returned regardless of the status of the feature.
- [#180]: Error variant `Error::Utf8` replaced by `Error::NonDecodable`

### New Tests

- [#9]: Added tests for incorrect nested tags in input
Expand All @@ -66,6 +98,8 @@

[#8]: https://github.com/Mingun/fast-xml/pull/8
[#9]: https://github.com/Mingun/fast-xml/pull/9
[#180]: https://github.com/tafia/quick-xml/issues/180
[#191]: https://github.com/tafia/quick-xml/issues/191
[#363]: https://github.com/tafia/quick-xml/issues/363
[#387]: https://github.com/tafia/quick-xml/pull/387
[#391]: https://github.com/tafia/quick-xml/pull/391
Expand Down
7 changes: 4 additions & 3 deletions benches/bench.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use criterion::{self, criterion_group, criterion_main, Criterion};
use pretty_assertions::assert_eq;
use quick_xml::events::Event;
use quick_xml::name::QName;
use quick_xml::Reader;

static SAMPLE: &[u8] = include_bytes!("../tests/sample_rss.xml");
Expand Down Expand Up @@ -173,15 +174,15 @@ fn bytes_text_unescaped(c: &mut Criterion) {
/// Benchmarks, how fast individual event parsed
fn one_event(c: &mut Criterion) {
let mut group = c.benchmark_group("One event");
group.bench_function("Text", |b| {
group.bench_function("StartText", |b| {
let src = "Hello world!".repeat(512 / 12).into_bytes();
let mut buf = Vec::with_capacity(1024);
b.iter(|| {
let mut r = Reader::from_reader(src.as_ref());
let mut nbtxt = criterion::black_box(0);
r.check_end_names(false).check_comments(false);
match r.read_event(&mut buf) {
Ok(Event::Text(ref e)) => nbtxt += e.unescaped().unwrap().len(),
Ok(Event::StartText(e)) => nbtxt += e.unescaped().unwrap().len(),
something_else => panic!("Did not expect {:?}", something_else),
};

Expand Down Expand Up @@ -310,7 +311,7 @@ fn attributes(c: &mut Criterion) {
let mut buf = Vec::new();
loop {
match r.read_event(&mut buf) {
Ok(Event::Empty(e)) if e.name() == b"player" => {
Ok(Event::Empty(e)) if e.name() == QName(b"player") => {
for name in ["num", "status", "avg"] {
if let Some(_attr) = e.try_get_attribute(name).unwrap() {
count += 1
Expand Down
7 changes: 0 additions & 7 deletions src/de/escape.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,8 @@ macro_rules! deserialize_num {
where
V: Visitor<'de>,
{
#[cfg(not(feature = "encoding"))]
let value = self.decoder.decode(self.escaped_value.as_ref())?.parse()?;

#[cfg(feature = "encoding")]
let value = self.decoder.decode(self.escaped_value.as_ref()).parse()?;

visitor.$visit(value)
}
};
Expand All @@ -71,11 +67,8 @@ impl<'de, 'a> serde::Deserializer<'de> for EscapedDeserializer<'a> {
V: Visitor<'de>,
{
let unescaped = self.unescaped()?;
#[cfg(not(feature = "encoding"))]
let value = self.decoder.decode(&unescaped)?;

#[cfg(feature = "encoding")]
let value = self.decoder.decode(&unescaped);
visitor.visit_str(&value)
}

Expand Down
12 changes: 10 additions & 2 deletions src/de/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,7 @@ where
{
#[cfg(feature = "encoding")]
{
let value = decoder.decode(value);
let value = decoder.decode(value)?;
// No need to unescape because valid boolean representations cannot be escaped
match value.as_ref() {
"true" | "1" | "True" | "TRUE" | "t" | "Yes" | "YES" | "yes" | "y" => {
Expand Down Expand Up @@ -624,7 +624,7 @@ where
allow_start: bool,
) -> Result<BytesCData<'de>, DeError> {
match self.next()? {
DeEvent::Text(e) if unescape => e.unescape().map_err(|e| DeError::InvalidXml(e.into())),
DeEvent::Text(e) if unescape => e.unescape().map_err(Into::into),
DeEvent::Text(e) => Ok(BytesCData::new(e.into_inner())),
DeEvent::CData(e) => Ok(e),
DeEvent::Start(e) if allow_start => {
Expand Down Expand Up @@ -952,6 +952,10 @@ impl<'i, R: BufRead> XmlRead<'i> for IoReader<R> {
let event = loop {
let e = self.reader.read_event(&mut self.buf)?;
match e {
//TODO: Probably not the best idea treat StartText as usual text
// Usually this event will represent a BOM
// Changing this requires review of the serde-de::top_level::one_element test
Event::StartText(e) => break Ok(DeEvent::Text(e.into_owned().into())),
Event::Start(e) => break Ok(DeEvent::Start(e.into_owned())),
Event::End(e) => break Ok(DeEvent::End(e.into_owned())),
Event::Text(e) => break Ok(DeEvent::Text(e.into_owned())),
Expand Down Expand Up @@ -992,6 +996,10 @@ impl<'de> XmlRead<'de> for SliceReader<'de> {
loop {
let e = self.reader.read_event_unbuffered()?;
match e {
//TODO: Probably not the best idea treat StartText as usual text
// Usually this event will represent a BOM
// Changing this requires review of the serde-de::top_level::one_element test
Event::StartText(e) => break Ok(DeEvent::Text(e.into())),
Event::Start(e) => break Ok(DeEvent::Start(e)),
Event::End(e) => break Ok(DeEvent::End(e)),
Event::Text(e) => break Ok(DeEvent::Text(e)),
Expand Down
8 changes: 1 addition & 7 deletions src/de/seq.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@ use crate::de::{DeError, DeEvent, Deserializer, XmlRead};
use crate::events::BytesStart;
use crate::reader::Decoder;
use serde::de::{DeserializeSeed, SeqAccess};
#[cfg(not(feature = "encoding"))]
use std::borrow::Cow;

/// Check if tag `start` is included in the `fields` list. `decoder` is used to
/// get a string representation of a tag.
Expand All @@ -14,11 +12,7 @@ pub fn not_in(
start: &BytesStart,
decoder: Decoder,
) -> Result<bool, DeError> {
#[cfg(not(feature = "encoding"))]
let tag = Cow::Borrowed(decoder.decode(start.name().into_inner())?);

#[cfg(feature = "encoding")]
let tag = decoder.decode(start.name().into_inner());
let tag = decoder.decode(start.name().into_inner())?;

Ok(fields.iter().all(|&field| field != tag.as_ref()))
}
Expand Down
52 changes: 40 additions & 12 deletions src/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,16 @@ use crate::escape::EscapeError;
use crate::events::attributes::AttrError;
use crate::utils::write_byte_string;
use std::str::Utf8Error;
use std::string::FromUtf8Error;

/// The error type used by this crate.
#[derive(Debug)]
pub enum Error {
/// IO error
Io(::std::io::Error),
/// Utf8 error
Utf8(Utf8Error),
/// Input decoding error. If `encoding` feature is disabled, contains `None`,
/// otherwise contains the UTF-8 decoding error
NonDecodable(Option<Utf8Error>),
/// Unexpected End of File
UnexpectedEof(String),
/// End event mismatch
Expand Down Expand Up @@ -46,10 +48,18 @@ impl From<::std::io::Error> for Error {
}

impl From<Utf8Error> for Error {
/// Creates a new `Error::Utf8` from the given error
/// Creates a new `Error::NonDecodable` from the given error
#[inline]
fn from(error: Utf8Error) -> Error {
Error::Utf8(error)
Error::NonDecodable(Some(error))
}
}

impl From<FromUtf8Error> for Error {
/// Creates a new `Error::Utf8` from the given error
#[inline]
fn from(error: FromUtf8Error) -> Error {
error.utf8_error().into()
}
}

Expand Down Expand Up @@ -77,7 +87,8 @@ impl std::fmt::Display for Error {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
Error::Io(e) => write!(f, "I/O error: {}", e),
Error::Utf8(e) => write!(f, "UTF8 error: {}", e),
Error::NonDecodable(None) => write!(f, "Malformed input, decoding impossible"),
Error::NonDecodable(Some(e)) => write!(f, "Malformed UTF-8 input: {}", e),
Error::UnexpectedEof(e) => write!(f, "Unexpected EOF during reading {}", e),
Error::EndEventMismatch { expected, found } => {
write!(f, "Expecting </{}> found </{}>", expected, found)
Expand Down Expand Up @@ -109,7 +120,7 @@ impl std::error::Error for Error {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
match self {
Error::Io(e) => Some(e),
Error::Utf8(e) => Some(e),
Error::NonDecodable(Some(e)) => Some(e),
Error::InvalidAttr(e) => Some(e),
Error::EscapeError(e) => Some(e),
_ => None,
Expand Down Expand Up @@ -227,6 +238,7 @@ pub mod serialize {
}

impl From<Error> for DeError {
#[inline]
fn from(e: Error) -> Self {
Self::InvalidXml(e)
}
Expand All @@ -239,15 +251,17 @@ pub mod serialize {
}
}

impl From<ParseIntError> for DeError {
fn from(e: ParseIntError) -> Self {
Self::InvalidInt(e)
impl From<Utf8Error> for DeError {
#[inline]
fn from(e: Utf8Error) -> Self {
Self::InvalidXml(e.into())
}
}

impl From<ParseFloatError> for DeError {
fn from(e: ParseFloatError) -> Self {
Self::InvalidFloat(e)
impl From<FromUtf8Error> for DeError {
#[inline]
fn from(e: FromUtf8Error) -> Self {
Self::InvalidXml(e.into())
}
}

Expand All @@ -257,4 +271,18 @@ pub mod serialize {
Self::InvalidXml(e.into())
}
}

impl From<ParseIntError> for DeError {
#[inline]
fn from(e: ParseIntError) -> Self {
Self::InvalidInt(e)
}
}

impl From<ParseFloatError> for DeError {
#[inline]
fn from(e: ParseFloatError) -> Self {
Self::InvalidFloat(e)
}
}
}
Loading