From daa10924d3e01cddbbc9e5ac85621fb62e9ca1ee Mon Sep 17 00:00:00 2001 From: Johann Tuffe Date: Mon, 12 Jun 2017 16:40:26 +0800 Subject: [PATCH 01/11] Have read_namespace_event borrow the ns buffer The current situation, where the reader owns the namespace buffer leads to a bad api. Indeed in the inner loop of the `read_namespaced_event`, the reader, mutably borrowed cannot be used. Making the caller own the buffer fixes the situation, while still avoiding extra allocation. --- src/reader.rs | 82 +++++++++++++++++++++++++++++---------------------- 1 file changed, 47 insertions(+), 35 deletions(-) diff --git a/src/reader.rs b/src/reader.rs index 4f4c21ee..ed7e896d 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -77,7 +77,7 @@ pub struct Reader { /// opened name start indexes opened_starts: Vec, /// a buffer to manage namespaces - ns_buffer: NamespaceBuffer, + ns_buffer: NamespaceBufferIndex, /// the encoding specified in the xml, defaults to utf8 encoding: &'static Encoding, } @@ -96,7 +96,7 @@ impl Reader { check_end_names: true, buf_position: 0, check_comments: false, - ns_buffer: NamespaceBuffer::default(), + ns_buffer: NamespaceBufferIndex::default(), encoding: ::encoding_rs::UTF_8, } } @@ -469,20 +469,26 @@ impl Reader { /// /// *Unqualified* attribute names do *not* inherit the current *default namespace*. #[inline] - pub fn resolve_namespace<'a, 'b>(&'a self, qname: &'b [u8]) -> (Option<&'a [u8]>, &'b [u8]) { - self.ns_buffer.resolve_namespace(qname) + pub fn resolve_namespace<'a, 'b, 'c>(&'a self, + qname: &'b [u8], + namespace_buffer: &'c [u8]) + -> (Option<&'c [u8]>, &'b [u8]) { + self.ns_buffer.resolve_namespace(qname, namespace_buffer) } /// Reads the next event and resolve its namespace - pub fn read_namespaced_event<'a, 'b>(&'a mut self, - buf: &'b mut Vec) - -> Result<(Option<&'a [u8]>, Event<'b>)> { - self.ns_buffer.pop_empty_namespaces(); + pub fn read_namespaced_event<'a, 'b, 'c>(&'a mut self, + buf: &'b mut Vec, + namespace_buffer: &'c mut Vec) + -> Result<(Option<&'c [u8]>, Event<'b>)> { + self.ns_buffer.pop_empty_namespaces(namespace_buffer); match self.read_event(buf) { Ok(Event::Eof) => Ok((None, Event::Eof)), Ok(Event::Start(e)) => { - self.ns_buffer.push_new_namespaces(&e); - Ok((self.ns_buffer.find_namespace_value(e.name()), Event::Start(e))) + self.ns_buffer.push_new_namespaces(&e, namespace_buffer); + Ok((self.ns_buffer + .find_namespace_value(e.name(), &**namespace_buffer), + Event::Start(e))) } Ok(Event::Empty(e)) => { // For empty elements we need to 'artificially' keep the namespace scope on the @@ -490,17 +496,21 @@ impl Reader { // Otherwise the caller has no chance to use `resolve` in the context of the // namespace declarations that are 'in scope' for the empty element alone. // Ex: - self.ns_buffer.push_new_namespaces(&e); + self.ns_buffer.push_new_namespaces(&e, namespace_buffer); // notify next `read_namespaced_event()` invocation that it needs to pop this // namespace scope self.ns_buffer.pending_pop = true; - Ok((self.ns_buffer.find_namespace_value(e.name()), Event::Empty(e))) + Ok((self.ns_buffer + .find_namespace_value(e.name(), &**namespace_buffer), + Event::Empty(e))) } Ok(Event::End(e)) => { // notify next `read_namespaced_event()` invocation that it needs to pop this // namespace scope self.ns_buffer.pending_pop = true; - Ok((self.ns_buffer.find_namespace_value(e.name()), Event::End(e))) + Ok((self.ns_buffer + .find_namespace_value(e.name(), &**namespace_buffer), + Event::End(e))) } Ok(e) => Ok((None, e)), Err(e) => Err(e), @@ -517,9 +527,7 @@ impl Reader { pub fn decode<'b, 'c>(&'b self, bytes: &'c [u8]) -> Cow<'c, str> { self.encoding.decode(bytes).0 } -} - -impl Reader { + /// Reads until end element is found /// /// Manages nested cases where parent and child elements have the same name @@ -767,11 +775,9 @@ impl Namespace { /// /// Holds all internal logic to push/pop namespaces with their levels. #[derive(Debug, Default)] -struct NamespaceBuffer { +struct NamespaceBufferIndex { /// a buffer of namespace ranges slices: Vec, - /// a buffer of existing namespaces - buffer: Vec, /// The number of open tags at the moment. We need to keep track of this to know which namespace /// declarations to remove when we encounter an `End` event. nesting_level: i32, @@ -781,22 +787,25 @@ struct NamespaceBuffer { pending_pop: bool, } -impl NamespaceBuffer { +impl NamespaceBufferIndex { #[inline] - fn find_namespace_value(&self, element_name: &[u8]) -> Option<&[u8]> { + fn find_namespace_value<'a, 'b, 'c>(&'a self, + element_name: &'b [u8], + buffer: &'c [u8]) + -> Option<&'c [u8]> { let ns = match element_name.iter().position(|b| *b == b':') { None => self.slices.iter().rev().find(|n| n.prefix_len == 0), Some(len) => { self.slices .iter() .rev() - .find(|n| n.prefix(&self.buffer) == &element_name[..len]) + .find(|n| n.prefix(buffer) == &element_name[..len]) } }; - ns.and_then(|n| n.opt_value(&self.buffer)) + ns.and_then(|n| n.opt_value(buffer)) } - fn pop_empty_namespaces(&mut self) { + fn pop_empty_namespaces(&mut self, buffer: &mut Vec) { if !self.pending_pop { return; } @@ -809,20 +818,20 @@ impl NamespaceBuffer { .rposition(|n| n.level <= current_level) { // none of the namespaces are valid, remove all of them None => { - self.buffer.clear(); + buffer.clear(); self.slices.clear(); } // drop all namespaces past the last valid namespace Some(last_valid_pos) => { if let Some(len) = self.slices.get(last_valid_pos + 1).map(|n| n.start) { - self.buffer.truncate(len); + buffer.truncate(len); self.slices.truncate(last_valid_pos + 1); } } } } - fn push_new_namespaces(&mut self, e: &BytesStart) { + fn push_new_namespaces(&mut self, e: &BytesStart, buffer: &mut Vec) { self.nesting_level += 1; let level = self.nesting_level; // adds new namespaces for attributes starting with 'xmlns:' and for the 'xmlns' @@ -832,8 +841,8 @@ impl NamespaceBuffer { if k.starts_with(b"xmlns") { match k.get(5) { None => { - let start = self.buffer.len(); - self.buffer.extend_from_slice(v); + let start = buffer.len(); + buffer.extend_from_slice(v); self.slices .push(Namespace { start: start, @@ -843,9 +852,9 @@ impl NamespaceBuffer { }); } Some(&b':') => { - let start = self.buffer.len(); - self.buffer.extend_from_slice(&k[6..]); - self.buffer.extend_from_slice(v); + let start = buffer.len(); + buffer.extend_from_slice(&k[6..]); + buffer.extend_from_slice(v); self.slices .push(Namespace { start: start, @@ -871,7 +880,10 @@ impl NamespaceBuffer { /// /// *Unqualified* attribute names do *not* inherit the current *default namespace*. #[inline] - fn resolve_namespace<'a, 'b>(&'a self, qname: &'b [u8]) -> (Option<&'a [u8]>, &'b [u8]) { + fn resolve_namespace<'a, 'b, 'c>(&'a self, + qname: &'b [u8], + buffer: &'c [u8]) + -> (Option<&'c [u8]>, &'b [u8]) { qname .iter() .position(|b| *b == b':') @@ -880,8 +892,8 @@ impl NamespaceBuffer { self.slices .iter() .rev() - .find(|n| n.prefix(&self.buffer) == prefix) - .map(|ns| (ns.opt_value(&self.buffer), &value[1..])) + .find(|n| n.prefix(buffer) == prefix) + .map(|ns| (ns.opt_value(buffer), &value[1..])) }) .unwrap_or((None, qname)) } From 1845abdf9830fb8df2b071baa622c79bfdac99ac Mon Sep 17 00:00:00 2001 From: Johann Tuffe Date: Mon, 12 Jun 2017 16:45:28 +0800 Subject: [PATCH 02/11] update tests --- tests/test.rs | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/tests/test.rs b/tests/test.rs index c3fcaa46..fd645836 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -90,8 +90,9 @@ fn test_attributes_empty_ns() { let mut r = Reader::from_reader(src as &[u8]); r.trim_text(true).expand_empty_elements(false); let mut buf = Vec::new(); + let mut ns_buf = Vec::new(); - let e = match r.read_namespaced_event(&mut buf) { + let e = match r.read_namespaced_event(&mut buf, &mut ns_buf) { Ok((None, Empty(e))) => e, e => panic!("Expecting Empty event, got {:?}", e), }; @@ -101,7 +102,7 @@ fn test_attributes_empty_ns() { // we don't care about xmlns attributes for this test .filter(|kv| !kv.key.starts_with(b"xmlns")) .map(|Attribute { key: name, value }| { - let (opt_ns, local_name) = r.resolve_namespace(name); + let (opt_ns, local_name) = r.resolve_namespace(name, &ns_buf); (opt_ns, local_name, value) }); match atts.next() { @@ -133,8 +134,9 @@ fn test_attributes_empty_ns_expanded() { let mut r = Reader::from_reader(src as &[u8]); r.trim_text(true).expand_empty_elements(true); let mut buf = Vec::new(); + let mut ns_buf = Vec::new(); { - let e = match r.read_namespaced_event(&mut buf) { + let e = match r.read_namespaced_event(&mut buf, &mut ns_buf) { Ok((None, Start(e))) => e, e => panic!("Expecting Empty event, got {:?}", e), }; @@ -144,7 +146,7 @@ fn test_attributes_empty_ns_expanded() { // we don't care about xmlns attributes for this test .filter(|kv| !kv.key.starts_with(b"xmlns")) .map(|Attribute { key: name, value }| { - let (opt_ns, local_name) = r.resolve_namespace(name); + let (opt_ns, local_name) = r.resolve_namespace(name, &ns_buf); (opt_ns, local_name, value) }); match atts.next() { @@ -166,7 +168,7 @@ fn test_attributes_empty_ns_expanded() { } } - match r.read_namespaced_event(&mut buf) { + match r.read_namespaced_event(&mut buf, &mut ns_buf) { Ok((None, End(e))) => assert_eq!(b"a", e.name()), e => panic!("Expecting End event, got {:?}", e), } @@ -179,10 +181,11 @@ fn test_default_ns_shadowing_empty() { let mut r = Reader::from_reader(src as &[u8]); r.trim_text(true).expand_empty_elements(false); let mut buf = Vec::new(); + let mut ns_buf = Vec::new(); // { - match r.read_namespaced_event(&mut buf) { + match r.read_namespaced_event(&mut buf, &mut ns_buf) { Ok((Some(ns), Start(e))) => { assert_eq!(&ns[..], b"urn:example:o"); assert_eq!(e.name(), b"e"); @@ -193,7 +196,7 @@ fn test_default_ns_shadowing_empty() { // { - let e = match r.read_namespaced_event(&mut buf) { + let e = match r.read_namespaced_event(&mut buf, &mut ns_buf) { Ok((Some(ns), Empty(e))) => { assert_eq!(::std::str::from_utf8(ns).unwrap(), "urn:example:i"); assert_eq!(e.name(), b"e"); @@ -207,7 +210,7 @@ fn test_default_ns_shadowing_empty() { // we don't care about xmlns attributes for this test .filter(|kv| !kv.key.starts_with(b"xmlns")) .map(|Attribute { key: name, value }| { - let (opt_ns, local_name) = r.resolve_namespace(name); + let (opt_ns, local_name) = r.resolve_namespace(name, &ns_buf); (opt_ns, local_name, value) }); // the attribute should _not_ have a namespace name. The default namespace does not @@ -223,7 +226,7 @@ fn test_default_ns_shadowing_empty() { } // - match r.read_namespaced_event(&mut buf) { + match r.read_namespaced_event(&mut buf, &mut ns_buf) { Ok((Some(ns), End(e))) => { assert_eq!(&ns[..], b"urn:example:o"); assert_eq!(e.name(), b"e"); @@ -239,10 +242,11 @@ fn test_default_ns_shadowing_expanded() { let mut r = Reader::from_reader(src as &[u8]); r.trim_text(true).expand_empty_elements(true); let mut buf = Vec::new(); + let mut ns_buf = Vec::new(); // { - match r.read_namespaced_event(&mut buf) { + match r.read_namespaced_event(&mut buf, &mut ns_buf) { Ok((Some(ns), Start(e))) => { assert_eq!(&ns[..], b"urn:example:o"); assert_eq!(e.name(), b"e"); @@ -254,7 +258,7 @@ fn test_default_ns_shadowing_expanded() { // { - let e = match r.read_namespaced_event(&mut buf) { + let e = match r.read_namespaced_event(&mut buf, &mut ns_buf) { Ok((Some(ns), Start(e))) => { assert_eq!(&ns[..], b"urn:example:i"); assert_eq!(e.name(), b"e"); @@ -267,7 +271,7 @@ fn test_default_ns_shadowing_expanded() { // we don't care about xmlns attributes for this test .filter(|kv| !kv.key.starts_with(b"xmlns")) .map(|Attribute { key: name, value }| { - let (opt_ns, local_name) = r.resolve_namespace(name); + let (opt_ns, local_name) = r.resolve_namespace(name, &ns_buf); (opt_ns, local_name, value) }); // the attribute should _not_ have a namespace name. The default namespace does not @@ -283,7 +287,7 @@ fn test_default_ns_shadowing_expanded() { } // virtual - match r.read_namespaced_event(&mut buf) { + match r.read_namespaced_event(&mut buf, &mut ns_buf) { Ok((Some(ns), End(e))) => { assert_eq!(&ns[..], b"urn:example:i"); assert_eq!(e.name(), b"e"); @@ -291,7 +295,7 @@ fn test_default_ns_shadowing_expanded() { e => panic!("Expected End event (), got {:?}", e), } // - match r.read_namespaced_event(&mut buf) { + match r.read_namespaced_event(&mut buf, &mut ns_buf) { Ok((Some(ns), End(e))) => { assert_eq!(&ns[..], b"urn:example:o"); assert_eq!(e.name(), b"e"); From 6bf416302165cbee97f182f3c45643e5511b7d03 Mon Sep 17 00:00:00 2001 From: Johann Tuffe Date: Mon, 12 Jun 2017 16:46:34 +0800 Subject: [PATCH 03/11] update unit_tests --- tests/unit_tests.rs | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/tests/unit_tests.rs b/tests/unit_tests.rs index 79552762..51a8d151 100644 --- a/tests/unit_tests.rs +++ b/tests/unit_tests.rs @@ -400,12 +400,13 @@ fn test_namespace() { r.trim_text(true);; let mut buf = Vec::new(); - if let Ok((None, Start(_))) = r.read_namespaced_event(&mut buf) { + let mut ns_buf = Vec::new(); + if let Ok((None, Start(_))) = r.read_namespaced_event(&mut buf, &mut ns_buf) { } else { assert!(false, "expecting start element with no namespace"); } - if let Ok((Some(a), Start(_))) = r.read_namespaced_event(&mut buf) { + if let Ok((Some(a), Start(_))) = r.read_namespaced_event(&mut buf, &mut ns_buf) { if &*a == b"www1" { assert!(true); } else { @@ -423,13 +424,14 @@ fn test_default_namespace() { // let mut buf = Vec::new(); - if let Ok((None, Start(_))) = r.read_namespaced_event(&mut buf) { + let mut ns_buf = Vec::new(); + if let Ok((None, Start(_))) = r.read_namespaced_event(&mut buf, &mut ns_buf) { } else { assert!(false, "expecting outer start element with no namespace"); } // - if let Ok((Some(a), Start(_))) = r.read_namespaced_event(&mut buf) { + if let Ok((Some(a), Start(_))) = r.read_namespaced_event(&mut buf, &mut ns_buf) { if &*a == b"www1" { assert!(true); } else { @@ -440,7 +442,7 @@ fn test_default_namespace() { } // - if let Ok((Some(a), End(_))) = r.read_namespaced_event(&mut buf) { + if let Ok((Some(a), End(_))) = r.read_namespaced_event(&mut buf, &mut ns_buf) { if &*a == b"www1" { assert!(true); } else { @@ -452,7 +454,7 @@ fn test_default_namespace() { // very important: a should not be in any namespace. The default namespace only applies to // the sub-document it is defined on. - if let Ok((None, End(_))) = r.read_namespaced_event(&mut buf) { + if let Ok((None, End(_))) = r.read_namespaced_event(&mut buf, &mut ns_buf) { } else { assert!(false, "expecting outer end element with no namespace"); } @@ -464,7 +466,8 @@ fn test_default_namespace_reset() { r.trim_text(true);; let mut buf = Vec::new(); - if let Ok((Some(a), Start(_))) = r.read_namespaced_event(&mut buf) { + let mut ns_buf = Vec::new(); + if let Ok((Some(a), Start(_))) = r.read_namespaced_event(&mut buf, &mut ns_buf) { assert_eq!(&a[..], b"www1", "expecting outer start element with to resolve to 'www1'"); @@ -473,16 +476,16 @@ fn test_default_namespace_reset() { "expecting outer start element with to resolve to 'www1'"); } - if let Ok((None, Start(_))) = r.read_namespaced_event(&mut buf) { + if let Ok((None, Start(_))) = r.read_namespaced_event(&mut buf, &mut ns_buf) { } else { assert!(false, "expecting inner start element"); } - if let Ok((None, End(_))) = r.read_namespaced_event(&mut buf) { + if let Ok((None, End(_))) = r.read_namespaced_event(&mut buf, &mut ns_buf) { } else { assert!(false, "expecting inner end element"); } - if let Ok((Some(a), End(_))) = r.read_namespaced_event(&mut buf) { + if let Ok((Some(a), End(_))) = r.read_namespaced_event(&mut buf, &mut ns_buf) { assert_eq!(&a[..], b"www1", "expecting outer end element with to resolve to 'www1'"); From bde611aa70bad5c98ce0e9867c23dee30a1c44fc Mon Sep 17 00:00:00 2001 From: Johann Tuffe Date: Mon, 12 Jun 2017 16:47:18 +0800 Subject: [PATCH 04/11] update xmlns_reader --- tests/xmlrs_reader_tests.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/xmlrs_reader_tests.rs b/tests/xmlrs_reader_tests.rs index fbe52c86..a6532560 100644 --- a/tests/xmlrs_reader_tests.rs +++ b/tests/xmlrs_reader_tests.rs @@ -134,7 +134,7 @@ fn issue_83_duplicate_attributes() { #[test] fn issue_93_large_characters_in_entity_references() { - test(r#"&𤶼;"#.as_bytes(), + test(r#"&𤶼;"#.as_bytes(), br#" |StartElement(hello) |1:10 Error while escaping character at range 0..5: @@ -270,6 +270,7 @@ fn test(input: &[u8], output: &[u8], is_short: bool) { .enumerate(); let mut buf = Vec::new(); + let mut ns_buffer = Vec::new(); if !is_short { reader.read_event(&mut buf).unwrap(); @@ -278,7 +279,7 @@ fn test(input: &[u8], output: &[u8], is_short: bool) { loop { { let line = { - let e = reader.read_namespaced_event(&mut buf); + let e = reader.read_namespaced_event(&mut buf, &mut ns_buffer); format!("{}", OptEvent(e)) }; if let Some((n, spec)) = spec_lines.next() { From 5accc184a0c2b946a93f59cecac40c35452dfaf8 Mon Sep 17 00:00:00 2001 From: Johann Tuffe Date: Mon, 12 Jun 2017 17:20:32 +0800 Subject: [PATCH 05/11] Add issue68 example --- examples/issue68.rs | 138 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 138 insertions(+) create mode 100644 examples/issue68.rs diff --git a/examples/issue68.rs b/examples/issue68.rs new file mode 100644 index 00000000..cbb9f597 --- /dev/null +++ b/examples/issue68.rs @@ -0,0 +1,138 @@ +extern crate quick_xml; + +use quick_xml::reader::Reader; +use quick_xml::events::Event; +use std::io::Read; + +struct Resource { + etag: String, + calendar_data: String, +} + +struct Prop { + namespace: String, + local_name: String, + value: String, +} + +impl Prop { + fn new() -> Prop { + Prop { + namespace: String::new(), + local_name: String::new(), + value: String::new(), + } + } +} + +struct PropStat { + status: String, + props: Vec, +} + +impl PropStat { + fn new() -> PropStat { + PropStat { + status: String::new(), + props: Vec::::new(), + } + } +} + +struct Response { + href: String, + propstats: Vec, +} + +impl Response { + fn new() -> Response { + Response { + href: String::new(), + propstats: Vec::::new(), + } + } +} + +fn parse_report(xml_data: &str) -> Vec { + let result = Vec::::new(); + + let mut reader = Reader::from_str(xml_data); + reader.trim_text(true); + + let mut count = 0; + let mut buf = Vec::new(); + let mut ns_buffer = Vec::new(); + + #[derive(Clone, Copy)] + enum State { + Root, + MultiStatus, + Response, + Success, + Error, + }; + + let mut responses = Vec::::new(); + let mut current_response = Response::new(); + let mut current_prop_stat = PropStat::new(); + let mut current_prop = Prop::new(); + + let mut depth = 0; + let mut state = State::MultiStatus; + + loop { + + match reader.read_namespaced_event(&mut buf, &mut ns_buffer) { + Ok((namespace_value, Event::Start(e))) => { + let namespace_value = namespace_value.unwrap_or_default(); + match (depth, state, namespace_value, e.local_name()) { + (0, State::Root, b"DAV:", b"multistatus") => state = State::MultiStatus, + (1, State::MultiStatus, b"DAV:", b"response") => { + state = State::Response; + current_response = Response::new(); + } + (2, State::Response, b"DAV:", b"href") => { + current_response.href = e.unescape_and_decode(&reader).unwrap(); + } + _ => {} + } + depth += 1; + } + Ok((namespace_value, Event::End(e))) => { + let namespace_value = namespace_value.unwrap_or_default(); + let local_name = e.local_name(); + match (depth, state, &*namespace_value, local_name) { + (1, State::MultiStatus, b"DAV:", b"multistatus") => state = State::Root, + (2, State::MultiStatus, b"DAV:", b"multistatus") => state = State::MultiStatus, + _ => {} + } + depth -= 1; + } + Ok((_, Event::Eof)) => break, + Err(e) => break, + _ => (), + } + + } + result +} + +fn main() { + let test_data = r#" + + + + /caldav/v2/johndoh%40gmail.com/events/07b7it7uonpnlnvjldr0l1ckg8%40google.com.ics + + HTTP/1.1 200 OK + + "63576798396" + BEGIN:VCALENDAR + + + + +"#; + + parse_report(test_data); +} From 8a8661205c136d2e6ae1f0e7651fb57aa62a02ae Mon Sep 17 00:00:00 2001 From: Johann Tuffe Date: Tue, 13 Jun 2017 15:41:06 +0800 Subject: [PATCH 06/11] allow unused in issue example --- examples/issue68.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/issue68.rs b/examples/issue68.rs index cbb9f597..0f1acefe 100644 --- a/examples/issue68.rs +++ b/examples/issue68.rs @@ -1,3 +1,5 @@ +#![allow(unused)] + extern crate quick_xml; use quick_xml::reader::Reader; @@ -74,7 +76,6 @@ fn parse_report(xml_data: &str) -> Vec { let mut responses = Vec::::new(); let mut current_response = Response::new(); - let mut current_prop_stat = PropStat::new(); let mut current_prop = Prop::new(); let mut depth = 0; From 8264538f5851274ccf662a1b4ac89e424d8e7074 Mon Sep 17 00:00:00 2001 From: Johann Tuffe Date: Tue, 13 Jun 2017 15:49:29 +0800 Subject: [PATCH 07/11] rust fmt and fix test --- examples/issue68.rs | 10 +++++++--- src/events/mod.rs | 3 +-- src/reader.rs | 19 +++++-------------- tests/xmlrs_reader_tests.rs | 2 +- 4 files changed, 14 insertions(+), 20 deletions(-) diff --git a/examples/issue68.rs b/examples/issue68.rs index 0f1acefe..8669b573 100644 --- a/examples/issue68.rs +++ b/examples/issue68.rs @@ -121,9 +121,13 @@ fn parse_report(xml_data: &str) -> Vec { fn main() { let test_data = r#" - - - /caldav/v2/johndoh%40gmail.com/events/07b7it7uonpnlnvjldr0l1ckg8%40google.com.ics + + + + /caldav/v2/johndoh%40gmail.com/events/07b7it7uonpnlnvjldr0l1ckg8%40google.com.ics + HTTP/1.1 200 OK diff --git a/src/events/mod.rs b/src/events/mod.rs index eb39af17..1f839fc4 100644 --- a/src/events/mod.rs +++ b/src/events/mod.rs @@ -406,8 +406,7 @@ fn local_name() { let mut buf = Vec::new(); let mut parsed_local_names = Vec::new(); loop { - match rdr.read_event(&mut buf) - .expect("unable to read xml event") { + match rdr.read_event(&mut buf).expect("unable to read xml event") { Event::Start(ref e) => { parsed_local_names.push(from_utf8(e.local_name()) .expect("unable to build str from local_name") diff --git a/src/reader.rs b/src/reader.rs index ed7e896d..235cc255 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -155,9 +155,7 @@ impl Reader { Ok(n) => { self.buf_position += n; let (start, len) = if self.trim_text { - match buf.iter() - .skip(buf_start) - .position(|&b| !is_whitespace(b)) { + match buf.iter().skip(buf_start).position(|&b| !is_whitespace(b)) { Some(start) => { (buf_start + start, buf.iter() @@ -319,10 +317,7 @@ impl Reader { Ok(Event::CData(BytesText::borrowed(&buf[buf_start + 8..len - 2]))) } b"DOCTYPE" => { - let mut count = buf.iter() - .skip(buf_start) - .filter(|&&b| b == b'<') - .count(); + let mut count = buf.iter().skip(buf_start).filter(|&&b| b == b'<').count(); while count > 0 { buf.push(b'>'); match read_until(&mut self.reader, b'>', buf) { @@ -384,9 +379,7 @@ impl Reader { fn read_start<'a, 'b>(&'a mut self, buf: &'b [u8]) -> Result> { // TODO: do this directly when reading bufreader ... let len = buf.len(); - let name_end = buf.iter() - .position(|&b| is_whitespace(b)) - .unwrap_or(len); + let name_end = buf.iter().position(|&b| is_whitespace(b)).unwrap_or(len); if let Some(&b'/') = buf.last() { let end = if name_end < len { name_end } else { len - 1 }; if self.expand_empty_elements { @@ -527,7 +520,7 @@ impl Reader { pub fn decode<'b, 'c>(&'b self, bytes: &'c [u8]) -> Cow<'c, str> { self.encoding.decode(bytes).0 } - + /// Reads until end element is found /// /// Manages nested cases where parent and child elements have the same name @@ -813,9 +806,7 @@ impl NamespaceBufferIndex { self.nesting_level -= 1; let current_level = self.nesting_level; // from the back (most deeply nested scope), look for the first scope that is still valid - match self.slices - .iter() - .rposition(|n| n.level <= current_level) { + match self.slices.iter().rposition(|n| n.level <= current_level) { // none of the namespaces are valid, remove all of them None => { buffer.clear(); diff --git a/tests/xmlrs_reader_tests.rs b/tests/xmlrs_reader_tests.rs index a6532560..d2ac4c77 100644 --- a/tests/xmlrs_reader_tests.rs +++ b/tests/xmlrs_reader_tests.rs @@ -134,7 +134,7 @@ fn issue_83_duplicate_attributes() { #[test] fn issue_93_large_characters_in_entity_references() { - test(r#"&𤶼;"#.as_bytes(), + test(r#"&𤶼;"#.as_bytes(), br#" |StartElement(hello) |1:10 Error while escaping character at range 0..5: From 0b2d0454a6f59f842463a5f5b5ef14dbac85e977 Mon Sep 17 00:00:00 2001 From: Johann Tuffe Date: Tue, 13 Jun 2017 16:22:11 +0800 Subject: [PATCH 08/11] cargo fmt new version --- examples/issue68.rs | 6 +++--- src/events/mod.rs | 2 +- src/reader.rs | 30 ++++++++++++++---------------- tests/xmlrs_reader_tests.rs | 9 ++++----- 4 files changed, 22 insertions(+), 25 deletions(-) diff --git a/examples/issue68.rs b/examples/issue68.rs index 8669b573..6671728b 100644 --- a/examples/issue68.rs +++ b/examples/issue68.rs @@ -119,11 +119,11 @@ fn parse_report(xml_data: &str) -> Vec { } fn main() { - let test_data = r#" + let test_data = r#" - - /caldav/v2/johndoh%40gmail.com/events/07b7it7uonpnlnvjldr0l1ckg8%40google.com.ics diff --git a/src/events/mod.rs b/src/events/mod.rs index 1f839fc4..e5c8c50e 100644 --- a/src/events/mod.rs +++ b/src/events/mod.rs @@ -157,7 +157,7 @@ impl<'a> BytesDecl<'a> { Some(Ok(a)) => { Err(format!("XmlDecl must start with 'version' attribute, found {:?}", from_utf8(a.key)) - .into()) + .into()) } None => Err("XmlDecl must start with 'version' attribute, found none".into()), } diff --git a/src/reader.rs b/src/reader.rs index 235cc255..1a081758 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -651,7 +651,7 @@ fn read_until(r: &mut R, byte: u8, buf: &mut Vec) -> Result(r: &mut R, end_byte: u8, buf: &mut Vec) -> Result { - #[derive(Debug,Clone,Copy,PartialEq,Eq)] + #[derive(Debug, Clone, Copy, PartialEq, Eq)] enum ElemReadState { /// The initial state (inside element, but outside of attribute value) Elem, @@ -759,7 +759,7 @@ impl Namespace { None } else { Some(&ns_buffer[self.start + self.prefix_len.. - self.start + self.prefix_len + self.value_len]) + self.start + self.prefix_len + self.value_len]) } } } @@ -834,25 +834,23 @@ impl NamespaceBufferIndex { None => { let start = buffer.len(); buffer.extend_from_slice(v); - self.slices - .push(Namespace { - start: start, - prefix_len: 0, - value_len: v.len(), - level: level, - }); + self.slices.push(Namespace { + start: start, + prefix_len: 0, + value_len: v.len(), + level: level, + }); } Some(&b':') => { let start = buffer.len(); buffer.extend_from_slice(&k[6..]); buffer.extend_from_slice(v); - self.slices - .push(Namespace { - start: start, - prefix_len: k.len() - 6, - value_len: v.len(), - level: level, - }); + self.slices.push(Namespace { + start: start, + prefix_len: k.len() - 6, + value_len: v.len(), + level: level, + }); } _ => break, } diff --git a/tests/xmlrs_reader_tests.rs b/tests/xmlrs_reader_tests.rs index d2ac4c77..b1117abd 100644 --- a/tests/xmlrs_reader_tests.rs +++ b/tests/xmlrs_reader_tests.rs @@ -305,11 +305,10 @@ fn test(input: &[u8], output: &[u8], is_short: bool) { if !is_short && line.starts_with("StartDocument") { // advance next Characters(empty space) ... if let Ok(Event::Text(ref e)) = reader.read_event(&mut buf) { - if e.iter() - .any(|b| match *b { - b' ' | b'\r' | b'\n' | b'\t' => false, - _ => true, - }) { + if e.iter().any(|b| match *b { + b' ' | b'\r' | b'\n' | b'\t' => false, + _ => true, + }) { panic!("Reader expects empty Text event after a StartDocument"); } } else { From 8883f1bf0f82002a9632a707e72a069513909242 Mon Sep 17 00:00:00 2001 From: Johann Tuffe Date: Tue, 13 Jun 2017 17:04:35 +0800 Subject: [PATCH 09/11] add read_namespaced_event example and bump version --- Cargo.toml | 6 +++--- Changelog.md | 3 +++ README.md | 6 +----- examples/issue68.rs | 2 +- src/reader.rs | 40 ++++++++++++++++++++++++++++++++++++++++ 5 files changed, 48 insertions(+), 9 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 487879af..8c087532 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "quick-xml" -version = "0.7.3" +version = "0.8.0" authors = ["Johann Tuffe "] description = "High performance xml reader and writer" @@ -16,11 +16,11 @@ license-file = "LICENSE-MIT.md" travis-ci = { repository = "tafia/quick-xml" } [dependencies] -encoding_rs = "0.6.6" +encoding_rs = "0.6.11" error-chain = "0.10.0" [dev-dependencies] -xml-rs = "0.4.1" +xml-rs = "0.6.0" [lib] bench = false diff --git a/Changelog.md b/Changelog.md index d8955b34..0a623d46 100644 --- a/Changelog.md +++ b/Changelog.md @@ -8,6 +8,9 @@ - test: Adding missing tests - chore: Changes to the build process or auxiliary tools/libraries/documentation +## 0.8.0 +- fix: make the reader borrow the namespace buffer so it can be used repetitively + ## 0.7.3 - fix: fix Event::Text slice always starting at the beginning of the buffer diff --git a/README.md b/README.md index dbf8c2c0..f25460ed 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ Syntax is inspired by [xml-rs](https://github.com/netvl/xml-rs). ```toml [dependencies] -quick-xml = "0.7.0" +quick-xml = "0.8.0" ``` ``` rust extern crate quick_xml; @@ -49,11 +49,7 @@ let mut buf = Vec::new(); // The `Reader` does not implement `Iterator` because it outputs borrowed data (`Cow`s) loop { match reader.read_event(&mut buf) { - // for triggering namespaced events, use this instead: - // match reader.read_namespaced_event(&mut buf) { Ok(Event::Start(ref e)) => { - // for namespaced: - // Ok((ref namespace_value, Event::Start(ref e))) match e.name() { b"tag1" => println!("attributes values: {:?}", e.attributes().map(|a| a.unwrap().value).collect::>()), diff --git a/examples/issue68.rs b/examples/issue68.rs index 6671728b..d70b93c7 100644 --- a/examples/issue68.rs +++ b/examples/issue68.rs @@ -121,7 +121,7 @@ fn parse_report(xml_data: &str) -> Vec { fn main() { let test_data = r#" - diff --git a/src/reader.rs b/src/reader.rs index 1a081758..1fabf3bb 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -470,6 +470,46 @@ impl Reader { } /// Reads the next event and resolve its namespace + /// + /// # Examples + /// ``` + /// use std::str::from_utf8; + /// use quick_xml::reader::Reader; + /// use quick_xml::events::Event; + /// + /// let xml = r#" + /// Test + /// Test 2 + /// "#; + /// let mut reader = Reader::from_str(xml); + /// reader.trim_text(true); + /// let mut count = 0; + /// let mut buf = Vec::new(); + /// let mut ns_buf = Vec::new(); + /// let mut txt = Vec::new(); + /// loop { + /// match reader.read_namespaced_event(&mut buf, &mut ns_buf) { + /// Ok((ref ns, Event::Start(ref e))) => { + /// count += 1; + /// match (*ns, e.local_name()) { + /// (Some(b"www.xxxx"), b"tag1") => (), + /// (Some(b"www.yyyy"), b"tag2") => (), + /// (ns, n) => panic!("Namespace and local name mismatch"), + /// } + /// println!("Resolved namespace: {:?}", ns.and_then(|ns| from_utf8(ns).ok())); + /// } + /// Ok((_, Event::Text(e))) => { + /// txt.push(e.unescape_and_decode(&reader).expect("Error!")) + /// }, + /// Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e), + /// Ok((_, Event::Eof)) => break, + /// _ => (), + /// } + /// buf.clear(); + /// } + /// println!("Found {} start events", count); + /// println!("Text events: {:?}", txt); + /// ``` pub fn read_namespaced_event<'a, 'b, 'c>(&'a mut self, buf: &'b mut Vec, namespace_buffer: &'c mut Vec) From 754c3d570d93fa8e457e2089b8f0dda81b14c776 Mon Sep 17 00:00:00 2001 From: Johann Tuffe Date: Tue, 13 Jun 2017 17:08:58 +0800 Subject: [PATCH 10/11] fix bench --- benches/bench.rs | 3 ++- examples/issue68.rs | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/benches/bench.rs b/benches/bench.rs index a3d40649..7207725e 100644 --- a/benches/bench.rs +++ b/benches/bench.rs @@ -36,8 +36,9 @@ fn bench_quick_xml_namespaced(b: &mut Bencher) { r.check_end_names(false).check_comments(false); let mut count = test::black_box(0); let mut buf = Vec::new(); + let mut ns_buf = Vec::new(); loop { - match r.read_namespaced_event(&mut buf) { + match r.read_namespaced_event(&mut buf, &mut ns_buf) { Ok((_, Event::Start(_))) | Ok((_, Event::Empty(_))) => count += 1, Ok((_, Event::Eof)) => break, diff --git a/examples/issue68.rs b/examples/issue68.rs index d70b93c7..6671728b 100644 --- a/examples/issue68.rs +++ b/examples/issue68.rs @@ -121,7 +121,7 @@ fn parse_report(xml_data: &str) -> Vec { fn main() { let test_data = r#" - From 6b682f0a4fe7c1b9b076bc5f997104eeed0e7082 Mon Sep 17 00:00:00 2001 From: Johann Tuffe Date: Tue, 13 Jun 2017 17:17:15 +0800 Subject: [PATCH 11/11] update changelog --- Changelog.md | 1 + 1 file changed, 1 insertion(+) diff --git a/Changelog.md b/Changelog.md index 0a623d46..804c8039 100644 --- a/Changelog.md +++ b/Changelog.md @@ -10,6 +10,7 @@ ## 0.8.0 - fix: make the reader borrow the namespace buffer so it can be used repetitively +- refactor: bump dependencies ## 0.7.3 - fix: fix Event::Text slice always starting at the beginning of the buffer