From cddca3fa3808ba0e4ea10e9a8be8594e22902086 Mon Sep 17 00:00:00 2001 From: Chris Paris Date: Tue, 10 Feb 2015 02:27:50 -1000 Subject: [PATCH 1/3] Implement fragment parsing --- src/driver.rs | 51 ++++++++++++++++++++++ src/lib.rs | 2 +- src/tree_builder/actions.rs | 9 ++-- src/tree_builder/mod.rs | 85 ++++++++++++++++++++++++++++++++++--- src/tree_builder/rules.rs | 6 +-- tests/tree_builder.rs | 34 ++++++++++----- 6 files changed, 164 insertions(+), 23 deletions(-) diff --git a/src/driver.rs b/src/driver.rs index 2b785d87..951c1625 100644 --- a/src/driver.rs +++ b/src/driver.rs @@ -18,6 +18,8 @@ use core::default::Default; use core::option; use collections::string::String; +use string_cache::{Atom, QualName}; + /// Convenience function to turn a single `String` into an iterator. pub fn one_input(x: String) -> option::IntoIter { Some(x).into_iter() @@ -82,6 +84,40 @@ pub fn parse_to< tok.unwrap().unwrap() } +/// Parse an HTML fragment and send results to a `TreeSink`. +/// +/// ## Example +/// +/// ```ignore +/// let mut sink = MySink; +/// parse_fragment_to(&mut sink, one_input(my_str), context_token, Default::default()); +/// ``` +pub fn parse_fragment_to< + Sink: TreeSink, + It: Iterator + >( + sink: Sink, + input: It, + context: String, + opts: ParseOpts) -> Sink { + + let mut sink = sink; + let context_elem = sink.create_element(QualName::new(ns!(HTML), + Atom::from_slice(context.as_slice())), + vec!()); + let tb = TreeBuilder::new_for_fragment(sink, context_elem, None, opts.tree_builder); + let tok_opts = TokenizerOpts { + initial_state: Some(tb.tokenizer_state_for_context_elem()), + .. opts.tokenizer + }; + let mut tok = Tokenizer::new(tb, tok_opts); + for s in input { + tok.feed(s); + } + tok.end(); + tok.unwrap().unwrap() +} + /// Results which can be extracted from a `TreeSink`. /// /// Implement this for your parse tree data type so that it @@ -105,3 +141,18 @@ pub fn parse(input: It, opts: ParseOpts) -> Output let sink = parse_to(Default::default(), input, opts); ParseResult::get_result(sink) } + +/// Parse an HTML fragment into a type which implements `ParseResult`. +/// +/// ## Example +/// +/// ```ignore +/// let dom: RcDom = parse_fragment(one_input(my_str), context_token, Default::default()); +/// ``` +pub fn parse_fragment(input: It, context: String, opts: ParseOpts) -> Output + where Output: ParseResult, + It: Iterator, +{ + let sink = parse_fragment_to(Default::default(), input, context, opts); + ParseResult::get_result(sink) +} diff --git a/src/lib.rs b/src/lib.rs index faee352e..c2cec64c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -49,7 +49,7 @@ extern crate phf; extern crate time; pub use tokenizer::Attribute; -pub use driver::{one_input, ParseOpts, parse_to, parse}; +pub use driver::{one_input, ParseOpts, parse_to, parse_fragment_to, parse, parse_fragment}; #[cfg(not(for_c))] pub use serialize::serialize; diff --git a/src/tree_builder/actions.rs b/src/tree_builder/actions.rs index 9378de01..34faa630 100644 --- a/src/tree_builder/actions.rs +++ b/src/tree_builder/actions.rs @@ -665,14 +665,17 @@ impl TreeBuilderActions } } + // https://html.spec.whatwg.org/multipage/syntax.html#reset-the-insertion-mode-appropriately fn reset_insertion_mode(&mut self) -> InsertionMode { - for (i, node) in self.open_elems.iter().enumerate().rev() { + for (i, mut node) in self.open_elems.iter().enumerate().rev() { + let last = i == 0u; + if last && self.is_fragment() { + node = self.context_elem.as_ref().expect("no context element"); + } let name = match self.sink.elem_name(node.clone()) { QualName { ns: ns!(HTML), local } => local, _ => continue, }; - let last = i == 0u; - // FIXME: fragment case context element match name { // FIXME: