New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Implement fragment parsing #91
Merged
+189
−23
Merged
Changes from 1 commit
Commits
Show all changes
3 commits
Select commit
Hold shift + click to select a range
File filter...
Filter file types
Jump to…
Jump to file
Failed to load files.
Loading status checks…
Implement fragment parsing
- Loading branch information
commit cddca3fa3808ba0e4ea10e9a8be8594e22902086
| @@ -665,14 +665,17 @@ impl<Handle, Sink> TreeBuilderActions<Handle> | ||
| } | ||
| } | ||
|
|
||
| // https://html.spec.whatwg.org/multipage/syntax.html#reset-the-insertion-mode-appropriately | ||
| fn reset_insertion_mode(&mut self) -> InsertionMode { | ||
| for (i, node) in self.open_elems.iter().enumerate().rev() { | ||
| for (i, mut node) in self.open_elems.iter().enumerate().rev() { | ||
| let last = i == 0u; | ||
| if last && self.is_fragment() { | ||
| node = self.context_elem.as_ref().expect("no context element"); | ||
kmcallister
Contributor
|
||
| } | ||
| let name = match self.sink.elem_name(node.clone()) { | ||
| QualName { ns: ns!(HTML), local } => local, | ||
| _ => continue, | ||
| }; | ||
| let last = i == 0u; | ||
| // FIXME: fragment case context element | ||
| match name { | ||
| // FIXME: <select> sub-steps | ||
| atom!(select) => return InSelect, | ||
| @@ -19,9 +19,12 @@ use self::types::*; | ||
| use self::actions::TreeBuilderActions; | ||
| use self::rules::TreeBuilderStep; | ||
|
|
||
| use string_cache::QualName; | ||
|
|
||
| use tokenizer; | ||
| use tokenizer::{Doctype, Tag}; | ||
| use tokenizer::TokenSink; | ||
| use tokenizer::states::{State, RawData, Rcdata, Rawtext, ScriptData, Data, Plaintext}; | ||
kmcallister
Contributor
|
||
|
|
||
| use util::str::{is_ascii_whitespace, char_run}; | ||
|
|
||
| @@ -33,7 +36,8 @@ use std::borrow::Cow::Borrowed; | ||
| use collections::RingBuf; | ||
|
|
||
| #[macro_use] mod tag_sets; | ||
| mod interface; | ||
| // "pub" is a workaround for rust#18241 (?) | ||
| pub mod interface; | ||
| mod data; | ||
| mod types; | ||
| mod actions; | ||
| @@ -52,9 +56,6 @@ pub struct TreeBuilderOpts { | ||
| /// Is this an `iframe srcdoc` document? | ||
| pub iframe_srcdoc: bool, | ||
|
|
||
| /// Are we parsing a HTML fragment? | ||
| pub fragment: bool, | ||
|
|
||
| /// Should we drop the DOCTYPE (if any) from the tree? | ||
| pub drop_doctype: bool, | ||
|
|
||
| @@ -75,7 +76,6 @@ impl Default for TreeBuilderOpts { | ||
| exact_errors: false, | ||
| scripting_enabled: true, | ||
| iframe_srcdoc: false, | ||
| fragment: false, | ||
| drop_doctype: false, | ||
| ignore_missing_rules: false, | ||
| } | ||
| @@ -135,6 +135,9 @@ pub struct TreeBuilder<Handle, Sink> { | ||
| /// Is foster parenting enabled? | ||
| foster_parenting: bool, | ||
|
|
||
| /// The context element for the fragment parsing algorithm. | ||
| context_elem: Option<Handle>, | ||
|
|
||
| // WARNING: If you add new fields that contain Handles, you | ||
| // must add them to trace_handles() below to preserve memory | ||
| // safety! | ||
| @@ -168,6 +171,72 @@ impl<Handle, Sink> TreeBuilder<Handle, Sink> | ||
| frameset_ok: true, | ||
| ignore_lf: false, | ||
| foster_parenting: false, | ||
| context_elem: None, | ||
| } | ||
| } | ||
|
|
||
| /// Create a new tree builder which sends tree modifications to a particular `TreeSink`. | ||
| /// This is for parsing fragments. | ||
| /// | ||
| /// The tree builder is also a `TokenSink`. | ||
| pub fn new_for_fragment(mut sink: Sink, | ||
| context_elem: Handle, | ||
| form_elem: Option<Handle>, | ||
| opts: TreeBuilderOpts) -> TreeBuilder<Handle, Sink> { | ||
| let doc_handle = sink.get_document(); | ||
| let context_is_template = | ||
| sink.elem_name(context_elem.clone()) == QualName::new(ns!(HTML), atom!(template)); | ||
|
||
| let mut tb = TreeBuilder { | ||
| opts: opts, | ||
| sink: sink, | ||
| mode: Initial, | ||
| orig_mode: None, | ||
| template_modes: if context_is_template { vec![InTemplate] } else { vec![] }, | ||
| pending_table_text: vec!(), | ||
| quirks_mode: NoQuirks, // XXX set this to match the sink's document | ||
|
||
| doc_handle: doc_handle, | ||
| open_elems: vec!(), | ||
| active_formatting: vec!(), | ||
| head_elem: None, | ||
| form_elem: form_elem, | ||
| next_tokenizer_state: None, | ||
| frameset_ok: true, | ||
| ignore_lf: false, | ||
| foster_parenting: false, | ||
| context_elem: Some(context_elem), | ||
| }; | ||
|
|
||
| // https://html.spec.whatwg.org/multipage/syntax.html#parsing-html-fragments | ||
| // 5. Let root be a new html element with no attributes. | ||
| // 6. Append the element root to the Document node created above. | ||
| // 7. Set up the parser's stack of open elements so that it contains just the single element root. | ||
| tb.create_root(vec!()); | ||
| // 10. Reset the parser's insertion mode appropriately. | ||
| tb.mode = tb.reset_insertion_mode(); | ||
|
|
||
| tb | ||
| } | ||
|
|
||
| // https://html.spec.whatwg.org/multipage/syntax.html#concept-frag-parse-context | ||
| // Step 4. Set the state of the HTML parser's tokenization stage as follows: | ||
| pub fn tokenizer_state_for_context_elem(&self) -> State { | ||
| let elem = self.context_elem.clone().expect("no context element"); | ||
kmcallister
Contributor
|
||
| match self.sink.elem_name(elem) { | ||
| QualName { ns: ns!(HTML), local } => match local { | ||
| atom!(title) | atom!(textarea) => RawData(Rcdata), | ||
|
|
||
| atom!(style) | atom!(xmp) | atom!(iframe) | ||
| | atom!(noembed) | atom!(noframes) => RawData(Rawtext), | ||
|
|
||
| atom!(script) => RawData(ScriptData), | ||
|
|
||
| atom!(noscript) => if self.opts.scripting_enabled { RawData(Rawtext) } else { Data }, | ||
|
|
||
| atom!(plaintext) => Plaintext, | ||
|
|
||
| _ => Data | ||
| }, | ||
| _ => Data | ||
| } | ||
| } | ||
|
|
||
| @@ -198,6 +267,7 @@ impl<Handle, Sink> TreeBuilder<Handle, Sink> | ||
| } | ||
| self.head_elem.as_ref().map(|h| tracer.trace_handle(h.clone())); | ||
| self.form_elem.as_ref().map(|h| tracer.trace_handle(h.clone())); | ||
| self.context_elem.as_ref().map(|h| tracer.trace_handle(h.clone())); | ||
| } | ||
|
|
||
| // Debug helper | ||
| @@ -285,6 +355,11 @@ impl<Handle, Sink> TreeBuilder<Handle, Sink> | ||
| } | ||
| } | ||
| } | ||
|
|
||
| /// Are we parsing a HTML fragment? | ||
| pub fn is_fragment(&self) -> bool { | ||
| self.context_elem.is_some() | ||
| } | ||
| } | ||
|
|
||
| impl<Handle, Sink> TokenSink | ||
ProTip!
Use n and p to navigate between commits in a pull request.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
I think this should be
Atom, to match the tree builder APIs that talk about node names. Also, that will avoid a heap allocation in the common case.