Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement fragment parsing #91

Merged
merged 3 commits into from Feb 19, 2015
Merged
Changes from 1 commit
Commits
File filter...
Filter file types
Jump to…
Jump to file
Failed to load files.

Always

Just for now

Prev

Address review comments

  • Loading branch information
ChrisParis committed Feb 19, 2015
commit c27cdde75f3fc5655f6c768473df597a41ae4009
@@ -98,13 +98,11 @@ pub fn parse_fragment_to<
>(
sink: Sink,
input: It,
context: String,
context: Atom,
opts: ParseOpts) -> Sink {

let mut sink = sink;
let context_elem = sink.create_element(QualName::new(ns!(HTML),
Atom::from_slice(context.as_slice())),
vec!());
let context_elem = sink.create_element(QualName::new(ns!(HTML), context), vec!());
let tb = TreeBuilder::new_for_fragment(sink, context_elem, None, opts.tree_builder);
let tok_opts = TokenizerOpts {
initial_state: Some(tb.tokenizer_state_for_context_elem()),
@@ -149,7 +147,7 @@ pub fn parse<Output, It>(input: It, opts: ParseOpts) -> Output
/// ```ignore
/// let dom: RcDom = parse_fragment(one_input(my_str), context_token, Default::default());
/// ```
pub fn parse_fragment<Output, It>(input: It, context: String, opts: ParseOpts) -> Output
pub fn parse_fragment<Output, It>(input: It, context: Atom, opts: ParseOpts) -> Output
where Output: ParseResult,
It: Iterator<Item=String>,
{
@@ -669,8 +669,8 @@ impl<Handle, Sink> TreeBuilderActions<Handle>
fn reset_insertion_mode(&mut self) -> InsertionMode {
for (i, mut node) in self.open_elems.iter().enumerate().rev() {
let last = i == 0u;
if last && self.is_fragment() {
node = self.context_elem.as_ref().expect("no context element");
if let (true, Some(ctx)) = (last, self.context_elem.as_ref()) {
node = ctx;
}
let name = match self.sink.elem_name(node.clone()) {
QualName { ns: ns!(HTML), local } => local,
@@ -24,7 +24,7 @@ use string_cache::QualName;
use tokenizer;
use tokenizer::{Doctype, Tag};
use tokenizer::TokenSink;
use tokenizer::states::{State, RawData, Rcdata, Rawtext, ScriptData, Data, Plaintext};
use tokenizer::states as tok_state;

use util::str::{is_ascii_whitespace, char_run};

@@ -185,15 +185,15 @@ impl<Handle, Sink> TreeBuilder<Handle, Sink>
opts: TreeBuilderOpts) -> TreeBuilder<Handle, Sink> {
let doc_handle = sink.get_document();
let context_is_template =
sink.elem_name(context_elem.clone()) == QualName::new(ns!(HTML), atom!(template));
sink.elem_name(context_elem.clone()) == qualname!(HTML, template);
let mut tb = TreeBuilder {
opts: opts,
sink: sink,
mode: Initial,
orig_mode: None,
template_modes: if context_is_template { vec![InTemplate] } else { vec![] },
pending_table_text: vec!(),
quirks_mode: NoQuirks, // XXX set this to match the sink's document
quirks_mode: NoQuirks, // FIXME(#96) set this to match the sink's document
doc_handle: doc_handle,
open_elems: vec!(),
active_formatting: vec!(),
@@ -219,24 +219,29 @@ impl<Handle, Sink> TreeBuilder<Handle, Sink>

// https://html.spec.whatwg.org/multipage/syntax.html#concept-frag-parse-context
// Step 4. Set the state of the HTML parser's tokenization stage as follows:
pub fn tokenizer_state_for_context_elem(&self) -> State {
pub fn tokenizer_state_for_context_elem(&self) -> tok_state::State {
let elem = self.context_elem.clone().expect("no context element");

This comment has been minimized.

Copy link
@kmcallister

kmcallister Feb 18, 2015

Contributor

I'd prefer to handle non-HTML elements with an early return, to avoid rightward drift and for consistency with other code.

match self.sink.elem_name(elem) {
QualName { ns: ns!(HTML), local } => match local {
atom!(title) | atom!(textarea) => RawData(Rcdata),
let name = match self.sink.elem_name(elem) {
QualName { ns: ns!(HTML), local } => local,
_ => return tok_state::Data
};
match name {
atom!(title) | atom!(textarea) => tok_state::RawData(tok_state::Rcdata),

atom!(style) | atom!(xmp) | atom!(iframe)
| atom!(noembed) | atom!(noframes) => RawData(Rawtext),
atom!(style) | atom!(xmp) | atom!(iframe)
| atom!(noembed) | atom!(noframes) => tok_state::RawData(tok_state::Rawtext),

atom!(script) => RawData(ScriptData),
atom!(script) => tok_state::RawData(tok_state::ScriptData),

atom!(noscript) => if self.opts.scripting_enabled { RawData(Rawtext) } else { Data },
atom!(noscript) => if self.opts.scripting_enabled {
tok_state::RawData(tok_state::Rawtext)
} else {
tok_state::Data
},

atom!(plaintext) => Plaintext,
atom!(plaintext) => tok_state::Plaintext,

_ => Data
},
_ => Data
_ => tok_state::Data
}
}

@@ -34,6 +34,8 @@ use html5ever::sink::common::{Document, Doctype, Text, Comment, Element};
use html5ever::sink::rcdom::{RcDom, Handle};
use html5ever::{parse, parse_fragment, one_input};

use string_cache::Atom;

fn parse_tests<It: Iterator<Item=String>>(mut lines: It) -> Vec<HashMap<String, String>> {
let mut tests = vec!();
let mut test = HashMap::new();
@@ -148,7 +150,7 @@ fn make_test(
let data = get_field("data");
let expected = get_field("document");
let context = fields.get("document-fragment")
.map(|field| field.as_slice().trim_right_matches('\n').to_string());
.map(|field| Atom::from_slice(field.as_slice().trim_right_matches('\n')));
let name = format!("tb: {}-{}", path_str, idx);
let ignore = ignores.contains(&name)
|| IGNORE_SUBSTRS.iter().any(|&ig| data.as_slice().contains(ig));
ProTip! Use n and p to navigate between commits in a pull request.
You can’t perform that action at this time.