Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use html5ever’s BytesParser. #9677

Closed
wants to merge 4 commits into from
Closed
Changes from all commits
Commits
File filter...
Filter file types
Jump to…
Jump to file
Failed to load files.

Always

Just for now

@@ -70,9 +70,9 @@ cssparser = {version = "0.5.3", features = ["heap_size", "serde-serialization"]}
encoding = "0.2"
euclid = {version = "0.6.2", features = ["plugins"]}
fnv = "1.0"
heapsize = "0.3.0"
heapsize = "0.3.2"
heapsize_plugin = "0.1.2"
html5ever = {version = "0.5.1", features = ["heap_size", "unstable"]}
html5ever = {version = "0.5.2", features = ["heap_size", "unstable"]}
hyper = { version = "0.7", features = [ "serde-serialization" ] }
image = "0.5.0"
libc = "0.2"
@@ -149,4 +149,9 @@ impl<T> DOMRefCell<T> {
pub fn borrow_mut(&self) -> RefMut<T> {
self.try_borrow_mut().expect("DOMRefCell<T> already borrowed")
}

/// Consumes the `DOMRefCell`, returning the wrapped value.
pub fn into_inner(self) -> T {
self.value.into_inner()
}
}
@@ -70,7 +70,7 @@ use serde::{Deserialize, Serialize};
use smallvec::SmallVec;
use std::boxed::FnBox;
use std::cell::{Cell, UnsafeCell};
use std::collections::{HashMap, HashSet};
use std::collections::{HashMap, HashSet, VecDeque};
use std::ffi::CString;
use std::hash::{BuildHasher, Hash};
use std::intrinsics::return_address;
@@ -204,6 +204,17 @@ impl<T: JSTraceable> JSTraceable for Vec<T> {
}
}

// XXXManishearth Check if the following three are optimized to no-ops
// if e.trace() is a no-op (e.g it is an no_jsmanaged_fields type)
impl<T: JSTraceable> JSTraceable for VecDeque<T> {
#[inline]
fn trace(&self, trc: *mut JSTracer) {
for e in &*self {
e.trace(trc);
}
}
}

// XXXManishearth Check if the following three are optimized to no-ops
// if e.trace() is a no-op (e.g it is an no_jsmanaged_fields type)
impl<T: JSTraceable + 'static> JSTraceable for SmallVec<[T; 1]> {
@@ -17,21 +17,20 @@ use dom::document::Document;
use dom::node::Node;
use dom::servoxmlparser::ServoXMLParser;
use dom::window::Window;
use encoding::all::UTF_8;
use encoding::types::{DecoderTrap, Encoding};
use html5ever::tokenizer;
use html5ever::driver::{BytesParser, BytesOpts, parse_document, parse_fragment_for_element};
use html5ever::tendril::TendrilSink;
use html5ever::tree_builder;
use html5ever::tree_builder::{TreeBuilder, TreeBuilderOpts};
use hyper::header::ContentType;
use hyper::mime::{Mime, SubLevel, TopLevel};
use js::jsapi::JSTracer;
use msg::constellation_msg::{PipelineId, SubpageId};
use net_traits::{AsyncResponseListener, Metadata};
use network_listener::PreInvoke;
use parse::Parser;
use parse::{Parser, Chunk};
use script_thread::{ScriptChan, ScriptThread};
use std::cell::Cell;
use std::cell::UnsafeCell;
use std::collections::VecDeque;
use std::default::Default;
use std::ptr;
use url::Url;
@@ -51,8 +50,6 @@ pub struct FragmentContext<'a> {
pub form_elem: Option<&'a Node>,
}

pub type Tokenizer = tokenizer::Tokenizer<TreeBuilder<JS<Node>, Sink>>;

#[must_root]
#[derive(JSTraceable, HeapSizeOf)]
pub enum ParserField {
@@ -136,7 +133,7 @@ pub enum ParserRef<'a> {
}

impl<'a> ParserRef<'a> {
fn parse_chunk(&self, input: String) {
fn parse_chunk(&self, input: Chunk) {
match *self {
ParserRef::HTML(parser) => parser.parse_chunk(input),
ParserRef::XML(parser) => parser.parse_chunk(input),
@@ -171,7 +168,7 @@ impl<'a> ParserRef<'a> {
}
}

pub fn pending_input(&self) -> &DOMRefCell<Vec<String>> {
pub fn pending_input(&self) -> &DOMRefCell<VecDeque<Chunk>> {
match *self {
ParserRef::HTML(parser) => parser.pending_input(),
ParserRef::XML(parser) => parser.pending_input(),
@@ -263,13 +260,13 @@ impl AsyncResponseListener for ParserContext {
self.is_synthesized_document = true;
let page = format!("<html><body><img src='{}' /></body></html>",
self.url.serialize());
parser.pending_input().borrow_mut().push(page);
parser.pending_input().borrow_mut().push_back(Chunk::Dom(page.into()));
parser.parse_sync();
},
Some(ContentType(Mime(TopLevel::Text, SubLevel::Plain, _))) => {
// https://html.spec.whatwg.org/multipage/#read-text
let page = format!("<pre>\n");
parser.pending_input().borrow_mut().push(page);
let page = "<pre>\n";
parser.pending_input().borrow_mut().push_back(Chunk::Dom(page.into()));
parser.parse_sync();
parser.set_plaintext_state();
},
@@ -285,7 +282,7 @@ impl AsyncResponseListener for ParserContext {
let page = format!("<html><body><p>Unknown content type ({}/{}).</p></body></html>",
toplevel.as_str(), sublevel.as_str());
self.is_synthesized_document = true;
parser.pending_input().borrow_mut().push(page);
parser.pending_input().borrow_mut().push_back(Chunk::Dom(page.into()));
parser.parse_sync();
},
None => {
@@ -297,13 +294,11 @@ impl AsyncResponseListener for ParserContext {

fn data_available(&mut self, payload: Vec<u8>) {
if !self.is_synthesized_document {
// FIXME: use Vec<u8> (html5ever #34)
let data = UTF_8.decode(&payload, DecoderTrap::Replace).unwrap();
let parser = match self.parser.as_ref() {
Some(parser) => parser.root(),
None => return,
};
parser.r().parse_chunk(data);
parser.r().parse_chunk(Chunk::Bytes(payload));
}
}

@@ -333,9 +328,9 @@ impl PreInvoke for ParserContext {
pub struct ServoHTMLParser {
reflector_: Reflector,
#[ignore_heap_size_of = "Defined in html5ever"]
tokenizer: DOMRefCell<Tokenizer>,
html5ever_parser: DOMRefCell<Option<BytesParser<Sink>>>,
/// Input chunks received but not yet passed to the parser.
pending_input: DOMRefCell<Vec<String>>,
pending_input: DOMRefCell<VecDeque<Chunk>>,
/// The document associated with this parser.
document: JS<Document>,
/// True if this parser should avoid passing any further data to the tokenizer.
@@ -348,9 +343,9 @@ pub struct ServoHTMLParser {
}

impl<'a> Parser for &'a ServoHTMLParser {
fn parse_chunk(self, input: String) {
fn parse_chunk(self, input: Chunk) {
self.document.set_current_parser(Some(ParserRef::HTML(self)));
self.pending_input.borrow_mut().push(input);
self.pending_input.borrow_mut().push_back(input);
if !self.is_suspended() {
self.parse_sync();
}
@@ -360,7 +355,7 @@ impl<'a> Parser for &'a ServoHTMLParser {
assert!(!self.suspended.get());
assert!(self.pending_input.borrow().is_empty());

self.tokenizer.borrow_mut().end();
self.html5ever_parser.borrow_mut().take().unwrap().finish();
debug!("finished parsing");

self.document.set_current_parser(None);
@@ -380,17 +375,15 @@ impl ServoHTMLParser {
document: JS::from_ref(document),
};

let tb = TreeBuilder::new(sink, TreeBuilderOpts {
ignore_missing_rules: true,
.. Default::default()
let html5ever_parser = parse_document(sink, Default::default()).from_bytes(BytesOpts {
// FIXME: get this from Hyper

This comment has been minimized.

Copy link
@SimonSapin
transport_layer_encoding: None,
});

let tok = tokenizer::Tokenizer::new(tb, Default::default());

let parser = ServoHTMLParser {
reflector_: Reflector::new(),

This comment has been minimized.

Copy link
@SimonSapin

SimonSapin Feb 17, 2016

Author Member

This isn’t necessary anymore since all rules are implemented in html5ever’s tree builder now: servo/html5ever@c40cf9a

tokenizer: DOMRefCell::new(tok),
pending_input: DOMRefCell::new(vec!()),
html5ever_parser: DOMRefCell::new(Some(html5ever_parser)),
pending_input: DOMRefCell::new(VecDeque::new()),
document: JS::from_ref(document),
suspended: Cell::new(false),
last_chunk_received: Cell::new(false),
@@ -409,25 +402,20 @@ impl ServoHTMLParser {
document: JS::from_ref(document),
};

let tb_opts = TreeBuilderOpts {
ignore_missing_rules: true,
.. Default::default()
};
let tb = TreeBuilder::new_for_fragment(sink,
JS::from_ref(fragment_context.context_elem),
fragment_context.form_elem.map(|n| JS::from_ref(n)),
tb_opts);

let tok_opts = tokenizer::TokenizerOpts {
initial_state: Some(tb.tokenizer_state_for_context_elem()),
.. Default::default()
};
let tok = tokenizer::Tokenizer::new(tb, tok_opts);
let html5ever_parser = parse_fragment_for_element(
sink,
Default::default(),
JS::from_ref(fragment_context.context_elem),
fragment_context.form_elem.map(|n| JS::from_ref(n))
).from_bytes(BytesOpts {
// FIXME: get this from Hyper
transport_layer_encoding: None,
});

let parser = ServoHTMLParser {
reflector_: Reflector::new(),
tokenizer: DOMRefCell::new(tok),
pending_input: DOMRefCell::new(vec!()),
html5ever_parser: DOMRefCell::new(Some(html5ever_parser)),
pending_input: DOMRefCell::new(VecDeque::new()),
document: JS::from_ref(document),
suspended: Cell::new(false),
last_chunk_received: Cell::new(true),
@@ -438,23 +426,14 @@ impl ServoHTMLParser {
ServoHTMLParserBinding::Wrap)
}

#[inline]
pub fn tokenizer(&self) -> &DOMRefCell<Tokenizer> {
&self.tokenizer
}

pub fn set_plaintext_state(&self) {
self.tokenizer.borrow_mut().set_plaintext_state()
self.html5ever_parser.borrow_mut().as_mut().unwrap()
.str_parser_mut().tokenizer.set_plaintext_state()
}

pub fn end_tokenizer(&self) {
self.tokenizer.borrow_mut().end()
}

pub fn pending_input(&self) -> &DOMRefCell<Vec<String>> {
pub fn pending_input(&self) -> &DOMRefCell<VecDeque<Chunk>> {
&self.pending_input
}

}


@@ -463,13 +442,20 @@ impl ServoHTMLParser {
// This parser will continue to parse while there is either pending input or
// the parser remains unsuspended.
loop {
self.document.reflow_if_reflow_timer_expired();
self.document.reflow_if_reflow_timer_expired();
let mut pending_input = self.pending_input.borrow_mut();
if !pending_input.is_empty() {
let chunk = pending_input.remove(0);
self.tokenizer.borrow_mut().feed(chunk.into());
} else {
self.tokenizer.borrow_mut().run();
let mut html5ever_parser = self.html5ever_parser.borrow_mut();
let html5ever_parser = html5ever_parser.as_mut().unwrap();
match pending_input.pop_front() {
Some(Chunk::Bytes(bytes)) => {
html5ever_parser.process((&*bytes).into());
}
Some(Chunk::Dom(domstring)) => {
html5ever_parser.process_unicode(String::from(domstring).into())
}
None => {
html5ever_parser.str_parser_mut().tokenizer.run()
}
}

// Document parsing is blocked on an external resource.
@@ -527,14 +513,14 @@ impl tree_builder::Tracer for Tracer {
}
}

impl JSTraceable for Tokenizer {
impl JSTraceable for BytesParser<Sink> {
fn trace(&self, trc: *mut JSTracer) {
let tracer = Tracer {
trc: trc,
};
let tracer = &tracer as &tree_builder::Tracer<Handle=JS<Node>>;

let tree_builder = self.sink();
let tree_builder = self.str_parser().tokenizer.sink();
tree_builder.trace_handles(tracer);
tree_builder.sink().trace(trc);
}
@@ -14,9 +14,10 @@ use dom::servohtmlparser::ParserRef;
use dom::window::Window;
use js::jsapi::JSTracer;
use msg::constellation_msg::PipelineId;
use parse::Parser;
use parse::{Parser, Chunk};
use script_thread::ScriptThread;
use std::cell::Cell;
use std::collections::VecDeque;
use url::Url;
use xml5ever::tokenizer;
use xml5ever::tree_builder::{self, XmlTreeBuilder};
@@ -37,7 +38,7 @@ pub struct ServoXMLParser {
#[ignore_heap_size_of = "Defined in xml5ever"]
tokenizer: DOMRefCell<Tokenizer>,
/// Input chunks received but not yet passed to the parser.
pending_input: DOMRefCell<Vec<String>>,
pending_input: DOMRefCell<VecDeque<Chunk>>,
/// The document associated with this parser.
document: JS<Document>,
/// True if this parser should avoid passing any further data to the tokenizer.
@@ -50,9 +51,9 @@ pub struct ServoXMLParser {
}

impl<'a> Parser for &'a ServoXMLParser {
fn parse_chunk(self, input: String) {
fn parse_chunk(self, input: Chunk) {
self.document.set_current_parser(Some(ParserRef::XML(self)));
self.pending_input.borrow_mut().push(input);
self.pending_input.borrow_mut().push_back(input);
if !self.is_suspended() {
self.parse_sync();
}
@@ -89,7 +90,7 @@ impl ServoXMLParser {
let parser = ServoXMLParser {
reflector_: Reflector::new(),
tokenizer: DOMRefCell::new(tok),
pending_input: DOMRefCell::new(vec!()),
pending_input: DOMRefCell::new(VecDeque::new()),
document: JS::from_ref(document),
suspended: Cell::new(false),
last_chunk_received: Cell::new(false),
@@ -125,9 +126,13 @@ impl ServoXMLParser {
loop {
self.document.reflow_if_reflow_timer_expired();
let mut pending_input = self.pending_input.borrow_mut();
if !pending_input.is_empty() {
let chunk = pending_input.remove(0);
self.tokenizer.borrow_mut().feed(chunk.into());
if let Some(chunk) = pending_input.pop_front() {
// FIXME: use xml5ever’s bytes API when there is one.
let string = match chunk {
Chunk::Bytes(bytes) => String::from_utf8_lossy(&bytes).into_owned(),
Chunk::Dom(domstring) => String::from(domstring),
};
self.tokenizer.borrow_mut().feed(string.into());
}

// Document parsing is blocked on an external resource.
@@ -145,29 +150,21 @@ impl ServoXMLParser {
}
}

pub fn pending_input(&self) -> &DOMRefCell<Vec<String>> {
pub fn pending_input(&self) -> &DOMRefCell<VecDeque<Chunk>> {
&self.pending_input
}

pub fn set_plaintext_state(&self) {
//self.tokenizer.borrow_mut().set_plaintext_state()
}

pub fn end_tokenizer(&self) {
self.tokenizer.borrow_mut().end()
}

pub fn document(&self) -> &Document {
&self.document
}

pub fn last_chunk_received(&self) -> &Cell<bool> {
&self.last_chunk_received
}

pub fn tokenizer(&self) -> &DOMRefCell<Tokenizer> {
&self.tokenizer
}
}

struct Tracer {
ProTip! Use n and p to navigate between commits in a pull request.
You can’t perform that action at this time.