Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use html5ever’s BytesParser. #9677

Closed
wants to merge 4 commits into from
Closed
Changes from 1 commit
Commits
File filter...
Filter file types
Jump to…
Jump to file
Failed to load files.

Always

Just for now

Prev

Use html5ever’s BytesParser.

This fixes the encoding issue in
#9670,
though it uncovers a font selection issue (at least for me).
  • Loading branch information
SimonSapin committed Feb 18, 2016
commit fdbb9a07580d2a137cc2f417e1076bbe01621e90
@@ -17,9 +17,7 @@ use dom::document::Document;
use dom::node::Node;
use dom::servoxmlparser::ServoXMLParser;
use dom::window::Window;
use encoding::all::UTF_8;
use encoding::types::{DecoderTrap, Encoding};
use html5ever::driver::{Parser as H5eParser, parse_document, parse_fragment_for_element};
use html5ever::driver::{BytesParser, BytesOpts, parse_document, parse_fragment_for_element};
use html5ever::tendril::TendrilSink;
use html5ever::tree_builder;
use hyper::header::ContentType;
@@ -28,7 +26,7 @@ use js::jsapi::JSTracer;
use msg::constellation_msg::{PipelineId, SubpageId};
use net_traits::{AsyncResponseListener, Metadata};
use network_listener::PreInvoke;
use parse::Parser;
use parse::{Parser, Chunk};
use script_thread::{ScriptChan, ScriptThread};
use std::cell::Cell;
use std::cell::UnsafeCell;
@@ -135,7 +133,7 @@ pub enum ParserRef<'a> {
}

impl<'a> ParserRef<'a> {
fn parse_chunk(&self, input: String) {
fn parse_chunk(&self, input: Chunk) {
match *self {
ParserRef::HTML(parser) => parser.parse_chunk(input),
ParserRef::XML(parser) => parser.parse_chunk(input),
@@ -170,7 +168,7 @@ impl<'a> ParserRef<'a> {
}
}

pub fn pending_input(&self) -> &DOMRefCell<VecDeque<String>> {
pub fn pending_input(&self) -> &DOMRefCell<VecDeque<Chunk>> {
match *self {
ParserRef::HTML(parser) => parser.pending_input(),
ParserRef::XML(parser) => parser.pending_input(),
@@ -262,13 +260,13 @@ impl AsyncResponseListener for ParserContext {
self.is_synthesized_document = true;
let page = format!("<html><body><img src='{}' /></body></html>",
self.url.serialize());
parser.pending_input().borrow_mut().push_back(page);
parser.pending_input().borrow_mut().push_back(Chunk::Dom(page.into()));
parser.parse_sync();
},
Some(ContentType(Mime(TopLevel::Text, SubLevel::Plain, _))) => {
// https://html.spec.whatwg.org/multipage/#read-text
let page = format!("<pre>\n");
parser.pending_input().borrow_mut().push_back(page);
let page = "<pre>\n";
parser.pending_input().borrow_mut().push_back(Chunk::Dom(page.into()));
parser.parse_sync();
parser.set_plaintext_state();
},
@@ -284,7 +282,7 @@ impl AsyncResponseListener for ParserContext {
let page = format!("<html><body><p>Unknown content type ({}/{}).</p></body></html>",
toplevel.as_str(), sublevel.as_str());
self.is_synthesized_document = true;
parser.pending_input().borrow_mut().push_back(page);
parser.pending_input().borrow_mut().push_back(Chunk::Dom(page.into()));
parser.parse_sync();
},
None => {
@@ -296,13 +294,11 @@ impl AsyncResponseListener for ParserContext {

fn data_available(&mut self, payload: Vec<u8>) {
if !self.is_synthesized_document {
// FIXME: use Vec<u8> (html5ever #34)
let data = UTF_8.decode(&payload, DecoderTrap::Replace).unwrap();
let parser = match self.parser.as_ref() {
Some(parser) => parser.root(),
None => return,
};
parser.r().parse_chunk(data);
parser.r().parse_chunk(Chunk::Bytes(payload));
}
}

@@ -332,9 +328,9 @@ impl PreInvoke for ParserContext {
pub struct ServoHTMLParser {
reflector_: Reflector,
#[ignore_heap_size_of = "Defined in html5ever"]
html5ever_parser: DOMRefCell<Option<H5eParser<Sink>>>,
html5ever_parser: DOMRefCell<Option<BytesParser<Sink>>>,
/// Input chunks received but not yet passed to the parser.
pending_input: DOMRefCell<VecDeque<String>>,
pending_input: DOMRefCell<VecDeque<Chunk>>,
/// The document associated with this parser.
document: JS<Document>,
/// True if this parser should avoid passing any further data to the tokenizer.
@@ -347,7 +343,7 @@ pub struct ServoHTMLParser {
}

impl<'a> Parser for &'a ServoHTMLParser {
fn parse_chunk(self, input: String) {
fn parse_chunk(self, input: Chunk) {
self.document.set_current_parser(Some(ParserRef::HTML(self)));
self.pending_input.borrow_mut().push_back(input);
if !self.is_suspended() {
@@ -379,9 +375,14 @@ impl ServoHTMLParser {
document: JS::from_ref(document),
};

let html5ever_parser = parse_document(sink, Default::default()).from_bytes(BytesOpts {
// FIXME: get this from Hyper

This comment has been minimized.

Copy link
@SimonSapin
transport_layer_encoding: None,
});

let parser = ServoHTMLParser {
reflector_: Reflector::new(),
html5ever_parser: DOMRefCell::new(Some(parse_document(sink, Default::default()))),
html5ever_parser: DOMRefCell::new(Some(html5ever_parser)),
pending_input: DOMRefCell::new(VecDeque::new()),
document: JS::from_ref(document),
suspended: Cell::new(false),
@@ -405,7 +406,11 @@ impl ServoHTMLParser {
sink,
Default::default(),
JS::from_ref(fragment_context.context_elem),
fragment_context.form_elem.map(|n| JS::from_ref(n)));
fragment_context.form_elem.map(|n| JS::from_ref(n))
).from_bytes(BytesOpts {
// FIXME: get this from Hyper
transport_layer_encoding: None,
});

let parser = ServoHTMLParser {
reflector_: Reflector::new(),
@@ -422,10 +427,11 @@ impl ServoHTMLParser {
}

pub fn set_plaintext_state(&self) {
self.html5ever_parser.borrow_mut().as_mut().unwrap().tokenizer.set_plaintext_state()
self.html5ever_parser.borrow_mut().as_mut().unwrap()
.str_parser_mut().tokenizer.set_plaintext_state()
}

pub fn pending_input(&self) -> &DOMRefCell<VecDeque<String>> {
pub fn pending_input(&self) -> &DOMRefCell<VecDeque<Chunk>> {
&self.pending_input
}
}
@@ -436,12 +442,20 @@ impl ServoHTMLParser {
// This parser will continue to parse while there is either pending input or
// the parser remains unsuspended.
loop {
self.document.reflow_if_reflow_timer_expired();
self.document.reflow_if_reflow_timer_expired();
let mut pending_input = self.pending_input.borrow_mut();
if let Some(chunk) = pending_input.pop_front() {
self.html5ever_parser.borrow_mut().as_mut().unwrap().process(chunk.into());
} else {
self.html5ever_parser.borrow_mut().as_mut().unwrap().tokenizer.run();
let mut html5ever_parser = self.html5ever_parser.borrow_mut();
let html5ever_parser = html5ever_parser.as_mut().unwrap();
match pending_input.pop_front() {
Some(Chunk::Bytes(bytes)) => {
html5ever_parser.process((&*bytes).into());
}
Some(Chunk::Dom(domstring)) => {
html5ever_parser.process_unicode(String::from(domstring).into())
}
None => {
html5ever_parser.str_parser_mut().tokenizer.run()
}
}

// Document parsing is blocked on an external resource.
@@ -499,14 +513,14 @@ impl tree_builder::Tracer for Tracer {
}
}

impl JSTraceable for H5eParser<Sink> {
impl JSTraceable for BytesParser<Sink> {
fn trace(&self, trc: *mut JSTracer) {
let tracer = Tracer {
trc: trc,
};
let tracer = &tracer as &tree_builder::Tracer<Handle=JS<Node>>;

let tree_builder = self.tokenizer.sink();
let tree_builder = self.str_parser().tokenizer.sink();
tree_builder.trace_handles(tracer);
tree_builder.sink().trace(trc);
}
@@ -14,7 +14,7 @@ use dom::servohtmlparser::ParserRef;
use dom::window::Window;
use js::jsapi::JSTracer;
use msg::constellation_msg::PipelineId;
use parse::Parser;
use parse::{Parser, Chunk};
use script_thread::ScriptThread;
use std::cell::Cell;
use std::collections::VecDeque;
@@ -38,7 +38,7 @@ pub struct ServoXMLParser {
#[ignore_heap_size_of = "Defined in xml5ever"]
tokenizer: DOMRefCell<Tokenizer>,
/// Input chunks received but not yet passed to the parser.
pending_input: DOMRefCell<VecDeque<String>>,
pending_input: DOMRefCell<VecDeque<Chunk>>,
/// The document associated with this parser.
document: JS<Document>,
/// True if this parser should avoid passing any further data to the tokenizer.
@@ -51,7 +51,7 @@ pub struct ServoXMLParser {
}

impl<'a> Parser for &'a ServoXMLParser {
fn parse_chunk(self, input: String) {
fn parse_chunk(self, input: Chunk) {
self.document.set_current_parser(Some(ParserRef::XML(self)));
self.pending_input.borrow_mut().push_back(input);
if !self.is_suspended() {
@@ -127,7 +127,12 @@ impl ServoXMLParser {
self.document.reflow_if_reflow_timer_expired();
let mut pending_input = self.pending_input.borrow_mut();
if let Some(chunk) = pending_input.pop_front() {
self.tokenizer.borrow_mut().feed(chunk.into());
// FIXME: use xml5ever’s bytes API when there is one.
let string = match chunk {
Chunk::Bytes(bytes) => String::from_utf8_lossy(&bytes).into_owned(),
Chunk::Dom(domstring) => String::from(domstring),
};
self.tokenizer.borrow_mut().feed(string.into());
}

// Document parsing is blocked on an external resource.
@@ -145,7 +150,7 @@ impl ServoXMLParser {
}
}

pub fn pending_input(&self) -> &DOMRefCell<VecDeque<String>> {
pub fn pending_input(&self) -> &DOMRefCell<VecDeque<Chunk>> {
&self.pending_input
}

@@ -32,7 +32,7 @@ use html5ever::serialize::{AttrRef, Serializable, Serializer};
use html5ever::tendril::StrTendril;
use html5ever::tree_builder::{NextParserState, NodeOrText, QuirksMode, TreeSink};
use msg::constellation_msg::PipelineId;
use parse::Parser;
use parse::{Parser, Chunk};
use std::borrow::Cow;
use std::io::{self, Write};
use string_cache::QualName;
@@ -261,7 +261,7 @@ pub fn parse_html(document: &Document,
ParseContext::Fragment(fc) =>
ServoHTMLParser::new_for_fragment(Some(url), document, fc),
};
parser.parse_chunk(String::from(input));
parser.parse_chunk(Chunk::Dom(input));
}

// https://html.spec.whatwg.org/multipage/#parsing-html-fragments
@@ -2,10 +2,18 @@
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */

use util::str::DOMString;

pub mod html;
pub mod xml;

pub trait Parser {
fn parse_chunk(self, input: String);
fn parse_chunk(self, input: Chunk);
fn finish(self);
}

#[derive(JSTraceable, HeapSizeOf, Debug)]
pub enum Chunk {
Bytes(Vec<u8>),
Dom(DOMString),
}
@@ -17,7 +17,7 @@ use dom::servoxmlparser;
use dom::servoxmlparser::ServoXMLParser;
use dom::text::Text;
use msg::constellation_msg::PipelineId;
use parse::Parser;
use parse::{Parser, Chunk};
use std::borrow::Cow;
use string_cache::{Atom, QualName, Namespace};
use url::Url;
@@ -117,6 +117,6 @@ pub fn parse_xml(document: &Document,
ParseContext::Owner(owner) =>
ServoXMLParser::new(Some(url), document, owner),
};
parser.parse_chunk(String::from(input));
parser.parse_chunk(Chunk::Dom(input));
}

This file was deleted.

This file was deleted.

ProTip! Use n and p to navigate between commits in a pull request.
You can’t perform that action at this time.