Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Clean up the parsers into a single interface #13675

Merged
merged 9 commits into from Oct 11, 2016

Move ParserContext to script::parse

  • Loading branch information
nox committed Oct 11, 2016
commit 02162a8bdae61430a1b9f4529330fad699072ff3
@@ -5,44 +5,29 @@
//! The bulk of the HTML parser integration is in `script::parse::html`.
//! This module is mostly about its interaction with DOM memory management.

use document_loader::LoadType;
use dom::bindings::cell::DOMRefCell;
use dom::bindings::codegen::Bindings::DocumentBinding::DocumentMethods;
use dom::bindings::codegen::Bindings::HTMLImageElementBinding::HTMLImageElementMethods;
use dom::bindings::codegen::Bindings::NodeBinding::NodeMethods;
use dom::bindings::codegen::Bindings::ServoHTMLParserBinding;
use dom::bindings::inheritance::Castable;
use dom::bindings::js::{JS, Root};
use dom::bindings::refcounted::Trusted;
use dom::bindings::reflector::reflect_dom_object;
use dom::bindings::str::DOMString;
use dom::bindings::trace::JSTraceable;
use dom::document::Document;
use dom::globalscope::GlobalScope;
use dom::htmlimageelement::HTMLImageElement;
use dom::node::Node;
use dom::servoparser::ServoParser;
use dom::window::Window;
use encoding::all::UTF_8;
use encoding::types::{DecoderTrap, Encoding};
use html5ever::tokenizer;
use html5ever::tree_builder;
use html5ever::tree_builder::{TreeBuilder, TreeBuilderOpts};
use hyper::header::ContentType;
use hyper::mime::{Mime, SubLevel, TopLevel};
use hyper_serde::Serde;
use js::jsapi::JSTracer;
use msg::constellation_msg::PipelineId;
use net_traits::{AsyncResponseListener, Metadata, NetworkError};
use network_listener::PreInvoke;
use parse::{Parser, ParserRef, TrustedParser};
use parse::{Parser, ParserRef};
use profile_traits::time::{TimerMetadata, TimerMetadataFrameType, TimerMetadataReflowType, profile};
use profile_traits::time::ProfilerCategory;
use script_thread::ScriptThread;
use std::cell::Cell;
use std::default::Default;
use url::Url;
use util::resource_files::read_resource_file;

#[must_root]
#[derive(JSTraceable, HeapSizeOf)]
@@ -61,161 +46,6 @@ pub struct FragmentContext<'a> {

pub type Tokenizer = tokenizer::Tokenizer<TreeBuilder<JS<Node>, Sink>>;

/// The context required for asynchronously fetching a document and parsing it progressively.
pub struct ParserContext {
/// The parser that initiated the request.
parser: Option<TrustedParser>,
/// Is this a synthesized document
is_synthesized_document: bool,
/// The pipeline associated with this document.
id: PipelineId,
/// The URL for this document.
url: Url,
}

impl ParserContext {
pub fn new(id: PipelineId, url: Url) -> ParserContext {
ParserContext {
parser: None,
is_synthesized_document: false,
id: id,
url: url,
}
}
}

impl AsyncResponseListener for ParserContext {
fn headers_available(&mut self, meta_result: Result<Metadata, NetworkError>) {
let mut ssl_error = None;
let metadata = match meta_result {
Ok(meta) => Some(meta),
Err(NetworkError::SslValidation(url, reason)) => {
ssl_error = Some(reason);
let mut meta = Metadata::default(url);
let mime: Option<Mime> = "text/html".parse().ok();
meta.set_content_type(mime.as_ref());
Some(meta)
},
Err(_) => None,
};
let content_type =
metadata.clone().and_then(|meta| meta.content_type).map(Serde::into_inner);
let parser = match ScriptThread::page_headers_available(&self.id,
metadata) {
Some(parser) => parser,
None => return,
};

let parser = parser.r();
let servo_parser = parser.as_servo_parser();
self.parser = Some(match parser {
ParserRef::HTML(parser) => TrustedParser::HTML(
Trusted::new(parser)),
ParserRef::XML(parser) => TrustedParser::XML(
Trusted::new(parser)),
});

match content_type {
Some(ContentType(Mime(TopLevel::Image, _, _))) => {
self.is_synthesized_document = true;
let page = "<html><body></body></html>".into();
servo_parser.push_input_chunk(page);
parser.parse_sync();

let doc = servo_parser.document();
let doc_body = Root::upcast::<Node>(doc.GetBody().unwrap());
let img = HTMLImageElement::new(atom!("img"), None, doc);
img.SetSrc(DOMString::from(self.url.to_string()));
doc_body.AppendChild(&Root::upcast::<Node>(img)).expect("Appending failed");

},
Some(ContentType(Mime(TopLevel::Text, SubLevel::Plain, _))) => {
// https://html.spec.whatwg.org/multipage/#read-text
let page = "<pre>\n".into();
servo_parser.push_input_chunk(page);
parser.parse_sync();
parser.set_plaintext_state();
},
Some(ContentType(Mime(TopLevel::Text, SubLevel::Html, _))) => { // Handle text/html
if let Some(reason) = ssl_error {
self.is_synthesized_document = true;
let page_bytes = read_resource_file("badcert.html").unwrap();
let page = String::from_utf8(page_bytes).unwrap();
let page = page.replace("${reason}", &reason);
servo_parser.push_input_chunk(page);
parser.parse_sync();
}
},
Some(ContentType(Mime(TopLevel::Text, SubLevel::Xml, _))) => {}, // Handle text/xml
Some(ContentType(Mime(toplevel, sublevel, _))) => {
if toplevel.as_str() == "application" && sublevel.as_str() == "xhtml+xml" {
// Handle xhtml (application/xhtml+xml).
return;
}

// Show warning page for unknown mime types.
let page = format!("<html><body><p>Unknown content type ({}/{}).</p></body></html>",
toplevel.as_str(), sublevel.as_str());
self.is_synthesized_document = true;
servo_parser.push_input_chunk(page);
parser.parse_sync();
},
None => {
// No content-type header.
// Merge with #4212 when fixed.
}
}
}

fn data_available(&mut self, payload: Vec<u8>) {
if !self.is_synthesized_document {
// FIXME: use Vec<u8> (html5ever #34)
let data = UTF_8.decode(&payload, DecoderTrap::Replace).unwrap();
let parser = match self.parser.as_ref() {
Some(parser) => parser.root(),
None => return,
};
parser.r().parse_chunk(data);
}
}

fn response_complete(&mut self, status: Result<(), NetworkError>) {
let parser = match self.parser.as_ref() {
Some(parser) => parser.root(),
None => return,
};

if let Err(NetworkError::Internal(ref reason)) = status {
// Show an error page for network errors,
// certificate errors are handled earlier.
self.is_synthesized_document = true;
let parser = parser.r();
let page_bytes = read_resource_file("neterror.html").unwrap();
let page = String::from_utf8(page_bytes).unwrap();
let page = page.replace("${reason}", reason);
parser.as_servo_parser().push_input_chunk(page);
parser.parse_sync();
} else if let Err(err) = status {
// TODO(Savago): we should send a notification to callers #5463.
debug!("Failed to load page URL {}, error: {:?}", self.url, err);
}

let parser = parser.r();
let servo_parser = parser.as_servo_parser();

servo_parser.document()
.finish_load(LoadType::PageSource(self.url.clone()));

servo_parser.mark_last_chunk_received();
if !parser.is_suspended() {
parser.parse_sync();
}
}
}

impl PreInvoke for ParserContext {
}

#[dom_struct]
pub struct ServoHTMLParser {
servoparser: ServoParser,
@@ -2,15 +2,33 @@
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */

use document_loader::LoadType;
use dom::bindings::codegen::Bindings::DocumentBinding::DocumentMethods;
use dom::bindings::codegen::Bindings::HTMLImageElementBinding::HTMLImageElementMethods;
use dom::bindings::codegen::Bindings::NodeBinding::NodeMethods;
use dom::bindings::inheritance::Castable;
use dom::bindings::js::{JS, Root};
use dom::bindings::refcounted::Trusted;
use dom::bindings::str::DOMString;
use dom::htmlimageelement::HTMLImageElement;
use dom::node::Node;
use dom::servohtmlparser::ServoHTMLParser;
use dom::servoparser::ServoParser;
use dom::servoxmlparser::ServoXMLParser;
use dom::window::Window;
use encoding::all::UTF_8;
use encoding::types::{DecoderTrap, Encoding};
use hyper::header::ContentType;
use hyper::mime::{Mime, SubLevel, TopLevel};
use hyper_serde::Serde;
use msg::constellation_msg::PipelineId;
use net_traits::{AsyncResponseListener, Metadata, NetworkError};
use network_listener::PreInvoke;
use script_thread::ScriptThread;
use std::cell::UnsafeCell;
use std::ptr;
use url::Url;
use util::resource_files::read_resource_file;

pub mod html;
pub mod xml;
@@ -160,3 +178,157 @@ impl<'a> ParserRef<'a> {
}
}

/// The context required for asynchronously fetching a document
/// and parsing it progressively.
pub struct ParserContext {
/// The parser that initiated the request.
parser: Option<TrustedParser>,
/// Is this a synthesized document
is_synthesized_document: bool,
/// The pipeline associated with this document.
id: PipelineId,
/// The URL for this document.
url: Url,
}

impl ParserContext {
pub fn new(id: PipelineId, url: Url) -> ParserContext {
ParserContext {
parser: None,
is_synthesized_document: false,
id: id,
url: url,
}
}
}

impl AsyncResponseListener for ParserContext {
fn headers_available(&mut self, meta_result: Result<Metadata, NetworkError>) {
let mut ssl_error = None;
let metadata = match meta_result {
Ok(meta) => Some(meta),
Err(NetworkError::SslValidation(url, reason)) => {
ssl_error = Some(reason);
let mut meta = Metadata::default(url);
let mime: Option<Mime> = "text/html".parse().ok();
meta.set_content_type(mime.as_ref());
Some(meta)
},
Err(_) => None,
};
let content_type =
metadata.clone().and_then(|meta| meta.content_type).map(Serde::into_inner);
let parser = match ScriptThread::page_headers_available(&self.id,
metadata) {
Some(parser) => parser,
None => return,
};

let parser = parser.r();
let servo_parser = parser.as_servo_parser();
self.parser = Some(match parser {
ParserRef::HTML(parser) => TrustedParser::HTML(
Trusted::new(parser)),
ParserRef::XML(parser) => TrustedParser::XML(
Trusted::new(parser)),
});

match content_type {
Some(ContentType(Mime(TopLevel::Image, _, _))) => {
self.is_synthesized_document = true;
let page = "<html><body></body></html>".into();
servo_parser.push_input_chunk(page);
parser.parse_sync();

let doc = servo_parser.document();
let doc_body = Root::upcast::<Node>(doc.GetBody().unwrap());
let img = HTMLImageElement::new(atom!("img"), None, doc);
img.SetSrc(DOMString::from(self.url.to_string()));
doc_body.AppendChild(&Root::upcast::<Node>(img)).expect("Appending failed");

},
Some(ContentType(Mime(TopLevel::Text, SubLevel::Plain, _))) => {
// https://html.spec.whatwg.org/multipage/#read-text
let page = "<pre>\n".into();
servo_parser.push_input_chunk(page);
parser.parse_sync();
parser.set_plaintext_state();
},
Some(ContentType(Mime(TopLevel::Text, SubLevel::Html, _))) => { // Handle text/html
if let Some(reason) = ssl_error {
self.is_synthesized_document = true;
let page_bytes = read_resource_file("badcert.html").unwrap();
let page = String::from_utf8(page_bytes).unwrap();
let page = page.replace("${reason}", &reason);
servo_parser.push_input_chunk(page);
parser.parse_sync();
}
},
Some(ContentType(Mime(TopLevel::Text, SubLevel::Xml, _))) => {}, // Handle text/xml
Some(ContentType(Mime(toplevel, sublevel, _))) => {
if toplevel.as_str() == "application" && sublevel.as_str() == "xhtml+xml" {
// Handle xhtml (application/xhtml+xml).
return;
}

// Show warning page for unknown mime types.
let page = format!("<html><body><p>Unknown content type ({}/{}).</p></body></html>",
toplevel.as_str(), sublevel.as_str());
self.is_synthesized_document = true;
servo_parser.push_input_chunk(page);
parser.parse_sync();
},
None => {
// No content-type header.
// Merge with #4212 when fixed.
}
}
}

fn data_available(&mut self, payload: Vec<u8>) {
if !self.is_synthesized_document {
// FIXME: use Vec<u8> (html5ever #34)
let data = UTF_8.decode(&payload, DecoderTrap::Replace).unwrap();
let parser = match self.parser.as_ref() {
Some(parser) => parser.root(),
None => return,
};
parser.r().parse_chunk(data);
}
}

fn response_complete(&mut self, status: Result<(), NetworkError>) {
let parser = match self.parser.as_ref() {
Some(parser) => parser.root(),
None => return,
};

if let Err(NetworkError::Internal(ref reason)) = status {
// Show an error page for network errors,
// certificate errors are handled earlier.
self.is_synthesized_document = true;
let parser = parser.r();
let page_bytes = read_resource_file("neterror.html").unwrap();
let page = String::from_utf8(page_bytes).unwrap();
let page = page.replace("${reason}", reason);
parser.as_servo_parser().push_input_chunk(page);
parser.parse_sync();
} else if let Err(err) = status {
// TODO(Savago): we should send a notification to callers #5463.
debug!("Failed to load page URL {}, error: {:?}", self.url, err);
}

let parser = parser.r();
let servo_parser = parser.as_servo_parser();

servo_parser.document()
.finish_load(LoadType::PageSource(self.url.clone()));

servo_parser.mark_last_chunk_received();
if !parser.is_suspended() {
parser.parse_sync();
}
}
}

impl PreInvoke for ParserContext {}
ProTip! Use n and p to navigate between commits in a pull request.
You can’t perform that action at this time.