Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make the parser decode input from document's encoding #22432

Merged
merged 4 commits into from
Dec 12, 2018
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions components/script/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ smallvec = { version = "0.6", features = ["std", "union"] }
style = {path = "../style", features = ["servo"]}
style_traits = {path = "../style_traits"}
swapper = "0.1"
tendril = {version = "0.4.1", features = ["encoding_rs"]}
time = "0.1.12"
unicode-segmentation = "1.1.0"
url = "1.6"
Expand Down
10 changes: 5 additions & 5 deletions components/script/dom/bindings/trace.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,6 @@ use encoding_rs::{Decoder, Encoding};
use euclid::Length as EuclidLength;
use euclid::{Point2D, Rect, Transform2D, Transform3D, TypedScale, TypedSize2D, Vector2D};
use html5ever::buffer_queue::BufferQueue;
use html5ever::tendril::fmt::UTF8;
use html5ever::tendril::stream::Utf8LossyDecoder;
use html5ever::tendril::{StrTendril, TendrilSink};
use html5ever::{LocalName, Namespace, Prefix, QualName};
use http::header::HeaderMap;
use hyper::Method;
Expand Down Expand Up @@ -128,6 +125,9 @@ use style::stylesheets::keyframes_rule::Keyframe;
use style::stylesheets::{CssRules, FontFaceRule, KeyframesRule, MediaRule, Stylesheet};
use style::stylesheets::{ImportRule, NamespaceRule, StyleRule, SupportsRule, ViewportRule};
use style::values::specified::Length;
use tendril::fmt::UTF8;
use tendril::stream::LossyDecoder;
use tendril::{StrTendril, TendrilSink};
use time::Duration;
use uuid::Uuid;
use webrender_api::{DocumentId, ImageKey, RenderApiSender};
Expand Down Expand Up @@ -736,12 +736,12 @@ where
}
}

unsafe impl<Sink> JSTraceable for Utf8LossyDecoder<Sink>
unsafe impl<Sink> JSTraceable for LossyDecoder<Sink>
where
Sink: JSTraceable + TendrilSink<UTF8>,
{
unsafe fn trace(&self, tracer: *mut JSTracer) {
self.inner_sink.trace(tracer);
self.inner_sink().trace(tracer);
}
}

Expand Down
26 changes: 16 additions & 10 deletions components/script/dom/document.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2586,26 +2586,32 @@ impl Document {
let interactive_time =
InteractiveMetrics::new(window.time_profiler_chan().clone(), url.clone());

let content_type = content_type.unwrap_or_else(|| {
match is_html_document {
// https://dom.spec.whatwg.org/#dom-domimplementation-createhtmldocument
IsHTMLDocument::HTMLDocument => mime::TEXT_HTML,
// https://dom.spec.whatwg.org/#concept-document-content-type
IsHTMLDocument::NonHTMLDocument => "application/xml".parse().unwrap(),
}
});

let encoding = content_type
.get_param(mime::CHARSET)
.and_then(|charset| Encoding::for_label(charset.as_str().as_bytes()))
.unwrap_or(UTF_8);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this fallback is incorrect per spec, but it’s likely better until we implement <meta charset> sniffing.


Document {
node: Node::new_document_node(),
window: Dom::from_ref(window),
has_browsing_context: has_browsing_context == HasBrowsingContext::Yes,
implementation: Default::default(),
content_type: match content_type {
Some(mime_data) => mime_data,
None => match is_html_document {
// https://dom.spec.whatwg.org/#dom-domimplementation-createhtmldocument
IsHTMLDocument::HTMLDocument => mime::TEXT_HTML,
// https://dom.spec.whatwg.org/#concept-document-content-type
IsHTMLDocument::NonHTMLDocument => "application/xml".parse().unwrap(),
},
},
content_type,
last_modified: last_modified,
url: DomRefCell::new(url),
// https://dom.spec.whatwg.org/#concept-document-quirks
quirks_mode: Cell::new(QuirksMode::NoQuirks),
// https://dom.spec.whatwg.org/#concept-document-encoding
encoding: Cell::new(UTF_8),
encoding: Cell::new(encoding),
is_html_document: is_html_document == IsHTMLDocument::HTMLDocument,
activity: Cell::new(activity),
id_map: DomRefCell::new(HashMap::new()),
Expand Down
16 changes: 10 additions & 6 deletions components/script/dom/servoparser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,9 @@ use crate::network_listener::PreInvoke;
use crate::script_thread::ScriptThread;
use dom_struct::dom_struct;
use embedder_traits::resources::{self, Resource};
use encoding_rs::Encoding;
use html5ever::buffer_queue::BufferQueue;
use html5ever::tendril::fmt::UTF8;
use html5ever::tendril::stream::Utf8LossyDecoder;
use html5ever::tendril::{ByteTendril, StrTendril, TendrilSink};
use html5ever::tree_builder::{ElementFlags, NextParserState, NodeOrText, QuirksMode, TreeSink};
use html5ever::{Attribute, ExpandedName, LocalName, QualName};
Expand All @@ -58,6 +58,7 @@ use std::borrow::Cow;
use std::cell::Cell;
use std::mem;
use style::context::QuirksMode as ServoQuirksMode;
use tendril::stream::LossyDecoder;

mod async_html;
mod html;
Expand Down Expand Up @@ -398,7 +399,7 @@ impl ServoParser {
ServoParser {
reflector: Reflector::new(),
document: Dom::from_ref(document),
network_decoder: DomRefCell::new(Some(NetworkDecoder::new())),
network_decoder: DomRefCell::new(Some(NetworkDecoder::new(document.encoding()))),
network_input: DomRefCell::new(BufferQueue::new()),
script_input: DomRefCell::new(BufferQueue::new()),
tokenizer: DomRefCell::new(tokenizer),
Expand Down Expand Up @@ -1195,19 +1196,22 @@ fn create_element_for_token(
#[derive(JSTraceable, MallocSizeOf)]
struct NetworkDecoder {
#[ignore_malloc_size_of = "Defined in tendril"]
decoder: Utf8LossyDecoder<NetworkSink>,
decoder: LossyDecoder<NetworkSink>,
}

impl NetworkDecoder {
fn new() -> Self {
fn new(encoding: &'static Encoding) -> Self {
Self {
decoder: Utf8LossyDecoder::new(Default::default()),
decoder: LossyDecoder::new_encoding_rs(encoding, Default::default()),
}
}

fn decode(&mut self, chunk: Vec<u8>) -> StrTendril {
self.decoder.process(ByteTendril::from(&*chunk));
mem::replace(&mut self.decoder.inner_sink.output, Default::default())
mem::replace(
&mut self.decoder.inner_sink_mut().output,
Default::default(),
)
}

fn finish(self) -> StrTendril {
Expand Down

This file was deleted.

This file was deleted.

This file was deleted.

4 changes: 0 additions & 4 deletions tests/wpt/metadata/xhr/send-entity-body-document.htm.ini
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,3 @@
expected: FAIL
bug: https://github.com/servo/servo/issues/14912

[HTML document, shift-jis]
expected: FAIL
bug: https://github.com/servo/servo/issues/6414