Permalink
Browse files

Use Utf8LossyDecoder instead of IncompleteUtf8

  • Loading branch information...
nox committed Dec 5, 2018
1 parent 08bbf4f commit 555226c1516e0ce40f6c43579697e81a3ff4d709
Showing with 74 additions and 27 deletions.
  1. +13 −2 components/script/dom/bindings/trace.rs
  2. +61 −25 components/script/dom/servoparser/mod.rs
@@ -56,7 +56,9 @@ use encoding_rs::{Decoder, Encoding};
use euclid::Length as EuclidLength;
use euclid::{Point2D, Rect, Transform2D, Transform3D, TypedScale, TypedSize2D, Vector2D};
use html5ever::buffer_queue::BufferQueue;
use html5ever::tendril::IncompleteUtf8;
use html5ever::tendril::fmt::UTF8;
use html5ever::tendril::stream::Utf8LossyDecoder;
use html5ever::tendril::{StrTendril, TendrilSink};
use html5ever::{LocalName, Namespace, Prefix, QualName};
use http::header::HeaderMap;
use hyper::Method;
@@ -395,7 +397,7 @@ unsafe_no_jsmanaged_fields!(
unsafe_no_jsmanaged_fields!(TimerEventId, TimerSource);
unsafe_no_jsmanaged_fields!(TimelineMarkerType);
unsafe_no_jsmanaged_fields!(WorkerId);
unsafe_no_jsmanaged_fields!(BufferQueue, QuirksMode, IncompleteUtf8);
unsafe_no_jsmanaged_fields!(BufferQueue, QuirksMode, StrTendril);
unsafe_no_jsmanaged_fields!(Runtime);
unsafe_no_jsmanaged_fields!(HeaderMap, Method);
unsafe_no_jsmanaged_fields!(WindowProxyHandler);
@@ -734,6 +736,15 @@ where
}
}

unsafe impl<Sink> JSTraceable for Utf8LossyDecoder<Sink>
where
Sink: JSTraceable + TendrilSink<UTF8>,
{
unsafe fn trace(&self, tracer: *mut JSTracer) {
self.inner_sink.trace(tracer);
}
}

/// Holds a set of JSTraceables that need to be rooted
struct RootedTraceableSet {
set: Vec<*const dyn JSTraceable>,
@@ -38,7 +38,9 @@ use crate::script_thread::ScriptThread;
use dom_struct::dom_struct;
use embedder_traits::resources::{self, Resource};
use html5ever::buffer_queue::BufferQueue;
use html5ever::tendril::{ByteTendril, IncompleteUtf8, StrTendril};
use html5ever::tendril::fmt::UTF8;
use html5ever::tendril::stream::Utf8LossyDecoder;
use html5ever::tendril::{ByteTendril, StrTendril, TendrilSink};
use html5ever::tree_builder::{ElementFlags, NextParserState, NodeOrText, QuirksMode, TreeSink};
use html5ever::{Attribute, ExpandedName, LocalName, QualName};
use hyper_serde::Serde;
@@ -78,12 +80,11 @@ pub struct ServoParser {
reflector: Reflector,
/// The document associated with this parser.
document: Dom<Document>,
/// The decoder used for the network input.
network_decoder: DomRefCell<Option<NetworkDecoder>>,
/// Input received from network.
#[ignore_malloc_size_of = "Defined in html5ever"]
network_input: DomRefCell<BufferQueue>,
/// Part of an UTF-8 code point spanning input chunks
#[ignore_malloc_size_of = "Defined in html5ever"]
incomplete_utf8: DomRefCell<Option<IncompleteUtf8>>,
/// Input received from script. Used only to support document.write().
#[ignore_malloc_size_of = "Defined in html5ever"]
script_input: DomRefCell<BufferQueue>,
@@ -401,7 +402,7 @@ impl ServoParser {
ServoParser {
reflector: Reflector::new(),
document: Dom::from_ref(document),
incomplete_utf8: DomRefCell::new(None),
network_decoder: DomRefCell::new(Some(NetworkDecoder::new())),
network_input: DomRefCell::new(BufferQueue::new()),
script_input: DomRefCell::new(BufferQueue::new()),
tokenizer: DomRefCell::new(tokenizer),
@@ -433,22 +434,15 @@ impl ServoParser {
}

fn push_bytes_input_chunk(&self, chunk: Vec<u8>) {
let mut chunk = ByteTendril::from(&*chunk);
let mut network_input = self.network_input.borrow_mut();
let mut incomplete_utf8 = self.incomplete_utf8.borrow_mut();

if let Some(mut incomplete) = incomplete_utf8.take() {
let result = incomplete.try_complete(chunk, |s| network_input.push_back(s));
match result {
Err(()) => {
*incomplete_utf8 = Some(incomplete);
return;
},
Ok(remaining) => chunk = remaining,
}
let chunk = self
.network_decoder
.borrow_mut()
.as_mut()
.unwrap()
.decode(chunk);
if !chunk.is_empty() {
self.network_input.borrow_mut().push_back(chunk);
}

*incomplete_utf8 = chunk.decode_utf8_lossy(|s| network_input.push_back(s));
}

fn push_string_input_chunk(&self, chunk: String) {
@@ -481,10 +475,9 @@ impl ServoParser {
// the parser remains unsuspended.

if self.last_chunk_received.get() {
if let Some(_) = self.incomplete_utf8.borrow_mut().take() {
self.network_input
.borrow_mut()
.push_back(StrTendril::from("\u{FFFD}"))
let chunk = self.network_decoder.borrow_mut().take().unwrap().finish();
if !chunk.is_empty() {
self.network_input.borrow_mut().push_back(chunk);
}
}
self.tokenize(|tokenizer| tokenizer.feed(&mut *self.network_input.borrow_mut()));
@@ -552,7 +545,7 @@ impl ServoParser {
assert!(self.last_chunk_received.get());
assert!(self.script_input.borrow().is_empty());
assert!(self.network_input.borrow().is_empty());
assert!(self.incomplete_utf8.borrow().is_none());
assert!(self.network_decoder.borrow().is_none());

// Step 1.
self.document
@@ -1200,3 +1193,46 @@ fn create_element_for_token(
// Step 13.
element
}

#[derive(JSTraceable, MallocSizeOf)]
struct NetworkDecoder {
#[ignore_malloc_size_of = "Defined in html5ever"]
decoder: Utf8LossyDecoder<NetworkSink>,
}

impl NetworkDecoder {
fn new() -> Self {
Self {
decoder: Utf8LossyDecoder::new(Default::default()),
}
}

fn decode(&mut self, chunk: Vec<u8>) -> StrTendril {
self.decoder.process(ByteTendril::from(&*chunk));
mem::replace(&mut self.decoder.inner_sink.output, Default::default())
}

fn finish(self) -> StrTendril {
self.decoder.finish()
}
}

#[derive(Default, JSTraceable)]
struct NetworkSink {
output: StrTendril,
}

impl TendrilSink<UTF8> for NetworkSink {
type Output = StrTendril;

fn process(&mut self, t: StrTendril) {
assert!(self.output.is_empty());
self.output = t;
}

fn error(&mut self, _desc: Cow<'static, str>) {}

fn finish(self) -> Self::Output {
self.output
}
}

0 comments on commit 555226c

Please sign in to comment.