Skip to content

Commit

Permalink
Auto merge of #386 - servo:private-rcdom, r=nox
Browse files Browse the repository at this point in the history
Extract RcDom into its own crate with no support guarantees

The RcDom implementation was never intended as anything more than a test-only implementation. This work extracts it into a separate crate that contains a README that lays out what sort of support and maintenance guarantees anybody choosing to rely on it can expect (read: none).

Fixes #385.
  • Loading branch information
bors-servo committed Oct 21, 2019
2 parents cf07a98 + 9b90752 commit 040a4dd
Show file tree
Hide file tree
Showing 19 changed files with 96 additions and 39 deletions.
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,6 @@
members = [
"markup5ever",
"html5ever",
"rcdom",
"xml5ever"
]
1 change: 1 addition & 0 deletions html5ever/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ serde_json = "1.0"
rustc-test = "0.3"
typed-arena = "1.3.0"
criterion = "0.3"
markup5ever_rcdom = { version = "0.1", path = "../rcdom" }

[build-dependencies]
quote = "1"
Expand Down
6 changes: 4 additions & 2 deletions html5ever/examples/html2html.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,16 @@
//! where htmlparser-1.4.jar comes from http://about.validator.nu/htmlparser/

extern crate html5ever;
extern crate markup5ever_rcdom as rcdom;

use std::default::Default;
use std::io::{self, Write};

use html5ever::driver::ParseOpts;
use html5ever::rcdom::RcDom;
use html5ever::tendril::TendrilSink;
use html5ever::tree_builder::TreeBuilderOpts;
use html5ever::{parse_document, serialize};
use rcdom::{RcDom, SerializableHandle};

fn main() {
let opts = ParseOpts {
Expand All @@ -45,7 +46,8 @@ fn main() {
.write_all(b"<!DOCTYPE html>\n")
.ok()
.expect("writing DOCTYPE failed");
serialize(&mut io::stdout(), &dom.document, Default::default())
let document: SerializableHandle = dom.document.clone().into();
serialize(&mut io::stdout(), &document, Default::default())
.ok()
.expect("serialization failed");
}
3 changes: 2 additions & 1 deletion html5ever/examples/print-rcdom.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,16 @@

#[macro_use]
extern crate html5ever;
extern crate markup5ever_rcdom as rcdom;

use std::default::Default;
use std::io;
use std::iter::repeat;
use std::string::String;

use html5ever::parse_document;
use html5ever::rcdom::{Handle, NodeData, RcDom};
use html5ever::tendril::TendrilSink;
use rcdom::{Handle, NodeData, RcDom};

// This is not proper HTML serialization, of course.

Expand Down
8 changes: 5 additions & 3 deletions html5ever/src/driver.rs
Original file line number Diff line number Diff line change
Expand Up @@ -137,18 +137,20 @@ impl<Sink: TreeSink> Parser<Sink> {

#[cfg(test)]
mod tests {
extern crate markup5ever_rcdom;
use super::*;
use crate::rcdom::RcDom;
use self::markup5ever_rcdom::{RcDom, SerializableHandle};
use crate::serialize::serialize;
use crate::tendril::TendrilSink;
use tendril::TendrilSink;

#[test]
fn from_utf8() {
let dom = parse_document(RcDom::default(), ParseOpts::default())
.from_utf8()
.one("<title>Test".as_bytes());
let mut serialized = Vec::new();
serialize(&mut serialized, &dom.document, Default::default()).unwrap();
let document: SerializableHandle = dom.document.clone().into();
serialize(&mut serialized, &document, Default::default()).unwrap();
assert_eq!(
String::from_utf8(serialized).unwrap().replace(" ", ""),
"<html><head><title>Test</title></head><body></body></html>"
Expand Down
13 changes: 7 additions & 6 deletions html5ever/src/tree_builder/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1683,9 +1683,10 @@ where
#[cfg(test)]
#[allow(non_snake_case)]
mod test {
use crate::interface::{AppendNode, AppendText, NodeOrText};
use crate::interface::{ElementFlags, Tracer, TreeSink};
use crate::interface::{LimitedQuirks, NoQuirks, Quirks, QuirksMode};
extern crate markup5ever_rcdom as rcdom;
use markup5ever::interface::{AppendNode, AppendText, NodeOrText};
use markup5ever::interface::{ElementFlags, Tracer, TreeSink};
use markup5ever::interface::{LimitedQuirks, NoQuirks, Quirks, QuirksMode};

use super::types::*;

Expand All @@ -1698,6 +1699,7 @@ mod test {
use crate::tokenizer::states as tok_state;
use crate::tokenizer::{Doctype, StartTag, Tag, TokenSink};
use crate::tokenizer::{Tokenizer, TokenizerOpts};
use crate::driver::*;

use crate::util::str::is_ascii_whitespace;

Expand All @@ -1708,9 +1710,8 @@ mod test {
use std::mem::replace;

use super::{TreeBuilder, TreeBuilderOpts};
use crate::driver::*;
use crate::{Attribute, local_name, namespace_url, ns};
use crate::rcdom::{Handle, Node, NodeData, RcDom};
use markup5ever::{Attribute, local_name, namespace_url, ns};
use self::rcdom::{Handle, Node, NodeData, RcDom};

pub struct LineCountingDOM {
pub line_vec: Vec<(QualName, u64)>,
Expand Down
13 changes: 8 additions & 5 deletions html5ever/tests/serializer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,16 @@

#[macro_use]
extern crate html5ever;
extern crate markup5ever_rcdom as rcdom;

use std::default::Default;

use html5ever::driver::ParseOpts;
use html5ever::rcdom::RcDom;
use html5ever::serialize::{Serialize, SerializeOpts, Serializer, TraversalScope};
use html5ever::tendril::{SliceExt, StrTendril, TendrilSink};
use html5ever::tokenizer::{TagKind, Token, TokenSink, TokenSinkResult, Tokenizer};
use html5ever::{parse_document, parse_fragment, serialize, QualName};
use rcdom::{RcDom, SerializableHandle};

use std::io;

Expand Down Expand Up @@ -98,10 +99,10 @@ fn parse_and_serialize(input: StrTendril) -> StrTendril {
vec![],
)
.one(input);
let inner = &dom.document.children.borrow()[0];
let inner: SerializableHandle = dom.document.children.borrow()[0].clone().into();

let mut result = vec![];
serialize(&mut result, inner, Default::default()).unwrap();
serialize(&mut result, &inner, Default::default()).unwrap();
StrTendril::try_from_byte_slice(&result).unwrap()
}

Expand Down Expand Up @@ -242,7 +243,8 @@ fn doctype() {
let dom = parse_document(RcDom::default(), ParseOpts::default()).one("<!doctype html>");
dom.document.children.borrow_mut().truncate(1); // Remove <html>
let mut result = vec![];
serialize(&mut result, &dom.document, Default::default()).unwrap();
let document: SerializableHandle = dom.document.clone().into();
serialize(&mut result, &document, Default::default()).unwrap();
assert_eq!(String::from_utf8(result).unwrap(), "<!DOCTYPE html>");
}

Expand All @@ -259,6 +261,7 @@ fn deep_tree() {
let document = &dom.document;
let opts = SerializeOpts::default();
let mut ret_val = Vec::new();
serialize(&mut ret_val, document, opts)
let document: SerializableHandle = dom.document.clone().into();
serialize(&mut ret_val, &document, opts)
.expect("Writing to a string shouldn't fail (expect on OOM)");
}
3 changes: 2 additions & 1 deletion html5ever/tests/tree_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.

extern crate markup5ever_rcdom as rcdom;
extern crate rustc_test as test;
#[macro_use]
extern crate html5ever;
Expand All @@ -24,10 +25,10 @@ use std::path::Path;
use std::{env, fs, io};
use test::{DynTestName, TestDesc, TestDescAndFn, TestFn};

use html5ever::rcdom::{Handle, NodeData, RcDom};
use html5ever::tendril::{StrTendril, TendrilSink};
use html5ever::{parse_document, parse_fragment, ParseOpts};
use html5ever::{LocalName, QualName};
use rcdom::{Handle, NodeData, RcDom};

fn parse_tests<It: Iterator<Item = String>>(mut lines: It) -> Vec<HashMap<String, String>> {
let mut tests = vec![];
Expand Down
2 changes: 2 additions & 0 deletions markup5ever/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,5 @@ serde = "1.0"
serde_derive = "1.0"
serde_json = "1.0"

[dev-dependencies]
markup5ever_rcdom = { version = "0.1", path = "../rcdom" }
3 changes: 2 additions & 1 deletion markup5ever/interface/tree_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,9 +83,10 @@ pub struct ElementFlags {
///
/// ```
/// # #[macro_use] extern crate markup5ever;
/// # extern crate markup5ever_rcdom as rcdom;
///
/// # fn main() {
/// use markup5ever::{rcdom, QualName, Attribute};
/// use markup5ever::{QualName, Attribute};
/// use markup5ever::interface::create_element;
///
/// let mut dom = rcdom::RcDom::default();
Expand Down
1 change: 0 additions & 1 deletion markup5ever/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ include!(concat!(env!("OUT_DIR"), "/generated.rs"));
pub mod data;
#[macro_use]
pub mod interface;
pub mod rcdom;
pub mod serialize;
mod util {
pub mod buffer_queue;
Expand Down
17 changes: 17 additions & 0 deletions rcdom/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
[package]
name = "markup5ever_rcdom"
version = "0.1.0"
authors = [ "The html5ever Project Developers" ]
license = "MIT / Apache-2.0"
repository = "https://github.com/servo/html5ever"
description = "Basic, unsupported DOM structure for use by tests in html5ever/xml5ever"
readme = "README.md"
documentation = "https://docs.rs/markup5ever_rcdom"
categories = [ "parser-implementations", "web-programming" ]

[lib]
path = "lib.rs"

[dependencies]
tendril = "0.4"
markup5ever = { version = "0.10", path = "../markup5ever" }
7 changes: 7 additions & 0 deletions rcdom/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# markup5ever_rcdom

This crate is built for the express purpose of writing automated tests for the `html5ever`
and `xml5ever` crates. It is not intended to be a production-quality DOM implementation,
and has not been fuzzed or tested against arbitrary, malicious, or nontrivial inputs. No maintenance
or support for any such issues will be provided. If you use this DOM implementation in a production,
user-facing system, you do so at your own risk.
33 changes: 22 additions & 11 deletions markup5ever/rcdom.rs → rcdom/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@
//! [tree structure]: https://en.wikipedia.org/wiki/Tree_(data_structure)
//! [dom wiki]: https://en.wikipedia.org/wiki/Document_Object_Model

extern crate markup5ever;
extern crate tendril;

use std::borrow::Cow;
use std::cell::{Cell, RefCell};
use std::collections::HashSet;
Expand All @@ -47,14 +50,14 @@ use std::rc::{Rc, Weak};

use tendril::StrTendril;

use crate::interface::tree_builder;
use crate::interface::tree_builder::{ElementFlags, NodeOrText, QuirksMode, TreeSink};
use crate::serialize::TraversalScope;
use crate::serialize::TraversalScope::{ChildrenOnly, IncludeNode};
use crate::serialize::{Serialize, Serializer};
use crate::Attribute;
use crate::ExpandedName;
use crate::QualName;
use markup5ever::interface::tree_builder;
use markup5ever::interface::tree_builder::{ElementFlags, NodeOrText, QuirksMode, TreeSink};
use markup5ever::serialize::TraversalScope;
use markup5ever::serialize::TraversalScope::{ChildrenOnly, IncludeNode};
use markup5ever::serialize::{Serialize, Serializer};
use markup5ever::Attribute;
use markup5ever::ExpandedName;
use markup5ever::QualName;

/// The different kinds of nodes in the DOM.
#[derive(Debug)]
Expand Down Expand Up @@ -433,14 +436,22 @@ enum SerializeOp {
Close(QualName)
}

impl Serialize for Handle {
pub struct SerializableHandle(Handle);

impl From<Handle> for SerializableHandle {
fn from(h: Handle) -> SerializableHandle {
SerializableHandle(h)
}
}

impl Serialize for SerializableHandle {
fn serialize<S>(&self, serializer: &mut S, traversal_scope: TraversalScope) -> io::Result<()>
where
S: Serializer,
{
let mut ops = match traversal_scope {
IncludeNode => vec![SerializeOp::Open(self.clone())],
ChildrenOnly(_) => self
IncludeNode => vec![SerializeOp::Open(self.0.clone())],
ChildrenOnly(_) => self.0
.children
.borrow()
.iter()
Expand Down
1 change: 1 addition & 0 deletions xml5ever/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ markup5ever = {version = "0.10", path = "../markup5ever" }
serde_json = "1.0"
rustc-test = "0.3"
criterion = "0.2"
markup5ever_rcdom = {version = "0.1", path = "../rcdom" }

[[bench]]
name = "xml5ever"
Expand Down
3 changes: 2 additions & 1 deletion xml5ever/examples/hello_xml.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,13 @@
//! xml5ever = "0.2.0"
//! tendril = "0.1.3"
//! ```
extern crate markup5ever_rcdom as rcdom;
extern crate xml5ever;

use std::default::Default;

use rcdom::{NodeData, RcDom};
use xml5ever::driver::parse_document;
use xml5ever::rcdom::{NodeData, RcDom};
use xml5ever::tendril::TendrilSink;
use xml5ever::tree_builder::TreeSink;

Expand Down
3 changes: 2 additions & 1 deletion xml5ever/examples/xml_tree_printer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,15 @@
//! xml5ever = "0.2.0"
//! tendril = "0.1.3"
//! ```
extern crate markup5ever_rcdom as rcdom;
extern crate xml5ever;

use std::default::Default;
use std::io;
use std::string::String;

use rcdom::{Handle, NodeData, RcDom};
use xml5ever::driver::parse_document;
use xml5ever::rcdom::{Handle, NodeData, RcDom};
use xml5ever::tendril::TendrilSink;

fn walk(prefix: &str, handle: &Handle) {
Expand Down
14 changes: 9 additions & 5 deletions xml5ever/src/driver.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,10 +89,11 @@ impl<Sink: TreeSink> XmlParser<Sink> {

#[cfg(test)]
mod tests {
extern crate markup5ever_rcdom;
use super::*;
use crate::rcdom::RcDom;
use self::markup5ever_rcdom::{RcDom, SerializableHandle};
use crate::serialize::serialize;
use crate::tendril::TendrilSink;
use tendril::TendrilSink;

#[test]
fn el_ns_serialize() {
Expand Down Expand Up @@ -170,16 +171,18 @@ mod tests {

fn assert_eq_serialization(text: &'static str, dom: RcDom) {
let mut serialized = Vec::new();
serialize(&mut serialized, &dom.document, Default::default()).unwrap();
let document: SerializableHandle = dom.document.clone().into();
serialize(&mut serialized, &document, Default::default()).unwrap();

let dom_from_text = parse_document(RcDom::default(), XmlParseOpts::default())
.from_utf8()
.one(text.as_bytes());

let mut reserialized = Vec::new();
let document: SerializableHandle = dom_from_text.document.clone().into();
serialize(
&mut reserialized,
&dom_from_text.document,
&document,
Default::default(),
)
.unwrap();
Expand All @@ -192,7 +195,8 @@ mod tests {

fn assert_serialization(text: &'static str, dom: RcDom) {
let mut serialized = Vec::new();
serialize(&mut serialized, &dom.document, Default::default()).unwrap();
let document: SerializableHandle = dom.document.clone().into();
serialize(&mut serialized, &document, Default::default()).unwrap();
assert_eq!(String::from_utf8(serialized).unwrap(), text);
}
}
Loading

0 comments on commit 040a4dd

Please sign in to comment.