From c47122b9eb01435d63e518d765c1ec5b4838712b Mon Sep 17 00:00:00 2001 From: Ryan Choi Date: Fri, 25 Oct 2013 22:37:36 +0900 Subject: [PATCH] Supports interning on tag, id, class, attr_exists, attr_equal, attr_dash_match using new interning library written in Rust --- src/components/main/layout/box.rs | 9 +- src/components/script/dom/document.rs | 15 +- src/components/script/dom/element.rs | 74 ++- src/components/script/dom/htmldocument.rs | 17 +- src/components/script/dom/htmlimageelement.rs | 3 +- .../script/dom/htmlscriptelement.rs | 3 +- .../script/html/hubbub_html_parser.rs | 19 +- src/components/script/script_task.rs | 13 +- src/components/style/selector_matching.rs | 41 +- src/components/style/selectors.rs | 26 +- src/components/util/interning.rs | 606 ++++++++++++++++++ src/components/util/tree.rs | 8 +- src/components/util/util.rc | 3 + 13 files changed, 745 insertions(+), 92 deletions(-) create mode 100644 src/components/util/interning.rs diff --git a/src/components/main/layout/box.rs b/src/components/main/layout/box.rs index 012fd5b681fe..af44c44d6001 100644 --- a/src/components/main/layout/box.rs +++ b/src/components/main/layout/box.rs @@ -41,6 +41,7 @@ use servo_net::local_image_cache::LocalImageCache; use servo_util::range::*; use servo_util::tree::{TreeNodeRef, ElementLike}; use extra::url::Url; +use servo_util::interning::intern_string; /// Render boxes (`struct RenderBox`) are the leaves of the layout tree. They cannot position /// themselves. In general, render boxes do not have a simple correspondence with CSS boxes as in @@ -476,9 +477,9 @@ impl RenderBox { pub fn image_width(&self, image_box: @mut ImageRenderBox) -> Au { let attr_width: Option = do self.with_base |base| { do base.node.with_imm_element |elt| { - match elt.get_attr("width") { + match elt.get_attr(&intern_string("width")) { Some(width) => { - FromStr::from_str(width) + FromStr::from_str(width.to_str_slice()) } None => { None @@ -502,9 +503,9 @@ impl RenderBox { pub fn image_height(&self, image_box: @mut ImageRenderBox) -> Au { let attr_height: Option = do self.with_base |base| { do base.node.with_imm_element |elt| { - match elt.get_attr("height") { + match elt.get_attr(&intern_string("height")) { Some(height) => { - FromStr::from_str(height) + FromStr::from_str(height.to_str_slice()) } None => { None diff --git a/src/components/script/dom/document.rs b/src/components/script/dom/document.rs index d0a40235878a..d3a8c3fd799c 100644 --- a/src/components/script/dom/document.rs +++ b/src/components/script/dom/document.rs @@ -28,10 +28,10 @@ use std::hashmap::HashMap; use std::cast; use std::ptr; -use std::str::eq_slice; use std::libc; use std::ascii::StrAsciiExt; use std::unstable::raw::Box; +use servo_util::interning::intern_string; pub trait ReflectableDocument { fn init_reflector(@mut self, cx: *JSContext); @@ -210,7 +210,7 @@ impl Document { } pub fn GetElementsByTagName(&self, tag: &DOMString) -> @mut HTMLCollection { - self.createHTMLCollection(|elem| eq_slice(elem.tag_name, null_str_as_empty(tag))) + self.createHTMLCollection(|elem| elem.tag_name.eq(&intern_string(null_str_as_empty(tag)))) } pub fn GetElementsByTagNameNS(&self, _ns: &DOMString, _tag: &DOMString) -> @mut HTMLCollection { @@ -322,7 +322,7 @@ impl Document { let new_title = @HTMLTitleElement { htmlelement: HTMLElement::new(HTMLTitleElementTypeId, ~"title", abstract_self) }; - let new_title = unsafe { + let new_title = unsafe { Node::as_abstract_node(self.get_cx(), new_title) }; new_title.add_child(self.CreateTextNode(abstract_self, title)); @@ -338,8 +338,13 @@ impl Document { } pub fn GetElementsByName(&self, name: &DOMString) -> @mut HTMLCollection { + let name_interned = intern_string("name"); self.createHTMLCollection(|elem| - elem.get_attr("name").is_some() && eq_slice(elem.get_attr("name").unwrap(), null_str_as_empty(name))) + match elem.get_attr(&name_interned) { + None => false, + Some(ref name_attr) => (*name_attr).eq(&intern_string(null_str_as_empty(name))), + } + ) } pub fn createHTMLCollection(&self, callback: &fn(elem: &Element) -> bool) -> @mut HTMLCollection { @@ -395,7 +400,7 @@ fn foreach_ided_elements(root: &AbstractNode, } do node.with_imm_element |element| { - match element.get_attr("id") { + match element.get_attr(&intern_string("id")) { Some(id) => { callback(&id.to_str(), &node); } diff --git a/src/components/script/dom/element.rs b/src/components/script/dom/element.rs index ebcdd22e0c72..ff380a2b13ca 100644 --- a/src/components/script/dom/element.rs +++ b/src/components/script/dom/element.rs @@ -21,12 +21,16 @@ use js::jsapi::{JSContext, JSObject}; use std::comm; use std::hashmap::HashMap; use std::ascii::StrAsciiExt; +use servo_util::interning::{intern_string, IntString}; +use std::str::eq_slice; pub struct Element { node: Node, - tag_name: ~str, // TODO: This should be an atom, not a ~str. - attrs: HashMap<~str, ~str>, - attrs_list: ~[~str], // store an order of attributes. + tag_name: IntString, // TODO: This should be an atom, not a ~str. + id: Option, + classes: ~[IntString], + attrs: HashMap, + attrs_list: ~[IntString], // store an order of attributes. style_attribute: Option, } @@ -126,27 +130,38 @@ pub enum ElementTypeId { // impl ElementLike for Element { - fn get_local_name<'a>(&'a self) -> &'a str { - self.tag_name.as_slice() + fn get_local_name<'a>(&'a self) -> &'a IntString { + &self.tag_name } - fn get_attr<'a>(&'a self, name: &str) -> Option<&'a str> { + fn get_attr<'a>(&'a self, name: &IntString) -> Option<&'a IntString> { // FIXME: only case-insensitive in the HTML namespace (as opposed to SVG, etc.) - let name = name.to_ascii_lower(); - let value: Option<&str> = self.attrs.find_equiv(&name).map(|value| { - let value: &str = *value; + let value: Option<&IntString> = self.attrs.find_equiv(name).map(|value| { value }); - return value; } + + fn get_id<'a>(&'a self) -> Option<&'a IntString> { + match self.id { + None => None, + Some(ref id) => Some(id), + } + } + + fn get_classes<'a>(&'a self) -> &'a [IntString] { + let c: &'a [IntString] = self.classes; + c + } } impl<'self> Element { pub fn new(type_id: ElementTypeId, tag_name: ~str, document: AbstractDocument) -> Element { Element { node: Node::new(ElementNodeTypeId(type_id), document), - tag_name: tag_name, + tag_name: intern_string(tag_name), + id: None, + classes: ~[], attrs: HashMap::new(), attrs_list: ~[], style_attribute: None, @@ -157,24 +172,33 @@ impl<'self> Element { abstract_self: AbstractNode, raw_name: &DOMString, raw_value: &DOMString) { - let name = null_str_as_empty(raw_name).to_ascii_lower(); - let value = null_str_as_empty(raw_value); + static WHITESPACE: &'static [char] = &'static [' ', '\t', '\n', '\r', '\x0C']; + + let name = intern_string(null_str_as_empty(raw_name)); + let value = intern_string(null_str_as_empty(raw_value)); - // FIXME: reduce the time of `value.clone()`. + // FIXME: reduce the time of `value.clone()` self.attrs.mangle(name.clone(), value.clone(), - |new_name: &~str, new_value: ~str| { + |new_name: &IntString, new_value: IntString| { // register to the ordered list. self.attrs_list.push(new_name.clone()); new_value }, - |_, old_value: &mut ~str, new_value: ~str| { + |_, old_value: &mut IntString, new_value: IntString| { // update value. *old_value = new_value; }); - if "style" == name { - self.style_attribute = Some(style::parse_style_attribute( - null_str_as_empty_ref(raw_value))); + if eq_slice(name.to_ascii_lower(), "id") { + self.id = Some(value); + } else if eq_slice(name.to_ascii_lower(), "class") { + for class in value.to_str_slice().split_iter(WHITESPACE) { + self.classes.push(intern_string(class)); + } + } + + if eq_slice("style", name.to_ascii_lower()) { + self.style_attribute = Some(style::parse_style_attribute(value.to_str_slice())); } // TODO: update owner document's id hashmap for `document.getElementById()` @@ -205,7 +229,7 @@ impl<'self> Element { impl Element { pub fn TagName(&self) -> DOMString { - Some(self.tag_name.to_owned().to_ascii_upper()) + Some(self.tag_name.to_str_slice().to_ascii_upper()) } pub fn Id(&self) -> DOMString { @@ -216,7 +240,7 @@ impl Element { } pub fn GetAttribute(&self, name: &DOMString) -> DOMString { - self.get_attr(null_str_as_empty_ref(name)).map(|s| s.to_owned()) + self.get_attr(&intern_string(null_str_as_empty_ref(name))).map(|s| s.to_str()) } pub fn GetAttributeNS(&self, _namespace: &DOMString, _localname: &DOMString) -> DOMString { @@ -385,15 +409,15 @@ impl Element { } pub struct Attr { - name: ~str, - value: ~str, + name: IntString, + value: IntString, } impl Attr { pub fn new(name: ~str, value: ~str) -> Attr { Attr { - name: name, - value: value + name: intern_string(name), + value: intern_string(value) } } } diff --git a/src/components/script/dom/htmldocument.rs b/src/components/script/dom/htmldocument.rs index cadba01ce977..bc79533b1d7b 100644 --- a/src/components/script/dom/htmldocument.rs +++ b/src/components/script/dom/htmldocument.rs @@ -16,6 +16,7 @@ use servo_util::tree::{TreeNodeRef, ElementLike}; use std::ptr; use std::str::eq_slice; +use servo_util::interning::intern_string; pub struct HTMLDocument { parent: Document @@ -48,11 +49,11 @@ impl HTMLDocument { } pub fn Images(&self) -> @mut HTMLCollection { - self.parent.createHTMLCollection(|elem| eq_slice(elem.tag_name, "img")) + self.parent.createHTMLCollection(|elem| eq_slice(elem.tag_name.to_str_slice(), "img")) } pub fn Embeds(&self) -> @mut HTMLCollection { - self.parent.createHTMLCollection(|elem| eq_slice(elem.tag_name, "embed")) + self.parent.createHTMLCollection(|elem| eq_slice(elem.tag_name.to_str_slice(), "embed")) } pub fn Plugins(&self) -> @mut HTMLCollection { @@ -61,26 +62,26 @@ impl HTMLDocument { pub fn Links(&self) -> @mut HTMLCollection { self.parent.createHTMLCollection(|elem| - (eq_slice(elem.tag_name, "a") || eq_slice(elem.tag_name, "area")) - && elem.get_attr("href").is_some()) + (eq_slice(elem.tag_name.to_str_slice(), "a") || eq_slice(elem.tag_name.to_str_slice(), "area")) + && elem.get_attr(&intern_string("href")).is_some()) } pub fn Forms(&self) -> @mut HTMLCollection { - self.parent.createHTMLCollection(|elem| eq_slice(elem.tag_name, "form")) + self.parent.createHTMLCollection(|elem| eq_slice(elem.tag_name.to_str_slice(), "form")) } pub fn Scripts(&self) -> @mut HTMLCollection { - self.parent.createHTMLCollection(|elem| eq_slice(elem.tag_name, "script")) + self.parent.createHTMLCollection(|elem| eq_slice(elem.tag_name.to_str_slice(), "script")) } pub fn Anchors(&self) -> @mut HTMLCollection { self.parent.createHTMLCollection(|elem| - eq_slice(elem.tag_name, "a") && elem.get_attr("name").is_some()) + eq_slice(elem.tag_name.to_str_slice(), "a") && elem.get_attr(&intern_string("name")).is_some()) } pub fn Applets(&self) -> @mut HTMLCollection { // FIXME: This should be return OBJECT elements containing applets. - self.parent.createHTMLCollection(|elem| eq_slice(elem.tag_name, "applet")) + self.parent.createHTMLCollection(|elem| eq_slice(elem.tag_name.to_str_slice(), "applet")) } } diff --git a/src/components/script/dom/htmlimageelement.rs b/src/components/script/dom/htmlimageelement.rs index 49010ee0f624..c2849429275b 100644 --- a/src/components/script/dom/htmlimageelement.rs +++ b/src/components/script/dom/htmlimageelement.rs @@ -12,6 +12,7 @@ use servo_net::image_cache_task; use servo_net::image_cache_task::ImageCacheTask; use servo_util::url::make_url; use servo_util::tree::ElementLike; +use servo_util::interning::intern_string; pub struct HTMLImageElement { htmlelement: HTMLElement, @@ -23,7 +24,7 @@ impl HTMLImageElement { /// prefetching the image. This method must be called after `src` is changed. pub fn update_image(&mut self, image_cache: ImageCacheTask, url: Option) { let elem = &mut self.htmlelement.element; - let src_opt = elem.get_attr("src").map(|x| x.to_str()); + let src_opt = elem.get_attr(&intern_string("src")).map(|x| x.to_str()); match src_opt { None => {} Some(src) => { diff --git a/src/components/script/dom/htmlscriptelement.rs b/src/components/script/dom/htmlscriptelement.rs index f3de42eae26f..67b0ce384207 100644 --- a/src/components/script/dom/htmlscriptelement.rs +++ b/src/components/script/dom/htmlscriptelement.rs @@ -5,6 +5,7 @@ use dom::bindings::utils::{DOMString, ErrorResult}; use dom::htmlelement::HTMLElement; use servo_util::tree::ElementLike; +use servo_util::interning::intern_string; pub struct HTMLScriptElement { htmlelement: HTMLElement, @@ -12,7 +13,7 @@ pub struct HTMLScriptElement { impl HTMLScriptElement { pub fn Src(&self) -> DOMString { - self.htmlelement.element.get_attr("src").map(|s| s.to_str()) + self.htmlelement.element.get_attr(&intern_string("src")).map(|s| s.to_str()) } pub fn SetSrc(&mut self, _src: &DOMString) -> ErrorResult { diff --git a/src/components/script/html/hubbub_html_parser.rs b/src/components/script/html/hubbub_html_parser.rs index c6dca22ee214..ebe554922b55 100644 --- a/src/components/script/html/hubbub_html_parser.rs +++ b/src/components/script/html/hubbub_html_parser.rs @@ -31,6 +31,7 @@ use servo_util::url::make_url; use extra::url::Url; use extra::future::Future; use geom::size::Size2D; +use servo_util::interning::{intern_string}; macro_rules! handle_element( ($cx: expr, @@ -110,7 +111,7 @@ type JSResult = ~[JSFile]; enum CSSMessage { CSSTaskNewFile(StylesheetProvenance), - CSSTaskExit + CSSTaskExit } enum JSMessage { @@ -365,7 +366,7 @@ pub fn parse_html(cx: *JSContext, let (css_chan2, css_chan3, js_chan2) = (css_chan.clone(), css_chan.clone(), js_chan.clone()); let next_subpage_id = Cell::new(next_subpage_id); - + parser.set_tree_handler(~hubbub::TreeHandler { create_comment: |data: ~str| { debug!("create comment"); @@ -405,10 +406,10 @@ pub fn parse_html(cx: *JSContext, // Handle CSS style sheets from elements ElementNodeTypeId(HTMLLinkElementTypeId) => { do node.with_imm_element |element| { - match (element.get_attr("rel"), element.get_attr("href")) { + match (element.get_attr(&intern_string("rel")), element.get_attr(&intern_string("href"))) { (Some(rel), Some(href)) => { - if rel == "stylesheet" { - debug!("found CSS stylesheet: %s", href); + if eq_slice(rel.to_ascii_lower(), "stylesheet") { + debug!("found CSS stylesheet: %s", href.to_str_slice()); let url = make_url(href.to_str(), Some(url2.clone())); css_chan2.send(CSSTaskNewFile(UrlProvenance(url))); } @@ -424,11 +425,11 @@ pub fn parse_html(cx: *JSContext, let iframe_chan = iframe_chan.take(); let sandboxed = iframe_element.is_sandboxed(); let elem = &mut iframe_element.htmlelement.element; - let src_opt = elem.get_attr("src").map(|x| x.to_str()); + let src_opt = elem.get_attr(&intern_string("src")).map(|x| x.to_str()); for src in src_opt.iter() { let iframe_url = make_url(src.clone(), Some(url2.clone())); iframe_element.frame = Some(iframe_url.clone()); - + // Size future let (port, chan) = comm::oneshot(); let size_future = Future::from_port(port); @@ -525,9 +526,9 @@ pub fn parse_html(cx: *JSContext, unsafe { let scriptnode: AbstractNode = NodeWrapping::from_hubbub_node(script); do scriptnode.with_imm_element |script| { - match script.get_attr("src") { + match script.get_attr(&intern_string("src")) { Some(src) => { - debug!("found script: %s", src); + debug!("found script: %s", src.to_str_slice()); let new_url = make_url(src.to_str(), Some(url3.clone())); js_chan2.send(JSTaskNewFile(new_url)); } diff --git a/src/components/script/script_task.rs b/src/components/script/script_task.rs index e710dfb6d958..7d16bc8862fd 100644 --- a/src/components/script/script_task.rs +++ b/src/components/script/script_task.rs @@ -49,6 +49,8 @@ use servo_util::tree::{TreeNodeRef, ElementLike}; use servo_util::url::make_url; use extra::url::Url; use extra::future::Future; +use servo_util::interning; +use servo_util::interning::intern_string; /// Messages used to control the script task. pub enum ScriptMsg { @@ -438,6 +440,7 @@ impl ScriptTask { js_runtime: js_runtime, }; + interning::init(); script_task } @@ -650,7 +653,7 @@ impl ScriptTask { // needs to be smarter about exiting pipelines. None => false, } - + } /// The entry point to document loading. Defines bindings, sets up the window and document @@ -823,7 +826,7 @@ impl ScriptTask { } if node.is_element() { do node.with_imm_element |element| { - if "a" == element.tag_name { + if "a" == element.tag_name.to_str_slice() { self.load_url_from_element(page, element) } } @@ -842,14 +845,14 @@ impl ScriptTask { fn load_url_from_element(&self, page: @mut Page, element: &Element) { // if the node's element is "a," load url from href attr - let attr = element.get_attr("href"); + let attr = element.get_attr(&intern_string("href")); for href in attr.iter() { - debug!("ScriptTask: clicked on link to %s", *href); + debug!("ScriptTask: clicked on link to %s", href.to_str_slice()); let current_url = do page.url.as_ref().map |&(ref url, _)| { url.clone() }; debug!("ScriptTask: current url is %?", current_url); - let url = make_url(href.to_owned(), current_url); + let url = make_url(href.to_str(), current_url); self.constellation_chan.send(LoadUrlMsg(page.id, url, Future::from_value(page.window_size.get()))); } } diff --git a/src/components/style/selector_matching.rs b/src/components/style/selector_matching.rs index e7ef0ab1955e..25a604a4722c 100644 --- a/src/components/style/selector_matching.rs +++ b/src/components/style/selector_matching.rs @@ -11,7 +11,7 @@ use stylesheets::Stylesheet; use media_queries::{Device, Screen}; use properties::{PropertyDeclaration, PropertyDeclarationBlock}; use servo_util::tree::{TreeNodeRefAsElement, TreeNode, ElementLike}; - +use servo_util::interning::IntString; pub enum StylesheetOrigin { UserAgentOrigin, @@ -198,38 +198,41 @@ fn matches_simple_selector(selector: &SimpleSelector, element: & match *selector { // TODO: case-sensitivity depends on the document type - // TODO: intern element names LocalNameSelector(ref name) - => element.get_local_name().eq_ignore_ascii_case(name.as_slice()), + => name.eq_ignore_ascii_case(element.get_local_name()), NamespaceSelector(_) => false, // TODO, when the DOM supports namespaces on elements. // TODO: case-sensitivity depends on the document type and quirks mode - // TODO: cache and intern IDs on elements. - IDSelector(ref id) => element.get_attr("id") == Some(id.as_slice()), - // TODO: cache and intern classe names on elements. - ClassSelector(ref class) => match element.get_attr("class") { + IDSelector(ref id) => match element.get_id() { None => false, - // TODO: case-sensitivity depends on the document type and quirks mode - Some(ref class_attr) - => class_attr.split_iter(WHITESPACE).any(|c| c == class.as_slice()), + Some(ref id_attr) => id.eq_ignore_ascii_case(*id_attr), + }, + // TODO: case-sensitivity depends on the document type and quirks mode + ClassSelector(ref class) => { + for c in element.get_classes().iter() { + if class.eq_ignore_ascii_case(c) { + return true; + } + } + false }, AttrExists(ref attr) => match_attribute(attr, element, |_| true), - AttrEqual(ref attr, ref value) => match_attribute(attr, element, |v| v == value.as_slice()), + AttrEqual(ref attr, ref value) => match_attribute(attr, element, |v| v.eq(value)), AttrIncludes(ref attr, ref value) => do match_attribute(attr, element) |attr_value| { - attr_value.split_iter(WHITESPACE).any(|v| v == value.as_slice()) + attr_value.to_str_slice().split_iter(WHITESPACE).any(|v| v == value.as_slice()) }, AttrDashMatch(ref attr, ref value, ref dashing_value) => do match_attribute(attr, element) |attr_value| { - attr_value == value.as_slice() || attr_value.starts_with(dashing_value.as_slice()) + attr_value.eq(value) || attr_value.to_str_slice().starts_with(dashing_value.to_str_slice()) }, AttrPrefixMatch(ref attr, ref value) => do match_attribute(attr, element) |attr_value| { - attr_value.starts_with(value.as_slice()) + attr_value.to_str_slice().starts_with(value.as_slice()) }, AttrSubstringMatch(ref attr, ref value) => do match_attribute(attr, element) |attr_value| { - attr_value.contains(value.as_slice()) + attr_value.to_str_slice().contains(value.as_slice()) }, AttrSuffixMatch(ref attr, ref value) => do match_attribute(attr, element) |attr_value| { - attr_value.ends_with(value.as_slice()) + attr_value.to_str_slice().ends_with(value.as_slice()) }, Negation(ref negated) => { @@ -240,12 +243,12 @@ fn matches_simple_selector(selector: &SimpleSelector, element: & #[inline] -fn match_attribute(attr: &AttrSelector, element: &E, f: &fn(&str)-> bool) -> bool { +fn match_attribute(attr: &AttrSelector, element: &E, f: &fn(&IntString)-> bool) -> bool { match attr.namespace { Some(_) => false, // TODO, when the DOM supports namespaces on attributes - None => match element.get_attr(attr.name) { + None => match element.get_attr(&attr.name) { None => false, - Some(ref value) => f(value.as_slice()) + Some(ref value) => f(*value) } } } diff --git a/src/components/style/selectors.rs b/src/components/style/selectors.rs index b055baa0cdbb..2f1b8bc1a02c 100644 --- a/src/components/style/selectors.rs +++ b/src/components/style/selectors.rs @@ -6,7 +6,7 @@ use std::{vec, iter}; use std::ascii::StrAsciiExt; use cssparser::*; use namespaces::NamespaceMap; - +use servo_util::interning::{intern_string, IntString}; #[deriving(Clone)] pub struct Selector { @@ -43,16 +43,16 @@ pub enum Combinator { #[deriving(Clone)] pub enum SimpleSelector { - IDSelector(~str), - ClassSelector(~str), - LocalNameSelector(~str), + IDSelector(IntString), + ClassSelector(IntString), + LocalNameSelector(IntString), NamespaceSelector(~str), // Attribute selectors AttrExists(AttrSelector), // [foo] - AttrEqual(AttrSelector, ~str), // [foo=bar] + AttrEqual(AttrSelector, IntString), // [foo=bar] AttrIncludes(AttrSelector, ~str), // [foo~=bar] - AttrDashMatch(AttrSelector, ~str, ~str), // [foo|=bar] Second string is the first + "-" + AttrDashMatch(AttrSelector, IntString, IntString), // [foo|=bar] Second string is the first + "-" AttrPrefixMatch(AttrSelector, ~str), // [foo^=bar] AttrSubstringMatch(AttrSelector, ~str), // [foo*=bar] AttrSuffixMatch(AttrSelector, ~str), // [foo$=bar] @@ -68,7 +68,7 @@ pub enum SimpleSelector { #[deriving(Clone)] pub struct AttrSelector { - name: ~str, + name: IntString, namespace: Option<~str>, } @@ -236,7 +236,7 @@ fn parse_type_selector(iter: &mut Iter, namespaces: &NamespaceMap) None => (), } match local_name { - Some(name) => simple_selectors.push(LocalNameSelector(name)), + Some(name) => simple_selectors.push(LocalNameSelector(intern_string(name))), None => (), } Some(Some(simple_selectors)) @@ -254,13 +254,13 @@ fn parse_one_simple_selector(iter: &mut Iter, namespaces: &NamespaceMap, inside_ -> Option>> { match iter.peek() { Some(&IDHash(_)) => match iter.next() { - Some(IDHash(id)) => Some(Some(Left(IDSelector(id)))), + Some(IDHash(id)) => Some(Some(Left(IDSelector(intern_string(id))))), _ => fail!("Implementation error, this should not happen."), }, Some(&Delim('.')) => { iter.next(); match iter.next() { - Some(Ident(class)) => Some(Some(Left(ClassSelector(class)))), + Some(Ident(class)) => Some(Some(Left(ClassSelector(intern_string(class))))), _ => None, // invalid selector } } @@ -374,7 +374,7 @@ fn parse_attribute_selector(content: ~[ComponentValue], namespaces: &NamespaceMa Some(Some((_, None))) => fail!("Implementation error, this should not happen."), Some(Some((namespace, Some(local_name)))) => AttrSelector { namespace: namespace, - name: local_name, + name: intern_string(local_name), }, }; skip_whitespace(iter); @@ -388,12 +388,12 @@ fn parse_attribute_selector(content: ~[ComponentValue], namespaces: &NamespaceMa }};) let result = match iter.next() { None => AttrExists(attr), // [foo] - Some(Delim('=')) => AttrEqual(attr, get_value!()), // [foo=bar] + Some(Delim('=')) => AttrEqual(attr, intern_string(get_value!())), // [foo=bar] Some(IncludeMatch) => AttrIncludes(attr, get_value!()), // [foo~=bar] Some(DashMatch) => { let value = get_value!(); let dashing_value = value + "-"; - AttrDashMatch(attr, value, dashing_value) // [foo|=bar] + AttrDashMatch(attr, intern_string(value), intern_string(dashing_value)) // [foo|=bar] }, Some(PrefixMatch) => AttrPrefixMatch(attr, get_value!()), // [foo^=bar] Some(SubstringMatch) => AttrSubstringMatch(attr, get_value!()), // [foo*=bar] diff --git a/src/components/util/interning.rs b/src/components/util/interning.rs new file mode 100644 index 000000000000..a17caa2b48a7 --- /dev/null +++ b/src/components/util/interning.rs @@ -0,0 +1,606 @@ +#[feature(globs)]; + +extern mod extra; +use std::{vec,ptr,cast,str}; +use std::ascii::StrAsciiExt; +use std::to_bytes::Cb; +use std::cmp::Equiv; + +#[deriving(Clone)] +struct Rawptr {priv p: *BucketNode} + +struct InterningStr { + priv buckets: ~[Option<~BucketNode>], + priv lens: ~[uint], +} + +#[deriving(Clone)] +struct BucketNode { + s: ~str, + hash: u32, + next: Option<~BucketNode>, +} + +struct MutBucketNodeIterator<'self> { + priv interningStr: &'self mut InterningStr, + priv cur: Rawptr, + priv nelem: uint, +} + +#[deriving(Clone)] +pub struct IntString { + priv s: Rawptr, + priv lowercases: Rawptr, +} + +static mut interning: Option = None; + +pub fn init() { + unsafe { + match interning { + None => { + interning = Some(InterningStr::new()); + } + Some(_) => () + } + } +} + +pub fn intern_string(s: &str) -> IntString { + unsafe { + match interning { + Some(ref mut n) => { + n.intern_string(s) + } + None => fail!("Interning: init() required") + } + } +} + +pub fn to_rust_string(s: &IntString) -> ~str { + s.to_str() +} + +pub fn to_rust_string_slice<'a>(s: &'a IntString) -> &'a str { + s.to_str_slice() +} + +impl InterningStr { + fn new() -> InterningStr { + let size = 1021u; + InterningStr { + buckets: vec::from_elem(size, None), + lens: vec::from_elem(size, 0u), + } + } + + fn universal_hash(s: &str) -> u32 { + let mut hash: u32 = 0; + for c in s.byte_iter() { + hash += SEEDS_A[c] * c as u32 + SEEDS_B[c] + } + return hash % 1511; + } + + fn intern_string(&mut self, s: &str) -> IntString { + let ptr = self.intern_string_internal(s); + let lowercases_ptr = self.intern_string_internal(s.to_ascii_lower()); + IntString { + s: ptr, + lowercases: lowercases_ptr, + } + } + + fn intern_string_internal(&mut self, s: &str) -> Rawptr { + let h = InterningStr::universal_hash(s); + let i = (h as uint) % self.buckets.len(); + + match self.buckets[i] { + Some(_) => { + return self.push_back_node(s, h, i); + } + None => { + self.buckets[i] = Some(~BucketNode { + s: s.to_str(), + hash: h, + next: None, + }); + self.lens[i]=1; + + let ptr = match self.buckets[i] { + Some(ref mut node) => { + Rawptr::some(*node) + } + None => fail!("Interning: internal logic error") + }; + return ptr; + } + } + } + + #[inline] + fn mut_iter<'a>(&'a mut self, index: uint) -> MutBucketNodeIterator<'a> { + let cur = match self.buckets[index] { + Some(ref mut node) => Rawptr::some(*node), + None => Rawptr::none(), + }; + MutBucketNodeIterator { + cur: cur, + nelem: self.lens[index], + interningStr: self, + } + } + + #[inline] + fn push_back_node(&mut self, s: &str, h: u32, i: uint) -> Rawptr { + let mut itr = self.mut_iter(i); + loop { + match itr.peek() { + Some(node) => { + if (node.hash == h) && (node.s.len() == s.len()) { + if str::eq_slice(node.s, s) { + return Rawptr::some(node); + } + } + } + None => fail!("Interning: internal logic error"), + } + + if itr.has_next() { + itr.next(); + } else { + break; + } + } + itr.insert_next(s, h, i) + } +} + +impl<'self> Iterator<&'self mut BucketNode> for MutBucketNodeIterator<'self> { + #[inline] + fn next(&mut self) -> Option<&'self mut BucketNode> { + if self.nelem == 0 { + return None; + } + do self.cur.resolve().map |cur| { + self.nelem -= 1; + self.cur = match cur.next { + Some(ref mut next_node) => { + Rawptr::some(*next_node) + } + None => Rawptr::none(), + }; + cur + } + } +} + +impl<'self> MutBucketNodeIterator<'self> { + #[inline] + fn has_next(&self) -> bool { + if !self.is_last() { + return true; + } else { + return false; + } + } + + #[inline] + fn is_last(&self) -> bool { + if self.nelem == 1 { + return true; + } else { + return false; + } + } + + #[inline] + fn peek<'a>(&'a mut self) -> Option<&'a mut BucketNode> { + if self.nelem == 0 { + return None; + } else { + return self.cur.resolve().map(|node| node); + } + } + + #[inline] + fn insert_next(&mut self, s: &str, h: u32, i: uint) -> Rawptr { + match self.cur.resolve() { + Some(node) => { + node.next = Some(~BucketNode { + s: s.to_str(), + hash: h, + next: None, + }); + self.interningStr.lens[i]+=1; + + let ptr = match node.next { + Some(ref mut new_node) => { + Rawptr::some(*new_node) + } + None => fail!("Interning: internal logic error") + }; + return ptr; + } + None => fail!("Interning: internal logic error") + } + } +} + +impl Rawptr { + fn none() -> Rawptr { + Rawptr {p: ptr::null()} + } + + fn some(n: &mut BucketNode) -> Rawptr { + Rawptr {p: ptr::to_unsafe_ptr(n)} + } + + fn resolve(&mut self) -> Option<&mut BucketNode> { + if self.p.is_null() { + None + } else { + Some(unsafe {cast::transmute(self.p)}) + } + } + + fn resolve_immut(&self) -> Option<& BucketNode> { + unsafe {self.p.to_option()} + } +} + +impl Eq for IntString { + #[inline] + fn eq(&self, other: &IntString) -> bool { + self.s.p == other.s.p + } +} + +impl ToStr for IntString { + #[inline] + fn to_str(&self) -> ~str { + self.to_str_slice().to_str() + } +} + +impl IterBytes for IntString { + #[inline] + fn iter_bytes(&self, lsb0: bool, f: Cb) -> bool { + self.to_str_slice().iter_bytes(lsb0, f) + } +} + +impl Equiv for IntString { + #[inline] + fn equiv(&self, other: &IntString) -> bool { + self.eq_ignore_ascii_case(other) + } +} + +impl<'self> IntString { + pub fn to_str_slice(&'self self) -> &'self str { + match self.s.resolve_immut() { + Some(ref node) => { + let s: &'self str = node.s; + s + } + None => fail!("Interning: internal logic error") + } + } + + #[inline] + pub fn eq_ignore_ascii_case(&self, other: &IntString) -> bool { + self.lowercases.p == other.lowercases.p + } + + #[inline] + pub fn to_ascii_lower(&self) -> &'self str { + match self.lowercases.resolve_immut() { + Some(ref node) => { + return node.s.as_slice(); + } + None => fail!("Interning: internal logic error") + } + } +} + +static SEEDS_A: &'static [u32] = &[ + 991, 261, 363, 1138, 78, 1036, 1455, 782, + 835, 1186, 1108, 391, 1503, 144, 1322, 33, + 648, 903, 429, 57, 89, 1501, 1000, 927, + 362, 1227, 1109, 1406, 40, 133, 222, 366, + 269, 18, 1450, 2, 1118, 748, 113, 98, + 517, 1065, 479, 1183, 1111, 798, 69, 113, + 1134, 969, 1159, 819, 863, 388, 616, 179, + 970, 11, 699, 188, 395, 1325, 834, 846, + 1011, 39, 434, 424, 288, 67, 307, 1285, + 1415, 1401, 1233, 1459, 635, 425, 1107, 11, + 1127, 75, 205, 522, 1003, 746, 1506, 985, + 163, 534, 559, 693, 188, 1160, 270, 1136, + 586, 890, 1276, 1065, 134, 1441, 505, 951, + 1461, 1427, 28, 759, 1013, 421, 1484, 222, + 1466, 120, 861, 823, 550, 650, 347, 1450, + 1192, 397, 1449, 871, 812, 340, 1328, 579, + 1086, 964, 395, 1267, 1195, 531, 1097, 667, + 531, 1165, 593, 481, 883, 827, 549, 646, + 671, 112, 904, 1148, 173, 627, 1217, 1211, + 142, 547, 855, 43, 249, 660, 1121, 1064, + 1227, 499, 1212, 640, 1329, 1294, 1221, 521, + 130, 710, 1287, 1007, 1411, 33, 23, 614, + 431, 530, 17, 580, 150, 267, 1447, 1016, + 946, 1234, 841, 472, 1072, 673, 281, 976, + 770, 618, 867, 858, 1397, 17, 878, 1084, + 893, 1495, 801, 388, 58, 814, 924, 745, + 197, 1390, 1454, 1125, 853, 171, 614, 1433, + 950, 1052, 758, 1034, 370, 948, 1343, 997, + 119, 492, 1350, 1049, 1305, 1068, 1021, 974, + 50, 475, 81, 1421, 1478, 1445, 728, 22, + 1134, 1455, 795, 56, 103, 1110, 1140, 1481, + 705, 1210, 925, 628, 229, 920, 691, 426, +]; + +static SEEDS_B: &'static [u32] = &[ + 1220, 248, 477, 424, 76, 791, 1058, 738, + 248, 408, 864, 878, 840, 1167, 708, 549, + 1136, 779, 515, 337, 1416, 926, 1294, 560, + 424, 382, 1276, 1062, 457, 155, 528, 389, + 180, 162, 334, 1121, 78, 151, 778, 1428, + 1260, 1130, 1236, 242, 323, 753, 1419, 1107, + 244, 413, 289, 247, 171, 558, 205, 1510, + 1365, 1023, 1108, 506, 473, 405, 271, 1141, + 1093, 1292, 875, 260, 1016, 1144, 298, 228, + 1054, 822, 1425, 694, 674, 154, 230, 538, + 925, 652, 157, 1042, 354, 572, 1498, 73, + 328, 939, 1075, 1374, 553, 1466, 1411, 915, + 1109, 51, 176, 574, 595, 1192, 1510, 855, + 1044, 1116, 1249, 941, 202, 1235, 607, 944, + 1357, 145, 948, 10, 285, 1338, 866, 711, + 575, 238, 1477, 68, 13, 949, 143, 418, + 1223, 275, 64, 142, 1206, 26, 1190, 1044, + 593, 231, 1327, 395, 1120, 1314, 1279, 478, + 962, 184, 705, 278, 134, 1277, 875, 1211, + 196, 664, 560, 1334, 1006, 1084, 96, 1103, + 394, 4, 507, 638, 199, 1005, 136, 583, + 1245, 936, 394, 719, 368, 997, 1268, 717, + 752, 822, 464, 1315, 1342, 1493, 1186, 4, + 733, 301, 539, 842, 710, 588, 539, 1216, + 635, 784, 535, 0, 124, 760, 1346, 847, + 775, 1261, 539, 251, 1260, 723, 986, 280, + 939, 85, 765, 185, 140, 744, 1030, 606, + 489, 969, 1279, 1357, 345, 655, 546, 144, + 953, 1037, 731, 536, 1375, 452, 991, 1045, + 1119, 371, 1048, 556, 984, 1399, 179, 1048, + 1055, 71, 294, 1479, 316, 1277, 1303, 1205, + 518, 1207, 44, 236, 33, 503, 849, 931, +]; + +mod seeds { + use std::rand; + use std::rand::Rng; + + fn gen_random_numbers(max: u32) { + let mut rng = rand::rng(); + if rng.gen() { + for _i in range(0, 32) { + for _j in range(0, 8) { + print!("{}, ", rng.gen::() % max); + } + print!("{}", "\n"); + } + } + } +} + +#[test] +fn interning_test() { + init(); + let s1 = intern_string("test"); + let s2 = intern_string("test"); + let s3 = intern_string("toast"); + + assert!(s1.eq(&s2)); + assert!(!s1.eq(&s3)); + assert!(str::eq_slice(s1.to_str(), s2.to_str())); + assert!(!str::eq_slice(s1.to_str(), s3.to_str())); + assert!(str::eq_slice(s1.to_str_slice(), s2.to_str_slice())); + assert!(!str::eq_slice(s1.to_str_slice(), s3.to_str_slice())); +} + +#[test] +fn smoke_test() { + let mut buckets = ~InterningStr::new(); + let s1 = buckets.intern_string("test"); + let s2 = buckets.intern_string("test"); + let s3 = buckets.intern_string("toast"); + let s4 = buckets.intern_string("TOAST"); + let s5 = buckets.intern_string("AAa"); + let s6 = buckets.intern_string("aAA"); + + assert!(s1.eq(&s2)); + assert!(!s1.eq(&s3)); + assert!(str::eq(&s1.to_str(), &s2.to_str())); + assert!(!str::eq(&s1.to_str(), &s3.to_str())); + assert!(str::eq_slice(s1.to_str_slice(), s2.to_str_slice())); + assert!(!str::eq_slice(s1.to_str_slice(), s3.to_str_slice())); + assert!(!s3.eq(&s4)); + assert!(s3.eq_ignore_ascii_case(&s4)); + assert!(!s5.eq(&s6)); + assert!(s5.eq_ignore_ascii_case(&s6)); +} + +#[cfg(test)] +mod bench { + use super::*; + use std::hashmap::HashSet; + + #[bench] + fn hashmap() { + let words = words(); + let mut hashmap = HashSet::new(); + for word in words.iter() { + hashmap.insert(word); + } + } + + #[bench] + fn interning() { + let words = words(); + let mut buckets = ~InterningStr::new(); + for word in words.iter() { + buckets.intern_string(*word); + } + } + + fn words() -> ~[~str] { + let words = ~[~"", + ~"", + ~"", + ~"div", ~"{", ~"font-size:30px;", ~"}", + ~"/*", ~"NOT_IMPLEMENTED", + ~"div:last-child", ~"{", ~"color:orange;", ~"font-size:70px;", ~"}", + ~"a:link", ~"{color:orange;", ~"font-size:70px;}", + ~"span+div", ~"{", ~"color:blue;", ~"}", + ~"span:last-child", ~"{", ~"font-style:italic;", ~"}", + ~"*/", + ~"h2", ~"span", ~"{", ~"color:red;", ~"}", + ~"div,", ~"span,", ~"p", ~"{", ~"font-family:\"Georgia\";", ~"}", + ~"div,", ~"span", ~"{", ~"font-size:40px;", ~"}", + ~"#left", ~"{", ~"text-align:left;", ~"}", + ~".center", ~"{", ~"text-align:center;", ~"}", + ~".right", ~"{", ~"text-align:right;", ~"}", + ~"div.bgorange", ~"{", ~"background-color:orange;", ~"}", + ~".bggreen{", ~"background-color:#11FF22;", ~"}", + ~".bggreen", ~"{", ~"background-color:green;}", + ~".bgblue", ~"{", ~"background-color:blue;", ~"}", + ~".bgyellow", ~"{", ~"background-color:yellow;", ~"}", + ~".red",~"{", ~"color:red;", ~"}", + ~".red.red2", ~"{", ~"font-size:40px;", ~"}", + ~".blue", ~"{", ~"color:blue;", ~"}", + ~"#gray", ~"{", ~"color:gray;", ~"}", + ~".green", ~"{", ~"color:green;", ~"}", + ~".yellow", ~"{", ~"color:yellow;", ~"}", + ~"div.white", ~"{", ~"color:white;", ~"}", + ~"span.white", ~"{", ~"color:white;", ~"font-size:20px;", ~"}", + ~"span.orange", ~"{", ~"color:orange;", ~"}", + ~"div>.orange", ~"{", ~"color:orange;", ~"font-size:30px;", ~"}", + ~".italic", ~"{", ~"font-style:italic;", ~"}", + ~"span.times", ~"{", ~"font-family:\"Times", ~"New", ~"Roman;\"", ~"}", + ~".geor", ~"{", ~"font-family:\"Georgia;\"", ~"}", + ~"#arial", ~"{", ~"font-family:\"Arial;\"", ~"}", + ~".ver", ~"{", ~"font-family:\"Verdana;\"", ~"}", + ~".under", ~"{", ~"text-decoration:underline;", ~"}", + ~".size30", ~"{", ~"font-size:30px;", ~"}", + ~".size45", ~"{", ~"font-size:45px;", ~"}", + ~".size60", ~"{", ~"font-size:60px;", ~"}", + ~".size2em", ~"{", ~"font-size:2em;", ~"}", + ~"h2", ~"div", ~".size50p", ~"{", ~"font-size:50%;", ~"}", + ~"#border_solid_5", ~"{", ~"border-style:solid;", ~"border-width:5px;", ~"}", + ~".bcolor", ~"{", ~"border-color:red;", ~"}", + ~"div.bcolor", ~"{", ~"border-style:solid;", ~"border-width:3px;", ~"font-size:20px;", ~"}", + ~"", + ~"", + ~"", + ~"", + ~"", + ~"CSS", ~"text", ~"align", ~"test", + ~"", + ~"", + ~"CSS", ~"text", ~"align", ~"test", + ~"", + ~"", + ~"CSS", ~"text", ~"align", ~"test", ~"", + ~"", + ~"
", + ~"This", + ~"", ~"is", + ~"CSS", + ~"Text", + ~"", + ~"Color", + ~"test", + ~"
", + ~"", + ~"CSS", ~"Font", ~"test", + ~"
", + ~"", + ~"underlined", + ~"text", + ~"", + ~"italic", + ~"", + ~"", + ~"Times", + ~"New", + ~"Roman", + ~"", + ~"", + ~"Verdana", + ~"", + ~"", + ~"Arial", + ~"", + ~"", + ~"Courier", ~"New", + ~"", + ~"", + ~"Georgia", + ~"", + ~"Lucida", ~"Console", + ~"", + ~"", + ~"", + ~"30px", + ~"", + ~"", + ~"45px", + ~"", + ~"", + ~"60px", + ~"", + ~"basesize-30px", + ~"", + ~"2em", + ~"", + ~"", + ~"50%", + ~"", + ~"", + ~"", + ~"
", + ~"", + ~"border", + ~"test", + ~"
", + ~"", + ~"border", + ~"green", + ~"", + ~"", + ~"border", + ~"test", + ~"abcde", + ~"

", + ~"", + ~"", + ~""]; + words + } +} diff --git a/src/components/util/tree.rs b/src/components/util/tree.rs index 73fdd14085d2..43ba6bf4a576 100644 --- a/src/components/util/tree.rs +++ b/src/components/util/tree.rs @@ -4,6 +4,8 @@ //! Helper functions for garbage collected doubly-linked trees. +use interning::IntString; + // Macros to make add_child etc. less painful to write. // Code outside this module should instead implement TreeNode // and use its default methods. @@ -322,6 +324,8 @@ pub trait TreeNode> { pub trait ElementLike { - fn get_local_name<'a>(&'a self) -> &'a str; - fn get_attr<'a>(&'a self, name: &str) -> Option<&'a str>; + fn get_local_name<'a>(&'a self) -> &'a IntString; + fn get_attr<'a>(&'a self, name: &IntString) -> Option<&'a IntString>; + fn get_id<'a>(&'a self) -> Option<&'a IntString>; + fn get_classes<'a>(&'a self) -> &'a [IntString]; } diff --git a/src/components/util/util.rc b/src/components/util/util.rc index bbcf72037406..bbce5a4ad6cf 100644 --- a/src/components/util/util.rc +++ b/src/components/util/util.rc @@ -10,6 +10,8 @@ #[feature(macro_rules)]; +#[feature(globs)]; + extern mod extra; extern mod geom; @@ -21,3 +23,4 @@ pub mod tree; pub mod url; pub mod vec; pub mod debug; +pub mod interning;