From cf4fd743ec655855daf558d73e414e9296b7d33e Mon Sep 17 00:00:00 2001 From: Chris Morgan Date: Tue, 15 Apr 2014 15:14:24 +1000 Subject: [PATCH 1/7] Use new inner attribute syntax. --- bench/mod.rs | 4 ++-- macros/mod.rs | 6 +++--- src/html5.rs | 6 +++--- test/mod.rs | 4 ++-- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/bench/mod.rs b/bench/mod.rs index 476fb24f..fbdcbbf5 100644 --- a/bench/mod.rs +++ b/bench/mod.rs @@ -2,8 +2,8 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -#[crate_id="html5-external-bench"]; -#[crate_type="bin"]; +#![crate_id="html5-external-bench"] +#![crate_type="bin"] extern crate test; diff --git a/macros/mod.rs b/macros/mod.rs index 693764be..e4bf4b9a 100644 --- a/macros/mod.rs +++ b/macros/mod.rs @@ -2,10 +2,10 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -#[crate_id="html5-macros"]; -#[crate_type="dylib"]; +#![crate_id="html5-macros"] +#![crate_type="dylib"] -#[feature(macro_rules, macro_registrar, quote, managed_boxes)]; +#![feature(macro_rules, macro_registrar, quote, managed_boxes)] extern crate syntax; extern crate serialize; diff --git a/src/html5.rs b/src/html5.rs index 2d12ce2b..0ece407b 100644 --- a/src/html5.rs +++ b/src/html5.rs @@ -2,10 +2,10 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -#[crate_id="github.com/kmcallister/html5"]; -#[crate_type="dylib"]; +#![crate_id="github.com/kmcallister/html5"] +#![crate_type="dylib"] -#[feature(macro_rules, phase)]; +#![feature(macro_rules, phase)] #[phase(syntax, link)] extern crate log; diff --git a/test/mod.rs b/test/mod.rs index cdd2c284..981d3769 100644 --- a/test/mod.rs +++ b/test/mod.rs @@ -2,8 +2,8 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -#[crate_id="html5-external-test"]; -#[crate_type="bin"]; +#![crate_id="html5-external-test"] +#![crate_type="bin"] extern crate test; extern crate serialize; From d1c6e89a0f16c6e31d6fb67e61ee9c88e7ca354d Mon Sep 17 00:00:00 2001 From: Chris Morgan Date: Tue, 15 Apr 2014 15:26:44 +1000 Subject: [PATCH 2/7] Rename crate roots to lib.rs (convention). --- Makefile.in | 8 ++++---- bench/{mod.rs => lib.rs} | 0 macros/{mod.rs => lib.rs} | 0 src/{html5.rs => lib.rs} | 0 test/{mod.rs => lib.rs} | 0 5 files changed, 4 insertions(+), 4 deletions(-) rename bench/{mod.rs => lib.rs} (100%) rename macros/{mod.rs => lib.rs} (100%) rename src/{html5.rs => lib.rs} (100%) rename test/{mod.rs => lib.rs} (100%) diff --git a/Makefile.in b/Makefile.in index 1203eec0..8f1747f0 100644 --- a/Makefile.in +++ b/Makefile.in @@ -5,18 +5,18 @@ RUST_DIRS := -L . -L $(VPATH)/rust-phf/build RUSTC_CMD := $(RUSTC) $(RUST_DIRS) $(RUSTFLAGS) -LIB_TOP_SRC := $(VPATH)/src/html5.rs +LIB_TOP_SRC := $(VPATH)/src/lib.rs LIB_ALL_SRC := $(shell find $(VPATH)/src -type f -name '*.rs') LIB := $(shell $(RUSTC) --crate-file-name "$(LIB_TOP_SRC)") -MACROS_TOP_SRC := $(VPATH)/macros/mod.rs +MACROS_TOP_SRC := $(VPATH)/macros/lib.rs MACROS_ALL_SRC := $(shell find $(VPATH)/macros -type f -name '*.rs') MACROS := $(shell $(RUSTC) --crate-file-name "$(MACROS_TOP_SRC)") -EXT_TEST_TOP_SRC := $(VPATH)/test/mod.rs +EXT_TEST_TOP_SRC := $(VPATH)/test/lib.rs EXT_TEST_ALL_SRC := $(shell find $(VPATH)/test -type f -name '*.rs') -EXT_BENCH_TOP_SRC := $(VPATH)/bench/mod.rs +EXT_BENCH_TOP_SRC := $(VPATH)/bench/lib.rs EXT_BENCH_ALL_SRC := $(shell find $(VPATH)/bench -type f -name '*.rs') TEST_JSON_SRC := $(shell find $(VPATH)/html5lib-tests/ -type f -name '*.test') diff --git a/bench/mod.rs b/bench/lib.rs similarity index 100% rename from bench/mod.rs rename to bench/lib.rs diff --git a/macros/mod.rs b/macros/lib.rs similarity index 100% rename from macros/mod.rs rename to macros/lib.rs diff --git a/src/html5.rs b/src/lib.rs similarity index 100% rename from src/html5.rs rename to src/lib.rs diff --git a/test/mod.rs b/test/lib.rs similarity index 100% rename from test/mod.rs rename to test/lib.rs From 33fc2c3390211ba6da449bd29eb73dc084e5b567 Mon Sep 17 00:00:00 2001 From: Chris Morgan Date: Tue, 15 Apr 2014 15:55:11 +1000 Subject: [PATCH 3/7] Update for Result-yielding Decodable::decode(). --- macros/named_entities.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/macros/named_entities.rs b/macros/named_entities.rs index 89d52b3f..2875ab84 100644 --- a/macros/named_entities.rs +++ b/macros/named_entities.rs @@ -41,7 +41,10 @@ fn build_map(js: Json) -> Option> { // Add every named entity to the map. for (k,v) in json_map.move_iter() { let mut decoder = json::Decoder::new(v); - let CharRef { codepoints }: CharRef = Decodable::decode(&mut decoder); + let CharRef { codepoints }: CharRef = match Decodable::decode(&mut decoder) { + Ok(o) => o, + Err(_) => return None, + }; assert!((codepoints.len() >= 1) && (codepoints.len() <= 2)); let mut codepoint_pair = [0, 0]; From 6b1b88b1fbf6e9f9551402d87af3759c51b6a2db Mon Sep 17 00:00:00 2001 From: Chris Morgan Date: Tue, 15 Apr 2014 15:55:24 +1000 Subject: [PATCH 4/7] Update most `~str` things to `StrBuf`. --- examples/tokenize-bench-example.rs | 3 ++- examples/tokenize-example.rs | 5 +++-- src/tokenizer/buffer_queue.rs | 25 ++++++++++++------------ src/tokenizer/char_ref/mod.rs | 18 +++++++++-------- src/tokenizer/mod.rs | 31 +++++++++++++++--------------- src/tokenizer/tokens.rs | 17 ++++++++-------- src/util/str.rs | 6 +++--- 7 files changed, 55 insertions(+), 50 deletions(-) diff --git a/examples/tokenize-bench-example.rs b/examples/tokenize-bench-example.rs index 256d1973..76cff7d3 100644 --- a/examples/tokenize-bench-example.rs +++ b/examples/tokenize-bench-example.rs @@ -9,6 +9,7 @@ extern crate html5; use std::{io, os}; use std::default::Default; +use std::strbuf::StrBuf; use test::black_box; @@ -30,7 +31,7 @@ fn main() { path.push(os::args()[1]); let mut file = io::File::open(&path).ok().expect("can't open file"); - let file_input = file.read_to_str().ok().expect("can't read file"); + let file_input = StrBuf::from_owned_str(file.read_to_str().ok().expect("can't read file")); let mut sink = Sink; let mut tok = Tokenizer::new(&mut sink, Default::default()); diff --git a/examples/tokenize-example.rs b/examples/tokenize-example.rs index 02a27fe1..4601bbc0 100644 --- a/examples/tokenize-example.rs +++ b/examples/tokenize-example.rs @@ -7,6 +7,7 @@ extern crate html5; use std::io; use std::char; use std::default::Default; +use std::strbuf::StrBuf; use html5::tokenizer::{TokenSink, Token, Tokenizer, TokenizerOpts, ParseError}; use html5::tokenizer::{CharacterToken, MultiCharacterToken, TagToken, StartTag, EndTag}; @@ -38,7 +39,7 @@ impl TokenSink for TokenPrinter { self.do_char(c); } MultiCharacterToken(b) => { - for c in b.chars() { + for c in b.as_slice().chars() { self.do_char(c); } } @@ -78,7 +79,7 @@ fn main() { profile: true, .. Default::default() }); - tok.feed(io::stdin().read_to_str().unwrap()); + tok.feed(StrBuf::from_owned_str(io::stdin().read_to_str().unwrap())); tok.end(); } sink.is_char(false); diff --git a/src/tokenizer/buffer_queue.rs b/src/tokenizer/buffer_queue.rs index 8e8a2735..8dd4c7a0 100644 --- a/src/tokenizer/buffer_queue.rs +++ b/src/tokenizer/buffer_queue.rs @@ -3,6 +3,7 @@ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ use std::str::CharRange; +use std::strbuf::StrBuf; use collections::deque::Deque; use collections::dlist::DList; @@ -10,7 +11,7 @@ struct Buffer { /// Byte position within the buffer. pos: uint, /// The buffer. - buf: ~str, + buf: StrBuf, } /// Either a single character or a run of "data" characters: those which @@ -20,7 +21,7 @@ struct Buffer { /// normally. #[deriving(Eq, TotalEq, Show)] pub enum DataRunOrChar { - DataRun(~str), + DataRun(StrBuf), OneChar(char), } @@ -57,7 +58,7 @@ impl BufferQueue { } /// Add a buffer to the beginning of the queue. - pub fn push_front(&mut self, buf: ~str) { + pub fn push_front(&mut self, buf: StrBuf) { if buf.len() == 0 { return; } @@ -71,7 +72,7 @@ impl BufferQueue { /// Add a buffer to the end of the queue. /// 'pos' can be non-zero to remove that many characters /// from the beginning. - pub fn push_back(&mut self, buf: ~str, pos: uint) { + pub fn push_back(&mut self, buf: StrBuf, pos: uint) { if pos >= buf.len() { return; } @@ -88,7 +89,7 @@ impl BufferQueue { } /// Get multiple characters, if that many are available. - pub fn pop_front(&mut self, n: uint) -> Option<~str> { + pub fn pop_front(&mut self, n: uint) -> Option { if !self.has(n) { return None; } @@ -99,7 +100,7 @@ impl BufferQueue { /// Look at the next available character, if any. pub fn peek(&mut self) -> Option { match self.buffers.front() { - Some(&Buffer { pos, ref buf }) => Some(buf.char_at(pos)), + Some(&Buffer { pos, ref buf }) => Some(buf.as_slice().char_at(pos)), None => None, } } @@ -109,17 +110,17 @@ impl BufferQueue { pub fn pop_data(&mut self) -> Option { let (result, now_empty) = match self.buffers.front_mut() { Some(&Buffer { ref mut pos, ref buf }) => { - let n = data_span(buf.slice_from(*pos)); + let n = data_span(buf.as_slice().slice_from(*pos)); // If we only have one character then it's cheaper not to allocate. if n > 1 { let new_pos = *pos + n; - let out = buf.slice(*pos, new_pos).to_owned(); + let out = StrBuf::from_str(buf.as_slice().slice(*pos, new_pos)); *pos = new_pos; self.available -= n; (Some(DataRun(out)), new_pos >= buf.len()) } else { - let CharRange { ch, next } = buf.char_range_at(*pos); + let CharRange { ch, next } = buf.as_slice().char_range_at(*pos); *pos = next; self.available -= 1; (Some(OneChar(ch)), next >= buf.len()) @@ -136,7 +137,7 @@ impl BufferQueue { } fn account_new(&mut self, buf: &str) { - // FIXME: We could pass through length from the initial [u8] -> ~str + // FIXME: We could pass through length from the initial [u8] -> StrBuf // conversion, which already must re-encode or at least scan for UTF-8 // validity. self.available += buf.char_len(); @@ -153,7 +154,7 @@ impl Iterator for BufferQueue { let (result, now_empty) = match self.buffers.front_mut() { None => (None, false), Some(&Buffer { ref mut pos, ref buf }) => { - let CharRange { ch, next } = buf.char_range_at(*pos); + let CharRange { ch, next } = buf.as_slice().char_range_at(*pos); *pos = next; self.available -= 1; (Some(ch), next >= buf.len()) @@ -239,7 +240,7 @@ fn can_push_truncated() { #[test] fn data_span_test() { - fn pad(s: &mut ~str, n: uint) { + fn pad(s: &mut StrBuf, n: uint) { for _ in range(0, n) { s.push_char('x'); } diff --git a/src/tokenizer/char_ref/mod.rs b/src/tokenizer/char_ref/mod.rs index 5b1a7a95..612db975 100644 --- a/src/tokenizer/char_ref/mod.rs +++ b/src/tokenizer/char_ref/mod.rs @@ -6,6 +6,7 @@ use super::{Tokenizer, TokenSink}; use util::str::{is_ascii_alnum, empty_str}; use std::char::{to_digit, from_u32}; +use std::strbuf::StrBuf; mod data; @@ -42,7 +43,7 @@ pub struct CharRefTokenizer { priv seen_digit: bool, priv hex_marker: Option, - priv name_buf_opt: Option<~str>, + priv name_buf_opt: Option, priv name_match: Option<&'static [u32, ..2]>, priv name_len: uint, } @@ -71,7 +72,7 @@ impl CharRefTokenizer { self.result.expect("get_result called before done") } - fn name_buf<'t>(&'t mut self) -> &'t mut ~str { + fn name_buf<'t>(&'t mut self) -> &'t mut StrBuf { self.name_buf_opt.as_mut() .expect("name_buf missing in named character reference") } @@ -182,7 +183,7 @@ impl<'sink, Sink: TokenSink> CharRefTokenizer { } fn unconsume_numeric(&mut self, tokenizer: &mut Tokenizer<'sink, Sink>) -> Status { - let mut unconsume = ~"#"; + let mut unconsume = StrBuf::from_str("#"); match self.hex_marker { Some(c) => unconsume.push_char(c), None => (), @@ -202,7 +203,7 @@ impl<'sink, Sink: TokenSink> CharRefTokenizer { n if (n > 0x10FFFF) || self.num_too_big => ('\ufffd', true), 0x00 | 0xD800..0xDFFF => ('\ufffd', true), - 0x80..0x9F => match data::c1_replacements[self.num - 0x80] { + 0x80..0x9F => match data::c1_replacements[(self.num - 0x80) as uint] { Some(c) => (c, true), None => (conv(self.num), true), }, @@ -287,14 +288,14 @@ impl<'sink, Sink: TokenSink> CharRefTokenizer { // ¬it => can't continue match assert!(self.name_len > 0); - let last_matched = self.name_buf().char_at(self.name_len-1); + let last_matched = self.name_buf().as_slice().char_at(self.name_len-1); // There might not be a next character after the match, if // we had a full match and then hit EOF. let next_after = if self.name_len == self.name_buf().len() { None } else { - Some(self.name_buf().char_at(self.name_len)) + Some(self.name_buf().as_slice().char_at(self.name_len)) }; // "If the character reference is being consumed as part of an @@ -324,7 +325,8 @@ impl<'sink, Sink: TokenSink> CharRefTokenizer { self.unconsume_name(tokenizer); self.finish_none() } else { - tokenizer.unconsume(self.name_buf().slice_from(self.name_len).to_owned()); + tokenizer.unconsume(StrBuf::from_str( + self.name_buf().as_slice().slice_from(self.name_len))); self.result = Some(CharRef { chars: [from_u32(c1).unwrap(), from_u32(c2).unwrap()], num_chars: if c2 == 0 { 1 } else { 2 }, @@ -368,7 +370,7 @@ impl<'sink, Sink: TokenSink> CharRefTokenizer { } Octothorpe => { - tokenizer.unconsume(~"#"); + tokenizer.unconsume(StrBuf::from_str("#")); tokenizer.emit_error(~"EOF after '#' in character reference"); self.finish_none(); } diff --git a/src/tokenizer/mod.rs b/src/tokenizer/mod.rs index 56e0fe0d..614bd293 100644 --- a/src/tokenizer/mod.rs +++ b/src/tokenizer/mod.rs @@ -18,7 +18,6 @@ use self::buffer_queue::{BufferQueue, DataRunOrChar, DataRun, OneChar}; use util::str::{lower_ascii, lower_ascii_letter, empty_str}; -use std::str; use std::ascii::StrAsciiExt; use std::mem::replace; use std::iter::AdditiveIterator; @@ -37,10 +36,10 @@ pub trait TokenSink { fn process_token(&mut self, token: Token); } -fn option_push_char(opt_str: &mut Option<~str>, c: char) { +fn option_push_char(opt_str: &mut Option, c: char) { match *opt_str { Some(ref mut s) => s.push_char(c), - None => *opt_str = Some(str::from_char(c)), + None => *opt_str = Some(StrBuf::from_char(1, c)), } } @@ -65,7 +64,7 @@ pub struct TokenizerOpts { /// Last start tag. Only the test runner should use a /// non-None value! - last_start_tag_name: Option<~str>, + last_start_tag_name: Option, } impl Default for TokenizerOpts { @@ -129,16 +128,16 @@ pub struct Tokenizer<'sink, Sink> { priv current_attr: Attribute, /// Current comment. - priv current_comment: ~str, + priv current_comment: StrBuf, /// Current doctype token. priv current_doctype: Doctype, /// Last start tag name, for use in checking "appropriate end tag". - priv last_start_tag_name: Option<~str>, + priv last_start_tag_name: Option, /// The "temporary buffer" mentioned in the spec. - priv temp_buf: ~str, + priv temp_buf: StrBuf, /// Record of how many ns we spent in each state, if profiling is enabled. priv state_profile: HashMap, @@ -171,12 +170,12 @@ impl<'sink, Sink: TokenSink> Tokenizer<'sink, Sink> { } } - pub fn feed(&mut self, input: ~str) { + pub fn feed(&mut self, input: StrBuf) { if input.len() == 0 { return; } - let pos = if self.discard_bom && input.char_at(0) == '\ufeff' { + let pos = if self.discard_bom && input.as_slice().char_at(0) == '\ufeff' { self.discard_bom = false; 3 // length of BOM in UTF-8 } else { @@ -309,7 +308,7 @@ impl<'sink, Sink: TokenSink> Tokenizer<'sink, Sink> { self.sink.process_token(CharacterToken(c)); } - fn emit_chars(&mut self, b: ~str) { + fn emit_chars(&mut self, b: StrBuf) { self.sink.process_token(MultiCharacterToken(b)); } @@ -408,7 +407,7 @@ impl<'sink, Sink: TokenSink> Tokenizer<'sink, Sink> { replace(&mut self.current_doctype, Doctype::new()))); } - fn doctype_id<'a>(&'a mut self, kind: DoctypeIdKind) -> &'a mut Option<~str> { + fn doctype_id<'a>(&'a mut self, kind: DoctypeIdKind) -> &'a mut Option { match kind { Public => &mut self.current_doctype.public_id, System => &mut self.current_doctype.system_id, @@ -446,7 +445,7 @@ impl<'sink, Sink: TokenSink> Tokenizer<'sink, Sink> { assert!(c.is_some()); } - fn unconsume(&mut self, buf: ~str) { + fn unconsume(&mut self, buf: StrBuf) { self.input_buffers.push_front(buf); } @@ -1023,7 +1022,7 @@ impl<'sink, Sink: TokenSink> Tokenizer<'sink, Sink> { } for i in range(0, num_chars) { - let c = chars[i]; + let c = chars[i as uint]; match self.state { states::Data | states::RawData(states::Rcdata) => go!(emit c), @@ -1144,21 +1143,21 @@ impl<'sink, Sink: TokenSink> Tokenizer<'sink, Sink> { #[test] fn push_to_None_gives_singleton() { - let mut s: Option<~str> = None; + let mut s: Option = None; option_push_char(&mut s, 'x'); assert_eq!(s, Some(~"x")); } #[test] fn push_to_empty_appends() { - let mut s: Option<~str> = Some(~""); + let mut s: Option = Some(~""); option_push_char(&mut s, 'x'); assert_eq!(s, Some(~"x")); } #[test] fn push_to_nonempty_appends() { - let mut s: Option<~str> = Some(~"y"); + let mut s: Option = Some(~"y"); option_push_char(&mut s, 'x'); assert_eq!(s, Some(~"yx")); } diff --git a/src/tokenizer/tokens.rs b/src/tokenizer/tokens.rs index 21148e9d..2304ca5a 100644 --- a/src/tokenizer/tokens.rs +++ b/src/tokenizer/tokens.rs @@ -2,14 +2,15 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +use std::strbuf::StrBuf; use util::str::empty_str; // FIXME: already exists in Servo DOM #[deriving(Eq, TotalEq, Clone)] pub struct Doctype { - name: Option<~str>, - public_id: Option<~str>, - system_id: Option<~str>, + name: Option, + public_id: Option, + system_id: Option, force_quirks: bool, } @@ -26,8 +27,8 @@ impl Doctype { #[deriving(Eq, TotalEq, Clone)] pub struct Attribute { - name: ~str, - value: ~str, + name: StrBuf, + value: StrBuf, } impl Attribute { @@ -53,7 +54,7 @@ pub enum TagKind { #[deriving(Eq, TotalEq, Clone)] pub struct Tag { kind: TagKind, - name: ~str, + name: StrBuf, self_closing: bool, attrs: Vec, } @@ -73,9 +74,9 @@ impl Tag { pub enum Token { DoctypeToken(Doctype), TagToken(Tag), - CommentToken(~str), + CommentToken(StrBuf), CharacterToken(char), - MultiCharacterToken(~str), + MultiCharacterToken(StrBuf), EOFToken, ParseError(~str), } diff --git a/src/util/str.rs b/src/util/str.rs index 6e83fac6..0164cff0 100644 --- a/src/util/str.rs +++ b/src/util/str.rs @@ -2,7 +2,7 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -use std::str; +use std::strbuf::StrBuf; /// If `c` is an ASCII letter, return the corresponding lowercase /// letter, otherwise None. @@ -23,8 +23,8 @@ pub fn is_ascii_alnum(c: char) -> bool { } /// Allocate an empty string with a small non-zero capacity. -pub fn empty_str() -> ~str { - str::with_capacity(4) +pub fn empty_str() -> StrBuf { + StrBuf::with_capacity(4) } test_eq!(lower_letter_a_is_a, lower_ascii_letter('a'), Some('a')) From a838fc6a4a6238c055db6322bbc16a2d965d662b Mon Sep 17 00:00:00 2001 From: Chris Morgan Date: Tue, 15 Apr 2014 16:03:14 +1000 Subject: [PATCH 5/7] Update for struct publicity swap. (Used to be public by default, private when marked explicitly; now it's private by default, public when marked explicitly.) --- src/tokenizer/buffer_queue.rs | 8 +++--- src/tokenizer/char_ref/mod.rs | 28 ++++++++++----------- src/tokenizer/mod.rs | 46 +++++++++++++++++------------------ src/tokenizer/tokens.rs | 20 +++++++-------- 4 files changed, 51 insertions(+), 51 deletions(-) diff --git a/src/tokenizer/buffer_queue.rs b/src/tokenizer/buffer_queue.rs index 8dd4c7a0..64f0147d 100644 --- a/src/tokenizer/buffer_queue.rs +++ b/src/tokenizer/buffer_queue.rs @@ -9,9 +9,9 @@ use collections::dlist::DList; struct Buffer { /// Byte position within the buffer. - pos: uint, + pub pos: uint, /// The buffer. - buf: StrBuf, + pub buf: StrBuf, } /// Either a single character or a run of "data" characters: those which @@ -42,10 +42,10 @@ fn data_span(s: &str) -> uint { /// consuming characters. pub struct BufferQueue { /// Buffers to process. - priv buffers: DList, + buffers: DList, /// Number of available characters. - priv available: uint, + available: uint, } impl BufferQueue { diff --git a/src/tokenizer/char_ref/mod.rs b/src/tokenizer/char_ref/mod.rs index 612db975..753b59b4 100644 --- a/src/tokenizer/char_ref/mod.rs +++ b/src/tokenizer/char_ref/mod.rs @@ -12,10 +12,10 @@ mod data; pub struct CharRef { /// The resulting character(s) - chars: [char, ..2], + pub chars: [char, ..2], /// How many slots in `chars` are valid? - num_chars: u8, + pub num_chars: u8, } pub enum Status { @@ -34,18 +34,18 @@ enum State { } pub struct CharRefTokenizer { - priv state: State, - priv addnl_allowed: Option, - priv result: Option, - - priv num: u32, - priv num_too_big: bool, - priv seen_digit: bool, - priv hex_marker: Option, - - priv name_buf_opt: Option, - priv name_match: Option<&'static [u32, ..2]>, - priv name_len: uint, + state: State, + addnl_allowed: Option, + result: Option, + + num: u32, + num_too_big: bool, + seen_digit: bool, + hex_marker: Option, + + name_buf_opt: Option, + name_match: Option<&'static [u32, ..2]>, + name_len: uint, } impl CharRefTokenizer { diff --git a/src/tokenizer/mod.rs b/src/tokenizer/mod.rs index 614bd293..aecea561 100644 --- a/src/tokenizer/mod.rs +++ b/src/tokenizer/mod.rs @@ -48,23 +48,23 @@ fn option_push_char(opt_str: &mut Option, c: char) { pub struct TokenizerOpts { /// Report all parse errors described in the spec, at some /// performance penalty? Default: false - exact_errors: bool, + pub exact_errors: bool, /// Discard a U+FEFF BYTE ORDER MARK if we see one at the beginning /// of the stream? Default: true - discard_bom: bool, + pub discard_bom: bool, /// Keep a record of how long we spent in each state? Printed /// when end() is called. Default: false - profile: bool, + pub profile: bool, /// Initial state override. Only the test runner should use /// a non-None value! - initial_state: Option, + pub initial_state: Option, /// Last start tag. Only the test runner should use a /// non-None value! - last_start_tag_name: Option, + pub last_start_tag_name: Option, } impl Default for TokenizerOpts { @@ -81,66 +81,66 @@ impl Default for TokenizerOpts { pub struct Tokenizer<'sink, Sink> { /// Options controlling the behavior of the tokenizer. - priv opts: TokenizerOpts, + opts: TokenizerOpts, /// Destination for tokens we emit. - priv sink: &'sink mut Sink, + sink: &'sink mut Sink, /// The abstract machine state as described in the spec. - priv state: states::State, + state: states::State, /// Input ready to be tokenized. - priv input_buffers: BufferQueue, + input_buffers: BufferQueue, /// If Some(n), the abstract machine needs n available /// characters to continue. - priv wait_for: Option, + wait_for: Option, /// Are we at the end of the file, once buffers have been processed /// completely? This affects whether we will wait for lookahead or not. - priv at_eof: bool, + at_eof: bool, /// Tokenizer for character references, if we're tokenizing /// one at the moment. - priv char_ref_tokenizer: Option<~CharRefTokenizer>, + char_ref_tokenizer: Option<~CharRefTokenizer>, /// Current input character. Just consumed, may reconsume. - priv current_char: char, + current_char: char, /// Should we reconsume the current input character? - priv reconsume: bool, + reconsume: bool, /// Did we just consume \r, translating it to \n? In that case we need /// to ignore the next character if it's \n. - priv ignore_lf: bool, + ignore_lf: bool, /// Discard a U+FEFF BYTE ORDER MARK if we see one? Only done at the /// beginning of the stream. - priv discard_bom: bool, + discard_bom: bool, // FIXME: The state machine guarantees the tag exists when // we need it, so we could eliminate the Option overhead. // Leaving it as Option for now, to find bugs. /// Current tag. - priv current_tag: Option, + current_tag: Option, /// Current attribute. - priv current_attr: Attribute, + current_attr: Attribute, /// Current comment. - priv current_comment: StrBuf, + current_comment: StrBuf, /// Current doctype token. - priv current_doctype: Doctype, + current_doctype: Doctype, /// Last start tag name, for use in checking "appropriate end tag". - priv last_start_tag_name: Option, + last_start_tag_name: Option, /// The "temporary buffer" mentioned in the spec. - priv temp_buf: StrBuf, + temp_buf: StrBuf, /// Record of how many ns we spent in each state, if profiling is enabled. - priv state_profile: HashMap, + state_profile: HashMap, } impl<'sink, Sink: TokenSink> Tokenizer<'sink, Sink> { diff --git a/src/tokenizer/tokens.rs b/src/tokenizer/tokens.rs index 2304ca5a..082da0b4 100644 --- a/src/tokenizer/tokens.rs +++ b/src/tokenizer/tokens.rs @@ -8,10 +8,10 @@ use util::str::empty_str; // FIXME: already exists in Servo DOM #[deriving(Eq, TotalEq, Clone)] pub struct Doctype { - name: Option, - public_id: Option, - system_id: Option, - force_quirks: bool, + pub name: Option, + pub public_id: Option, + pub system_id: Option, + pub force_quirks: bool, } impl Doctype { @@ -27,8 +27,8 @@ impl Doctype { #[deriving(Eq, TotalEq, Clone)] pub struct Attribute { - name: StrBuf, - value: StrBuf, + pub name: StrBuf, + pub value: StrBuf, } impl Attribute { @@ -53,10 +53,10 @@ pub enum TagKind { #[deriving(Eq, TotalEq, Clone)] pub struct Tag { - kind: TagKind, - name: StrBuf, - self_closing: bool, - attrs: Vec, + pub kind: TagKind, + pub name: StrBuf, + pub self_closing: bool, + pub attrs: Vec, } impl Tag { From ef1bb3dcf37911b7471ea5f508441b83c1ef6a8d Mon Sep 17 00:00:00 2001 From: Chris Morgan Date: Tue, 15 Apr 2014 16:28:12 +1000 Subject: [PATCH 6/7] Update rust-phf. --- rust-phf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust-phf b/rust-phf index 06211e13..52a1cc62 160000 --- a/rust-phf +++ b/rust-phf @@ -1 +1 @@ -Subproject commit 06211e1339eeb1ca357c1f2055869d559fd218da +Subproject commit 52a1cc62318d4fd53011ab9843348e142fae6c8a From 46dac0473c4572219fd97a0c7a49baa3cdd073cb Mon Sep 17 00:00:00 2001 From: Chris Morgan Date: Tue, 15 Apr 2014 16:28:43 +1000 Subject: [PATCH 7/7] Switch to sfackler/rust-phf (for now). --- .gitmodules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitmodules b/.gitmodules index 5104d42a..1cab514f 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,6 +1,6 @@ [submodule "rust-phf"] path = rust-phf - url = https://github.com/kmcallister/rust-phf + url = https://github.com/sfackler/rust-phf [submodule "html5lib-tests"] path = html5lib-tests url = https://github.com/html5lib/html5lib-tests