From cf4fd743ec655855daf558d73e414e9296b7d33e Mon Sep 17 00:00:00 2001
From: Chris Morgan <me@chrismorgan.info>
Date: Tue, 15 Apr 2014 15:14:24 +1000
Subject: [PATCH 1/7] Use new inner attribute syntax.

---
 bench/mod.rs  | 4 ++--
 macros/mod.rs | 6 +++---
 src/html5.rs  | 6 +++---
 test/mod.rs   | 4 ++--
 4 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/bench/mod.rs b/bench/mod.rs
index 476fb24f..fbdcbbf5 100644
--- a/bench/mod.rs
+++ b/bench/mod.rs
@@ -2,8 +2,8 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-#[crate_id="html5-external-bench"];
-#[crate_type="bin"];
+#![crate_id="html5-external-bench"]
+#![crate_type="bin"]
 
 extern crate test;
 
diff --git a/macros/mod.rs b/macros/mod.rs
index 693764be..e4bf4b9a 100644
--- a/macros/mod.rs
+++ b/macros/mod.rs
@@ -2,10 +2,10 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-#[crate_id="html5-macros"];
-#[crate_type="dylib"];
+#![crate_id="html5-macros"]
+#![crate_type="dylib"]
 
-#[feature(macro_rules, macro_registrar, quote, managed_boxes)];
+#![feature(macro_rules, macro_registrar, quote, managed_boxes)]
 
 extern crate syntax;
 extern crate serialize;
diff --git a/src/html5.rs b/src/html5.rs
index 2d12ce2b..0ece407b 100644
--- a/src/html5.rs
+++ b/src/html5.rs
@@ -2,10 +2,10 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-#[crate_id="github.com/kmcallister/html5"];
-#[crate_type="dylib"];
+#![crate_id="github.com/kmcallister/html5"]
+#![crate_type="dylib"]
 
-#[feature(macro_rules, phase)];
+#![feature(macro_rules, phase)]
 
 #[phase(syntax, link)]
 extern crate log;
diff --git a/test/mod.rs b/test/mod.rs
index cdd2c284..981d3769 100644
--- a/test/mod.rs
+++ b/test/mod.rs
@@ -2,8 +2,8 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-#[crate_id="html5-external-test"];
-#[crate_type="bin"];
+#![crate_id="html5-external-test"]
+#![crate_type="bin"]
 
 extern crate test;
 extern crate serialize;

From d1c6e89a0f16c6e31d6fb67e61ee9c88e7ca354d Mon Sep 17 00:00:00 2001
From: Chris Morgan <me@chrismorgan.info>
Date: Tue, 15 Apr 2014 15:26:44 +1000
Subject: [PATCH 2/7] Rename crate roots to lib.rs (convention).

---
 Makefile.in               | 8 ++++----
 bench/{mod.rs => lib.rs}  | 0
 macros/{mod.rs => lib.rs} | 0
 src/{html5.rs => lib.rs}  | 0
 test/{mod.rs => lib.rs}   | 0
 5 files changed, 4 insertions(+), 4 deletions(-)
 rename bench/{mod.rs => lib.rs} (100%)
 rename macros/{mod.rs => lib.rs} (100%)
 rename src/{html5.rs => lib.rs} (100%)
 rename test/{mod.rs => lib.rs} (100%)

diff --git a/Makefile.in b/Makefile.in
index 1203eec0..8f1747f0 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -5,18 +5,18 @@ RUST_DIRS := -L . -L $(VPATH)/rust-phf/build
 
 RUSTC_CMD := $(RUSTC) $(RUST_DIRS) $(RUSTFLAGS)
 
-LIB_TOP_SRC := $(VPATH)/src/html5.rs
+LIB_TOP_SRC := $(VPATH)/src/lib.rs
 LIB_ALL_SRC := $(shell find $(VPATH)/src -type f -name '*.rs')
 LIB         := $(shell $(RUSTC) --crate-file-name "$(LIB_TOP_SRC)")
 
-MACROS_TOP_SRC := $(VPATH)/macros/mod.rs
+MACROS_TOP_SRC := $(VPATH)/macros/lib.rs
 MACROS_ALL_SRC := $(shell find $(VPATH)/macros -type f -name '*.rs')
 MACROS         := $(shell $(RUSTC) --crate-file-name "$(MACROS_TOP_SRC)")
 
-EXT_TEST_TOP_SRC := $(VPATH)/test/mod.rs
+EXT_TEST_TOP_SRC := $(VPATH)/test/lib.rs
 EXT_TEST_ALL_SRC := $(shell find $(VPATH)/test -type f -name '*.rs')
 
-EXT_BENCH_TOP_SRC := $(VPATH)/bench/mod.rs
+EXT_BENCH_TOP_SRC := $(VPATH)/bench/lib.rs
 EXT_BENCH_ALL_SRC := $(shell find $(VPATH)/bench -type f -name '*.rs')
 
 TEST_JSON_SRC := $(shell find $(VPATH)/html5lib-tests/ -type f -name '*.test')
diff --git a/bench/mod.rs b/bench/lib.rs
similarity index 100%
rename from bench/mod.rs
rename to bench/lib.rs
diff --git a/macros/mod.rs b/macros/lib.rs
similarity index 100%
rename from macros/mod.rs
rename to macros/lib.rs
diff --git a/src/html5.rs b/src/lib.rs
similarity index 100%
rename from src/html5.rs
rename to src/lib.rs
diff --git a/test/mod.rs b/test/lib.rs
similarity index 100%
rename from test/mod.rs
rename to test/lib.rs

From 33fc2c3390211ba6da449bd29eb73dc084e5b567 Mon Sep 17 00:00:00 2001
From: Chris Morgan <me@chrismorgan.info>
Date: Tue, 15 Apr 2014 15:55:11 +1000
Subject: [PATCH 3/7] Update for Result-yielding Decodable::decode().

---
 macros/named_entities.rs | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/macros/named_entities.rs b/macros/named_entities.rs
index 89d52b3f..2875ab84 100644
--- a/macros/named_entities.rs
+++ b/macros/named_entities.rs
@@ -41,7 +41,10 @@ fn build_map(js: Json) -> Option<HashMap<~str, [u32, ..2]>> {
     // Add every named entity to the map.
     for (k,v) in json_map.move_iter() {
         let mut decoder = json::Decoder::new(v);
-        let CharRef { codepoints }: CharRef = Decodable::decode(&mut decoder);
+        let CharRef { codepoints }: CharRef = match Decodable::decode(&mut decoder) {
+            Ok(o) => o,
+            Err(_) => return None,
+        };
 
         assert!((codepoints.len() >= 1) && (codepoints.len() <= 2));
         let mut codepoint_pair = [0, 0];

From 6b1b88b1fbf6e9f9551402d87af3759c51b6a2db Mon Sep 17 00:00:00 2001
From: Chris Morgan <me@chrismorgan.info>
Date: Tue, 15 Apr 2014 15:55:24 +1000
Subject: [PATCH 4/7] Update most `~str` things to `StrBuf`.

---
 examples/tokenize-bench-example.rs |  3 ++-
 examples/tokenize-example.rs       |  5 +++--
 src/tokenizer/buffer_queue.rs      | 25 ++++++++++++------------
 src/tokenizer/char_ref/mod.rs      | 18 +++++++++--------
 src/tokenizer/mod.rs               | 31 +++++++++++++++---------------
 src/tokenizer/tokens.rs            | 17 ++++++++--------
 src/util/str.rs                    |  6 +++---
 7 files changed, 55 insertions(+), 50 deletions(-)

diff --git a/examples/tokenize-bench-example.rs b/examples/tokenize-bench-example.rs
index 256d1973..76cff7d3 100644
--- a/examples/tokenize-bench-example.rs
+++ b/examples/tokenize-bench-example.rs
@@ -9,6 +9,7 @@ extern crate html5;
 
 use std::{io, os};
 use std::default::Default;
+use std::strbuf::StrBuf;
 
 use test::black_box;
 
@@ -30,7 +31,7 @@ fn main() {
     path.push(os::args()[1]);
 
     let mut file = io::File::open(&path).ok().expect("can't open file");
-    let file_input = file.read_to_str().ok().expect("can't read file");
+    let file_input = StrBuf::from_owned_str(file.read_to_str().ok().expect("can't read file"));
 
     let mut sink = Sink;
     let mut tok = Tokenizer::new(&mut sink, Default::default());
diff --git a/examples/tokenize-example.rs b/examples/tokenize-example.rs
index 02a27fe1..4601bbc0 100644
--- a/examples/tokenize-example.rs
+++ b/examples/tokenize-example.rs
@@ -7,6 +7,7 @@ extern crate html5;
 use std::io;
 use std::char;
 use std::default::Default;
+use std::strbuf::StrBuf;
 
 use html5::tokenizer::{TokenSink, Token, Tokenizer, TokenizerOpts, ParseError};
 use html5::tokenizer::{CharacterToken, MultiCharacterToken, TagToken, StartTag, EndTag};
@@ -38,7 +39,7 @@ impl TokenSink for TokenPrinter {
                 self.do_char(c);
             }
             MultiCharacterToken(b) => {
-                for c in b.chars() {
+                for c in b.as_slice().chars() {
                     self.do_char(c);
                 }
             }
@@ -78,7 +79,7 @@ fn main() {
             profile: true,
             .. Default::default()
         });
-        tok.feed(io::stdin().read_to_str().unwrap());
+        tok.feed(StrBuf::from_owned_str(io::stdin().read_to_str().unwrap()));
         tok.end();
     }
     sink.is_char(false);
diff --git a/src/tokenizer/buffer_queue.rs b/src/tokenizer/buffer_queue.rs
index 8e8a2735..8dd4c7a0 100644
--- a/src/tokenizer/buffer_queue.rs
+++ b/src/tokenizer/buffer_queue.rs
@@ -3,6 +3,7 @@
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use std::str::CharRange;
+use std::strbuf::StrBuf;
 use collections::deque::Deque;
 use collections::dlist::DList;
 
@@ -10,7 +11,7 @@ struct Buffer {
     /// Byte position within the buffer.
     pos: uint,
     /// The buffer.
-    buf: ~str,
+    buf: StrBuf,
 }
 
 /// Either a single character or a run of "data" characters: those which
@@ -20,7 +21,7 @@ struct Buffer {
 /// normally.
 #[deriving(Eq, TotalEq, Show)]
 pub enum DataRunOrChar {
-    DataRun(~str),
+    DataRun(StrBuf),
     OneChar(char),
 }
 
@@ -57,7 +58,7 @@ impl BufferQueue {
     }
 
     /// Add a buffer to the beginning of the queue.
-    pub fn push_front(&mut self, buf: ~str) {
+    pub fn push_front(&mut self, buf: StrBuf) {
         if buf.len() == 0 {
             return;
         }
@@ -71,7 +72,7 @@ impl BufferQueue {
     /// Add a buffer to the end of the queue.
     /// 'pos' can be non-zero to remove that many characters
     /// from the beginning.
-    pub fn push_back(&mut self, buf: ~str, pos: uint) {
+    pub fn push_back(&mut self, buf: StrBuf, pos: uint) {
         if pos >= buf.len() {
             return;
         }
@@ -88,7 +89,7 @@ impl BufferQueue {
     }
 
     /// Get multiple characters, if that many are available.
-    pub fn pop_front(&mut self, n: uint) -> Option<~str> {
+    pub fn pop_front(&mut self, n: uint) -> Option<StrBuf> {
         if !self.has(n) {
             return None;
         }
@@ -99,7 +100,7 @@ impl BufferQueue {
     /// Look at the next available character, if any.
     pub fn peek(&mut self) -> Option<char> {
         match self.buffers.front() {
-            Some(&Buffer { pos, ref buf }) => Some(buf.char_at(pos)),
+            Some(&Buffer { pos, ref buf }) => Some(buf.as_slice().char_at(pos)),
             None => None,
         }
     }
@@ -109,17 +110,17 @@ impl BufferQueue {
     pub fn pop_data(&mut self) -> Option<DataRunOrChar> {
         let (result, now_empty) = match self.buffers.front_mut() {
             Some(&Buffer { ref mut pos, ref buf }) => {
-                let n = data_span(buf.slice_from(*pos));
+                let n = data_span(buf.as_slice().slice_from(*pos));
 
                 // If we only have one character then it's cheaper not to allocate.
                 if n > 1 {
                     let new_pos = *pos + n;
-                    let out = buf.slice(*pos, new_pos).to_owned();
+                    let out = StrBuf::from_str(buf.as_slice().slice(*pos, new_pos));
                     *pos = new_pos;
                     self.available -= n;
                     (Some(DataRun(out)), new_pos >= buf.len())
                 } else {
-                    let CharRange { ch, next } = buf.char_range_at(*pos);
+                    let CharRange { ch, next } = buf.as_slice().char_range_at(*pos);
                     *pos = next;
                     self.available -= 1;
                     (Some(OneChar(ch)), next >= buf.len())
@@ -136,7 +137,7 @@ impl BufferQueue {
     }
 
     fn account_new(&mut self, buf: &str) {
-        // FIXME: We could pass through length from the initial [u8] -> ~str
+        // FIXME: We could pass through length from the initial [u8] -> StrBuf
         // conversion, which already must re-encode or at least scan for UTF-8
         // validity.
         self.available += buf.char_len();
@@ -153,7 +154,7 @@ impl Iterator<char> for BufferQueue {
         let (result, now_empty) = match self.buffers.front_mut() {
             None => (None, false),
             Some(&Buffer { ref mut pos, ref buf }) => {
-                let CharRange { ch, next } = buf.char_range_at(*pos);
+                let CharRange { ch, next } = buf.as_slice().char_range_at(*pos);
                 *pos = next;
                 self.available -= 1;
                 (Some(ch), next >= buf.len())
@@ -239,7 +240,7 @@ fn can_push_truncated() {
 
 #[test]
 fn data_span_test() {
-    fn pad(s: &mut ~str, n: uint) {
+    fn pad(s: &mut StrBuf, n: uint) {
         for _ in range(0, n) {
             s.push_char('x');
         }
diff --git a/src/tokenizer/char_ref/mod.rs b/src/tokenizer/char_ref/mod.rs
index 5b1a7a95..612db975 100644
--- a/src/tokenizer/char_ref/mod.rs
+++ b/src/tokenizer/char_ref/mod.rs
@@ -6,6 +6,7 @@ use super::{Tokenizer, TokenSink};
 
 use util::str::{is_ascii_alnum, empty_str};
 use std::char::{to_digit, from_u32};
+use std::strbuf::StrBuf;
 
 mod data;
 
@@ -42,7 +43,7 @@ pub struct CharRefTokenizer {
     priv seen_digit: bool,
     priv hex_marker: Option<char>,
 
-    priv name_buf_opt: Option<~str>,
+    priv name_buf_opt: Option<StrBuf>,
     priv name_match: Option<&'static [u32, ..2]>,
     priv name_len: uint,
 }
@@ -71,7 +72,7 @@ impl CharRefTokenizer {
         self.result.expect("get_result called before done")
     }
 
-    fn name_buf<'t>(&'t mut self) -> &'t mut ~str {
+    fn name_buf<'t>(&'t mut self) -> &'t mut StrBuf {
         self.name_buf_opt.as_mut()
             .expect("name_buf missing in named character reference")
     }
@@ -182,7 +183,7 @@ impl<'sink, Sink: TokenSink> CharRefTokenizer {
     }
 
     fn unconsume_numeric(&mut self, tokenizer: &mut Tokenizer<'sink, Sink>) -> Status {
-        let mut unconsume = ~"#";
+        let mut unconsume = StrBuf::from_str("#");
         match self.hex_marker {
             Some(c) => unconsume.push_char(c),
             None => (),
@@ -202,7 +203,7 @@ impl<'sink, Sink: TokenSink> CharRefTokenizer {
             n if (n > 0x10FFFF) || self.num_too_big => ('\ufffd', true),
             0x00 | 0xD800..0xDFFF => ('\ufffd', true),
 
-            0x80..0x9F => match data::c1_replacements[self.num - 0x80] {
+            0x80..0x9F => match data::c1_replacements[(self.num - 0x80) as uint] {
                 Some(c) => (c, true),
                 None => (conv(self.num), true),
             },
@@ -287,14 +288,14 @@ impl<'sink, Sink: TokenSink> CharRefTokenizer {
                 //     &notit  => can't continue match
 
                 assert!(self.name_len > 0);
-                let last_matched = self.name_buf().char_at(self.name_len-1);
+                let last_matched = self.name_buf().as_slice().char_at(self.name_len-1);
 
                 // There might not be a next character after the match, if
                 // we had a full match and then hit EOF.
                 let next_after = if self.name_len == self.name_buf().len() {
                     None
                 } else {
-                    Some(self.name_buf().char_at(self.name_len))
+                    Some(self.name_buf().as_slice().char_at(self.name_len))
                 };
 
                 // "If the character reference is being consumed as part of an
@@ -324,7 +325,8 @@ impl<'sink, Sink: TokenSink> CharRefTokenizer {
                     self.unconsume_name(tokenizer);
                     self.finish_none()
                 } else {
-                    tokenizer.unconsume(self.name_buf().slice_from(self.name_len).to_owned());
+                    tokenizer.unconsume(StrBuf::from_str(
+                        self.name_buf().as_slice().slice_from(self.name_len)));
                     self.result = Some(CharRef {
                         chars: [from_u32(c1).unwrap(), from_u32(c2).unwrap()],
                         num_chars: if c2 == 0 { 1 } else { 2 },
@@ -368,7 +370,7 @@ impl<'sink, Sink: TokenSink> CharRefTokenizer {
                 }
 
                 Octothorpe => {
-                    tokenizer.unconsume(~"#");
+                    tokenizer.unconsume(StrBuf::from_str("#"));
                     tokenizer.emit_error(~"EOF after '#' in character reference");
                     self.finish_none();
                 }
diff --git a/src/tokenizer/mod.rs b/src/tokenizer/mod.rs
index 56e0fe0d..614bd293 100644
--- a/src/tokenizer/mod.rs
+++ b/src/tokenizer/mod.rs
@@ -18,7 +18,6 @@ use self::buffer_queue::{BufferQueue, DataRunOrChar, DataRun, OneChar};
 
 use util::str::{lower_ascii, lower_ascii_letter, empty_str};
 
-use std::str;
 use std::ascii::StrAsciiExt;
 use std::mem::replace;
 use std::iter::AdditiveIterator;
@@ -37,10 +36,10 @@ pub trait TokenSink {
     fn process_token(&mut self, token: Token);
 }
 
-fn option_push_char(opt_str: &mut Option<~str>, c: char) {
+fn option_push_char(opt_str: &mut Option<StrBuf>, c: char) {
     match *opt_str {
         Some(ref mut s) => s.push_char(c),
-        None => *opt_str = Some(str::from_char(c)),
+        None => *opt_str = Some(StrBuf::from_char(1, c)),
     }
 }
 
@@ -65,7 +64,7 @@ pub struct TokenizerOpts {
 
     /// Last start tag.  Only the test runner should use a
     /// non-None value!
-    last_start_tag_name: Option<~str>,
+    last_start_tag_name: Option<StrBuf>,
 }
 
 impl Default for TokenizerOpts {
@@ -129,16 +128,16 @@ pub struct Tokenizer<'sink, Sink> {
     priv current_attr: Attribute,
 
     /// Current comment.
-    priv current_comment: ~str,
+    priv current_comment: StrBuf,
 
     /// Current doctype token.
     priv current_doctype: Doctype,
 
     /// Last start tag name, for use in checking "appropriate end tag".
-    priv last_start_tag_name: Option<~str>,
+    priv last_start_tag_name: Option<StrBuf>,
 
     /// The "temporary buffer" mentioned in the spec.
-    priv temp_buf: ~str,
+    priv temp_buf: StrBuf,
 
     /// Record of how many ns we spent in each state, if profiling is enabled.
     priv state_profile: HashMap<states::State, u64>,
@@ -171,12 +170,12 @@ impl<'sink, Sink: TokenSink> Tokenizer<'sink, Sink> {
         }
     }
 
-    pub fn feed(&mut self, input: ~str) {
+    pub fn feed(&mut self, input: StrBuf) {
         if input.len() == 0 {
             return;
         }
 
-        let pos = if self.discard_bom && input.char_at(0) == '\ufeff' {
+        let pos = if self.discard_bom && input.as_slice().char_at(0) == '\ufeff' {
             self.discard_bom = false;
             3  // length of BOM in UTF-8
         } else {
@@ -309,7 +308,7 @@ impl<'sink, Sink: TokenSink> Tokenizer<'sink, Sink> {
         self.sink.process_token(CharacterToken(c));
     }
 
-    fn emit_chars(&mut self, b: ~str) {
+    fn emit_chars(&mut self, b: StrBuf) {
         self.sink.process_token(MultiCharacterToken(b));
     }
 
@@ -408,7 +407,7 @@ impl<'sink, Sink: TokenSink> Tokenizer<'sink, Sink> {
             replace(&mut self.current_doctype, Doctype::new())));
     }
 
-    fn doctype_id<'a>(&'a mut self, kind: DoctypeIdKind) -> &'a mut Option<~str> {
+    fn doctype_id<'a>(&'a mut self, kind: DoctypeIdKind) -> &'a mut Option<StrBuf> {
         match kind {
             Public => &mut self.current_doctype.public_id,
             System => &mut self.current_doctype.system_id,
@@ -446,7 +445,7 @@ impl<'sink, Sink: TokenSink> Tokenizer<'sink, Sink> {
         assert!(c.is_some());
     }
 
-    fn unconsume(&mut self, buf: ~str) {
+    fn unconsume(&mut self, buf: StrBuf) {
         self.input_buffers.push_front(buf);
     }
 
@@ -1023,7 +1022,7 @@ impl<'sink, Sink: TokenSink> Tokenizer<'sink, Sink> {
         }
 
         for i in range(0, num_chars) {
-            let c = chars[i];
+            let c = chars[i as uint];
             match self.state {
                 states::Data | states::RawData(states::Rcdata)
                     => go!(emit c),
@@ -1144,21 +1143,21 @@ impl<'sink, Sink: TokenSink> Tokenizer<'sink, Sink> {
 
 #[test]
 fn push_to_None_gives_singleton() {
-    let mut s: Option<~str> = None;
+    let mut s: Option<StrBuf> = None;
     option_push_char(&mut s, 'x');
     assert_eq!(s, Some(~"x"));
 }
 
 #[test]
 fn push_to_empty_appends() {
-    let mut s: Option<~str> = Some(~"");
+    let mut s: Option<StrBuf> = Some(~"");
     option_push_char(&mut s, 'x');
     assert_eq!(s, Some(~"x"));
 }
 
 #[test]
 fn push_to_nonempty_appends() {
-    let mut s: Option<~str> = Some(~"y");
+    let mut s: Option<StrBuf> = Some(~"y");
     option_push_char(&mut s, 'x');
     assert_eq!(s, Some(~"yx"));
 }
diff --git a/src/tokenizer/tokens.rs b/src/tokenizer/tokens.rs
index 21148e9d..2304ca5a 100644
--- a/src/tokenizer/tokens.rs
+++ b/src/tokenizer/tokens.rs
@@ -2,14 +2,15 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
+use std::strbuf::StrBuf;
 use util::str::empty_str;
 
 // FIXME: already exists in Servo DOM
 #[deriving(Eq, TotalEq, Clone)]
 pub struct Doctype {
-    name: Option<~str>,
-    public_id: Option<~str>,
-    system_id: Option<~str>,
+    name: Option<StrBuf>,
+    public_id: Option<StrBuf>,
+    system_id: Option<StrBuf>,
     force_quirks: bool,
 }
 
@@ -26,8 +27,8 @@ impl Doctype {
 
 #[deriving(Eq, TotalEq, Clone)]
 pub struct Attribute {
-    name: ~str,
-    value: ~str,
+    name: StrBuf,
+    value: StrBuf,
 }
 
 impl Attribute {
@@ -53,7 +54,7 @@ pub enum TagKind {
 #[deriving(Eq, TotalEq, Clone)]
 pub struct Tag {
     kind: TagKind,
-    name: ~str,
+    name: StrBuf,
     self_closing: bool,
     attrs: Vec<Attribute>,
 }
@@ -73,9 +74,9 @@ impl Tag {
 pub enum Token {
     DoctypeToken(Doctype),
     TagToken(Tag),
-    CommentToken(~str),
+    CommentToken(StrBuf),
     CharacterToken(char),
-    MultiCharacterToken(~str),
+    MultiCharacterToken(StrBuf),
     EOFToken,
     ParseError(~str),
 }
diff --git a/src/util/str.rs b/src/util/str.rs
index 6e83fac6..0164cff0 100644
--- a/src/util/str.rs
+++ b/src/util/str.rs
@@ -2,7 +2,7 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-use std::str;
+use std::strbuf::StrBuf;
 
 /// If `c` is an ASCII letter, return the corresponding lowercase
 /// letter, otherwise None.
@@ -23,8 +23,8 @@ pub fn is_ascii_alnum(c: char) -> bool {
 }
 
 /// Allocate an empty string with a small non-zero capacity.
-pub fn empty_str() -> ~str {
-    str::with_capacity(4)
+pub fn empty_str() -> StrBuf {
+    StrBuf::with_capacity(4)
 }
 
 test_eq!(lower_letter_a_is_a, lower_ascii_letter('a'), Some('a'))

From a838fc6a4a6238c055db6322bbc16a2d965d662b Mon Sep 17 00:00:00 2001
From: Chris Morgan <me@chrismorgan.info>
Date: Tue, 15 Apr 2014 16:03:14 +1000
Subject: [PATCH 5/7] Update for struct publicity swap.

(Used to be public by default, private when marked explicitly; now it's
private by default, public when marked explicitly.)
---
 src/tokenizer/buffer_queue.rs |  8 +++---
 src/tokenizer/char_ref/mod.rs | 28 ++++++++++-----------
 src/tokenizer/mod.rs          | 46 +++++++++++++++++------------------
 src/tokenizer/tokens.rs       | 20 +++++++--------
 4 files changed, 51 insertions(+), 51 deletions(-)

diff --git a/src/tokenizer/buffer_queue.rs b/src/tokenizer/buffer_queue.rs
index 8dd4c7a0..64f0147d 100644
--- a/src/tokenizer/buffer_queue.rs
+++ b/src/tokenizer/buffer_queue.rs
@@ -9,9 +9,9 @@ use collections::dlist::DList;
 
 struct Buffer {
     /// Byte position within the buffer.
-    pos: uint,
+    pub pos: uint,
     /// The buffer.
-    buf: StrBuf,
+    pub buf: StrBuf,
 }
 
 /// Either a single character or a run of "data" characters: those which
@@ -42,10 +42,10 @@ fn data_span(s: &str) -> uint {
 /// consuming characters.
 pub struct BufferQueue {
     /// Buffers to process.
-    priv buffers: DList<Buffer>,
+    buffers: DList<Buffer>,
 
     /// Number of available characters.
-    priv available: uint,
+    available: uint,
 }
 
 impl BufferQueue {
diff --git a/src/tokenizer/char_ref/mod.rs b/src/tokenizer/char_ref/mod.rs
index 612db975..753b59b4 100644
--- a/src/tokenizer/char_ref/mod.rs
+++ b/src/tokenizer/char_ref/mod.rs
@@ -12,10 +12,10 @@ mod data;
 
 pub struct CharRef {
     /// The resulting character(s)
-    chars: [char, ..2],
+    pub chars: [char, ..2],
 
     /// How many slots in `chars` are valid?
-    num_chars: u8,
+    pub num_chars: u8,
 }
 
 pub enum Status {
@@ -34,18 +34,18 @@ enum State {
 }
 
 pub struct CharRefTokenizer {
-    priv state: State,
-    priv addnl_allowed: Option<char>,
-    priv result: Option<CharRef>,
-
-    priv num: u32,
-    priv num_too_big: bool,
-    priv seen_digit: bool,
-    priv hex_marker: Option<char>,
-
-    priv name_buf_opt: Option<StrBuf>,
-    priv name_match: Option<&'static [u32, ..2]>,
-    priv name_len: uint,
+    state: State,
+    addnl_allowed: Option<char>,
+    result: Option<CharRef>,
+
+    num: u32,
+    num_too_big: bool,
+    seen_digit: bool,
+    hex_marker: Option<char>,
+
+    name_buf_opt: Option<StrBuf>,
+    name_match: Option<&'static [u32, ..2]>,
+    name_len: uint,
 }
 
 impl CharRefTokenizer {
diff --git a/src/tokenizer/mod.rs b/src/tokenizer/mod.rs
index 614bd293..aecea561 100644
--- a/src/tokenizer/mod.rs
+++ b/src/tokenizer/mod.rs
@@ -48,23 +48,23 @@ fn option_push_char(opt_str: &mut Option<StrBuf>, c: char) {
 pub struct TokenizerOpts {
     /// Report all parse errors described in the spec, at some
     /// performance penalty?  Default: false
-    exact_errors: bool,
+    pub exact_errors: bool,
 
     /// Discard a U+FEFF BYTE ORDER MARK if we see one at the beginning
     /// of the stream?  Default: true
-    discard_bom: bool,
+    pub discard_bom: bool,
 
     /// Keep a record of how long we spent in each state?  Printed
     /// when end() is called.  Default: false
-    profile: bool,
+    pub profile: bool,
 
     /// Initial state override.  Only the test runner should use
     /// a non-None value!
-    initial_state: Option<states::State>,
+    pub initial_state: Option<states::State>,
 
     /// Last start tag.  Only the test runner should use a
     /// non-None value!
-    last_start_tag_name: Option<StrBuf>,
+    pub last_start_tag_name: Option<StrBuf>,
 }
 
 impl Default for TokenizerOpts {
@@ -81,66 +81,66 @@ impl Default for TokenizerOpts {
 
 pub struct Tokenizer<'sink, Sink> {
     /// Options controlling the behavior of the tokenizer.
-    priv opts: TokenizerOpts,
+    opts: TokenizerOpts,
 
     /// Destination for tokens we emit.
-    priv sink: &'sink mut Sink,
+    sink: &'sink mut Sink,
 
     /// The abstract machine state as described in the spec.
-    priv state: states::State,
+    state: states::State,
 
     /// Input ready to be tokenized.
-    priv input_buffers: BufferQueue,
+    input_buffers: BufferQueue,
 
     /// If Some(n), the abstract machine needs n available
     /// characters to continue.
-    priv wait_for: Option<uint>,
+    wait_for: Option<uint>,
 
     /// Are we at the end of the file, once buffers have been processed
     /// completely? This affects whether we will wait for lookahead or not.
-    priv at_eof: bool,
+    at_eof: bool,
 
     /// Tokenizer for character references, if we're tokenizing
     /// one at the moment.
-    priv char_ref_tokenizer: Option<~CharRefTokenizer>,
+    char_ref_tokenizer: Option<~CharRefTokenizer>,
 
     /// Current input character.  Just consumed, may reconsume.
-    priv current_char: char,
+    current_char: char,
 
     /// Should we reconsume the current input character?
-    priv reconsume: bool,
+    reconsume: bool,
 
     /// Did we just consume \r, translating it to \n?  In that case we need
     /// to ignore the next character if it's \n.
-    priv ignore_lf: bool,
+    ignore_lf: bool,
 
     /// Discard a U+FEFF BYTE ORDER MARK if we see one?  Only done at the
     /// beginning of the stream.
-    priv discard_bom: bool,
+    discard_bom: bool,
 
     // FIXME: The state machine guarantees the tag exists when
     // we need it, so we could eliminate the Option overhead.
     // Leaving it as Option for now, to find bugs.
     /// Current tag.
-    priv current_tag: Option<Tag>,
+    current_tag: Option<Tag>,
 
     /// Current attribute.
-    priv current_attr: Attribute,
+    current_attr: Attribute,
 
     /// Current comment.
-    priv current_comment: StrBuf,
+    current_comment: StrBuf,
 
     /// Current doctype token.
-    priv current_doctype: Doctype,
+    current_doctype: Doctype,
 
     /// Last start tag name, for use in checking "appropriate end tag".
-    priv last_start_tag_name: Option<StrBuf>,
+    last_start_tag_name: Option<StrBuf>,
 
     /// The "temporary buffer" mentioned in the spec.
-    priv temp_buf: StrBuf,
+    temp_buf: StrBuf,
 
     /// Record of how many ns we spent in each state, if profiling is enabled.
-    priv state_profile: HashMap<states::State, u64>,
+    state_profile: HashMap<states::State, u64>,
 }
 
 impl<'sink, Sink: TokenSink> Tokenizer<'sink, Sink> {
diff --git a/src/tokenizer/tokens.rs b/src/tokenizer/tokens.rs
index 2304ca5a..082da0b4 100644
--- a/src/tokenizer/tokens.rs
+++ b/src/tokenizer/tokens.rs
@@ -8,10 +8,10 @@ use util::str::empty_str;
 // FIXME: already exists in Servo DOM
 #[deriving(Eq, TotalEq, Clone)]
 pub struct Doctype {
-    name: Option<StrBuf>,
-    public_id: Option<StrBuf>,
-    system_id: Option<StrBuf>,
-    force_quirks: bool,
+    pub name: Option<StrBuf>,
+    pub public_id: Option<StrBuf>,
+    pub system_id: Option<StrBuf>,
+    pub force_quirks: bool,
 }
 
 impl Doctype {
@@ -27,8 +27,8 @@ impl Doctype {
 
 #[deriving(Eq, TotalEq, Clone)]
 pub struct Attribute {
-    name: StrBuf,
-    value: StrBuf,
+    pub name: StrBuf,
+    pub value: StrBuf,
 }
 
 impl Attribute {
@@ -53,10 +53,10 @@ pub enum TagKind {
 
 #[deriving(Eq, TotalEq, Clone)]
 pub struct Tag {
-    kind: TagKind,
-    name: StrBuf,
-    self_closing: bool,
-    attrs: Vec<Attribute>,
+    pub kind: TagKind,
+    pub name: StrBuf,
+    pub self_closing: bool,
+    pub attrs: Vec<Attribute>,
 }
 
 impl Tag {

From ef1bb3dcf37911b7471ea5f508441b83c1ef6a8d Mon Sep 17 00:00:00 2001
From: Chris Morgan <me@chrismorgan.info>
Date: Tue, 15 Apr 2014 16:28:12 +1000
Subject: [PATCH 6/7] Update rust-phf.

---
 rust-phf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rust-phf b/rust-phf
index 06211e13..52a1cc62 160000
--- a/rust-phf
+++ b/rust-phf
@@ -1 +1 @@
-Subproject commit 06211e1339eeb1ca357c1f2055869d559fd218da
+Subproject commit 52a1cc62318d4fd53011ab9843348e142fae6c8a

From 46dac0473c4572219fd97a0c7a49baa3cdd073cb Mon Sep 17 00:00:00 2001
From: Chris Morgan <me@chrismorgan.info>
Date: Tue, 15 Apr 2014 16:28:43 +1000
Subject: [PATCH 7/7] Switch to sfackler/rust-phf (for now).

---
 .gitmodules | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.gitmodules b/.gitmodules
index 5104d42a..1cab514f 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,6 +1,6 @@
 [submodule "rust-phf"]
 	path = rust-phf
-	url = https://github.com/kmcallister/rust-phf
+	url = https://github.com/sfackler/rust-phf
 [submodule "html5lib-tests"]
 	path = html5lib-tests
 	url = https://github.com/html5lib/html5lib-tests