Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitmodules
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[submodule "rust-phf"]
path = rust-phf
url = https://github.com/kmcallister/rust-phf
url = https://github.com/sfackler/rust-phf
[submodule "html5lib-tests"]
path = html5lib-tests
url = https://github.com/html5lib/html5lib-tests
8 changes: 4 additions & 4 deletions Makefile.in
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,18 @@ RUST_DIRS := -L . -L $(VPATH)/rust-phf/build

RUSTC_CMD := $(RUSTC) $(RUST_DIRS) $(RUSTFLAGS)

LIB_TOP_SRC := $(VPATH)/src/html5.rs
LIB_TOP_SRC := $(VPATH)/src/lib.rs
LIB_ALL_SRC := $(shell find $(VPATH)/src -type f -name '*.rs')
LIB := $(shell $(RUSTC) --crate-file-name "$(LIB_TOP_SRC)")

MACROS_TOP_SRC := $(VPATH)/macros/mod.rs
MACROS_TOP_SRC := $(VPATH)/macros/lib.rs
MACROS_ALL_SRC := $(shell find $(VPATH)/macros -type f -name '*.rs')
MACROS := $(shell $(RUSTC) --crate-file-name "$(MACROS_TOP_SRC)")

EXT_TEST_TOP_SRC := $(VPATH)/test/mod.rs
EXT_TEST_TOP_SRC := $(VPATH)/test/lib.rs
EXT_TEST_ALL_SRC := $(shell find $(VPATH)/test -type f -name '*.rs')

EXT_BENCH_TOP_SRC := $(VPATH)/bench/mod.rs
EXT_BENCH_TOP_SRC := $(VPATH)/bench/lib.rs
EXT_BENCH_ALL_SRC := $(shell find $(VPATH)/bench -type f -name '*.rs')

TEST_JSON_SRC := $(shell find $(VPATH)/html5lib-tests/ -type f -name '*.test')
Expand Down
4 changes: 2 additions & 2 deletions bench/mod.rs → bench/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#[crate_id="html5-external-bench"];
#[crate_type="bin"];
#![crate_id="html5-external-bench"]
#![crate_type="bin"]

extern crate test;

Expand Down
3 changes: 2 additions & 1 deletion examples/tokenize-bench-example.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ extern crate html5;

use std::{io, os};
use std::default::Default;
use std::strbuf::StrBuf;

use test::black_box;

Expand All @@ -30,7 +31,7 @@ fn main() {
path.push(os::args()[1]);

let mut file = io::File::open(&path).ok().expect("can't open file");
let file_input = file.read_to_str().ok().expect("can't read file");
let file_input = StrBuf::from_owned_str(file.read_to_str().ok().expect("can't read file"));

let mut sink = Sink;
let mut tok = Tokenizer::new(&mut sink, Default::default());
Expand Down
5 changes: 3 additions & 2 deletions examples/tokenize-example.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ extern crate html5;
use std::io;
use std::char;
use std::default::Default;
use std::strbuf::StrBuf;

use html5::tokenizer::{TokenSink, Token, Tokenizer, TokenizerOpts, ParseError};
use html5::tokenizer::{CharacterToken, MultiCharacterToken, TagToken, StartTag, EndTag};
Expand Down Expand Up @@ -38,7 +39,7 @@ impl TokenSink for TokenPrinter {
self.do_char(c);
}
MultiCharacterToken(b) => {
for c in b.chars() {
for c in b.as_slice().chars() {
self.do_char(c);
}
}
Expand Down Expand Up @@ -78,7 +79,7 @@ fn main() {
profile: true,
.. Default::default()
});
tok.feed(io::stdin().read_to_str().unwrap());
tok.feed(StrBuf::from_owned_str(io::stdin().read_to_str().unwrap()));
tok.end();
}
sink.is_char(false);
Expand Down
6 changes: 3 additions & 3 deletions macros/mod.rs → macros/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#[crate_id="html5-macros"];
#[crate_type="dylib"];
#![crate_id="html5-macros"]
#![crate_type="dylib"]

#[feature(macro_rules, macro_registrar, quote, managed_boxes)];
#![feature(macro_rules, macro_registrar, quote, managed_boxes)]

extern crate syntax;
extern crate serialize;
Expand Down
5 changes: 4 additions & 1 deletion macros/named_entities.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,10 @@ fn build_map(js: Json) -> Option<HashMap<~str, [u32, ..2]>> {
// Add every named entity to the map.
for (k,v) in json_map.move_iter() {
let mut decoder = json::Decoder::new(v);
let CharRef { codepoints }: CharRef = Decodable::decode(&mut decoder);
let CharRef { codepoints }: CharRef = match Decodable::decode(&mut decoder) {
Ok(o) => o,
Err(_) => return None,
};

assert!((codepoints.len() >= 1) && (codepoints.len() <= 2));
let mut codepoint_pair = [0, 0];
Expand Down
2 changes: 1 addition & 1 deletion rust-phf
6 changes: 3 additions & 3 deletions src/html5.rs → src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#[crate_id="github.com/kmcallister/html5"];
#[crate_type="dylib"];
#![crate_id="github.com/kmcallister/html5"]
#![crate_type="dylib"]

#[feature(macro_rules, phase)];
#![feature(macro_rules, phase)]

#[phase(syntax, link)]
extern crate log;
Expand Down
31 changes: 16 additions & 15 deletions src/tokenizer/buffer_queue.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,15 @@
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */

use std::str::CharRange;
use std::strbuf::StrBuf;
use collections::deque::Deque;
use collections::dlist::DList;

struct Buffer {
/// Byte position within the buffer.
pos: uint,
pub pos: uint,
/// The buffer.
buf: ~str,
pub buf: StrBuf,
}

/// Either a single character or a run of "data" characters: those which
Expand All @@ -20,7 +21,7 @@ struct Buffer {
/// normally.
#[deriving(Eq, TotalEq, Show)]
pub enum DataRunOrChar {
DataRun(~str),
DataRun(StrBuf),
OneChar(char),
}

Expand All @@ -41,10 +42,10 @@ fn data_span(s: &str) -> uint {
/// consuming characters.
pub struct BufferQueue {
/// Buffers to process.
priv buffers: DList<Buffer>,
buffers: DList<Buffer>,

/// Number of available characters.
priv available: uint,
available: uint,
}

impl BufferQueue {
Expand All @@ -57,7 +58,7 @@ impl BufferQueue {
}

/// Add a buffer to the beginning of the queue.
pub fn push_front(&mut self, buf: ~str) {
pub fn push_front(&mut self, buf: StrBuf) {
if buf.len() == 0 {
return;
}
Expand All @@ -71,7 +72,7 @@ impl BufferQueue {
/// Add a buffer to the end of the queue.
/// 'pos' can be non-zero to remove that many characters
/// from the beginning.
pub fn push_back(&mut self, buf: ~str, pos: uint) {
pub fn push_back(&mut self, buf: StrBuf, pos: uint) {
if pos >= buf.len() {
return;
}
Expand All @@ -88,7 +89,7 @@ impl BufferQueue {
}

/// Get multiple characters, if that many are available.
pub fn pop_front(&mut self, n: uint) -> Option<~str> {
pub fn pop_front(&mut self, n: uint) -> Option<StrBuf> {
if !self.has(n) {
return None;
}
Expand All @@ -99,7 +100,7 @@ impl BufferQueue {
/// Look at the next available character, if any.
pub fn peek(&mut self) -> Option<char> {
match self.buffers.front() {
Some(&Buffer { pos, ref buf }) => Some(buf.char_at(pos)),
Some(&Buffer { pos, ref buf }) => Some(buf.as_slice().char_at(pos)),
None => None,
}
}
Expand All @@ -109,17 +110,17 @@ impl BufferQueue {
pub fn pop_data(&mut self) -> Option<DataRunOrChar> {
let (result, now_empty) = match self.buffers.front_mut() {
Some(&Buffer { ref mut pos, ref buf }) => {
let n = data_span(buf.slice_from(*pos));
let n = data_span(buf.as_slice().slice_from(*pos));

// If we only have one character then it's cheaper not to allocate.
if n > 1 {
let new_pos = *pos + n;
let out = buf.slice(*pos, new_pos).to_owned();
let out = StrBuf::from_str(buf.as_slice().slice(*pos, new_pos));
*pos = new_pos;
self.available -= n;
(Some(DataRun(out)), new_pos >= buf.len())
} else {
let CharRange { ch, next } = buf.char_range_at(*pos);
let CharRange { ch, next } = buf.as_slice().char_range_at(*pos);
*pos = next;
self.available -= 1;
(Some(OneChar(ch)), next >= buf.len())
Expand All @@ -136,7 +137,7 @@ impl BufferQueue {
}

fn account_new(&mut self, buf: &str) {
// FIXME: We could pass through length from the initial [u8] -> ~str
// FIXME: We could pass through length from the initial [u8] -> StrBuf
// conversion, which already must re-encode or at least scan for UTF-8
// validity.
self.available += buf.char_len();
Expand All @@ -153,7 +154,7 @@ impl Iterator<char> for BufferQueue {
let (result, now_empty) = match self.buffers.front_mut() {
None => (None, false),
Some(&Buffer { ref mut pos, ref buf }) => {
let CharRange { ch, next } = buf.char_range_at(*pos);
let CharRange { ch, next } = buf.as_slice().char_range_at(*pos);
*pos = next;
self.available -= 1;
(Some(ch), next >= buf.len())
Expand Down Expand Up @@ -239,7 +240,7 @@ fn can_push_truncated() {

#[test]
fn data_span_test() {
fn pad(s: &mut ~str, n: uint) {
fn pad(s: &mut StrBuf, n: uint) {
for _ in range(0, n) {
s.push_char('x');
}
Expand Down
44 changes: 23 additions & 21 deletions src/tokenizer/char_ref/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,16 @@ use super::{Tokenizer, TokenSink};

use util::str::{is_ascii_alnum, empty_str};
use std::char::{to_digit, from_u32};
use std::strbuf::StrBuf;

mod data;

pub struct CharRef {
/// The resulting character(s)
chars: [char, ..2],
pub chars: [char, ..2],

/// How many slots in `chars` are valid?
num_chars: u8,
pub num_chars: u8,
}

pub enum Status {
Expand All @@ -33,18 +34,18 @@ enum State {
}

pub struct CharRefTokenizer {
priv state: State,
priv addnl_allowed: Option<char>,
priv result: Option<CharRef>,

priv num: u32,
priv num_too_big: bool,
priv seen_digit: bool,
priv hex_marker: Option<char>,

priv name_buf_opt: Option<~str>,
priv name_match: Option<&'static [u32, ..2]>,
priv name_len: uint,
state: State,
addnl_allowed: Option<char>,
result: Option<CharRef>,

num: u32,
num_too_big: bool,
seen_digit: bool,
hex_marker: Option<char>,

name_buf_opt: Option<StrBuf>,
name_match: Option<&'static [u32, ..2]>,
name_len: uint,
}

impl CharRefTokenizer {
Expand All @@ -71,7 +72,7 @@ impl CharRefTokenizer {
self.result.expect("get_result called before done")
}

fn name_buf<'t>(&'t mut self) -> &'t mut ~str {
fn name_buf<'t>(&'t mut self) -> &'t mut StrBuf {
self.name_buf_opt.as_mut()
.expect("name_buf missing in named character reference")
}
Expand Down Expand Up @@ -182,7 +183,7 @@ impl<'sink, Sink: TokenSink> CharRefTokenizer {
}

fn unconsume_numeric(&mut self, tokenizer: &mut Tokenizer<'sink, Sink>) -> Status {
let mut unconsume = ~"#";
let mut unconsume = StrBuf::from_str("#");
match self.hex_marker {
Some(c) => unconsume.push_char(c),
None => (),
Expand All @@ -202,7 +203,7 @@ impl<'sink, Sink: TokenSink> CharRefTokenizer {
n if (n > 0x10FFFF) || self.num_too_big => ('\ufffd', true),
0x00 | 0xD800..0xDFFF => ('\ufffd', true),

0x80..0x9F => match data::c1_replacements[self.num - 0x80] {
0x80..0x9F => match data::c1_replacements[(self.num - 0x80) as uint] {
Some(c) => (c, true),
None => (conv(self.num), true),
},
Expand Down Expand Up @@ -287,14 +288,14 @@ impl<'sink, Sink: TokenSink> CharRefTokenizer {
// &notit => can't continue match

assert!(self.name_len > 0);
let last_matched = self.name_buf().char_at(self.name_len-1);
let last_matched = self.name_buf().as_slice().char_at(self.name_len-1);

// There might not be a next character after the match, if
// we had a full match and then hit EOF.
let next_after = if self.name_len == self.name_buf().len() {
None
} else {
Some(self.name_buf().char_at(self.name_len))
Some(self.name_buf().as_slice().char_at(self.name_len))
};

// "If the character reference is being consumed as part of an
Expand Down Expand Up @@ -324,7 +325,8 @@ impl<'sink, Sink: TokenSink> CharRefTokenizer {
self.unconsume_name(tokenizer);
self.finish_none()
} else {
tokenizer.unconsume(self.name_buf().slice_from(self.name_len).to_owned());
tokenizer.unconsume(StrBuf::from_str(
self.name_buf().as_slice().slice_from(self.name_len)));
self.result = Some(CharRef {
chars: [from_u32(c1).unwrap(), from_u32(c2).unwrap()],
num_chars: if c2 == 0 { 1 } else { 2 },
Expand Down Expand Up @@ -368,7 +370,7 @@ impl<'sink, Sink: TokenSink> CharRefTokenizer {
}

Octothorpe => {
tokenizer.unconsume(~"#");
tokenizer.unconsume(StrBuf::from_str("#"));
tokenizer.emit_error(~"EOF after '#' in character reference");
self.finish_none();
}
Expand Down
Loading