Skip to content

Commit

Permalink
Fix/#23 use transmute (#24)
Browse files Browse the repository at this point in the history
* Fixes #23
  • Loading branch information
justinwilaby committed Dec 17, 2019
1 parent 28fe67d commit e724a06
Show file tree
Hide file tree
Showing 6 changed files with 68,633 additions and 959 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@
/.nyc_output
/node_modules
.idea
/target
Binary file modified lib/sax-wasm.wasm
Binary file not shown.
6 changes: 3 additions & 3 deletions src/js/__test__/largeXML.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -43,13 +43,13 @@ describe('When parsing XML, the SaxWasm', () => {
it('should process large XML files', async () => {
await new Promise(resolve => {
const readable = createReadStream(pathResolve(__dirname + '/xml.xml'), options);
let t = Date.now();
let t = process.hrtime();
readable.on('data', (chunk) => {
parser.write(chunk);
});
readable.on('end', () => {
t = Date.now() - t;
console.log(t);
let [s, n] = process.hrtime(t);
process.stdout.write(`XML parsed in ${(s * 1000) + n / 1000 / 1000} ms\n`);
resolve()
});
});
Expand Down
69,427 changes: 68,546 additions & 881 deletions src/js/__test__/xml.xml

Large diffs are not rendered by default.

39 changes: 22 additions & 17 deletions src/sax/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use std::str;

use sax::names::*;
use sax::tag::*;
use std::mem::transmute;

static BOM: &'static [u8; 3] = &[0xef, 0xbb, 0xbf];

Expand Down Expand Up @@ -232,7 +233,7 @@ impl SAXParser {
if grapheme != "<" {
self.write_text(grapheme);
} else {
if self.text.value != "" {
if !self.text.value.is_empty() {
let len = self.tags.len();
// Store these only if we're interested in CloseTag events
if len != 0 && self.events & Event::CloseTag as u32 != 0 {
Expand All @@ -250,19 +251,21 @@ impl SAXParser {

fn sgml_decl(&mut self, grapheme: &str) {
self.sgml_decl.push_str(grapheme);
if self.sgml_decl == "[CDATA[" {
if &self.sgml_decl == "[CDATA[" {
self.state = State::Cdata;
self.cdata = String::new();
if self.events & Event::OpenCDATA as u32 != 0 {
let mut v = Vec::new();
read_u32_into(self.line, &mut v);
read_u32_into(self.character - 7, &mut v);
unsafe {
v.extend_from_slice(&transmute::<u32, [u8; 4]>(self.line));
v.extend_from_slice(&transmute::<u32, [u8; 4]>(self.character - 7));
}
(self.event_handler)(Event::OpenCDATA as u32, v.as_ptr(), v.len());
}
} else if self.sgml_decl == "--" {
} else if &self.sgml_decl == "--" {
self.state = State::Comment;
self.sgml_decl = String::new();
} else if self.sgml_decl == "DOCTYPE" {
} else if &self.sgml_decl == "DOCTYPE" {
self.state = State::Doctype;
if self.doctype.len() != 0 {
self.doctype = String::new();
Expand Down Expand Up @@ -392,8 +395,10 @@ impl SAXParser {
}
if self.events & Event::CloseCDATA as u32 != 0 {
let mut v = Vec::new();
read_u32_into(self.line, &mut v);
read_u32_into(self.character, &mut v);
unsafe {
v.extend_from_slice(&transmute::<u32, [u8; 4]>(self.line));
v.extend_from_slice(&transmute::<u32, [u8; 4]>(self.character));
}
(self.event_handler)(Event::CloseCDATA as u32, v.as_ptr(), v.len());
}
return;
Expand Down Expand Up @@ -560,7 +565,7 @@ impl SAXParser {
if grapheme == ">" {
// Weird </> tag
let len = self.tags.len();
if self.close_tag_name == "" && (len == 0 || self.tags[len - 1].name != "") {
if self.close_tag_name.is_empty() && (len == 0 || !self.tags[len - 1].name.is_empty()) {
self.process_open_tag(true);
}
self.process_close_tag();
Expand Down Expand Up @@ -614,16 +619,16 @@ impl SAXParser {

fn process_close_tag(&mut self) {
self.new_text();
let mut s = self.tags.len();
let mut tags_len = self.tags.len();
{
let mut close_tag_name = mem::replace(&mut self.close_tag_name, String::new());
let mut found = false;
if close_tag_name == "" && self.tag.self_closing {
if close_tag_name.is_empty() && self.tag.self_closing {
close_tag_name = self.tag.name.clone();
}
while s != 0 {
s -= 1;
let tag = &mut self.tags[s];
while tags_len != 0 {
tags_len -= 1;
let tag = &mut self.tags[tags_len];
if tag.name == close_tag_name {
tag.close_start = self.tag.open_start;
tag.close_end = (self.line, self.character);
Expand All @@ -642,17 +647,17 @@ impl SAXParser {

let mut len = self.tags.len();
if self.events & Event::CloseTag as u32 == 0 {
let idx = len - s;
let idx = len - tags_len;
if idx > 1 {
self.tags.truncate(idx);
return;
}

self.tag = self.tags.remove(s);
self.tag = self.tags.remove(tags_len);
return;
}

while len > s {
while len > tags_len {
len -= 1;
self.tag = self.tags.remove(len);
self.tag.close_end = (self.line, self.character);
Expand Down
119 changes: 61 additions & 58 deletions src/sax/tag.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use std::mem::transmute;

#[derive(Clone)]
pub struct Tag {
pub name: String,
Expand Down Expand Up @@ -30,42 +32,45 @@ impl Tag {
impl Encode<Vec<u8>> for Tag {
fn encode(&self) -> Vec<u8> {
let mut v = Vec::new();
// known byte length
read_u32_into(self.open_start.0 as u32, &mut v);
read_u32_into(self.open_start.1 as u32, &mut v);

read_u32_into(self.open_end.0 as u32, &mut v);
read_u32_into(self.open_end.1 as u32, &mut v);

read_u32_into(self.close_start.0 as u32, &mut v);
read_u32_into(self.close_start.1 as u32, &mut v);

read_u32_into(self.close_end.0 as u32, &mut v);
read_u32_into(self.close_end.1 as u32, &mut v);

v.push(self.self_closing.clone() as u8);

read_u32_into(self.name.len() as u32, &mut v);
v.extend_from_slice(self.name.as_bytes());

// unknown byte length
let attr_ptr = v.len();
read_u32_into(self.attributes.len() as u32, &mut v);
for a in &self.attributes {
let mut attr = a.encode();
read_u32_into(attr.len() as u32, &mut v);
v.append(&mut attr);
unsafe {

// known byte length
v.extend_from_slice(&transmute::<u32, [u8; 4]>(self.open_start.0));
v.extend_from_slice(&transmute::<u32, [u8; 4]>(self.open_start.1));

v.extend_from_slice(&transmute::<u32, [u8; 4]>(self.open_end.0));
v.extend_from_slice(&transmute::<u32, [u8; 4]>(self.open_end.1));

v.extend_from_slice(&transmute::<u32, [u8; 4]>(self.close_start.0));
v.extend_from_slice(&transmute::<u32, [u8; 4]>(self.close_start.1));

v.extend_from_slice(&transmute::<u32, [u8; 4]>(self.close_end.0));
v.extend_from_slice(&transmute::<u32, [u8; 4]>(self.close_end.1));

v.push(self.self_closing.clone() as u8);

v.extend_from_slice(&transmute::<u32, [u8; 4]>(self.name.len() as u32));
v.extend_from_slice(self.name.as_bytes());

// unknown byte length
let attr_ptr = v.len();
v.extend_from_slice(&transmute::<u32, [u8; 4]>(self.attributes.len() as u32));
for a in &self.attributes {
let mut attr = a.encode();
v.extend_from_slice(&transmute::<u32, [u8; 4]>(attr.len() as u32));
v.append(&mut attr);
}

let text_ptr = v.len();
v.extend_from_slice(&transmute::<u32, [u8; 4]>(self.text_nodes.len() as u32));
for t in &self.text_nodes {
let mut text = t.encode();
v.extend_from_slice(&transmute::<u32, [u8; 4]>(text.len() as u32));
v.append(&mut text);
}
v.extend_from_slice(&transmute::<u32, [u8; 4]>(attr_ptr as u32));
v.extend_from_slice(&transmute::<u32, [u8; 4]>(text_ptr as u32));
}

let text_ptr = v.len();
read_u32_into(self.text_nodes.len() as u32, &mut v);
for t in &self.text_nodes {
let mut text = t.encode();
read_u32_into(text.len() as u32, &mut v);
v.append(&mut text);
}
read_u32_into(attr_ptr as u32, &mut v);
read_u32_into(text_ptr as u32, &mut v);
v
}
}
Expand All @@ -90,13 +95,15 @@ impl Text {
impl Encode<Vec<u8>> for Text {
fn encode(&self) -> Vec<u8> {
let mut v = Vec::new();
read_u32_into(self.start.0, &mut v);
read_u32_into(self.start.1, &mut v);
unsafe {
v.extend_from_slice(&transmute::<u32, [u8; 4]>(self.start.0));
v.extend_from_slice(&transmute::<u32, [u8; 4]>(self.start.1));

read_u32_into(self.end.0, &mut v);
read_u32_into(self.end.1, &mut v);
v.extend_from_slice(&transmute::<u32, [u8; 4]>(self.end.0));
v.extend_from_slice(&transmute::<u32, [u8; 4]>(self.end.1));

read_u32_into(self.value.len() as u32, &mut v);
v.extend_from_slice(&transmute::<u32, [u8; 4]>(self.value.len() as u32));
}
v.extend_from_slice(self.value.as_bytes());
v
}
Expand Down Expand Up @@ -128,22 +135,25 @@ impl Attribute {
impl Encode<Vec<u8>> for Attribute {
fn encode(&self) -> Vec<u8> {
let mut v: Vec<u8> = Vec::new();
read_u32_into(self.name_start.0, &mut v);
read_u32_into(self.name_start.1, &mut v);
unsafe {
v.extend_from_slice(&transmute::<u32, [u8; 4]>(self.name_start.0));
v.extend_from_slice(&transmute::<u32, [u8; 4]>(self.name_start.1));

read_u32_into(self.name_end.0, &mut v);
read_u32_into(self.name_end.1, &mut v);
v.extend_from_slice(&transmute::<u32, [u8; 4]>(self.name_end.0));
v.extend_from_slice(&transmute::<u32, [u8; 4]>(self.name_end.1));

read_u32_into(self.value_start.0, &mut v);
read_u32_into(self.value_start.1, &mut v);
v.extend_from_slice(&transmute::<u32, [u8; 4]>(self.value_start.0));
v.extend_from_slice(&transmute::<u32, [u8; 4]>(self.value_start.1));

read_u32_into(self.value_end.0, &mut v);
read_u32_into(self.value_end.1, &mut v);
v.extend_from_slice(&transmute::<u32, [u8; 4]>(self.value_end.0));
v.extend_from_slice(&transmute::<u32, [u8; 4]>(self.value_end.1));

read_u32_into(self.name.len() as u32, &mut v);
v.extend_from_slice(self.name.as_bytes());
v.extend_from_slice(&transmute::<u32, [u8; 4]>(self.name.len() as u32));

read_u32_into(self.value.len() as u32, &mut v);
v.extend_from_slice(self.name.as_bytes());

v.extend_from_slice(&transmute::<u32, [u8; 4]>(self.value.len() as u32));
}
v.extend_from_slice(self.value.as_bytes());
v
}
Expand All @@ -153,10 +163,3 @@ pub trait Encode<T> {
fn encode(&self) -> T;
}

// Little Endian
pub fn read_u32_into(x: u32, vec: &mut Vec<u8>) {
vec.push((x & 0xff) as u8);
vec.push(((x & 0xffff) >> 8) as u8);
vec.push(((x & 0xffffff) >> 16) as u8);
vec.push(((x & 0xffffffff) >> 24) as u8);
}

0 comments on commit e724a06

Please sign in to comment.