Skip to content

Commit

Permalink
Use PEG for parsing; better support for literal strings in patterns
Browse files Browse the repository at this point in the history
  • Loading branch information
squaremo committed May 15, 2012
1 parent 5f5a2ff commit 51ce1d8
Show file tree
Hide file tree
Showing 7 changed files with 117 additions and 57 deletions.
1 change: 1 addition & 0 deletions lib/compile.js
Expand Up @@ -84,6 +84,7 @@ function match_seg(segment) {
break;
case 'string':
assign_result = get_string(segment);
break;
}
var handle_result = "if (result === false) { return false; }\n";
if (segment.name) {
Expand Down
67 changes: 67 additions & 0 deletions lib/grammar.pegjs
@@ -0,0 +1,67 @@

start
= ws head:segment tail:segmentTail* { tail.unshift(head); return tail; }

segmentTail
= ws ',' ws seg:segment { return seg; }

segment
= str:string { return {string: str}; }
/ v:identifier size:size ? specs:specifierList ?
{ return {name: v, size: size, specifiers: specs}; }
/ v:number size:size ? specs:specifierList ?
{ return {value: v, size: size, specifiers: specs}; }

string
= '"' '"' { return ""; }
/ '"' chars:chars '"' { return chars; }

/* From JSON example
https://github.com/dmajda/pegjs/blob/master/examples/json.pegjs */

chars
= chars:char+ { return chars.join(""); }

char
= [^"\\\0-\x1F\x7f]
/ '\\"' { return '"'; }
/ "\\\\" { return "\\"; }
/ "\\/" { return "/"; }
/ "\\b" { return "\b"; }
/ "\\f" { return "\f"; }
/ "\\n" { return "\n"; }
/ "\\r" { return "\r"; }
/ "\\t" { return "\t"; }
/ "\\u" h1:hexDigit h2:hexDigit h3:hexDigit h4:hexDigit {
return String.fromCharCode(parseInt("0x" + h1 + h2 + h3 + h4));
}

hexDigit
= [0-9a-fA-F]

identifier
= (head:[_a-zA-Z] tail:[_a-zA-Z0-9]*) { return head + tail.join(''); }

number
= '0' { return 0; }
/ head:[1-9] tail:[0-9]* { return parseInt(head + tail.join('')); }

size
= ':' num:number { return num; }
/ ':' id:identifier { return id; }

specifierList
= '/' head:specifier tail:specifierTail* { tail.unshift(head); return tail; }

specifierTail
= '-' spec:specifier { return spec; }

specifier
= 'little' / 'big' / 'signed' / 'unsigned'
/ 'integer' / 'binary' / 'float'
/ unit

unit
= 'unit:' num:number { return 'unit:' + num; }

ws = [ \t\n]*
6 changes: 3 additions & 3 deletions lib/interp.js
Expand Up @@ -260,11 +260,11 @@ function match(pattern, binary, boundvars) {
if (segment.name === '_') {
result = skip_bits(segment);
}
else if (segment.type === 'string') {
result = get_string(segment);
}
else {
switch (segment.type) {
case 'string':
result = get_string(segment);
break;
case 'integer':
result = get_integer(segment);
break;
Expand Down
73 changes: 23 additions & 50 deletions lib/parse.js
@@ -1,59 +1,32 @@
// Parse patterns in string form into the form we use for interpreting
// (and later, for compiling).

var ast = require('./pattern');
var peg = require('pegjs'),
ast = require('./pattern'),
path = require('path');

function compose() {
var funcs = [].slice.call(arguments);
return function(elem) {
var result = elem;
for (var i in funcs) {
result = funcs[i](result);
}
return result;
}
}

function map(array0, func) {
var array = array0.slice();
for (var i in array.slice()) {
array[i] = func(array[i]);
}
return array;
}
var grammar = require('fs').readFileSync(
path.join(path.dirname(module.filename), 'grammar.pegjs')).toString();
var parser = peg.buildParser(grammar);

function parse_pattern(string) {
return map(
string.split(','),
compose(
function(s) { return s.replace(/\s/g, ''); },
parse_segment));
var segments = parser.parse(string);
for (var i=0, len = segments.length; i < len; i++) {
var s = segments[i];
if (s.string != undefined) {
segments[i] = ast.string(s.string);
}
else if (s.value != undefined) {
segments[i] = ast.value(s.value, s.size, s.specifiers);
}
else if (s.name != undefined) {
segments[i] = ast.variable(s.name, s.size, s.specifiers);
}
else {
throw "Unknown segment " + s;
}
}
return segments;
}

module.exports.parse = parse_pattern;

// From
// http://stackoverflow.com/questions/18082/validate-numbers-in-javascript-isnumeric
function isNumber(n) {
return !isNaN(parseFloat(n)) && isFinite(n);
}

var PARTS = /^([_a-zA-Z0-9\.]*)(?:\:([a-zA-Z_0-9]+))?(?:\/([a-z0-9:-]*))?$/;
var STRING = /^"(([^"]|(\"))*)"$/;

function parse_segment(string) {
var parts = STRING.exec(string);
if (parts) {
return ast.string(parts[1]);
}
parts = PARTS.exec(string);
var nameOrValue = parts[1];
var size = parts[2];
var specifiers = (parts[3] || '').split('-');
if (size !== undefined && isNumber(size)) {
size = parseInt(size);
}
return ((isNumber(nameOrValue)) ?
ast.value :
ast.variable) (nameOrValue, size, specifiers);
}
2 changes: 1 addition & 1 deletion lib/pattern.js
Expand Up @@ -97,7 +97,7 @@ function unit_in(specifiers, type) {
}

function size_of(segment, type, size, unit) {
if (size !== undefined) {
if (size !== undefined && size !== '') {
return size;
}
else {
Expand Down
4 changes: 3 additions & 1 deletion package.json
Expand Up @@ -17,6 +17,8 @@
"engines": {
"node": ">0.4"
},
"dependencies": {},
"dependencies": {
"pegjs": "~0.7"
},
"devDependencies": {}
}
21 changes: 19 additions & 2 deletions test/matching.js
Expand Up @@ -31,7 +31,7 @@ var INT_TESTS = [
[[[245, 23, 97, 102], 1717639157]]],
['n:32/signed-little',
[[[245, 23, 97, 129], -2124343307]]],

['n:4/signed-little-unit:8',
[[[245, 23, 97, 129], -2124343307]]]
];
Expand Down Expand Up @@ -72,7 +72,7 @@ FLOAT_TESTS = [
['n:64/float-little',
[[[24, 45, 68, 84, 251, 33, 9, 64], Math.PI],
[[0, 0, 0, 0, 0, 0, 0, 0], 0.0]]],

['n:4/float-unit:8',
[[[64,73,15,219], Math.PI],
[[0, 0, 0, 0], 0.0]]]
Expand Down Expand Up @@ -129,3 +129,20 @@ suite("Binary",
});
});
});

STRING_TESTS = [
['"foobar", n:8', "foobarA", 'A'.charCodeAt(0)],
['n:8, "foobar", _/binary', "CfoobarGARBAGE", 'C'.charCodeAt(0)],
['"foo bar\\"", n:8, "another"', 'foo bar"Zanother', 'Z'.charCodeAt(0)]
];

suite("String",
function() {
STRING_TESTS.forEach(function(p) {
var pattern = parse(p[0]);
test(p[0], function() {
var res = match(pattern, new Buffer(p[1]));
assert.equal(res.n, p[2]);
});
});
});

0 comments on commit 51ce1d8

Please sign in to comment.