Skip to content

Commit

Permalink
Get calcAlphabet and range operations done'
Browse files Browse the repository at this point in the history
  • Loading branch information
jviereck committed Sep 9, 2013
1 parent cdb7f7f commit f11e0bb
Show file tree
Hide file tree
Showing 3 changed files with 141 additions and 35 deletions.
4 changes: 4 additions & 0 deletions index.js
@@ -1,6 +1,7 @@
// Whole-script strict mode syntax
"use strict";

var JIT = require('./lib/jit');
var getStartNodeFromPattern = require('./lib/exec').getStartNodeFromPattern;
var exec = require('./lib/exec').exec;
var canonicalize = require('./lib/utils').canonicalize;
Expand Down Expand Up @@ -264,6 +265,9 @@ Object.defineProperty(RegExpJS.prototype, 'lastIndex', {

if (typeof window !== 'undefined') {
window.RegExpJS = RegExpJS;
if (!window.require) {
window.require = require;
}
}

exports.RegExpJS = RegExpJS;
120 changes: 109 additions & 11 deletions lib/jit.js
@@ -1,19 +1,69 @@
var nodeToCharCode = require('./parser').nodeToCharCode;

function Range(min, max) {
if (max - min <= 0) {
throw new Error('Range min/max not possible: ' + min + ' ' + max);
}
this.min = min;
this.max = max + 1;
this.max = max;
}

Range.prototype.toString = function() {
var minChar = String.fromCharCode(this.min);
var maxChar = String.fromCharCode(this.max - 1);

return '[' + minChar + '-' + maxChar + ']';
}

/**
* Inclusive intersection of two ranges.
* Inclusive means, that two touching ranges intersect as well.
*/
Range.prototype.hasIntersect = function(other, ignoreEdge) {
if (ignoreEdge) {
return this.min < other.max && this.max > other.min;
} else {
return this.min <= other.max && this.max >= other.min;
}
}

Range.prototype.intersect = function(other) {
if (this.min <= other.max && this.max >= other.min) {
return true;
if (!this.hasIntersect(other, true)) {
return [];
}

var min = Math.max(this.min, other.min);
var max = Math.min(this.max, other.max);

return [new Range(min, max)];
}

Range.prototype.subtract = function(other) {
if (!this.hasIntersect(other, true)) {
// No real intersection -> nothing to subtract.
return [new Range(this.min, this.max)];
}

// This range is totally inside the `other` range and therefore removes it.
if (this.min >= other.min && this.max <= other.max) {
return []
}
// The other range is inside this range and therefor split this
// range up.
if (other.min >= this.min && other.max <= this.max) {
if (this.min == other.min) {
return [new Range(other.max, this.max)];
} else if (this.max == other.max) {
return [new Range(this.min, other.min)];
} else {
return [new Range(this.min, other.min), new Range(other.max, this.max)];
}
}

if (this.min < other.min) {
return [new Range(this.min, other.min)];
} else {
return false;
return [new Range(other.max, this.max)];
}
}

Expand Down Expand Up @@ -41,7 +91,7 @@ RangeList.prototype.simplify = function() {
var current = this.list[0];
for (var i = 1; i < this.list.length; i++) {
var range = this.list[i];
if (current.intersect(range)) {
if (current.hasIntersect(range)) {
// The list is sorted. Therefore current.min <= range.min.
if (range.max > current.max) {
current.max = range.max;
Expand All @@ -62,8 +112,8 @@ function collectCharRanges(parseTree) {
switch (entry.type) {
case 'character':
case 'escape':
var charCode = nodeToCharCode(node);
return new Range(charCode, charCode);
var charCode = nodeToCharCode(entry);
return new Range(charCode, charCode + 1);

case 'characterClassRange':
var min = nodeToCharCode(entry.min);
Expand All @@ -73,7 +123,7 @@ function collectCharRanges(parseTree) {
// TODO: Better error message.
throw new Error('Character not in right order');
}
return new Range(min, max);
return new Range(min, max + 1);

case 'empty':
return null;
Expand Down Expand Up @@ -101,7 +151,7 @@ function collectCharRanges(parseTree) {
case 'character':
case 'escape':
var charCode = nodeToCharCode(node);
ranges.push(new RangeList(false, new Range(charCode, charCode)));
ranges.push(new RangeList(false, new Range(charCode, charCode + 1)));
break;

case 'quantifier':
Expand All @@ -112,13 +162,13 @@ function collectCharRanges(parseTree) {
var rl = new RangeList(true);
// '\n\r\u2028\u2029'
[10, 13, 8232, 8233].forEach(function(charCode) {
rl.push(new Range(charCode, charCode));
rl.push(new Range(charCode, charCode + 1));
});
ranges.push(rl);
break;

case 'characterClass':
var rl = new RangeList(true);
var rl = new RangeList(node.negative);
node.classRanges.forEach(function(classEntry) {
var res = buildClassEntryRange(classEntry);
if (res) {
Expand Down Expand Up @@ -157,6 +207,52 @@ function collectCharRanges(parseTree) {
return ranges;
}

function splitLists(listA, listB) {
if (listA.length == 0) {
return listB;
}

var listA = listA.slice();
var listB = listB.slice();

listBLoop:
for (var n = 0; n < listB.length; n++) {
var s = listB[n];
for (var i = listA.length - 1; i >= 0; i--) {
var p = listA[i];
var A = p.subtract(s);
var B = p.intersect(s);
var sub = s.subtract(p);

listA.splice.apply(listA, [i, 1].concat(A, B));

if (sub.length == 0) {
continue listBLoop;
} else if (sub.length == 2) {
listB.push(sub[1]);
}
s = sub[0];
}
listA.push(s);
}

return listA;
}

function calcAlphabet(ranges) {
if (ranges.length === 0) {
return ranges;
}

var alphabet = ranges[0].list;
for (var i = 1; i < ranges.length; i++) {
var rangeList = ranges[i].list;

alphabet = splitLists(alphabet, rangeList);
}
return alphabet;
}

function isJITAble(parseTree, ignoreCase) {
function walk(node) {
switch (node.type) {
Expand Down Expand Up @@ -217,3 +313,5 @@ function isJITAble(parseTree, ignoreCase) {
exports.Range = Range;
exports.RangeList = RangeList;
exports.isJITAble = isJITAble;
exports.collectCharRanges = collectCharRanges;
exports.calcAlphabet = calcAlphabet;
52 changes: 28 additions & 24 deletions test.js
Expand Up @@ -196,37 +196,41 @@ assert(__re.constructor === RegExp, 'Constructor is BuildInRegExp');
var Range = require('./lib/jit').Range;
var RangeList = require('./lib/jit').RangeList;

function assertIntersect(a, b, c, d, shouldIntersect) {
function assertIntersect(a, b, c, d, shouldIntersect, ignoreEdge) {
r = new Range(a, b);
p = new Range(c, d);
assert(r.intersect(p) == shouldIntersect, 'part 1');
assert(r.hasIntersect(p, ignoreEdge) == shouldIntersect, 'part 1');

r = new Range(c, d);
p = new Range(a, b);
assert(r.intersect(p) == shouldIntersect, 'part 2');
assert(r.hasIntersect(p, ignoreEdge) == shouldIntersect, 'part 2');
}

assertIntersect(0, 4, 5, 7, true);
assertIntersect(0, 3, 5, 7, false);
assertIntersect(0, 6, 5, 7, true);
assertIntersect(5, 5, 5, 7, true);
assertIntersect(6, 6, 5, 7, true);
assertIntersect(6, 8, 5, 7, true);
assertIntersect(6, 9, 5, 7, true);
assertIntersect(8, 9, 5, 7, true);
assertIntersect(9, 9, 5, 7, false);
assertIntersect(9, 9, 5, 7, false);
assertIntersect(0, 5, 5, 8, true);
assertIntersect(0, 4, 5, 8, false);
assertIntersect(0, 6, 5, 8, true);
assertIntersect(5, 6, 5, 8, true);
assertIntersect(6, 7, 5, 8, true);
assertIntersect(6, 9, 5, 8, true);
assertIntersect(6, 10, 5, 8, true);
assertIntersect(8, 10, 5, 8, true);
assertIntersect(9, 10, 5, 8, false);
assertIntersect(9, 10, 5, 8, false);

assertIntersect(0, 3, 3, 8, false, true);
assertIntersect(0, 4, 3, 8, true, true);
assertIntersect(0, 10, 3, 8, true, true);

r = new RangeList(false);
r.push(new Range(6, 8));
r.push(new Range(0, 4));
r.push(new Range(0, 5));
r.simplify();
assert(r.length === 2);
assert(r.list[0].min === 0);
assert(r.list[1].min === 6);

r = new RangeList(false);
r.push(new Range(0, 4));
r.push(new Range(0, 5));
r.push(new Range(6, 8));
r.simplify();
assert(r.length === 2);
Expand All @@ -237,34 +241,34 @@ r.push(new Range(5, 8));
r.simplify();
assert(r.length === 1); // Got merged
assert(r.list[0].min === 0);
assert(r.list[0].max === 9);
assert(r.list[0].max === 8);

r = new RangeList(false);
r.push(new Range(0, 4));
r.push(new Range(0, 5));
r.push(new Range(5, 8));
r.push(new Range(1, 8));
r.simplify();
assert(r.length === 1); // Got merged
assert(r.list[0].min === 0);
assert(r.list[0].max === 9);
assert(r.list[0].max === 8);

r = new RangeList(false);
r.push(new Range(0, 4));
r.push(new Range(5, 8));
r.push(new Range(0, 5));
r.push(new Range(5, 9));
r.push(new Range(9, 9));
r.simplify();
assert(r.length === 1); // Got merged
assert(r.list[0].min === 0);
assert(r.list[0].max === 10);
assert(r.list[0].max === 9);

r = new RangeList(false);
r.push(new Range(9, 9));
r.push(new Range(0, 4));
r.push(new Range(9, 10));
r.push(new Range(0, 5));
r.push(new Range(5, 7));
r.simplify();
assert(r.length === 2); // Got merged
assert(r.list[0].min === 0);
assert(r.list[0].max === 8);
assert(r.list[0].max === 7);
assert(r.list[1].min === 9);
assert(r.list[1].max === 10);

Expand Down

0 comments on commit f11e0bb

Please sign in to comment.