Skip to content

Commit

Permalink
(daleharvey/pouchdb#1658) - add indexable string
Browse files Browse the repository at this point in the history
Following daleharvey/pouchdb#1658 and
pouchdb/mapreduce#12, this adds the
toIndexableString method, which allows us
to emulate CouchDB's standard collation
to a reasonable degree of fidelity (e.g. no
ICU ordering for strings).
  • Loading branch information
Tomasz Kołodziejski and Nolan Lawson authored and nolanlawson committed Mar 17, 2014
1 parent 32a5ce7 commit 720e6c0
Show file tree
Hide file tree
Showing 3 changed files with 314 additions and 15 deletions.
103 changes: 100 additions & 3 deletions lib/index.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
'use strict';

var MIN_MAGNITUDE = -324; // verified by -Number.MIN_VALUE
var MAGNITUDE_DIGITS = 3; // ditto
var SEP = '_'; // TODO: in production it should be empty

var utils = require('./utils');

exports.collate = function (a, b) {
a = exports.normalizeKey(a);
b = exports.normalizeKey(b);
Expand Down Expand Up @@ -43,6 +49,50 @@ exports.normalizeKey = function (key) {
return key;
};

// convert the given key to a string that would be appropriate
// for lexical sorting, e.g. within a database, where the
// sorting is the same given by the collate() function.
exports.toIndexableString = function (key) {
var zero = '\u0000';

key = exports.normalizeKey(key);

var result = collationIndex(key) + SEP;

if (key !== null) {
if (typeof key === 'boolean') {
result += (key ? 1 : 0);
} else if (typeof key === 'number') {
result += numToIndexableString(key) + zero;
} else if (typeof key === 'string') {
// We've to be sure that key does not contain \u0000
// Do order-preserving replacements:
// 0 -> 1, 1
// 1 -> 1, 2
// 2 -> 2, 2
key = key.replace(/\u0002/g, '\u0002\u0002');
key = key.replace(/\u0001/g, '\u0001\u0002');
key = key.replace(/\u0000/g, '\u0001\u0001');

result += key + zero;
} else if (Array.isArray(key)) {
key.forEach(function (element) {
result += exports.toIndexableString(element);
});
result += zero;
} else if (typeof key === 'object') {
var arr = [];
var keys = Object.keys(key);
keys.forEach(function (objKey) {
arr.push([objKey, key[objKey]]);
});
result += exports.toIndexableString(arr);
}
}

return result;
};

function arrayCollate(a, b) {
var len = Math.min(a.length, b.length);
for (var i = 0; i < len; i++) {
Expand Down Expand Up @@ -85,13 +135,60 @@ function objectCollate(a, b) {
// null/undefined/NaN/Infinity/-Infinity are all considered null
function collationIndex(x) {
var id = ['boolean', 'number', 'string', 'object'];
if (id.indexOf(typeof x) !== -1) {
var idx = id.indexOf(typeof x);
if (idx !== -1) {
if (x === null) {
return 1;
}
return id.indexOf(typeof x) + 2;
if (Array.isArray(x)) {
return 5;
}
return idx < 3 ? (idx + 2) : (idx + 3);
}
if (Array.isArray(x)) {
return 4.5;
return 5;
}
}

// conversion:
// x yyy zz...zz
// x = 0 for negative, 1 for 0, 2 for positive
// y = exponent (for negative numbers negated) moved so that it's >= 0
// z = mantisse
function numToIndexableString(num) {

// convert number to exponential format for easier and
// more succinct string sorting
var expFormat = num.toExponential().split(/e\+?/);
var magnitude = parseInt(expFormat[1], 10);

var neg = num < 0;

if (num === 0) {
return '1';
}

var result = neg ? '0' : '2';

// first sort by magnitude
// it's easier if all magnitudes are positive
var magForComparison = ((neg ? -magnitude : magnitude) - MIN_MAGNITUDE);
var magString = utils.padLeft((magForComparison).toString(), '0', MAGNITUDE_DIGITS);

result += SEP + magString;

// then sort by the factor
var factor = Math.abs(parseFloat(expFormat[0])); // [1..10)
if (neg) { // for negative reverse ordering
factor = 10 - factor;
}

var factorStr = factor.toFixed(20);

// strip zeros from the end
factorStr = factorStr.replace(/\.?0+$/, '');

result += SEP + factorStr;

return result;
}
70 changes: 70 additions & 0 deletions lib/utils.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
'use strict';

function pad(str, padWith, upToLength) {
var padding = '';
var targetLength = upToLength - str.length;
while (padding.length < targetLength) {
padding += padWith;
}
return padding;
}

exports.padLeft = function (str, padWith, upToLength) {
var padding = pad(str, padWith, upToLength);
return padding + str;
};

exports.padRight = function (str, padWith, upToLength) {
var padding = pad(str, padWith, upToLength);
return str + padding;
};

exports.stringLexCompare = function (a, b) {

var aLen = a.length;
var bLen = b.length;

var i;
for (i = 0; i < aLen; i++) {
if (i === bLen) {
// b is shorter substring of a
return 1;
}
var aChar = a.charAt(i);
var bChar = b.charAt(i);
if (aChar !== bChar) {
return aChar < bChar ? -1 : 1;
}
}

if (aLen < bLen) {
// a is shorter substring of b
return -1;
}

return 0;
};

/*
* returns the decimal form for the given integer, i.e. writes
* out all the digits (in base-10) instead of using scientific notation
*/
exports.intToDecimalForm = function (int) {

var isNeg = int < 0;
var result = '';

do {
var remainder = isNeg ? -Math.ceil(int % 10) : Math.floor(int % 10);

result = remainder + result;
int = isNeg ? Math.ceil(int / 10) : Math.floor(int / 10);
} while (int);


if (isNeg && result !== '0') {
result = '-' + result;
}

return result;
};
156 changes: 144 additions & 12 deletions test/test.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,25 @@ var should = require('chai').should();
var pouchCollate = require('../lib');
var collate = pouchCollate.collate;
var normalizeKey = pouchCollate.normalizeKey;
var utils = require('../lib/utils');

var verifyLexicalKeysSort = function (keys) {
var lexical = keys.map(function (key) {
return [key, pouchCollate.toIndexableString(key)];
});
lexical.sort(function (a, b) {
return utils.stringLexCompare(a[1], b[1]);
});
keys.sort(pouchCollate.collate);

keys.forEach(function (expected, i) {
var actual = lexical[i][0];

should.equal(actual, expected, 'expect ' + JSON.stringify(actual) +
' is ' + JSON.stringify(expected));
});
};


describe('collate', function () {
var a = {
Expand Down Expand Up @@ -84,29 +103,25 @@ describe('collate', function () {
collate(b.object, a.object).should.equal(1);
collate(c.object, b.object).should.equal(-1);
collate(b.object, c.object).should.equal(1);
collate(c.object, a.object).should.equal(-2);
collate(a.object, c.object).should.equal(2);
collate(c.object, a.object).should.be.below(0);
collate(a.object, c.object).should.be.above(0);
});
it('objects differing only in num of keys', function () {
collate({1: 1}, {1: 1, 2: 2}).should.equal(-1);
collate({1: 1, 2: 2}, {1: 1}).should.equal(1);
});
it('compare number to null', function () {
collate(a.number, null).should.equal(2);
collate(a.number, null).should.be.above(0);
});
it('compare number to function', function () {
collate(a.number, function () {
}).should.not.equal(collate(a.number, function () {
}));
}).should.not.equal(collate(a.number, function () {}));
collate(b.number, function () {
}).should.not.equal(collate(b.number, function () {
}));
}).should.not.equal(collate(b.number, function () {}));
collate(function () {
}, a.number).should.not.equal(collate(function () {
}, a.number));
}, a.number).should.not.equal(collate(function () {}, a.number));
collate(function () {
}, b.number).should.not.equal(collate(function () {
}, b.number));
}, b.number).should.not.equal(collate(function () {}, b.number));
});
});

Expand All @@ -133,11 +148,128 @@ describe('normalizeKey', function () {
var original = normalization[0];
var expected = normalization[1];
var normalized = normalizeKey(original);

var message = 'check normalization of ' + JSON.stringify(original) +
' to ' + JSON.stringify(expected) +
', got ' + JSON.stringify(normalized);
should.equal(normalized, expected, message);
});
});
});

describe('indexableString', function () {

it('verify intToDecimalForm', function () {
utils.intToDecimalForm(0).should.equal('0');
utils.intToDecimalForm(Number.MIN_VALUE).should.equal('0');
utils.intToDecimalForm(-Number.MIN_VALUE).should.equal('0');

var maxValueStr = '1797693134862316800886484642206468426866682428440286464' +
'42228680066046004606080400844208228060084840044686866242482868202680268' +
'82040288406280040662242886466688240606642242682208668042640440204020242' +
'48802248082808208888442866208026644060866608420408868240026826626668642' +
'46642840408646468824200860804260804068888';

utils.intToDecimalForm(Number.MAX_VALUE).should.equal(maxValueStr);
utils.intToDecimalForm(-Number.MAX_VALUE).should.equal('-' + maxValueStr);

var simpleNums = [-3000, 3000, 322, 2308, -32, -1, 0, 1, 2, -2, -10, 10, -100, 100];

simpleNums.forEach(function (simpleNum) {
utils.intToDecimalForm(simpleNum).should.equal(simpleNum.toString());
});
});

it('verify toIndexableString()', function () {
var keys = [
null,
false,
true,
-Number.MAX_VALUE,
-300,
-200,
-100,
-10,
-2.5,
-2,
-1.5,
-1,
-0.5,
-0.0001,
-Number.MIN_VALUE,
0,
Number.MIN_VALUE,
0.0001,
0.1,
0.5,
1,
1.5,
2,
3,
10,
15,
100,
200,
300,
Number.MAX_VALUE,
'',
'1',
'10',
'100',
'2',
'20',
'[]',
//'é',
'foo',
'mo',
'moe',
//'moé',
//'moët et chandon',
'moz',
'mozilla',
'mozilla with a super long string see how far it can go',
'mozzy',
[],
[ null ],
[ null, null ],
[ null, 'foo' ],
[ false ],
[ false, 100 ],
[ true ],
[ true, 100 ],
[ 0 ],
[ 0, null ],
[ 0, 1 ],
[ 0, '' ],
[ 0, 'foo' ],
[ '', '' ],
[ 'foo' ],
[ 'foo', 1 ],
{},
{ '0': null },
{ '0': false },
{ '0': true },
{ '0': 0 },
{ '0': 1 },
{ '0': 'bar' },
{ '0': 'foo' },
{ '0': 'foo', '1': false },
{ '0': 'foo', '1': true },
{ '0': 'foo', '1': 0 },
{ '0': 'foo', '1': '0' },
{ '0': 'foo', '1': 'bar' },
{ '0': 'quux' },
{ '1': 'foo' }
//{ '1': 'foo', '0' : 'foo' } // key order actually matters, but node sorts them
];
verifyLexicalKeysSort(keys);
});

it('verify toIndexableString()', function () {
var keys = [
['test', 'test'],
['test\u0000']
];
verifyLexicalKeysSort(keys);
});
});

0 comments on commit 720e6c0

Please sign in to comment.