Skip to content

Commit

Permalink
test: verify hash seed uniqueness
Browse files Browse the repository at this point in the history
This tests that the hash seed used by V8 for hashing is random.

PR-URL: nodejs-private/node-private#84
Reviewed-By: Ben Noordhuis <info@bnoordhuis.nl>
Reviewed-By: Michael Dawson <michael_dawson@ca.ibm.com>
Reviewed-By: Fedor Indutny <fedor.indutny@gmail.com>
  • Loading branch information
ofrobots authored and MylesBorins committed Jul 10, 2017
1 parent 9d51bdc commit 58a8f15
Show file tree
Hide file tree
Showing 3 changed files with 201 additions and 2 deletions.
8 changes: 6 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,9 @@ test-internet: all
test-debugger: all
$(PYTHON) tools/test.py debugger

test-hash-seed: all
$(NODE) test/pummel/test-hash-seed.js

test-known-issues: all
$(PYTHON) tools/test.py known_issues --expect-fail

Expand All @@ -256,7 +259,7 @@ test-timers:
test-timers-clean:
$(MAKE) --directory=tools clean

test-v8: v8
test-v8: v8 test-hash-seed
# note: performs full test unless QUICKCHECK is specified
deps/v8/tools/run-tests.py --arch=$(V8_ARCH) \
--mode=$(BUILDTYPE_LOWER) $(V8_TEST_NO_I18N) $(QUICKCHECK_ARG) \
Expand Down Expand Up @@ -734,4 +737,5 @@ lint-ci: lint
bench-all bench bench-misc bench-array bench-buffer bench-net \
bench-http bench-fs bench-tls cctest run-ci lint-ci bench-ci \
test-v8 test-v8-intl test-v8-benchmarks test-v8-all v8 \
$(TARBALL)-headers test-ci test-ci-native test-ci-js build-ci doc-only
$(TARBALL)-headers test-ci test-ci-native test-ci-js build-ci doc-only \
test-hash-seed
175 changes: 175 additions & 0 deletions test/fixtures/guess-hash-seed.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
/* eslint-disable required-modules */
'use strict';
function min(arr) {
let res = arr[0];
for (let i = 1; i < arr.length; i++) {
const val = arr[i];
if (val < res)
res = val;
}
return res;
}
function run_repeated(n, fn) {
const res = [];
for (let i = 0; i < n; i++) res.push(fn());
return res;
}

const INT_MAX = 0x7fffffff;

// from src/js/collection.js
// key must be a signed 32-bit number!
function ComputeIntegerHash(key/*, seed*/) {
let hash = key;
hash = hash ^ 0/*seed*/;
hash = ~hash + (hash << 15); // hash = (hash << 15) - hash - 1;
hash = hash ^ (hash >>> 12);
hash = hash + (hash << 2);
hash = hash ^ (hash >>> 4);
hash = (hash * 2057) | 0; // hash = (hash + (hash << 3)) + (hash << 11);
hash = hash ^ (hash >>> 16);
return hash & 0x3fffffff;
}

const kNofHashBitFields = 2;
const kHashShift = kNofHashBitFields;
const kHashBitMask = 0xffffffff >>> kHashShift;
const kZeroHash = 27;

function string_to_array(str) {
const res = new Array(str.length);
for (let i = 0; i < str.length; i++) {
res[i] = str.charCodeAt(i);
}
return res;
}

function gen_specialized_hasher(str) {
const str_arr = string_to_array(str);
return Function('seed', `
var running_hash = seed;
${str_arr.map((c) => `
running_hash += ${c};
running_hash &= 0xffffffff;
running_hash += (running_hash << 10);
running_hash &= 0xffffffff;
running_hash ^= (running_hash >>> 6);
running_hash &= 0xffffffff;
`).join('')}
running_hash += (running_hash << 3);
running_hash &= 0xffffffff;
running_hash ^= (running_hash >>> 11);
running_hash &= 0xffffffff;
running_hash += (running_hash << 15);
running_hash &= 0xffffffff;
if ((running_hash & ${kHashBitMask}) == 0) {
return ${kZeroHash};
}
return running_hash;
`);
}

// adapted from HashToEntry
function hash_to_bucket(hash, numBuckets) {
return (hash & ((numBuckets) - 1));
}

function time_set_lookup(set, value) {
const t1 = process.hrtime();
for (let i = 0; i < 100; i++) {
// annoyingly, SetHas() is JS code and therefore potentially optimizable.
// However, SetHas() looks up the table using native code, and it seems like
// that's sufficient to prevent the optimizer from doing anything?
set.has(value);
}
const t = process.hrtime(t1);
const secs = t[0];
const nanos = t[1];
return secs * 1e9 + nanos;
}

// Set with 256 buckets; bucket 0 full, others empty
const tester_set_buckets = 256;
const tester_set = new Set();
let tester_set_treshold;
(function() {
// fill bucket 0 and find extra numbers mapping to bucket 0 and a different
// bucket `capacity == numBuckets * 2`
let needed = Math.floor(tester_set_buckets * 1.5) + 1;
let positive_test_value;
let negative_test_value;
for (let i = 0; true; i++) {
if (i > INT_MAX) throw new Error('i too high');
if (hash_to_bucket(ComputeIntegerHash(i), tester_set_buckets) !== 0) {
negative_test_value = i;
break;
}
}
for (let i = 0; needed > 0; i++) {
if (i > INT_MAX) throw new Error('i too high');
if (hash_to_bucket(ComputeIntegerHash(i), tester_set_buckets) === 0) {
needed--;
if (needed == 0) {
positive_test_value = i;
} else {
tester_set.add(i);
}
}
}

// calibrate Set access times for accessing the full bucket / an empty bucket
const pos_time =
min(run_repeated(10000, time_set_lookup.bind(null, tester_set,
positive_test_value)));
const neg_time =
min(run_repeated(10000, time_set_lookup.bind(null, tester_set,
negative_test_value)));
tester_set_treshold = (pos_time + neg_time) / 2;
// console.log(`pos_time: ${pos_time}, neg_time: ${neg_time},`,
// `threshold: ${tester_set_treshold}`);
})();

// determine hash seed
const slow_str_gen = (function*() {
let strgen_i = 0;
outer:
while (1) {
const str = '#' + (strgen_i++);
for (let i = 0; i < 1000; i++) {
if (time_set_lookup(tester_set, str) < tester_set_treshold)
continue outer;
}
yield str;
}
})();

const first_slow_str = slow_str_gen.next().value;
// console.log('first slow string:', first_slow_str);
const first_slow_str_special_hasher = gen_specialized_hasher(first_slow_str);
let seed_candidates = [];
//var t_before_first_seed_brute = performance.now();
for (let seed_candidate = 0; seed_candidate < 0x100000000; seed_candidate++) {
if (hash_to_bucket(first_slow_str_special_hasher(seed_candidate),
tester_set_buckets) == 0) {
seed_candidates.push(seed_candidate);
}
}
// console.log(`got ${seed_candidates.length} candidates`);
// after ${performance.now()-t_before_first_seed_brute}
while (seed_candidates.length > 1) {
const slow_str = slow_str_gen.next().value;
const special_hasher = gen_specialized_hasher(slow_str);
const new_seed_candidates = [];
for (const seed_candidate of seed_candidates) {
if (hash_to_bucket(special_hasher(seed_candidate), tester_set_buckets) ==
0) {
new_seed_candidates.push(seed_candidate);
}
}
seed_candidates = new_seed_candidates;
// console.log(`reduced to ${seed_candidates.length} candidates`);
}
if (seed_candidates.length != 1)
throw new Error('no candidates remaining');
const seed = seed_candidates[0];
console.log(seed);
20 changes: 20 additions & 0 deletions test/pummel/test-hash-seed.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
'use strict';

const REPETITIONS = 2;

const assert = require('assert');
const common = require('../common');
const cp = require('child_process');
const path = require('path');
const targetScript = path.resolve(common.fixturesDir, 'guess-hash-seed.js');
const seeds = [];

for (let i = 0; i < REPETITIONS; ++i) {
const seed = cp.spawnSync(process.execPath, [targetScript],
{ encoding: 'utf8' }).stdout.trim();
seeds.push(seed);
}

console.log(`Seeds: ${seeds}`);
const hasDuplicates = (new Set(seeds)).size !== seeds.length;
assert.strictEqual(hasDuplicates, false);

0 comments on commit 58a8f15

Please sign in to comment.