Skip to content

Commit

Permalink
test: make crypto.timingSafeEqual test less flaky
Browse files Browse the repository at this point in the history
The `crypto.timingSafeEqual` test still seems to be a bit flaky. This
makes a few changes to the test:

* Separates the basic usage and the benchmarking into different tests

* Moves the timing-sensitive benchmark function into a separate module,
and reparses the module on every iteration of the loop to avoid shared
state between timing measurements.

PR-URL: #8456
Reviewed-By: James M Snell <jasnell@gmail.com>
  • Loading branch information
not-an-aardvark authored and Trott committed Sep 12, 2016
1 parent 5282f0b commit c678ecb
Show file tree
Hide file tree
Showing 3 changed files with 140 additions and 132 deletions.
17 changes: 17 additions & 0 deletions test/fixtures/crypto-timing-safe-equal-benchmark-func.js
@@ -0,0 +1,17 @@
'use strict';

const assert = require('assert');
module.exports = (compareFunc, firstBufFill, secondBufFill, bufSize) => {
const firstBuffer = Buffer.alloc(bufSize, firstBufFill);
const secondBuffer = Buffer.alloc(bufSize, secondBufFill);

const startTime = process.hrtime();
const result = compareFunc(firstBuffer, secondBuffer);
const endTime = process.hrtime(startTime);

// Ensure that the result of the function call gets used, so it doesn't
// get discarded due to engine optimizations.
assert.strictEqual(result, firstBufFill === secondBufFill);

return endTime[0] * 1e9 + endTime[1];
};
123 changes: 123 additions & 0 deletions test/sequential/test-crypto-timing-safe-equal-benchmarks.js
@@ -0,0 +1,123 @@
'use strict';
const common = require('../common');
const assert = require('assert');

if (!common.hasCrypto) {
common.skip('missing crypto');
return;
}

if (!common.enoughTestMem) {
common.skip('memory-intensive test');
return;
}

const crypto = require('crypto');

const BENCHMARK_FUNC_PATH =
`${common.fixturesDir}/crypto-timing-safe-equal-benchmark-func`;
function runOneBenchmark(...args) {
const benchmarkFunc = require(BENCHMARK_FUNC_PATH);
const result = benchmarkFunc(...args);

// Don't let the comparison function get cached. This avoid a timing
// inconsistency due to V8 optimization where the function would take
// less time when called with a specific set of parameters.
delete require.cache[require.resolve(BENCHMARK_FUNC_PATH)];
return result;
}

function getTValue(compareFunc) {
const numTrials = 10000;
const bufSize = 10000;
// Perform benchmarks to verify that timingSafeEqual is actually timing-safe.

const rawEqualBenches = Array(numTrials);
const rawUnequalBenches = Array(numTrials);

for (let i = 0; i < numTrials; i++) {
if (Math.random() < 0.5) {
// First benchmark: comparing two equal buffers
rawEqualBenches[i] = runOneBenchmark(compareFunc, 'A', 'A', bufSize);
// Second benchmark: comparing two unequal buffers
rawUnequalBenches[i] = runOneBenchmark(compareFunc, 'B', 'C', bufSize);
} else {
// Flip the order of the benchmarks half of the time.
rawUnequalBenches[i] = runOneBenchmark(compareFunc, 'B', 'C', bufSize);
rawEqualBenches[i] = runOneBenchmark(compareFunc, 'A', 'A', bufSize);
}
}

const equalBenches = filterOutliers(rawEqualBenches);
const unequalBenches = filterOutliers(rawUnequalBenches);

// Use a two-sample t-test to determine whether the timing difference between
// the benchmarks is statistically significant.
// https://wikipedia.org/wiki/Student%27s_t-test#Independent_two-sample_t-test

const equalMean = mean(equalBenches);
const unequalMean = mean(unequalBenches);

const equalLen = equalBenches.length;
const unequalLen = unequalBenches.length;

const combinedStd = combinedStandardDeviation(equalBenches, unequalBenches);
const standardErr = combinedStd * Math.sqrt(1 / equalLen + 1 / unequalLen);

return (equalMean - unequalMean) / standardErr;
}

// Returns the mean of an array
function mean(array) {
return array.reduce((sum, val) => sum + val, 0) / array.length;
}

// Returns the sample standard deviation of an array
function standardDeviation(array) {
const arrMean = mean(array);
const total = array.reduce((sum, val) => sum + Math.pow(val - arrMean, 2), 0);
return Math.sqrt(total / (array.length - 1));
}

// Returns the common standard deviation of two arrays
function combinedStandardDeviation(array1, array2) {
const sum1 = Math.pow(standardDeviation(array1), 2) * (array1.length - 1);
const sum2 = Math.pow(standardDeviation(array2), 2) * (array2.length - 1);
return Math.sqrt((sum1 + sum2) / (array1.length + array2.length - 2));
}

// Filter large outliers from an array. A 'large outlier' is a value that is at
// least 50 times larger than the mean. This prevents the tests from failing
// due to the standard deviation increase when a function unexpectedly takes
// a very long time to execute.
function filterOutliers(array) {
const arrMean = mean(array);
return array.filter((value) => value / arrMean < 50);
}

// t_(0.99995, ∞)
// i.e. If a given comparison function is indeed timing-safe, the t-test result
// has a 99.99% chance to be below this threshold. Unfortunately, this means
// that this test will be a bit flakey and will fail 0.01% of the time even if
// crypto.timingSafeEqual is working properly.
// t-table ref: http://www.sjsu.edu/faculty/gerstman/StatPrimer/t-table.pdf
// Note that in reality there are roughly `2 * numTrials - 2` degrees of
// freedom, not ∞. However, assuming `numTrials` is large, this doesn't
// significantly affect the threshold.
const T_THRESHOLD = 3.892;

const t = getTValue(crypto.timingSafeEqual);
assert(
Math.abs(t) < T_THRESHOLD,
`timingSafeEqual should not leak information from its execution time (t=${t})`
);

// As a sanity check to make sure the statistical tests are working, run the
// same benchmarks again, this time with an unsafe comparison function. In this
// case the t-value should be above the threshold.
const unsafeCompare = (bufA, bufB) => bufA.equals(bufB);
const t2 = getTValue(unsafeCompare);
assert(
Math.abs(t2) > T_THRESHOLD,
`Buffer#equals should leak information from its execution time (t=${t2})`
);
132 changes: 0 additions & 132 deletions test/sequential/test-crypto-timing-safe-equal.js
Expand Up @@ -32,135 +32,3 @@ assert.throws(function() {
assert.throws(function() {
crypto.timingSafeEqual(Buffer.from([1, 2]), 'not a buffer');
}, 'should throw if the second argument is not a buffer');

function getTValue(compareFunc) {
const numTrials = 10000;
const testBufferSize = 10000;
// Perform benchmarks to verify that timingSafeEqual is actually timing-safe.

const rawEqualBenches = Array(numTrials);
const rawUnequalBenches = Array(numTrials);

for (let i = 0; i < numTrials; i++) {

// The `runEqualBenchmark` and `runUnequalBenchmark` functions are
// intentionally redefined on every iteration of this loop, to avoid
// timing inconsistency.
function runEqualBenchmark(compareFunc, bufferA, bufferB) {
const startTime = process.hrtime();
const result = compareFunc(bufferA, bufferB);
const endTime = process.hrtime(startTime);

// Ensure that the result of the function call gets used, so it doesn't
// get discarded due to engine optimizations.
assert.strictEqual(result, true);
return endTime[0] * 1e9 + endTime[1];
}

// This is almost the same as the runEqualBenchmark function, but it's
// duplicated to avoid timing issues with V8 optimization/inlining.
function runUnequalBenchmark(compareFunc, bufferA, bufferB) {
const startTime = process.hrtime();
const result = compareFunc(bufferA, bufferB);
const endTime = process.hrtime(startTime);

assert.strictEqual(result, false);
return endTime[0] * 1e9 + endTime[1];
}

if (i % 2) {
const bufferA1 = Buffer.alloc(testBufferSize, 'A');
const bufferB = Buffer.alloc(testBufferSize, 'B');
const bufferA2 = Buffer.alloc(testBufferSize, 'A');
const bufferC = Buffer.alloc(testBufferSize, 'C');

// First benchmark: comparing two equal buffers
rawEqualBenches[i] = runEqualBenchmark(compareFunc, bufferA1, bufferA2);

// Second benchmark: comparing two unequal buffers
rawUnequalBenches[i] = runUnequalBenchmark(compareFunc, bufferB, bufferC);
} else {
// Swap the order of the benchmarks every second iteration, to avoid any
// patterns caused by memory usage.
const bufferB = Buffer.alloc(testBufferSize, 'B');
const bufferA1 = Buffer.alloc(testBufferSize, 'A');
const bufferC = Buffer.alloc(testBufferSize, 'C');
const bufferA2 = Buffer.alloc(testBufferSize, 'A');
rawUnequalBenches[i] = runUnequalBenchmark(compareFunc, bufferB, bufferC);
rawEqualBenches[i] = runEqualBenchmark(compareFunc, bufferA1, bufferA2);
}
}

const equalBenches = filterOutliers(rawEqualBenches);
const unequalBenches = filterOutliers(rawUnequalBenches);

// Use a two-sample t-test to determine whether the timing difference between
// the benchmarks is statistically significant.
// https://wikipedia.org/wiki/Student%27s_t-test#Independent_two-sample_t-test

const equalMean = mean(equalBenches);
const unequalMean = mean(unequalBenches);

const equalLen = equalBenches.length;
const unequalLen = unequalBenches.length;

const combinedStd = combinedStandardDeviation(equalBenches, unequalBenches);
const standardErr = combinedStd * Math.sqrt(1 / equalLen + 1 / unequalLen);

return (equalMean - unequalMean) / standardErr;
}

// Returns the mean of an array
function mean(array) {
return array.reduce((sum, val) => sum + val, 0) / array.length;
}

// Returns the sample standard deviation of an array
function standardDeviation(array) {
const arrMean = mean(array);
const total = array.reduce((sum, val) => sum + Math.pow(val - arrMean, 2), 0);
return Math.sqrt(total / (array.length - 1));
}

// Returns the common standard deviation of two arrays
function combinedStandardDeviation(array1, array2) {
const sum1 = Math.pow(standardDeviation(array1), 2) * (array1.length - 1);
const sum2 = Math.pow(standardDeviation(array2), 2) * (array2.length - 1);
return Math.sqrt((sum1 + sum2) / (array1.length + array2.length - 2));
}

// Filter large outliers from an array. A 'large outlier' is a value that is at
// least 50 times larger than the mean. This prevents the tests from failing
// due to the standard deviation increase when a function unexpectedly takes
// a very long time to execute.
function filterOutliers(array) {
const arrMean = mean(array);
return array.filter((value) => value / arrMean < 50);
}

// t_(0.99995, ∞)
// i.e. If a given comparison function is indeed timing-safe, the t-test result
// has a 99.99% chance to be below this threshold. Unfortunately, this means
// that this test will be a bit flakey and will fail 0.01% of the time even if
// crypto.timingSafeEqual is working properly.
// t-table ref: http://www.sjsu.edu/faculty/gerstman/StatPrimer/t-table.pdf
// Note that in reality there are roughly `2 * numTrials - 2` degrees of
// freedom, not ∞. However, assuming `numTrials` is large, this doesn't
// significantly affect the threshold.
const T_THRESHOLD = 3.892;

const t = getTValue(crypto.timingSafeEqual);
assert(
Math.abs(t) < T_THRESHOLD,
`timingSafeEqual should not leak information from its execution time (t=${t})`
);

// As a sanity check to make sure the statistical tests are working, run the
// same benchmarks again, this time with an unsafe comparison function. In this
// case the t-value should be above the threshold.
const unsafeCompare = (bufA, bufB) => bufA.equals(bufB);
const t2 = getTValue(unsafeCompare);
assert(
Math.abs(t2) > T_THRESHOLD,
`Buffer#equals should leak information from its execution time (t=${t2})`
);

0 comments on commit c678ecb

Please sign in to comment.