Skip to content

Commit d0e61d4

Browse files
not-an-aardvarkFishrock123
authored andcommitted
test: make crypto.timingSafeEqual test less flaky
The `crypto.timingSafeEqual` test still seems to be a bit flaky. This makes a few changes to the test: * Separates the basic usage and the benchmarking into different tests * Moves the timing-sensitive benchmark function into a separate module, and reparses the module on every iteration of the loop to avoid shared state between timing measurements. PR-URL: #8456 Reviewed-By: James M Snell <jasnell@gmail.com>
1 parent 154d689 commit d0e61d4

File tree

3 files changed

+140
-132
lines changed

3 files changed

+140
-132
lines changed
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
'use strict';
2+
3+
const assert = require('assert');
4+
module.exports = (compareFunc, firstBufFill, secondBufFill, bufSize) => {
5+
const firstBuffer = Buffer.alloc(bufSize, firstBufFill);
6+
const secondBuffer = Buffer.alloc(bufSize, secondBufFill);
7+
8+
const startTime = process.hrtime();
9+
const result = compareFunc(firstBuffer, secondBuffer);
10+
const endTime = process.hrtime(startTime);
11+
12+
// Ensure that the result of the function call gets used, so it doesn't
13+
// get discarded due to engine optimizations.
14+
assert.strictEqual(result, firstBufFill === secondBufFill);
15+
16+
return endTime[0] * 1e9 + endTime[1];
17+
};
Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
'use strict';
2+
const common = require('../common');
3+
const assert = require('assert');
4+
5+
if (!common.hasCrypto) {
6+
common.skip('missing crypto');
7+
return;
8+
}
9+
10+
if (!common.enoughTestMem) {
11+
common.skip('memory-intensive test');
12+
return;
13+
}
14+
15+
const crypto = require('crypto');
16+
17+
const BENCHMARK_FUNC_PATH =
18+
`${common.fixturesDir}/crypto-timing-safe-equal-benchmark-func`;
19+
function runOneBenchmark(...args) {
20+
const benchmarkFunc = require(BENCHMARK_FUNC_PATH);
21+
const result = benchmarkFunc(...args);
22+
23+
// Don't let the comparison function get cached. This avoid a timing
24+
// inconsistency due to V8 optimization where the function would take
25+
// less time when called with a specific set of parameters.
26+
delete require.cache[require.resolve(BENCHMARK_FUNC_PATH)];
27+
return result;
28+
}
29+
30+
function getTValue(compareFunc) {
31+
const numTrials = 10000;
32+
const bufSize = 10000;
33+
// Perform benchmarks to verify that timingSafeEqual is actually timing-safe.
34+
35+
const rawEqualBenches = Array(numTrials);
36+
const rawUnequalBenches = Array(numTrials);
37+
38+
for (let i = 0; i < numTrials; i++) {
39+
if (Math.random() < 0.5) {
40+
// First benchmark: comparing two equal buffers
41+
rawEqualBenches[i] = runOneBenchmark(compareFunc, 'A', 'A', bufSize);
42+
// Second benchmark: comparing two unequal buffers
43+
rawUnequalBenches[i] = runOneBenchmark(compareFunc, 'B', 'C', bufSize);
44+
} else {
45+
// Flip the order of the benchmarks half of the time.
46+
rawUnequalBenches[i] = runOneBenchmark(compareFunc, 'B', 'C', bufSize);
47+
rawEqualBenches[i] = runOneBenchmark(compareFunc, 'A', 'A', bufSize);
48+
}
49+
}
50+
51+
const equalBenches = filterOutliers(rawEqualBenches);
52+
const unequalBenches = filterOutliers(rawUnequalBenches);
53+
54+
// Use a two-sample t-test to determine whether the timing difference between
55+
// the benchmarks is statistically significant.
56+
// https://wikipedia.org/wiki/Student%27s_t-test#Independent_two-sample_t-test
57+
58+
const equalMean = mean(equalBenches);
59+
const unequalMean = mean(unequalBenches);
60+
61+
const equalLen = equalBenches.length;
62+
const unequalLen = unequalBenches.length;
63+
64+
const combinedStd = combinedStandardDeviation(equalBenches, unequalBenches);
65+
const standardErr = combinedStd * Math.sqrt(1 / equalLen + 1 / unequalLen);
66+
67+
return (equalMean - unequalMean) / standardErr;
68+
}
69+
70+
// Returns the mean of an array
71+
function mean(array) {
72+
return array.reduce((sum, val) => sum + val, 0) / array.length;
73+
}
74+
75+
// Returns the sample standard deviation of an array
76+
function standardDeviation(array) {
77+
const arrMean = mean(array);
78+
const total = array.reduce((sum, val) => sum + Math.pow(val - arrMean, 2), 0);
79+
return Math.sqrt(total / (array.length - 1));
80+
}
81+
82+
// Returns the common standard deviation of two arrays
83+
function combinedStandardDeviation(array1, array2) {
84+
const sum1 = Math.pow(standardDeviation(array1), 2) * (array1.length - 1);
85+
const sum2 = Math.pow(standardDeviation(array2), 2) * (array2.length - 1);
86+
return Math.sqrt((sum1 + sum2) / (array1.length + array2.length - 2));
87+
}
88+
89+
// Filter large outliers from an array. A 'large outlier' is a value that is at
90+
// least 50 times larger than the mean. This prevents the tests from failing
91+
// due to the standard deviation increase when a function unexpectedly takes
92+
// a very long time to execute.
93+
function filterOutliers(array) {
94+
const arrMean = mean(array);
95+
return array.filter((value) => value / arrMean < 50);
96+
}
97+
98+
// t_(0.99995, ∞)
99+
// i.e. If a given comparison function is indeed timing-safe, the t-test result
100+
// has a 99.99% chance to be below this threshold. Unfortunately, this means
101+
// that this test will be a bit flakey and will fail 0.01% of the time even if
102+
// crypto.timingSafeEqual is working properly.
103+
// t-table ref: http://www.sjsu.edu/faculty/gerstman/StatPrimer/t-table.pdf
104+
// Note that in reality there are roughly `2 * numTrials - 2` degrees of
105+
// freedom, not ∞. However, assuming `numTrials` is large, this doesn't
106+
// significantly affect the threshold.
107+
const T_THRESHOLD = 3.892;
108+
109+
const t = getTValue(crypto.timingSafeEqual);
110+
assert(
111+
Math.abs(t) < T_THRESHOLD,
112+
`timingSafeEqual should not leak information from its execution time (t=${t})`
113+
);
114+
115+
// As a sanity check to make sure the statistical tests are working, run the
116+
// same benchmarks again, this time with an unsafe comparison function. In this
117+
// case the t-value should be above the threshold.
118+
const unsafeCompare = (bufA, bufB) => bufA.equals(bufB);
119+
const t2 = getTValue(unsafeCompare);
120+
assert(
121+
Math.abs(t2) > T_THRESHOLD,
122+
`Buffer#equals should leak information from its execution time (t=${t2})`
123+
);

test/sequential/test-crypto-timing-safe-equal.js

Lines changed: 0 additions & 132 deletions
Original file line numberDiff line numberDiff line change
@@ -32,135 +32,3 @@ assert.throws(function() {
3232
assert.throws(function() {
3333
crypto.timingSafeEqual(Buffer.from([1, 2]), 'not a buffer');
3434
}, 'should throw if the second argument is not a buffer');
35-
36-
function getTValue(compareFunc) {
37-
const numTrials = 10000;
38-
const testBufferSize = 10000;
39-
// Perform benchmarks to verify that timingSafeEqual is actually timing-safe.
40-
41-
const rawEqualBenches = Array(numTrials);
42-
const rawUnequalBenches = Array(numTrials);
43-
44-
for (let i = 0; i < numTrials; i++) {
45-
46-
// The `runEqualBenchmark` and `runUnequalBenchmark` functions are
47-
// intentionally redefined on every iteration of this loop, to avoid
48-
// timing inconsistency.
49-
function runEqualBenchmark(compareFunc, bufferA, bufferB) {
50-
const startTime = process.hrtime();
51-
const result = compareFunc(bufferA, bufferB);
52-
const endTime = process.hrtime(startTime);
53-
54-
// Ensure that the result of the function call gets used, so it doesn't
55-
// get discarded due to engine optimizations.
56-
assert.strictEqual(result, true);
57-
return endTime[0] * 1e9 + endTime[1];
58-
}
59-
60-
// This is almost the same as the runEqualBenchmark function, but it's
61-
// duplicated to avoid timing issues with V8 optimization/inlining.
62-
function runUnequalBenchmark(compareFunc, bufferA, bufferB) {
63-
const startTime = process.hrtime();
64-
const result = compareFunc(bufferA, bufferB);
65-
const endTime = process.hrtime(startTime);
66-
67-
assert.strictEqual(result, false);
68-
return endTime[0] * 1e9 + endTime[1];
69-
}
70-
71-
if (i % 2) {
72-
const bufferA1 = Buffer.alloc(testBufferSize, 'A');
73-
const bufferB = Buffer.alloc(testBufferSize, 'B');
74-
const bufferA2 = Buffer.alloc(testBufferSize, 'A');
75-
const bufferC = Buffer.alloc(testBufferSize, 'C');
76-
77-
// First benchmark: comparing two equal buffers
78-
rawEqualBenches[i] = runEqualBenchmark(compareFunc, bufferA1, bufferA2);
79-
80-
// Second benchmark: comparing two unequal buffers
81-
rawUnequalBenches[i] = runUnequalBenchmark(compareFunc, bufferB, bufferC);
82-
} else {
83-
// Swap the order of the benchmarks every second iteration, to avoid any
84-
// patterns caused by memory usage.
85-
const bufferB = Buffer.alloc(testBufferSize, 'B');
86-
const bufferA1 = Buffer.alloc(testBufferSize, 'A');
87-
const bufferC = Buffer.alloc(testBufferSize, 'C');
88-
const bufferA2 = Buffer.alloc(testBufferSize, 'A');
89-
rawUnequalBenches[i] = runUnequalBenchmark(compareFunc, bufferB, bufferC);
90-
rawEqualBenches[i] = runEqualBenchmark(compareFunc, bufferA1, bufferA2);
91-
}
92-
}
93-
94-
const equalBenches = filterOutliers(rawEqualBenches);
95-
const unequalBenches = filterOutliers(rawUnequalBenches);
96-
97-
// Use a two-sample t-test to determine whether the timing difference between
98-
// the benchmarks is statistically significant.
99-
// https://wikipedia.org/wiki/Student%27s_t-test#Independent_two-sample_t-test
100-
101-
const equalMean = mean(equalBenches);
102-
const unequalMean = mean(unequalBenches);
103-
104-
const equalLen = equalBenches.length;
105-
const unequalLen = unequalBenches.length;
106-
107-
const combinedStd = combinedStandardDeviation(equalBenches, unequalBenches);
108-
const standardErr = combinedStd * Math.sqrt(1 / equalLen + 1 / unequalLen);
109-
110-
return (equalMean - unequalMean) / standardErr;
111-
}
112-
113-
// Returns the mean of an array
114-
function mean(array) {
115-
return array.reduce((sum, val) => sum + val, 0) / array.length;
116-
}
117-
118-
// Returns the sample standard deviation of an array
119-
function standardDeviation(array) {
120-
const arrMean = mean(array);
121-
const total = array.reduce((sum, val) => sum + Math.pow(val - arrMean, 2), 0);
122-
return Math.sqrt(total / (array.length - 1));
123-
}
124-
125-
// Returns the common standard deviation of two arrays
126-
function combinedStandardDeviation(array1, array2) {
127-
const sum1 = Math.pow(standardDeviation(array1), 2) * (array1.length - 1);
128-
const sum2 = Math.pow(standardDeviation(array2), 2) * (array2.length - 1);
129-
return Math.sqrt((sum1 + sum2) / (array1.length + array2.length - 2));
130-
}
131-
132-
// Filter large outliers from an array. A 'large outlier' is a value that is at
133-
// least 50 times larger than the mean. This prevents the tests from failing
134-
// due to the standard deviation increase when a function unexpectedly takes
135-
// a very long time to execute.
136-
function filterOutliers(array) {
137-
const arrMean = mean(array);
138-
return array.filter((value) => value / arrMean < 50);
139-
}
140-
141-
// t_(0.99995, ∞)
142-
// i.e. If a given comparison function is indeed timing-safe, the t-test result
143-
// has a 99.99% chance to be below this threshold. Unfortunately, this means
144-
// that this test will be a bit flakey and will fail 0.01% of the time even if
145-
// crypto.timingSafeEqual is working properly.
146-
// t-table ref: http://www.sjsu.edu/faculty/gerstman/StatPrimer/t-table.pdf
147-
// Note that in reality there are roughly `2 * numTrials - 2` degrees of
148-
// freedom, not ∞. However, assuming `numTrials` is large, this doesn't
149-
// significantly affect the threshold.
150-
const T_THRESHOLD = 3.892;
151-
152-
const t = getTValue(crypto.timingSafeEqual);
153-
assert(
154-
Math.abs(t) < T_THRESHOLD,
155-
`timingSafeEqual should not leak information from its execution time (t=${t})`
156-
);
157-
158-
// As a sanity check to make sure the statistical tests are working, run the
159-
// same benchmarks again, this time with an unsafe comparison function. In this
160-
// case the t-value should be above the threshold.
161-
const unsafeCompare = (bufA, bufB) => bufA.equals(bufB);
162-
const t2 = getTValue(unsafeCompare);
163-
assert(
164-
Math.abs(t2) > T_THRESHOLD,
165-
`Buffer#equals should leak information from its execution time (t=${t2})`
166-
);

0 commit comments

Comments
 (0)