Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

benchmark: pre-generate data set for URL benchmarks and use WPT test data #24302

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 92 additions & 0 deletions benchmark/common.js
Original file line number Diff line number Diff line change
Expand Up @@ -252,3 +252,95 @@ exports.binding = function(bindingName) {
return process.binding(bindingName);
}
};

const urls = {
long: 'http://nodejs.org:89/docs/latest/api/foo/bar/qua/13949281/0f28b/' +
'/5d49/b3020/url.html#test?payload1=true&payload2=false&test=1' +
'&benchmark=3&foo=38.38.011.293&bar=1234834910480&test=19299&3992&' +
'key=f5c65e1e98fe07e648249ad41e1cfdb0',
short: 'https://nodejs.org/en/blog/',
idn: 'http://你好你好.在线',
auth: 'https://user:pass@example.com/path?search=1',
file: 'file:///foo/bar/test/node.js',
ws: 'ws://localhost:9229/f46db715-70df-43ad-a359-7f9949f39868',
javascript: 'javascript:alert("node is awesome");',
percent: 'https://%E4%BD%A0/foo',
dot: 'https://example.org/./a/../b/./c'
};
exports.urls = urls;

const searchParams = {
noencode: 'foo=bar&baz=quux&xyzzy=thud',
multicharsep: 'foo=bar&&&&&&&&&&baz=quux&&&&&&&&&&xyzzy=thud',
encodefake: 'foo=%©ar&baz=%A©uux&xyzzy=%©ud',
encodemany: '%66%6F%6F=bar&%62%61%7A=quux&xyzzy=%74h%75d',
encodelast: 'foo=bar&baz=quux&xyzzy=thu%64',
multivalue: 'foo=bar&foo=baz&foo=quux&quuy=quuz',
multivaluemany: 'foo=bar&foo=baz&foo=quux&quuy=quuz&foo=abc&foo=def&' +
'foo=ghi&foo=jkl&foo=mno&foo=pqr&foo=stu&foo=vwxyz',
manypairs: 'a&b&c&d&e&f&g&h&i&j&k&l&m&n&o&p&q&r&s&t&u&v&w&x&y&z',
manyblankpairs: '&&&&&&&&&&&&&&&&&&&&&&&&',
altspaces: 'foo+bar=baz+quux&xyzzy+thud=quuy+quuz&abc=def+ghi'
};
exports.searchParams = searchParams;

function getUrlData(withBase) {
const data = require('../test/fixtures/wpt/url/resources/urltestdata.json');
const result = [];
for (const item of data) {
if (item.failure || !item.input) continue;
if (withBase) {
result.push([item.input, item.base]);
} else if (item.base !== 'about:blank') {
result.push(item.base);
}
}
return result;
}

exports.urlDataTypes = Object.keys(urls).concat(['wpt']);

/**
* Generate an array of data for URL benchmarks to use.
* The size of the resulting data set is the original data size * 2 ** `e`.
* The 'wpt' type contains about 400 data points when `withBase` is true,
* and 200 data points when `withBase` is false.
* Other types contain 200 data points with or without base.
*
* @param {string} type Type of the data, 'wpt' or a key of `urls`
* @param {number} e The repetition of the data, as exponent of 2
* @param {boolean} withBase Whether to include a base URL
* @param {boolean} asUrl Whether to return the results as URL objects
* @return {string[] | string[][] | URL[]}
*/
function bakeUrlData(type, e = 0, withBase = false, asUrl = false) {
let result = [];
if (type === 'wpt') {
result = getUrlData(withBase);
} else if (urls[type]) {
const input = urls[type];
const item = withBase ? [input, 'about:blank'] : input;
// Roughly the size of WPT URL test data
result = new Array(200).fill(item);
} else {
throw new Error(`Unknown url data type ${type}`);
}

if (typeof e !== 'number') {
throw new Error(`e must be a number, received ${e}`);
}

for (let i = 0; i < e; ++i) {
result = result.concat(result);
}

if (asUrl) {
if (withBase) {
result = result.map(([input, base]) => new URL(input, base));
} else {
result = result.map((input) => new URL(input));
}
}
return result;
}
exports.bakeUrlData = bakeUrlData;
30 changes: 0 additions & 30 deletions benchmark/fixtures/url-inputs.js

This file was deleted.

50 changes: 40 additions & 10 deletions benchmark/http/create-clientrequest.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,49 @@

const common = require('../common.js');
const ClientRequest = require('http').ClientRequest;

const types = Object.keys(common.urls)
.filter((i) => common.urls[i]
.startsWith('http://'));
const bench = common.createBenchmark(main, {
len: [1, 8, 16, 32, 64, 128],
n: [1e6]
Copy link
Contributor

@mscdex mscdex Nov 11, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is n being removed in general in this PR? If we're not going to be looping over the same input for each input, then we should at least loop n times over the entire set of inputs.

Copy link
Member Author

@joyeecheung joyeecheung Nov 11, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@mscdex That should already been covered by the combination e and --runs (of the compare.js): the size of the input is dependent on e so to get a size of 1e6 we can just set e to roughly Math.log(1e6 / 200, 2) (all the data are pre-generated, so when e is 1 and type is long it will iterate over the same kind of string 400 times. Only the type wpt gives you a nonhomogenous array), but I don't think we actually need that many iterations for a single benchmark run, the n was set when Crankshaft existed, and currently I don't observe a significance in the value of the n once it's large enough (by default it's roughly 800/400 with/without base). Instead we can just use --runs to get more samples, but the default 30 is already a pretty good sample size (at least it's the magic number taught in statistics textbooks). So with the default setting for each input you run through the same code roughly 30 * 400 times with the same binary

// Use 'url' to avoid name clash with other http benchmark
url: types.concat(['wpt']),
arg: ['URL', 'string', 'options'],
e: [1]
});

function main({ len, n }) {
const path = '/'.repeat(len);
const opts = { path: path, createConnection: function() {} };
function noop() {}

bench.start();
for (var i = 0; i < n; i++) {
new ClientRequest(opts);
function main({ url: type, arg, e }) {
e = +e;
const data = common.bakeUrlData(type, e, false, false)
.filter((i) => i.startsWith('http://'));
const len = data.length;
var result;
var i;
if (arg === 'options') {
const options = data.map((i) => ({
path: new URL(i).path, createConnection: noop
}));
bench.start();
for (i = 0; i < len; i++) {
result = new ClientRequest(options[i]);
}
bench.end(len);
} else if (arg === 'URL') {
const options = data.map((i) => new URL(i));
bench.start();
for (i = 0; i < len; i++) {
result = new ClientRequest(options[i], { createConnection: noop });
}
bench.end(len);
} else if (arg === 'string') {
bench.start();
for (i = 0; i < len; i++) {
result = new ClientRequest(data[i], { createConnection: noop });
}
bench.end(len);
} else {
throw new Error(`Unknown arg type ${arg}`);
}
bench.end(n);
require('assert').ok(result);
}
2 changes: 1 addition & 1 deletion benchmark/querystring/querystring-parse.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
'use strict';
const common = require('../common.js');
const querystring = require('querystring');
const inputs = require('../fixtures/url-inputs.js').searchParams;
const inputs = common.searchParams;

const bench = common.createBenchmark(main, {
type: Object.keys(inputs),
Expand Down
44 changes: 21 additions & 23 deletions benchmark/url/legacy-vs-whatwg-url-get-prop.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,15 @@ const common = require('../common.js');
const url = require('url');
const URL = url.URL;
const assert = require('assert');
const inputs = require('../fixtures/url-inputs.js').urls;

const bench = common.createBenchmark(main, {
type: Object.keys(inputs),
type: common.urlDataTypes,
method: ['legacy', 'whatwg'],
n: [1e5]
e: [1]
});

// At the time of writing, when using a passed property name to index
// the object, Crankshaft would generate a LoadKeyedGeneric even when it
// remains a constant in the function, so here we must use the literal
// instead to get a LoadNamedField.
function useLegacy(n, input) {
const obj = url.parse(input);
function useLegacy(data) {
const obj = url.parse(data[0]);
const noDead = {
protocol: obj.protocol,
auth: obj.auth,
Expand All @@ -27,10 +22,12 @@ function useLegacy(n, input) {
search: obj.search,
hash: obj.hash
};
const len = data.length;
// It's necessary to assign the values to an object
// to avoid loop invariant code motion.
bench.start();
for (var i = 0; i < n; i += 1) {
for (var i = 0; i < len; i++) {
const obj = data[i];
noDead.protocol = obj.protocol;
noDead.auth = obj.auth;
noDead.host = obj.host;
Expand All @@ -40,12 +37,12 @@ function useLegacy(n, input) {
noDead.search = obj.search;
noDead.hash = obj.hash;
}
bench.end(n);
bench.end(len);
return noDead;
}

function useWHATWG(n, input) {
const obj = new URL(input);
function useWHATWG(data) {
const obj = new URL(data[0]);
const noDead = {
protocol: obj.protocol,
auth: `${obj.username}:${obj.password}`,
Expand All @@ -56,8 +53,10 @@ function useWHATWG(n, input) {
search: obj.search,
hash: obj.hash
};
const len = data.length;
bench.start();
for (var i = 0; i < n; i += 1) {
for (var i = 0; i < len; i++) {
const obj = data[i];
noDead.protocol = obj.protocol;
noDead.auth = `${obj.username}:${obj.password}`;
noDead.host = obj.host;
Expand All @@ -67,23 +66,22 @@ function useWHATWG(n, input) {
noDead.search = obj.search;
noDead.hash = obj.hash;
}
bench.end(n);
bench.end(len);
return noDead;
}

function main({ type, n, method }) {
const input = inputs[type];
if (!input) {
throw new Error(`Unknown input type "${type}"`);
}

function main({ type, method, e }) {
e = +e;
var data;
var noDead; // Avoid dead code elimination.
switch (method) {
case 'legacy':
noDead = useLegacy(n, input);
data = common.bakeUrlData(type, e, false, false);
noDead = useLegacy(data.map((i) => url.parse(i)));
break;
case 'whatwg':
noDead = useWHATWG(n, input);
data = common.bakeUrlData(type, e, false, true);
noDead = useWHATWG(data);
break;
default:
throw new Error(`Unknown method "${method}"`);
Expand Down
58 changes: 36 additions & 22 deletions benchmark/url/legacy-vs-whatwg-url-parse.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,47 +3,61 @@ const common = require('../common.js');
const url = require('url');
const URL = url.URL;
const assert = require('assert');
const inputs = require('../fixtures/url-inputs.js').urls;

const bench = common.createBenchmark(main, {
type: Object.keys(inputs),
method: ['legacy', 'whatwg'],
n: [1e5]
withBase: ['true', 'false'],
type: common.urlDataTypes,
e: [1],
method: ['legacy', 'whatwg']
});

function useLegacy(n, input) {
var noDead = url.parse(input);
function useLegacy(data) {
const len = data.length;
var result = url.parse(data[0]); // avoid dead code elimination
bench.start();
for (var i = 0; i < n; i += 1) {
noDead = url.parse(input);
for (var i = 0; i < len; ++i) {
result = url.parse(data[i]);
}
bench.end(n);
return noDead;
bench.end(len);
return result;
}

function useWHATWG(n, input) {
var noDead = new URL(input);
function useWHATWGWithBase(data) {
const len = data.length;
var result = new URL(data[0][0], data[0][1]); // avoid dead code elimination
bench.start();
for (var i = 0; i < n; i += 1) {
noDead = new URL(input);
for (var i = 0; i < len; ++i) {
const item = data[i];
result = new URL(item[0], item[1]);
}
bench.end(n);
return noDead;
bench.end(len);
return result;
}

function main({ type, n, method }) {
const input = inputs[type];
if (!input) {
throw new Error(`Unknown input type "${type}"`);
function useWHATWGWithoutBase(data) {
const len = data.length;
var result = new URL(data[0]); // avoid dead code elimination
bench.start();
for (var i = 0; i < len; ++i) {
result = new URL(data[i]);
}
bench.end(len);
return result;
}

function main({ e, method, type, withBase }) {
e = +e;
withBase = withBase === 'true';
var noDead; // Avoid dead code elimination.
var data;
switch (method) {
case 'legacy':
noDead = useLegacy(n, input);
data = common.bakeUrlData(type, e, false, false);
noDead = useLegacy(data);
break;
case 'whatwg':
noDead = useWHATWG(n, input);
data = common.bakeUrlData(type, e, withBase, false);
noDead = withBase ? useWHATWGWithBase(data) : useWHATWGWithoutBase(data);
break;
default:
throw new Error(`Unknown method ${method}`);
Expand Down
2 changes: 1 addition & 1 deletion benchmark/url/legacy-vs-whatwg-url-searchparams-parse.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
const common = require('../common.js');
const { URLSearchParams } = require('url');
const querystring = require('querystring');
const searchParams = require('../fixtures/url-inputs.js').searchParams;
const searchParams = common.searchParams;

const bench = common.createBenchmark(main, {
searchParam: Object.keys(searchParams),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
const common = require('../common.js');
const { URLSearchParams } = require('url');
const querystring = require('querystring');
const searchParams = require('../fixtures/url-inputs.js').searchParams;
const searchParams = common.searchParams;

const bench = common.createBenchmark(main, {
searchParam: Object.keys(searchParams),
Expand Down
Loading