Skip to content

Commit

Permalink
url: treat special characters in hostnames more strictly in url.parse()
Browse files Browse the repository at this point in the history
Throw if ^, |, and some other special characters are in the hostname,
similar to WHATWG URL.

Use punycode/toAscii when % appears in a hostname, like WHATWG URL.
  • Loading branch information
Trott committed Oct 25, 2022
1 parent fa7e08c commit 105b71a
Show file tree
Hide file tree
Showing 5 changed files with 44 additions and 32 deletions.
9 changes: 2 additions & 7 deletions lib/internal/idna.js
Original file line number Diff line number Diff line change
@@ -1,9 +1,4 @@
'use strict';

if (internalBinding('config').hasIntl) {
const { toASCII, toUnicode } = internalBinding('icu');
module.exports = { toASCII, toUnicode };
} else {
const { domainToASCII, domainToUnicode } = require('internal/url');
module.exports = { toASCII: domainToASCII, toUnicode: domainToUnicode };
}
const { domainToASCII, domainToUnicode } = require('internal/url');
module.exports = { toASCII: domainToASCII, toUnicode: domainToUnicode };
44 changes: 26 additions & 18 deletions lib/url.js
Original file line number Diff line number Diff line change
Expand Up @@ -115,8 +115,8 @@ const slashedProtocol = new SafeSet([
const {
CHAR_SPACE,
CHAR_TAB,
CHAR_CARRIAGE_RETURN,
CHAR_LINE_FEED,
CHAR_CARRIAGE_RETURN,
CHAR_NO_BREAK_SPACE,
CHAR_ZERO_WIDTH_NOBREAK_SPACE,
CHAR_HASH,
Expand All @@ -130,7 +130,6 @@ const {
CHAR_QUESTION_MARK,
CHAR_DOUBLE_QUOTE,
CHAR_SINGLE_QUOTE,
CHAR_PERCENT,
CHAR_SEMICOLON,
CHAR_BACKWARD_SLASH,
CHAR_CIRCUMFLEX_ACCENT,
Expand Down Expand Up @@ -314,6 +313,7 @@ Url.prototype.parse = function parse(url, parseQueryString, slashesDenoteHost) {
let hostEnd = -1;
let atSign = -1;
let nonHost = -1;
let invalid = -1;
for (let i = 0; i < rest.length; ++i) {
switch (rest.charCodeAt(i)) {
case CHAR_TAB:
Expand All @@ -324,35 +324,37 @@ Url.prototype.parse = function parse(url, parseQueryString, slashesDenoteHost) {
i -= 1;
break;
case CHAR_SPACE:
case CHAR_DOUBLE_QUOTE:
case CHAR_PERCENT:
case CHAR_SINGLE_QUOTE:
case CHAR_SEMICOLON:
case CHAR_LEFT_ANGLE_BRACKET:
case CHAR_RIGHT_ANGLE_BRACKET:
case CHAR_BACKWARD_SLASH:
case CHAR_CIRCUMFLEX_ACCENT:
case CHAR_VERTICAL_LINE:
case CHAR_DOUBLE_QUOTE:
case CHAR_SINGLE_QUOTE:
case CHAR_GRAVE_ACCENT:
case CHAR_LEFT_CURLY_BRACKET:
case CHAR_VERTICAL_LINE:
case CHAR_RIGHT_CURLY_BRACKET:
// Characters that are never ever allowed in a hostname from RFC 2396
if (nonHost === -1)
nonHost = i;
// Characters that are never ever allowed in a hostname from RFC 2396
if (invalid === -1) {
invalid = i;
}
break;
case CHAR_SEMICOLON:
case CHAR_BACKWARD_SLASH:
case CHAR_HASH:
case CHAR_FORWARD_SLASH:
case CHAR_QUESTION_MARK:
// Find the first instance of any host-ending characters
if (nonHost === -1)
if (nonHost === -1) {
nonHost = i;
}
hostEnd = i;
break;
case CHAR_AT:
// At this point, either we have an explicit point where the
// auth portion cannot go past, or the last @ char is the decider.
atSign = i;
nonHost = -1;
invalid = -1;
break;
}
if (hostEnd !== -1)
Expand All @@ -364,9 +366,15 @@ Url.prototype.parse = function parse(url, parseQueryString, slashesDenoteHost) {
start = atSign + 1;
}
if (nonHost === -1) {
if (invalid !== -1) {
throw new ERR_INVALID_URL(url);
}
this.host = rest.slice(start);
rest = '';
} else {
if (invalid > -1 && invalid < nonHost) {
throw new ERR_INVALID_URL(url);
}
this.host = rest.slice(start, nonHost);
rest = rest.slice(nonHost);
}
Expand Down Expand Up @@ -509,13 +517,13 @@ Url.prototype.parse = function parse(url, parseQueryString, slashesDenoteHost) {
function getHostname(self, rest, hostname, url) {
for (let i = 0; i < hostname.length; ++i) {
const code = hostname.charCodeAt(i);
const isValid = (code !== CHAR_FORWARD_SLASH &&
code !== CHAR_BACKWARD_SLASH &&
code !== CHAR_HASH &&
code !== CHAR_QUESTION_MARK &&
code !== CHAR_COLON);
const isHostEnd = (code === CHAR_FORWARD_SLASH ||
code === CHAR_BACKWARD_SLASH ||
code === CHAR_HASH ||
code === CHAR_QUESTION_MARK ||
code === CHAR_COLON);

if (!isValid) {
if (isHostEnd) {
// If leftover starts with :, then it represents an invalid port.
if (hostname.charCodeAt(i) === 58) {
throw new ERR_INVALID_URL(url);
Expand Down
6 changes: 0 additions & 6 deletions test/parallel/test-url-format.js
Original file line number Diff line number Diff line change
Expand Up @@ -67,12 +67,6 @@ const formatTests = {
hash: '#frag=?bar/#frag',
pathname: '/'
},
'http://google.com" onload="alert(42)/': {
href: 'http://google.com/%22%20onload=%22alert(42)/',
protocol: 'http:',
host: 'google.com',
pathname: '/%22%20onload=%22alert(42)/'
},
'http://a.com/a/b/c?s#h': {
href: 'http://a.com/a/b/c?s#h',
protocol: 'http',
Expand Down
2 changes: 1 addition & 1 deletion test/parallel/test-url-parse-format.js
Original file line number Diff line number Diff line change
Expand Up @@ -1007,7 +1007,7 @@ for (const u in parseTests) {
assert.deepStrictEqual(
actual,
expected,
`parsing ${u} and expected ${inspect(expected)} but got ${inspect(actual)}`
`parsing ${inspect(u)}, expected ${inspect(expected)}, got ${inspect(actual)}`
);
assert.deepStrictEqual(
spaced,
Expand Down
15 changes: 15 additions & 0 deletions test/parallel/test-url-parse-invalid-input.js
Original file line number Diff line number Diff line change
Expand Up @@ -86,3 +86,18 @@ if (common.hasIntl) {
`parsing ${badURL}`);
});
}

{
const badChars = [ ' ', '>', '<', '^', '|' ];
for (const badChar of badChars) {
const badURL = `http://fail${badChar}fail.com/`;
assert.throws(() => { url.parse(badURL); },
(e) => e.code === 'ERR_INVALID_URL',
`parsing "${badURL}"`);
}
}

assert.throws(() => { url.parse('http://google.com" onload="alert(42)/'); },
(e) => e.code === 'ERR_INVALID_URL',
'parsing \'http://google.com" onload="alert(42)/\''
);

0 comments on commit 105b71a

Please sign in to comment.