Skip to content
This repository has been archived by the owner on Apr 22, 2023. It is now read-only.

Commit

Permalink
Closes GH-711 URL parse more safely
Browse files Browse the repository at this point in the history
This does 3 things:

1. Delimiters and "unwise" characters are never included in the
   hostname or path.
2. url.format will sanitize string URLs that are passed to it.
3. The parsed url's 'href' member will be the sanitized url, which may
   not match the argument to url.parse.
  • Loading branch information
isaacs authored and ry committed Feb 28, 2011
1 parent 3599c71 commit d664bf3
Show file tree
Hide file tree
Showing 2 changed files with 150 additions and 72 deletions.
74 changes: 65 additions & 9 deletions lib/url.js
Expand Up @@ -7,9 +7,30 @@ exports.format = urlFormat;
// compiled once on the first module load.
var protocolPattern = /^([a-z0-9]+:)/,
portPattern = /:[0-9]+$/,
nonHostChars = ['/', '?', ';', '#'],
delims = ['<', '>', '"', '\'', '`', /\s/],
unwise = ['{', '}', '|', '\\', '^', '~', '[', ']', '`'].concat(delims),
nonHostChars = ['/', '?', ';', '#'].concat(unwise),
hostnameMaxLen = 255,
hostnamePartPattern = /^[a-z0-9][a-z0-9A-Z-]{0,62}$/,
unsafeProtocol = {
'javascript': true,
'javascript:': true
},
hostlessProtocol = {
'javascript': true,
'javascript:': true,
'file': true,
'file:': true
},
pathedProtocol = {
'http': true,
'https': true,
'ftp': true,
'gopher': true,
'file': true,
'http:': true,
'ftp:': true,
'gopher:': true,
'file:': true
},
slashedProtocol = {
Expand All @@ -29,7 +50,7 @@ var protocolPattern = /^([a-z0-9]+:)/,
function urlParse(url, parseQueryString, slashesDenoteHost) {
if (url && typeof(url) === 'object' && url.href) return url;

var out = { href: url },
var out = {},
rest = url;

var proto = protocolPattern.exec(rest);
Expand All @@ -50,6 +71,7 @@ function urlParse(url, parseQueryString, slashesDenoteHost) {
out.slashes = true;
}
}

if (!hostlessProtocol[proto] &&
(slashes || (proto && !slashedProtocol[proto]))) {
// there's a hostname.
Expand Down Expand Up @@ -79,9 +101,36 @@ function urlParse(url, parseQueryString, slashesDenoteHost) {
// we've indicated that there is a hostname,
// so even if it's empty, it has to be present.
out.hostname = out.hostname || '';

// validate a little.
if (out.hostname.length > hostnameMaxLen) {
out.hostname = '';
} else {
var hostparts = out.hostname.split(/\./);
for (var i = 0, l = hostparts.length; i < l; i++) {
var part = hostparts[i];
if (!part.match(hostnamePartPattern)) {
out.hostname = '';
break;
}
}
}
}

// now rest is set to the post-host stuff.
// chop off any delim chars.
if (!unsafeProtocol[proto]) {
var chop = rest.length;
for (var i = 0, l = delims.length; i < l; i++) {
var c = rest.indexOf(delims[i]);
if (c !== -1) {
chop = Math.min(c, chop);
}
}
rest = rest.substr(0, chop);
}


// chop off from the tail first.
var hash = rest.indexOf('#');
if (hash !== -1) {
Expand All @@ -99,9 +148,17 @@ function urlParse(url, parseQueryString, slashesDenoteHost) {
rest = rest.slice(0, qm);
} else if (parseQueryString) {
// no query string, but parseQueryString still requested
out.search = '';
out.query = {};
}
if (rest) out.pathname = rest;
if (slashedProtocol[proto] &&
out.hostname && !out.pathname) {
out.pathname = '/';
}

// finally, reconstruct the href based on what has been validated.
out.href = urlFormat(out);

return out;
}
Expand All @@ -123,13 +180,12 @@ function urlFormat(obj) {
) :
false,
pathname = obj.pathname || '',
search = obj.search || (
obj.query && ('?' + (
typeof(obj.query) === 'object' ?
querystring.stringify(obj.query) :
String(obj.query)
))
) || '',
query = obj.query &&
((typeof obj.query === 'object' &&
Object.keys(obj.query).length) ?
querystring.stringify(obj.query) :
'') || '',
search = obj.search || (query && ('?' + query)) || '',
hash = obj.hash || '';

if (protocol && protocol.substr(-1) !== ':') protocol += ':';
Expand Down
148 changes: 85 additions & 63 deletions test/simple/test-url.js
Expand Up @@ -28,7 +28,8 @@ var parseTests = {
'pathname': '/vt/lyrs=m@114&hl=en&src=api&x=2&y=2&z=3&s='
},
'http://mt0.google.com/vt/lyrs=m@114???&hl=en&src=api&x=2&y=2&z=3&s=' : {
'href': 'http://mt0.google.com/vt/lyrs=m@114???&hl=en&src=api&x=2&y=2&z=3&s=',
'href': 'http://mt0.google.com/vt/lyrs=m@114???&hl=en&src=api' +
'&x=2&y=2&z=3&s=',
'protocol': 'http:',
'host': 'mt0.google.com',
'hostname': 'mt0.google.com',
Expand All @@ -37,7 +38,8 @@ var parseTests = {
'pathname': '/vt/lyrs=m@114'
},
'http://user:pass@mt0.google.com/vt/lyrs=m@114???&hl=en&src=api&x=2&y=2&z=3&s=' : {
'href': 'http://user:pass@mt0.google.com/vt/lyrs=m@114???&hl=en&src=api&x=2&y=2&z=3&s=',
'href': 'http://user:pass@mt0.google.com/vt/lyrs=m@114???' +
'&hl=en&src=api&x=2&y=2&z=3&s=',
'protocol': 'http:',
'host': 'user:pass@mt0.google.com',
'auth': 'user:pass',
Expand Down Expand Up @@ -84,49 +86,6 @@ var parseTests = {
'query': 'baz=quux',
'pathname': '/foo/bar'
},
'http://example.com?foo=bar#frag' : {
'href': 'http://example.com?foo=bar#frag',
'protocol': 'http:',
'host': 'example.com',
'hostname': 'example.com',
'hash': '#frag',
'search': '?foo=bar',
'query': 'foo=bar'
},
'http://example.com?foo=@bar#frag' : {
'href': 'http://example.com?foo=@bar#frag',
'protocol': 'http:',
'host': 'example.com',
'hostname': 'example.com',
'hash': '#frag',
'search': '?foo=@bar',
'query': 'foo=@bar'
},
'http://example.com?foo=/bar/#frag' : {
'href': 'http://example.com?foo=/bar/#frag',
'protocol': 'http:',
'host': 'example.com',
'hostname': 'example.com',
'hash': '#frag',
'search': '?foo=/bar/',
'query': 'foo=/bar/'
},
'http://example.com?foo=?bar/#frag' : {
'href': 'http://example.com?foo=?bar/#frag',
'protocol': 'http:',
'host': 'example.com',
'hostname': 'example.com',
'hash': '#frag',
'search': '?foo=?bar/',
'query': 'foo=?bar/'
},
'http://example.com#frag=?bar/#frag' : {
'href': 'http://example.com#frag=?bar/#frag',
'protocol': 'http:',
'host': 'example.com',
'hostname': 'example.com',
'hash': '#frag=?bar/#frag'
},
'/foo/bar?baz=quux#frag' : {
'href': '/foo/bar?baz=quux#frag',
'hash': '#frag',
Expand Down Expand Up @@ -154,9 +113,7 @@ var parseTests = {
'javascript:alert(\'hello\');' : {
'href': 'javascript:alert(\'hello\');',
'protocol': 'javascript:',
'host': 'alert(\'hello\')',
'hostname': 'alert(\'hello\')',
'pathname' : ';'
'pathname': 'alert(\'hello\');'
},
'xmpp:isaacschlueter@jabber.org' : {
'href': 'xmpp:isaacschlueter@jabber.org',
Expand Down Expand Up @@ -194,21 +151,13 @@ var parseTestsWithQueryString = {
'pathname': '/foo/bar'
},
'http://example.com' : {
'href': 'http://example.com',
'protocol': 'http:',
'slashes': true,
'host': 'example.com',
'hostname': 'example.com',
'query': {}
},
'http://example.com?' : {
'href': 'http://example.com?',
'href': 'http://example.com/',
'protocol': 'http:',
'slashes': true,
'host': 'example.com',
'hostname': 'example.com',
'search': '?',
'query': {}
'query': {},
'pathname': '/'
}
};
for (var u in parseTestsWithQueryString) {
Expand All @@ -225,25 +174,98 @@ for (var u in parseTestsWithQueryString) {
// some extra formatting tests, just to verify
// that it'll format slightly wonky content to a valid url.
var formatTests = {
'http://example.com?' : {
'href': 'http://example.com/?',
'protocol': 'http:',
'slashes': true,
'host': 'example.com',
'hostname': 'example.com',
'search': '?',
'query': {},
'pathname': '/'
},
'http://example.com?foo=bar#frag' : {
'href': 'http://example.com/?foo=bar#frag',
'protocol': 'http:',
'host': 'example.com',
'hostname': 'example.com',
'hash': '#frag',
'search': '?foo=bar',
'query': 'foo=bar',
'pathname': '/'
},
'http://example.com?foo=@bar#frag' : {
'href': 'http://example.com/?foo=@bar#frag',
'protocol': 'http:',
'host': 'example.com',
'hostname': 'example.com',
'hash': '#frag',
'search': '?foo=@bar',
'query': 'foo=@bar',
'pathname': '/'
},
'http://example.com?foo=/bar/#frag' : {
'href': 'http://example.com/?foo=/bar/#frag',
'protocol': 'http:',
'host': 'example.com',
'hostname': 'example.com',
'hash': '#frag',
'search': '?foo=/bar/',
'query': 'foo=/bar/',
'pathname': '/'
},
'http://example.com?foo=?bar/#frag' : {
'href': 'http://example.com/?foo=?bar/#frag',
'protocol': 'http:',
'host': 'example.com',
'hostname': 'example.com',
'hash': '#frag',
'search': '?foo=?bar/',
'query': 'foo=?bar/',
'pathname': '/'
},
'http://example.com#frag=?bar/#frag' : {
'href': 'http://example.com/#frag=?bar/#frag',
'protocol': 'http:',
'host': 'example.com',
'hostname': 'example.com',
'hash': '#frag=?bar/#frag',
'pathname': '/'
},
'http://google.com" onload="alert(42)/' : {
'href': 'http://google.com/',
'protocol': 'http:',
'host': 'google.com',
'pathname': '/'
},
'http://a.com/a/b/c?s#h' : {
'href': 'http://a.com/a/b/c?s#h',
'protocol': 'http',
'host': 'a.com',
'pathname': 'a/b/c',
'hash': 'h',
'search': 's'
},
'xmpp:isaacschlueter@jabber.org' : {
'href': 'xmpp://isaacschlueter@jabber.org',
'href': 'xmpp:isaacschlueter@jabber.org',
'protocol': 'xmpp:',
'host': 'isaacschlueter@jabber.org',
'auth': 'isaacschlueter',
'hostname': 'jabber.org'
}
};
for (var u in formatTests) {
var actual = url.format(formatTests[u]);
assert.equal(actual, u,
'wonky format(' + u + ') == ' + u + '\nactual:' + actual);
var expect = formatTests[u].href;
delete formatTests[u].href;
var actual = url.format(u);
var actualObj = url.format(formatTests[u]);
assert.equal(actual, expect,
'wonky format(' + u + ') == ' + expect +
'\nactual:' + actual);
assert.equal(actualObj, expect,
'wonky format(' + JSON.stringify(formatTests[u]) +
') == ' + expect +
'\nactual: ' + actualObj);
}

/*
Expand Down

0 comments on commit d664bf3

Please sign in to comment.