Skip to content

Commit

Permalink
http: disallow two-byte characters in URL path
Browse files Browse the repository at this point in the history
This commit changes node's handling of two-byte characters in
the path component of an http URL. Previously, node would just
strip the higher byte when generating the request. So this code:

```
http.request({host: "example.com", port: "80", "/N"})
```

would request `http://example.com/.`
(`.` is the character for the byte `0x2e`).

This is not useful and can in some cases lead to filter evasion.
With this change, the code generates `ERR_UNESCAPED_CHARACTERS`,
just like space and control characters already did.

PR-URL: #16237
Reviewed-By: James M Snell <jasnell@gmail.com>
Reviewed-By: Anna Henningsen <anna@addaleax.net>
Reviewed-By: Anatoli Papirovski <apapirovski@mac.com>
Reviewed-By: Ruben Bridgewater <ruben@bridgewater.de>
Reviewed-By: Timothy Gu <timothygu99@gmail.com>
  • Loading branch information
bennofs authored and BridgeAR committed Dec 12, 2017
1 parent ac25cee commit b961d9f
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 34 deletions.
36 changes: 2 additions & 34 deletions lib/_http_client.js
Original file line number Diff line number Diff line change
Expand Up @@ -41,33 +41,7 @@ const { outHeadersKey } = require('internal/http');
const { nextTick } = require('internal/process/next_tick');
const errors = require('internal/errors');

// The actual list of disallowed characters in regexp form is more like:
// /[^A-Za-z0-9\-._~!$&'()*+,;=/:@]/
// with an additional rule for ignoring percentage-escaped characters, but
// that's a) hard to capture in a regular expression that performs well, and
// b) possibly too restrictive for real-world usage. So instead we restrict the
// filter to just control characters and spaces.
//
// This function is used in the case of small paths, where manual character code
// checks can greatly outperform the equivalent regexp (tested in V8 5.4).
function isInvalidPath(s) {
var i = 0;
if (s.charCodeAt(0) <= 32) return true;
if (++i >= s.length) return false;
if (s.charCodeAt(1) <= 32) return true;
if (++i >= s.length) return false;
if (s.charCodeAt(2) <= 32) return true;
if (++i >= s.length) return false;
if (s.charCodeAt(3) <= 32) return true;
if (++i >= s.length) return false;
if (s.charCodeAt(4) <= 32) return true;
if (++i >= s.length) return false;
if (s.charCodeAt(5) <= 32) return true;
++i;
for (; i < s.length; ++i)
if (s.charCodeAt(i) <= 32) return true;
return false;
}
const INVALID_PATH_REGEX = /[^\u0021-\u00ff]/;

function validateHost(host, name) {
if (host != null && typeof host !== 'string') {
Expand Down Expand Up @@ -117,13 +91,7 @@ function ClientRequest(options, cb) {
var path;
if (options.path) {
path = String(options.path);
var invalidPath;
if (path.length <= 39) { // Determined experimentally in V8 5.4
invalidPath = isInvalidPath(path);
} else {
invalidPath = /[\u0000-\u0020]/.test(path);
}
if (invalidPath)
if (INVALID_PATH_REGEX.test(path))
throw new errors.TypeError('ERR_UNESCAPED_CHARACTERS', 'Request path');
}

Expand Down
12 changes: 12 additions & 0 deletions test/parallel/test-http-client-invalid-path.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
'use strict';
const common = require('../common');
const http = require('http');

common.expectsError(() => {
http.request({
path: '/thisisinvalid\uffe2'
}).end();
}, {
code: 'ERR_UNESCAPED_CHARACTERS',
type: TypeError
});

4 comments on commit b961d9f

@JoeUX
Copy link

@JoeUX JoeUX commented on b961d9f Apr 26, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What are the implications of this change for Internationalized Domain Names (IDNs)?

@Trott
Copy link
Member

@Trott Trott commented on b961d9f Apr 26, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What are the implications of this change for Internationalized Domain Names (IDNs)?

/ping @nodejs/http @nodejs/i18n

@jasnell
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IDNs must be converted to ascii form to be valid within a URL.

@TimothyGu
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In case of a URL string being passed in, url.parse() (and new URL()) will automatically punycode-encode the host.

Please sign in to comment.