From b961d9fd83c963657c2305ed13ff447573eac852 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Benno=20F=C3=BCnfst=C3=BCck?= Date: Mon, 16 Oct 2017 15:36:32 +0200 Subject: [PATCH] http: disallow two-byte characters in URL path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit changes node's handling of two-byte characters in the path component of an http URL. Previously, node would just strip the higher byte when generating the request. So this code: ``` http.request({host: "example.com", port: "80", "/N"}) ``` would request `http://example.com/.` (`.` is the character for the byte `0x2e`). This is not useful and can in some cases lead to filter evasion. With this change, the code generates `ERR_UNESCAPED_CHARACTERS`, just like space and control characters already did. PR-URL: https://github.com/nodejs/node/pull/16237 Reviewed-By: James M Snell Reviewed-By: Anna Henningsen Reviewed-By: Anatoli Papirovski Reviewed-By: Ruben Bridgewater Reviewed-By: Timothy Gu --- lib/_http_client.js | 36 ++----------------- .../parallel/test-http-client-invalid-path.js | 12 +++++++ 2 files changed, 14 insertions(+), 34 deletions(-) create mode 100644 test/parallel/test-http-client-invalid-path.js diff --git a/lib/_http_client.js b/lib/_http_client.js index 5b568628006d68..bdda708493adba 100644 --- a/lib/_http_client.js +++ b/lib/_http_client.js @@ -41,33 +41,7 @@ const { outHeadersKey } = require('internal/http'); const { nextTick } = require('internal/process/next_tick'); const errors = require('internal/errors'); -// The actual list of disallowed characters in regexp form is more like: -// /[^A-Za-z0-9\-._~!$&'()*+,;=/:@]/ -// with an additional rule for ignoring percentage-escaped characters, but -// that's a) hard to capture in a regular expression that performs well, and -// b) possibly too restrictive for real-world usage. So instead we restrict the -// filter to just control characters and spaces. -// -// This function is used in the case of small paths, where manual character code -// checks can greatly outperform the equivalent regexp (tested in V8 5.4). -function isInvalidPath(s) { - var i = 0; - if (s.charCodeAt(0) <= 32) return true; - if (++i >= s.length) return false; - if (s.charCodeAt(1) <= 32) return true; - if (++i >= s.length) return false; - if (s.charCodeAt(2) <= 32) return true; - if (++i >= s.length) return false; - if (s.charCodeAt(3) <= 32) return true; - if (++i >= s.length) return false; - if (s.charCodeAt(4) <= 32) return true; - if (++i >= s.length) return false; - if (s.charCodeAt(5) <= 32) return true; - ++i; - for (; i < s.length; ++i) - if (s.charCodeAt(i) <= 32) return true; - return false; -} +const INVALID_PATH_REGEX = /[^\u0021-\u00ff]/; function validateHost(host, name) { if (host != null && typeof host !== 'string') { @@ -117,13 +91,7 @@ function ClientRequest(options, cb) { var path; if (options.path) { path = String(options.path); - var invalidPath; - if (path.length <= 39) { // Determined experimentally in V8 5.4 - invalidPath = isInvalidPath(path); - } else { - invalidPath = /[\u0000-\u0020]/.test(path); - } - if (invalidPath) + if (INVALID_PATH_REGEX.test(path)) throw new errors.TypeError('ERR_UNESCAPED_CHARACTERS', 'Request path'); } diff --git a/test/parallel/test-http-client-invalid-path.js b/test/parallel/test-http-client-invalid-path.js new file mode 100644 index 00000000000000..c042d61eda0999 --- /dev/null +++ b/test/parallel/test-http-client-invalid-path.js @@ -0,0 +1,12 @@ +'use strict'; +const common = require('../common'); +const http = require('http'); + +common.expectsError(() => { + http.request({ + path: '/thisisinvalid\uffe2' + }).end(); +}, { + code: 'ERR_UNESCAPED_CHARACTERS', + type: TypeError +});