Skip to content

Commit

Permalink
url: improve spec compliance of WHATWG URL
Browse files Browse the repository at this point in the history
This patch contains the following changes:

url: make IPv4 parser more spec compliant

* Return int64_t from ParseNumber to prevent overflow for valid big numbers
* Don't throw when there are more than 4 parts (it cannot be an IP
address)
* Correctly interpret the address and don't always throw when there are
numbers > 255

Ref: https://url.spec.whatwg.org/#concept-ipv4-parser
Fixes: #10306

url: percent encode fragment to follow spec change

Ref: whatwg/url#150
Ref: whatwg/url@373dbed

url: fix URL#search setter

The check for empty string must be done before removing the leading '?'.

Ref: https://url.spec.whatwg.org/#dom-url-search

url: set port to null if an empty string is given

This is to follow a spec change.

Ref: whatwg/url#113

url: fix parsing of paths with Windows drive letter

test: update WHATWG URL test fixtures

PR-URL: #10317
Reviewed-By: James M Snell <jasnell@gmail.com>
Reviewed-By: Benjamin Gruenbaum <benjamingr@gmail.com>
  • Loading branch information
targos authored and evanlucas committed Jan 4, 2017
1 parent 2619236 commit 78e8674
Show file tree
Hide file tree
Showing 4 changed files with 429 additions and 51 deletions.
5 changes: 2 additions & 3 deletions lib/internal/url.js
Original file line number Diff line number Diff line change
Expand Up @@ -444,8 +444,7 @@ Object.defineProperties(URL.prototype, {
return;
port = String(port);
if (port === '') {
// Currently, if port number is empty, left unchanged.
// TODO(jasnell): This might be changing in the spec
ctx.port = undefined;
return;
}
binding.parse(port, binding.kPort, null, ctx,
Expand Down Expand Up @@ -478,13 +477,13 @@ Object.defineProperties(URL.prototype, {
set(search) {
const ctx = this[context];
search = String(search);
if (search[0] === '?') search = search.slice(1);
if (!search) {
ctx.query = null;
ctx.flags &= ~binding.URL_FLAGS_HAS_QUERY;
this[searchParams][searchParams] = {};
return;
}
if (search[0] === '?') search = search.slice(1);
ctx.query = '';
binding.parse(search, binding.kQuery, null, ctx,
onParseSearchComplete.bind(this));
Expand Down
64 changes: 37 additions & 27 deletions src/node_url.cc
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ namespace url {
return type;
}

static inline int ParseNumber(const char* start, const char* end) {
static inline int64_t ParseNumber(const char* start, const char* end) {
unsigned R = 10;
if (end - start >= 2 && start[0] == '0' && (start[1] | 0x20) == 'x') {
start += 2;
Expand Down Expand Up @@ -293,7 +293,7 @@ namespace url {
}
p++;
}
return strtol(start, NULL, R);
return strtoll(start, NULL, R);
}

static url_host_type ParseIPv4Host(url_host* host,
Expand All @@ -305,28 +305,25 @@ namespace url {
const char* end = pointer + length;
int parts = 0;
uint32_t val = 0;
unsigned numbers[4];
uint64_t numbers[4];
int tooBigNumbers = 0;
if (length == 0)
goto end;

while (pointer <= end) {
const char ch = pointer < end ? pointer[0] : kEOL;
const int remaining = end - pointer - 1;
if (ch == '.' || ch == kEOL) {
if (++parts > 4 || pointer - mark == 0)
break;
int n = ParseNumber(mark, pointer);
if (n < 0) {
type = HOST_TYPE_DOMAIN;
if (++parts > 4)
goto end;
}
if (pointer - mark == 10) {
numbers[parts - 1] = n;
if (pointer - mark == 0)
break;
}
if (n > 255) {
type = HOST_TYPE_FAILED;
int64_t n = ParseNumber(mark, pointer);
if (n < 0)
goto end;

if (n > 255) {
tooBigNumbers++;
}
numbers[parts - 1] = n;
mark = pointer + 1;
Expand All @@ -335,14 +332,23 @@ namespace url {
}
pointer++;
}
CHECK_GT(parts, 0);

// If any but the last item in numbers is greater than 255, return failure.
// If the last item in numbers is greater than or equal to
// 256^(5 - the number of items in numbers), return failure.
if (tooBigNumbers > 1 ||
(tooBigNumbers == 1 && numbers[parts - 1] <= 255) ||
numbers[parts - 1] >= pow(256, static_cast<double>(5 - parts))) {
type = HOST_TYPE_FAILED;
goto end;
}

type = HOST_TYPE_IPV4;
if (parts > 0) {
val = numbers[parts - 1];
for (int n = 0; n < parts - 1; n++) {
double b = 3-n;
val += numbers[n] * pow(256, b);
}
val = numbers[parts - 1];
for (int n = 0; n < parts - 1; n++) {
double b = 3 - n;
val += numbers[n] * pow(256, b);
}

host->value.ipv4 = val;
Expand Down Expand Up @@ -618,6 +624,13 @@ namespace url {
}
}

static inline void ShortenUrlPath(struct url_data* url) {
if (url->path.empty()) return;
if (url->path.size() == 1 && url->scheme == "file:" &&
NORMALIZED_WINDOWS_DRIVE_LETTER(url->path[0])) return;
url->path.pop_back();
}

static void Parse(Environment* env,
Local<Value> recv,
const char* input,
Expand Down Expand Up @@ -895,8 +908,7 @@ namespace url {
if (DOES_HAVE_PATH(base)) {
SET_HAVE_PATH()
url.path = base.path;
if (!url.path.empty())
url.path.pop_back();
ShortenUrlPath(&url);
}
url.port = base.port;
state = kPath;
Expand Down Expand Up @@ -1112,8 +1124,7 @@ namespace url {
SET_HAVE_PATH()
url.path = base.path;
}
if (!url.path.empty())
url.path.pop_back();
ShortenUrlPath(&url);
}
state = kPath;
continue;
Expand Down Expand Up @@ -1172,8 +1183,7 @@ namespace url {
special_back_slash ||
(!state_override && (ch == '?' || ch == '#'))) {
if (IsDoubleDotSegment(buffer)) {
if (!url.path.empty())
url.path.pop_back();
ShortenUrlPath(&url);
if (ch != '/' && !special_back_slash) {
SET_HAVE_PATH()
url.path.push_back("");
Expand Down Expand Up @@ -1247,7 +1257,7 @@ namespace url {
case 0:
break;
default:
buffer += ch;
AppendOrEscape(&buffer, ch, SimpleEncodeSet);
}
break;
default:
Expand Down
34 changes: 20 additions & 14 deletions test/fixtures/url-setter-tests.json
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,7 @@
}
},
{
"comment": "Port number is unchanges if empty in the new value. Note: this may change, see https://github.com/whatwg/url/pull/113",
"comment": "Port number is unchanged if not specified",
"href": "http://example.net:8080",
"new_value": "example.com:",
"expected": {
Expand All @@ -358,7 +358,6 @@
}
},
{

"comment": "The empty host is not valid for special schemes",
"href": "http://example.net",
"new_value": "",
Expand Down Expand Up @@ -763,14 +762,14 @@
}
},
{
"comment": "Port number is unchanged if empty in the new value. Note: this may change, see https://github.com/whatwg/url/pull/113",
"comment": "Port number is removed if empty is the new value",
"href": "http://example.net:8080",
"new_value": "",
"expected": {
"href": "http://example.net:8080/",
"host": "example.net:8080",
"href": "http://example.net/",
"host": "example.net",
"hostname": "example.net",
"port": "8080"
"port": ""
}
},
{
Expand Down Expand Up @@ -975,6 +974,15 @@
"href": "http://example.net/..%c3%89t%C3%A9",
"pathname": "/..%c3%89t%C3%A9"
}
},
{
"comment": "? needs to be encoded",
"href": "http://example.net",
"new_value": "?",
"expected": {
"href": "http://example.net/%3F",
"pathname": "/%3F"
}
}
],
"search": [
Expand Down Expand Up @@ -1011,7 +1019,6 @@
}
},
{
"skip": "we do not pass this, but we do match chromes behavior",
"href": "https://example.net?lang=en-US#nav",
"new_value": "?",
"expected": {
Expand Down Expand Up @@ -1096,7 +1103,6 @@
}
},
{
"skip": "we do not pass this, but we do match chromes behavior",
"href": "https://example.net?lang=en-US#nav",
"new_value": "#",
"expected": {
Expand All @@ -1113,22 +1119,22 @@
}
},
{
"comment": "No percent-encoding at all (!); nuls, tabs, and newlines are removed",
"comment": "Simple percent-encoding; nuls, tabs, and newlines are removed",
"href": "a:/",
"new_value": "\u0000\u0001\t\n\r\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé",
"expected": {
"href": "a:/#\u0001\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé",
"hash": "#\u0001\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé"
"href": "a:/#%01%1F !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~%7F%C2%80%C2%81%C3%89%C3%A9",
"hash": "#%01%1F !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~%7F%C2%80%C2%81%C3%89%C3%A9"
}
},
{
"comment": "Bytes already percent-encoded are left as-is",
"href": "http://example.net",
"new_value": "%c3%89té",
"expected": {
"href": "http://example.net/#%c3%89té",
"hash": "#%c3%89té"
"href": "http://example.net/#%c3%89t%C3%A9",
"hash": "#%c3%89t%C3%A9"
}
}
]
}
}
Loading

0 comments on commit 78e8674

Please sign in to comment.