Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

url: standard-conformant C0 control and whitespace handling #12846

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 39 additions & 20 deletions src/node_url.cc
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,9 @@ enum url_error_cb_args {
// https://infra.spec.whatwg.org/#ascii-tab-or-newline
CHAR_TEST(8, IsASCIITabOrNewline, (ch == '\t' || ch == '\n' || ch == '\r'))

// https://infra.spec.whatwg.org/#c0-control-or-space
CHAR_TEST(8, IsC0ControlOrSpace, (ch >= '\0' && ch <= ' '))

// https://infra.spec.whatwg.org/#ascii-digit
CHAR_TEST(8, IsASCIIDigit, (ch >= '0' && ch <= '9'))

Expand Down Expand Up @@ -1134,15 +1137,45 @@ static inline void ShortenUrlPath(struct url_data* url) {
}

void URL::Parse(const char* input,
const size_t len,
size_t len,
enum url_parse_state state_override,
struct url_data* url,
bool has_url,
const struct url_data* base,
bool has_base) {
const char* p = input;
const char* end = input + len;

if (!has_url) {
for (const char* ptr = p; ptr < end; ptr++) {
if (IsC0ControlOrSpace(*ptr))
p++;
else
break;
}
for (const char* ptr = end - 1; ptr >= p; ptr--) {
if (IsC0ControlOrSpace(*ptr))
end--;
else
break;
}
len = end - p;
}

std::string whitespace_stripped;
whitespace_stripped.reserve(len);
for (const char* ptr = p; ptr < end; ptr++)
if (!IsASCIITabOrNewline(*ptr))
whitespace_stripped += *ptr;

input = whitespace_stripped.c_str();
len = whitespace_stripped.size();
p = input;
end = input + len;

bool atflag = false;
bool sbflag = false;
bool uflag = false;
int wskip = 0;

std::string buffer;
url->scheme.reserve(len);
Expand All @@ -1159,9 +1192,6 @@ void URL::Parse(const char* input,
enum url_parse_state state = has_state_override ? state_override :
kSchemeStart;

const char* p = input;
const char* end = input + len;

if (state < kSchemeStart || state > kFragment) {
url->flags |= URL_FLAGS_INVALID_PARSE_STATE;
return;
Expand All @@ -1171,18 +1201,6 @@ void URL::Parse(const char* input,
const char ch = p < end ? p[0] : kEOL;
const size_t remaining = end == p ? 0 : (end - p - 1);

if (IsASCIITabOrNewline(ch)) {
if (state == kAuthority) {
// It's necessary to keep track of how much whitespace
// is being ignored when in kAuthority state because of
// how the buffer is managed. TODO: See if there's a better
// way
wskip++;
}
p++;
continue;
}

bool special = (url->flags & URL_FLAGS_SPECIAL);
bool cannot_be_base;
const bool special_back_slash = (special && ch == '\\');
Expand Down Expand Up @@ -1500,7 +1518,7 @@ void URL::Parse(const char* input,
url->flags |= URL_FLAGS_FAILED;
return;
}
p -= buffer.size() + 1 + wskip;
p -= buffer.size() + 1;
buffer.clear();
state = kHost;
} else {
Expand Down Expand Up @@ -1892,16 +1910,17 @@ static void Parse(Environment* env,
HandleScope handle_scope(isolate);
Context::Scope context_scope(context);

const bool has_context = context_obj->IsObject();
const bool has_base = base_obj->IsObject();

struct url_data base;
struct url_data url;
if (context_obj->IsObject())
if (has_context)
HarvestContext(env, &url, context_obj.As<Object>());
if (has_base)
HarvestBase(env, &base, base_obj.As<Object>());

URL::Parse(input, len, state_override, &url, &base, has_base);
URL::Parse(input, len, state_override, &url, has_context, &base, has_base);
if ((url.flags & URL_FLAGS_INVALID_PARSE_STATE) ||
((state_override != kUnknownState) &&
(url.flags & URL_FLAGS_TERMINATED)))
Expand Down
17 changes: 11 additions & 6 deletions src/node_url.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,30 +81,35 @@ struct url_data {
class URL {
public:
static void Parse(const char* input,
const size_t len,
size_t len,
enum url_parse_state state_override,
struct url_data* url,
bool has_url,
const struct url_data* base,
bool has_base);

URL(const char* input, const size_t len) {
Parse(input, len, kUnknownState, &context_, nullptr, false);
Parse(input, len, kUnknownState, &context_, false, nullptr, false);
}

URL(const char* input, const size_t len, const URL* base) {
if (base != nullptr)
Parse(input, len, kUnknownState, &context_, &(base->context_), true);
Parse(input, len, kUnknownState,
&context_, false,
&(base->context_), true);
else
Parse(input, len, kUnknownState, &context_, nullptr, false);
Parse(input, len, kUnknownState, &context_, false, nullptr, false);
}

URL(const char* input, const size_t len,
const char* base, const size_t baselen) {
if (base != nullptr && baselen > 0) {
URL _base(base, baselen);
Parse(input, len, kUnknownState, &context_, &(_base.context_), true);
Parse(input, len, kUnknownState,
&context_, false,
&(_base.context_), true);
} else {
Parse(input, len, kUnknownState, &context_, nullptr, false);
Parse(input, len, kUnknownState, &context_, false, nullptr, false);
}
}

Expand Down
32 changes: 31 additions & 1 deletion test/fixtures/url-tests.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
'use strict';

/* WPT Refs:
https://github.com/w3c/web-platform-tests/blob/28541bb/url/urltestdata.json
https://github.com/w3c/web-platform-tests/blob/0f26c418a5/url/urltestdata.json
License: http://www.w3.org/Consortium/Legal/2008/04-testsuite-copyright.html
*/
module.exports =
Expand Down Expand Up @@ -3566,6 +3566,22 @@ module.exports =
"search": "",
"hash": ""
},
"Leading and trailing C0 control or space",
{
"input": "\u0000\u001b\u0004\u0012 http://example.com/\u001f \u000d ",
"base": "about:blank",
"href": "http://example.com/",
"origin": "http://example.com",
"protocol": "http:",
"username": "",
"password": "",
"host": "example.com",
"hostname": "example.com",
"port": "",
"pathname": "/",
"search": "",
"hash": ""
},
"Ideographic full stop (full-width period for Chinese, etc.) should be treated as a dot. U+3002 is mapped to U+002E (dot)",
{
"input": "http://www.foo。bar.com",
Expand Down Expand Up @@ -5487,6 +5503,20 @@ module.exports =
"search": "",
"hash": ""
},
{
"input": "C|\n/",
"base": "file://host/dir/file",
"href": "file:///C:/",
"protocol": "file:",
"username": "",
"password": "",
"host": "",
"hostname": "",
"port": "",
"pathname": "/C:/",
"search": "",
"hash": ""
},
{
"input": "C|\\",
"base": "file://host/dir/file",
Expand Down