Skip to content

Commit

Permalink
src: remove explicit UTF-8 validity check in url
Browse files Browse the repository at this point in the history
This step was never part of the URL Standard's host parser algorithm,
and is rendered unnecessary after IDNA errors are no longer ignored.

PR-URL: #11859
Refs: c2a302c "src: do not ignore IDNA conversion error"
Refs: https://url.spec.whatwg.org/#concept-host-parser
Reviewed-By: Ben Noordhuis <info@bnoordhuis.nl>
Reviewed-By: Daijiro Wachi <daijiro.wachi@gmail.com>
Reviewed-By: Anna Henningsen <anna@addaleax.net>
Reviewed-By: James M Snell <jasnell@gmail.com>
Reviewed-By: Colin Ihrig <cjihrig@gmail.com>
  • Loading branch information
TimothyGu committed Mar 16, 2017
1 parent 4cdb0e8 commit d099f8e
Showing 1 changed file with 0 additions and 30 deletions.
30 changes: 0 additions & 30 deletions src/node_url.cc
Expand Up @@ -15,11 +15,6 @@
#include <stdio.h>
#include <cmath>

#if defined(NODE_HAVE_I18N_SUPPORT)
#include <unicode/utf8.h>
#include <unicode/utf.h>
#endif

#define UNICODE_REPLACEMENT_CHARACTER 0xFFFD

namespace node {
Expand Down Expand Up @@ -113,21 +108,6 @@ namespace url {
output->assign(*buf, buf.length());
return true;
}

// Unfortunately there's not really a better way to do this.
// Iterate through each encoded codepoint and verify that
// it is a valid unicode codepoint.
static bool IsValidUTF8(std::string* input) {
const char* p = input->c_str();
int32_t len = input->length();
for (int32_t i = 0; i < len;) {
UChar32 c;
U8_NEXT_UNSAFE(p, i, c);
if (!U_IS_UNICODE_CHAR(c))
return false;
}
return true;
}
#else
// Intentional non-ops if ICU is not present.
static bool ToUnicode(std::string* input, std::string* output) {
Expand All @@ -139,10 +119,6 @@ namespace url {
*output = *input;
return true;
}

static bool IsValidUTF8(std::string* input) {
return true;
}
#endif

// If a UTF-16 character is a low/trailing surrogate.
Expand Down Expand Up @@ -395,12 +371,6 @@ namespace url {
if (PercentDecode(input, length, &decoded) < 0)
goto end;

// If there are any invalid UTF8 byte sequences, we have to fail.
// Unfortunately this means iterating through the string and checking
// each decoded codepoint.
if (!IsValidUTF8(&decoded))
goto end;

// Then we have to punycode toASCII
if (!ToASCII(&decoded, &decoded))
goto end;
Expand Down

0 comments on commit d099f8e

Please sign in to comment.