From d099f8e317a6c55c2c0b877565b47d40c5a3ba9c Mon Sep 17 00:00:00 2001 From: Timothy Gu Date: Tue, 14 Mar 2017 23:41:57 -0700 Subject: [PATCH] src: remove explicit UTF-8 validity check in url This step was never part of the URL Standard's host parser algorithm, and is rendered unnecessary after IDNA errors are no longer ignored. PR-URL: https://github.com/nodejs/node/pull/11859 Refs: c2a302c50b3787666339371 "src: do not ignore IDNA conversion error" Refs: https://url.spec.whatwg.org/#concept-host-parser Reviewed-By: Ben Noordhuis Reviewed-By: Daijiro Wachi Reviewed-By: Anna Henningsen Reviewed-By: James M Snell Reviewed-By: Colin Ihrig --- src/node_url.cc | 30 ------------------------------ 1 file changed, 30 deletions(-) diff --git a/src/node_url.cc b/src/node_url.cc index b2f1322ade3cc0..1aae557115851f 100644 --- a/src/node_url.cc +++ b/src/node_url.cc @@ -15,11 +15,6 @@ #include #include -#if defined(NODE_HAVE_I18N_SUPPORT) -#include -#include -#endif - #define UNICODE_REPLACEMENT_CHARACTER 0xFFFD namespace node { @@ -113,21 +108,6 @@ namespace url { output->assign(*buf, buf.length()); return true; } - - // Unfortunately there's not really a better way to do this. - // Iterate through each encoded codepoint and verify that - // it is a valid unicode codepoint. - static bool IsValidUTF8(std::string* input) { - const char* p = input->c_str(); - int32_t len = input->length(); - for (int32_t i = 0; i < len;) { - UChar32 c; - U8_NEXT_UNSAFE(p, i, c); - if (!U_IS_UNICODE_CHAR(c)) - return false; - } - return true; - } #else // Intentional non-ops if ICU is not present. static bool ToUnicode(std::string* input, std::string* output) { @@ -139,10 +119,6 @@ namespace url { *output = *input; return true; } - - static bool IsValidUTF8(std::string* input) { - return true; - } #endif // If a UTF-16 character is a low/trailing surrogate. @@ -395,12 +371,6 @@ namespace url { if (PercentDecode(input, length, &decoded) < 0) goto end; - // If there are any invalid UTF8 byte sequences, we have to fail. - // Unfortunately this means iterating through the string and checking - // each decoded codepoint. - if (!IsValidUTF8(&decoded)) - goto end; - // Then we have to punycode toASCII if (!ToASCII(&decoded, &decoded)) goto end;