From d099f8e317a6c55c2c0b877565b47d40c5a3ba9c Mon Sep 17 00:00:00 2001
From: Timothy Gu <timothygu99@gmail.com>
Date: Tue, 14 Mar 2017 23:41:57 -0700
Subject: [PATCH] src: remove explicit UTF-8 validity check in url

This step was never part of the URL Standard's host parser algorithm,
and is rendered unnecessary after IDNA errors are no longer ignored.

PR-URL: https://github.com/nodejs/node/pull/11859
Refs: c2a302c50b3787666339371 "src: do not ignore IDNA conversion error"
Refs: https://url.spec.whatwg.org/#concept-host-parser
Reviewed-By: Ben Noordhuis <info@bnoordhuis.nl>
Reviewed-By: Daijiro Wachi <daijiro.wachi@gmail.com>
Reviewed-By: Anna Henningsen <anna@addaleax.net>
Reviewed-By: James M Snell <jasnell@gmail.com>
Reviewed-By: Colin Ihrig <cjihrig@gmail.com>
---
 src/node_url.cc | 30 ------------------------------
 1 file changed, 30 deletions(-)
diff --git a/src/node_url.cc b/src/node_url.cc
index b2f1322ade3cc0..1aae557115851f 100644
--- a/src/node_url.cc
+++ b/src/node_url.cc
@@ -15,11 +15,6 @@
 #include <stdio.h>
 #include <cmath>
 
-#if defined(NODE_HAVE_I18N_SUPPORT)
-#include <unicode/utf8.h>
-#include <unicode/utf.h>
-#endif
-
 #define UNICODE_REPLACEMENT_CHARACTER 0xFFFD
 
 namespace node {
@@ -113,21 +108,6 @@ namespace url {
     output->assign(*buf, buf.length());
     return true;
   }
-
-  // Unfortunately there's not really a better way to do this.
-  // Iterate through each encoded codepoint and verify that
-  // it is a valid unicode codepoint.
-  static bool IsValidUTF8(std::string* input) {
-    const char* p = input->c_str();
-    int32_t len = input->length();
-    for (int32_t i = 0; i < len;) {
-      UChar32 c;
-      U8_NEXT_UNSAFE(p, i, c);
-      if (!U_IS_UNICODE_CHAR(c))
-        return false;
-    }
-    return true;
-  }
 #else
   // Intentional non-ops if ICU is not present.
   static bool ToUnicode(std::string* input, std::string* output) {
@@ -139,10 +119,6 @@ namespace url {
     *output = *input;
     return true;
   }
-
-  static bool IsValidUTF8(std::string* input) {
-    return true;
-  }
 #endif
 
   // If a UTF-16 character is a low/trailing surrogate.
@@ -395,12 +371,6 @@ namespace url {
     if (PercentDecode(input, length, &decoded) < 0)
       goto end;
 
-    // If there are any invalid UTF8 byte sequences, we have to fail.
-    // Unfortunately this means iterating through the string and checking
-    // each decoded codepoint.
-    if (!IsValidUTF8(&decoded))
-      goto end;
-
     // Then we have to punycode toASCII
     if (!ToASCII(&decoded, &decoded))
       goto end;