Skip to content
Browse files

src: remove explicit UTF-8 validity check in url

This step was never part of the URL Standard's host parser algorithm,
and is rendered unnecessary after IDNA errors are no longer ignored.

PR-URL: #11859
Refs: c2a302c "src: do not ignore IDNA conversion error"
Reviewed-By: Ben Noordhuis <>
Reviewed-By: Daijiro Wachi <>
Reviewed-By: Anna Henningsen <>
Reviewed-By: James M Snell <>
Reviewed-By: Colin Ihrig <>
  • Loading branch information...
TimothyGu committed Mar 15, 2017
1 parent 4cdb0e8 commit d099f8e317a6c55c2c0b877565b47d40c5a3ba9c
Showing with 0 additions and 30 deletions.
  1. +0 −30 src/
@@ -15,11 +15,6 @@
#include <stdio.h>
#include <cmath>

#if defined(NODE_HAVE_I18N_SUPPORT)
#include <unicode/utf8.h>
#include <unicode/utf.h>


namespace node {
@@ -113,21 +108,6 @@ namespace url {
output->assign(*buf, buf.length());
return true;

// Unfortunately there's not really a better way to do this.
// Iterate through each encoded codepoint and verify that
// it is a valid unicode codepoint.
static bool IsValidUTF8(std::string* input) {
const char* p = input->c_str();
int32_t len = input->length();
for (int32_t i = 0; i < len;) {
UChar32 c;
U8_NEXT_UNSAFE(p, i, c);
return false;
return true;
// Intentional non-ops if ICU is not present.
static bool ToUnicode(std::string* input, std::string* output) {
@@ -139,10 +119,6 @@ namespace url {
*output = *input;
return true;

static bool IsValidUTF8(std::string* input) {
return true;

// If a UTF-16 character is a low/trailing surrogate.
@@ -395,12 +371,6 @@ namespace url {
if (PercentDecode(input, length, &decoded) < 0)
goto end;

// If there are any invalid UTF8 byte sequences, we have to fail.
// Unfortunately this means iterating through the string and checking
// each decoded codepoint.
if (!IsValidUTF8(&decoded))
goto end;

// Then we have to punycode toASCII
if (!ToASCII(&decoded, &decoded))
goto end;

0 comments on commit d099f8e

Please sign in to comment.
You can’t perform that action at this time.