Permalink
Browse files

net: use icu's punycode implementation

ICU has a punycode implementation built in. Use it instead of the
javascript implementation because it's much faster.

PR-URL: #7355
Reviewed-By: Trevor Norris <trev.norris@gmail.com>
Reviewed-By: Ben Noordhuis <info@bnoordhuis.nl>
  • Loading branch information...
jasnell committed Jun 21, 2016
1 parent 3d6a01e commit 7de59ef9258ede7684ff0882a7964db8f0226533
Showing with 291 additions and 12 deletions.
  1. +75 −0 benchmark/net/punycode.js
  2. +10 −2 lib/url.js
  3. +132 −0 src/node_i18n.cc
  4. +72 −0 test/parallel/test-icu-punycode.js
  5. +1 −8 tools/icu/icu-generic.gyp
  6. +1 −2 tools/icu/icu_small.json
View
@@ -0,0 +1,75 @@
'use strict';
const common = require('../common.js');
const icu = process.binding('icu');
const punycode = require('punycode');
const bench = common.createBenchmark(main, {
method: ['punycode', 'icu'],
n: [1024],
val: [
'افغانستا.icom.museum',
'الجزائر.icom.museum',
'österreich.icom.museum',
'বাংলাদেশ.icom.museum',
'беларусь.icom.museum',
'belgië.icom.museum',
'българия.icom.museum',
'تشادر.icom.museum',
'中国.icom.museum',
'القمر.icom.museum',
'κυπρος.icom.museum',
'českárepublika.icom.museum',
'مصر.icom.museum',
'ελλάδα.icom.museum',
'magyarország.icom.museum',
'ísland.icom.museum',
'भारत.icom.museum',
'ايران.icom.museum',
'éire.icom.museum',
'איקו״ם.ישראל.museum',
'日本.icom.museum',
'الأردن.icom.museum'
]
});
function usingPunycode(val) {
punycode.toUnicode(punycode.toASCII(val));
}
function usingICU(val) {
icu.toUnicode(icu.toASCII(val));
}
function runPunycode(n, val) {
common.v8ForceOptimization(usingPunycode, val);
var i = 0;
bench.start();
for (; i < n; i++)
usingPunycode(val);
bench.end(n);
}
function runICU(n, val) {
common.v8ForceOptimization(usingICU, val);
var i = 0;
bench.start();
for (; i < n; i++)
usingICU(val);
bench.end(n);
}
function main(conf) {
const n = +conf.n;
const val = conf.val;
switch (conf.method) {
case 'punycode':
runPunycode(n, val);
break;
case 'icu':
runICU(n, val);
break;
default:
throw new Error('Unexpected method');
}
}
View
@@ -1,6 +1,14 @@
'use strict';
const punycode = require('punycode');
function importPunycode() {
try {
return process.binding('icu');
} catch (e) {
return require('punycode');
}
}
const { toASCII } = importPunycode();
exports.parse = urlParse;
exports.resolve = urlResolve;
@@ -309,7 +317,7 @@ Url.prototype.parse = function(url, parseQueryString, slashesDenoteHost) {
// It only converts parts of the domain name that
// have non-ASCII characters, i.e. it doesn't matter if
// you call it with a domain that already is ASCII-only.
this.hostname = punycode.toASCII(this.hostname);
this.hostname = toASCII(this.hostname);
}
var p = this.port ? ':' + this.port : '';
View
@@ -23,8 +23,16 @@
#if defined(NODE_HAVE_I18N_SUPPORT)
#include "node.h"
#include "env.h"
#include "env-inl.h"
#include "util.h"
#include "util-inl.h"
#include "v8.h"
#include <unicode/putil.h>
#include <unicode/udata.h>
#include <unicode/uidna.h>
#ifdef NODE_HAVE_SMALL_ICU
/* if this is defined, we have a 'secondary' entry point.
@@ -43,6 +51,13 @@ extern "C" const char U_DATA_API SMALL_ICUDATA_ENTRY_POINT[];
namespace node {
using v8::Context;
using v8::FunctionCallbackInfo;
using v8::Local;
using v8::Object;
using v8::String;
using v8::Value;
bool flag_icu_data_dir = false;
namespace i18n {
@@ -64,7 +79,124 @@ bool InitializeICUDirectory(const char* icu_data_path) {
}
}
static int32_t ToUnicode(MaybeStackBuffer<char>* buf,
const char* input,
size_t length) {
UErrorCode status = U_ZERO_ERROR;
uint32_t options = UIDNA_DEFAULT;
options |= UIDNA_NONTRANSITIONAL_TO_UNICODE;
UIDNA* uidna = uidna_openUTS46(options, &status);
if (U_FAILURE(status))
return -1;
UIDNAInfo info = UIDNA_INFO_INITIALIZER;
int32_t len = uidna_nameToUnicodeUTF8(uidna,
input, length,
**buf, buf->length(),
&info,
&status);
if (status == U_BUFFER_OVERFLOW_ERROR) {
status = U_ZERO_ERROR;
buf->AllocateSufficientStorage(len);
len = uidna_nameToUnicodeUTF8(uidna,
input, length,
**buf, buf->length(),
&info,
&status);
}
if (U_FAILURE(status))
len = -1;
uidna_close(uidna);
return len;
}
static int32_t ToASCII(MaybeStackBuffer<char>* buf,
const char* input,
size_t length) {
UErrorCode status = U_ZERO_ERROR;
uint32_t options = UIDNA_DEFAULT;
options |= UIDNA_NONTRANSITIONAL_TO_ASCII;
UIDNA* uidna = uidna_openUTS46(options, &status);
if (U_FAILURE(status))
return -1;
UIDNAInfo info = UIDNA_INFO_INITIALIZER;
int32_t len = uidna_nameToASCII_UTF8(uidna,
input, length,
**buf, buf->length(),
&info,
&status);
if (status == U_BUFFER_OVERFLOW_ERROR) {
status = U_ZERO_ERROR;
buf->AllocateSufficientStorage(len);
len = uidna_nameToASCII_UTF8(uidna,
input, length,
**buf, buf->length(),
&info,
&status);
}
if (U_FAILURE(status))
len = -1;
uidna_close(uidna);
return len;
}
static void ToUnicode(const FunctionCallbackInfo<Value>& args) {
Environment* env = Environment::GetCurrent(args);
CHECK_GE(args.Length(), 1);
CHECK(args[0]->IsString());
Utf8Value val(env->isolate(), args[0]);
MaybeStackBuffer<char> buf;
int32_t len = ToUnicode(&buf, *val, val.length());
if (len < 0) {
return env->ThrowError("Cannot convert name to Unicode");
}
args.GetReturnValue().Set(
String::NewFromUtf8(env->isolate(),
*buf,
v8::NewStringType::kNormal,
len).ToLocalChecked());
}
static void ToASCII(const FunctionCallbackInfo<Value>& args) {
Environment* env = Environment::GetCurrent(args);
CHECK_GE(args.Length(), 1);
CHECK(args[0]->IsString());
Utf8Value val(env->isolate(), args[0]);
MaybeStackBuffer<char> buf;
int32_t len = ToASCII(&buf, *val, val.length());
if (len < 0) {
return env->ThrowError("Cannot convert name to ASCII");
}
args.GetReturnValue().Set(
String::NewFromUtf8(env->isolate(),
*buf,
v8::NewStringType::kNormal,
len).ToLocalChecked());
}
void Init(Local<Object> target,
Local<Value> unused,
Local<Context> context,
void* priv) {
Environment* env = Environment::GetCurrent(context);
env->SetMethod(target, "toUnicode", ToUnicode);
env->SetMethod(target, "toASCII", ToASCII);
}
} // namespace i18n
} // namespace node
NODE_MODULE_CONTEXT_AWARE_BUILTIN(icu, node::i18n::Init)
#endif // NODE_HAVE_I18N_SUPPORT
@@ -0,0 +1,72 @@
'use strict';
const common = require('../common');
const icu = getPunycode();
const assert = require('assert');
function getPunycode() {
try {
return process.binding('icu');
} catch (err) {
return undefined;
}
}
if (!icu) {
common.skip('icu punycode tests because ICU is not present.');
return;
}
// Credit for list: http://www.i18nguy.com/markup/idna-examples.html
const tests = [
'افغانستا.icom.museum',
'الجزائر.icom.museum',
'österreich.icom.museum',
'বাংলাদেশ.icom.museum',
'беларусь.icom.museum',
'belgië.icom.museum',
'българия.icom.museum',
'تشادر.icom.museum',
'中国.icom.museum',
'القمر.icom.museum',
'κυπρος.icom.museum',
'českárepublika.icom.museum',
'مصر.icom.museum',
'ελλάδα.icom.museum',
'magyarország.icom.museum',
'ísland.icom.museum',
'भारत.icom.museum',
'ايران.icom.museum',
'éire.icom.museum',
'איקו״ם.ישראל.museum',
'日本.icom.museum',
'الأردن.icom.museum',
'қазақстан.icom.museum',
'한국.icom.museum',
'кыргызстан.icom.museum',
'ລາວ.icom.museum',
'لبنان.icom.museum',
'македонија.icom.museum',
'موريتانيا.icom.museum',
'méxico.icom.museum',
'монголулс.icom.museum',
'المغرب.icom.museum',
'नेपाल.icom.museum',
'عمان.icom.museum',
'قطر.icom.museum',
'românia.icom.museum',
'россия.иком.museum',
'србијаицрнагора.иком.museum',
'இலங்கை.icom.museum',
'españa.icom.museum',
'ไทย.icom.museum',
'تونس.icom.museum',
'türkiye.icom.museum',
'украина.icom.museum',
'việtnam.icom.museum'
];
// Testing the roundtrip
tests.forEach((i) => {
assert.strictEqual(i, icu.toUnicode(icu.toASCII(i)));
});
@@ -37,8 +37,7 @@
'defines': [
# ICU cannot swap the initial data without this.
# http://bugs.icu-project.org/trac/ticket/11046
'UCONFIG_NO_LEGACY_CONVERSION=1',
'UCONFIG_NO_IDNA=1',
'UCONFIG_NO_LEGACY_CONVERSION=1'
],
}],
],
@@ -428,9 +427,6 @@
#'<(icu_path)/source/common/ubidi_props_data.h',
# and the callers
'<(icu_path)/source/common/ushape.cpp',
'<(icu_path)/source/common/usprep.cpp',
'<(icu_path)/source/common/uts46.cpp',
'<(icu_path)/source/common/uidna.cpp',
]}],
[ 'icu_ver_major == 57', { 'sources!': [
# work around http://bugs.icu-project.org/trac/ticket/12451
@@ -447,9 +443,6 @@
#'<(icu_path)/source/common/ubidi_props_data.h',
# and the callers
'<(icu_path)/source/common/ushape.cpp',
'<(icu_path)/source/common/usprep.cpp',
'<(icu_path)/source/common/uts46.cpp',
'<(icu_path)/source/common/uidna.cpp',
]}],
[ 'OS == "solaris"', { 'defines': [
'_XOPEN_SOURCE_EXTENDED=0',
View
@@ -24,7 +24,7 @@
"region": "none",
"zone": "locales",
"converters": "none",
"stringprep": "none",
"stringprep": "locales",
"translit": "none",
"brkfiles": "none",
"brkdict": "none",
@@ -34,7 +34,6 @@
"remove": [
"cnvalias.icu",
"postalCodeData.res",
"uts46.nrm",
"genderList.res",
"brkitr/root.res",
"unames.icu"

0 comments on commit 7de59ef

Please sign in to comment.