From a39dd376538e9a34008b123d04cbdb8a67017ba5 Mon Sep 17 00:00:00 2001 From: Yagiz Nizipli Date: Fri, 24 Feb 2023 17:35:05 -0500 Subject: [PATCH] url: simplify and improve url formatting PR-URL: https://github.com/nodejs/node/pull/46736 Backport-PR-URL: https://github.com/nodejs/node/pull/47435 Reviewed-By: Tiancheng "Timothy" Gu Reviewed-By: James M Snell --- lib/internal/url.js | 98 +++---------------- lib/url.js | 46 +++++++-- src/node_url.cc | 73 +++++++++++--- .../test-whatwg-url-custom-inspect.js | 4 +- 4 files changed, 111 insertions(+), 110 deletions(-) diff --git a/lib/internal/url.js b/lib/internal/url.js index 23626595ee78e2..7b2e0b4ffa6cc7 100644 --- a/lib/internal/url.js +++ b/lib/internal/url.js @@ -79,7 +79,6 @@ const path = require('path'); const { validateFunction, - validateObject, } = require('internal/validators'); const querystring = require('querystring'); @@ -152,8 +151,6 @@ class URLContext { password = ''; port = ''; hash = ''; - hasHost = false; - hasOpaquePath = false; } function isURLSearchParams(self) { @@ -290,7 +287,9 @@ class URLSearchParams { name = toUSVString(name); value = toUSVString(value); ArrayPrototypePush(this[searchParams], name, value); - update(this[context], this); + if (this[context]) { + this[context].search = this.toString(); + } } delete(name) { @@ -311,7 +310,9 @@ class URLSearchParams { i += 2; } } - update(this[context], this); + if (this[context]) { + this[context].search = this.toString(); + } } get(name) { @@ -406,7 +407,9 @@ class URLSearchParams { ArrayPrototypePush(list, name, value); } - update(this[context], this); + if (this[context]) { + this[context].search = this.toString(); + } } sort() { @@ -450,7 +453,9 @@ class URLSearchParams { } } - update(this[context], this); + if (this[context]) { + this[context].search = this.toString(); + } } // https://heycam.github.io/webidl/#es-iterators @@ -536,46 +541,6 @@ function isURLThis(self) { return self != null && ObjectPrototypeHasOwnProperty(self, context); } -function constructHref(ctx, options) { - if (options) - validateObject(options, 'options'); - - options = { - fragment: true, - unicode: false, - search: true, - auth: true, - ...options, - }; - - // https://url.spec.whatwg.org/#url-serializing - let ret = ctx.protocol; - if (ctx.hasHost) { - ret += '//'; - const hasUsername = ctx.username !== ''; - const hasPassword = ctx.password !== ''; - if (options.auth && (hasUsername || hasPassword)) { - if (hasUsername) - ret += ctx.username; - if (hasPassword) - ret += `:${ctx.password}`; - ret += '@'; - } - ret += options.unicode ? - domainToUnicode(ctx.hostname) : ctx.hostname; - if (ctx.port !== '') - ret += `:${ctx.port}`; - } else if (!ctx.hasOpaquePath && ctx.pathname.lastIndexOf('/') !== 0 && ctx.pathname.startsWith('//')) { - ret += '/.'; - } - ret += ctx.pathname; - if (options.search) - ret += ctx.search; - if (options.fragment) - ret += ctx.hash; - return ret; -} - class URL { constructor(input, base = undefined) { // toUSVString is not needed. @@ -628,14 +593,8 @@ class URL { return `${constructor.name} ${inspect(obj, opts)}`; } - [kFormat](options) { - // TODO(@anonrig): Replace kFormat with actually calling setters. - return constructHref(this[context], options); - } - #onParseComplete = (href, origin, protocol, hostname, pathname, - search, username, password, port, hash, hasHost, - hasOpaquePath) => { + search, username, password, port, hash) => { const ctx = this[context]; ctx.href = href; ctx.origin = origin; @@ -647,9 +606,6 @@ class URL { ctx.password = password; ctx.port = port; ctx.hash = hash; - // TODO(@anonrig): Remove hasHost and hasOpaquePath when kFormat is removed. - ctx.hasHost = hasHost; - ctx.hasOpaquePath = hasOpaquePath; if (!this[searchParams]) { // Invoked from URL constructor this[searchParams] = new URLSearchParams(); this[searchParams][context] = this; @@ -862,33 +818,6 @@ ObjectDefineProperties(URL, { revokeObjectURL: kEnumerableProperty, }); -function update(url, params) { - if (!url) - return; - - const ctx = url[context]; - const serializedParams = params.toString(); - if (serializedParams.length > 0) { - ctx.search = '?' + serializedParams; - } else { - ctx.search = ''; - - // Potentially strip trailing spaces from an opaque path - if (ctx.hasOpaquePath && ctx.hash.length === 0) { - let length = ctx.pathname.length; - while (length > 0 && ctx.pathname.charCodeAt(length - 1) === 32) { - length--; - } - - // No need to copy the whole string if there is no space at the end - if (length !== ctx.pathname.length) { - ctx.pathname = ctx.pathname.slice(0, length); - } - } - } - ctx.href = constructHref(ctx); -} - function initSearchParams(url, init) { if (!init) { url[searchParams] = []; @@ -1387,7 +1316,6 @@ module.exports = { domainToASCII, domainToUnicode, urlToHttpOptions, - formatSymbol: kFormat, searchParamsSymbol: searchParams, encodeStr, }; diff --git a/lib/url.js b/lib/url.js index 4d7374a8e3f358..019a6dced9db55 100644 --- a/lib/url.js +++ b/lib/url.js @@ -22,6 +22,7 @@ 'use strict'; const { + Boolean, Int8Array, ObjectCreate, ObjectKeys, @@ -38,7 +39,10 @@ const { ERR_INVALID_ARG_TYPE, ERR_INVALID_URL, } = require('internal/errors').codes; -const { validateString } = require('internal/validators'); +const { + validateString, + validateObject, +} = require('internal/validators'); // This ensures setURLConstructor() is called before the native // URL::ToObject() method is used. @@ -51,11 +55,14 @@ const { domainToASCII, domainToUnicode, fileURLToPath, - formatSymbol, pathToFileURL, urlToHttpOptions, } = require('internal/url'); +const { + formatUrl, +} = internalBinding('url'); + // Original url.parse() API function Url() { @@ -579,13 +586,36 @@ function urlFormat(urlObject, options) { } else if (typeof urlObject !== 'object' || urlObject === null) { throw new ERR_INVALID_ARG_TYPE('urlObject', ['Object', 'string'], urlObject); - } else if (!(urlObject instanceof Url)) { - const format = urlObject[formatSymbol]; - return format ? - format.call(urlObject, options) : - Url.prototype.format.call(urlObject); + } else if (urlObject instanceof URL) { + let fragment = true; + let unicode = false; + let search = true; + let auth = true; + + if (options) { + validateObject(options, 'options'); + + if (options.fragment != null) { + fragment = Boolean(options.fragment); + } + + if (options.unicode != null) { + unicode = Boolean(options.unicode); + } + + if (options.search != null) { + search = Boolean(options.search); + } + + if (options.auth != null) { + auth = Boolean(options.auth); + } + } + + return formatUrl(urlObject.href, fragment, unicode, search, auth); } - return urlObject.format(); + + return Url.prototype.format.call(urlObject); } // These characters do not need escaping: diff --git a/src/node_url.cc b/src/node_url.cc index 4a7bbd71a44dbc..81f3ecbc0698c9 100644 --- a/src/node_url.cc +++ b/src/node_url.cc @@ -11,7 +11,6 @@ namespace node { -using v8::Boolean; using v8::Context; using v8::Function; using v8::FunctionCallbackInfo; @@ -47,7 +46,7 @@ enum url_update_action { kHref = 9, }; -void SetArgs(Environment* env, Local argv[12], const ada::result& url) { +void SetArgs(Environment* env, Local argv[10], const ada::result& url) { Isolate* isolate = env->isolate(); argv[0] = Utf8String(isolate, url->get_href()); argv[1] = Utf8String(isolate, url->get_origin()); @@ -59,8 +58,6 @@ void SetArgs(Environment* env, Local argv[12], const ada::result& url) { argv[7] = Utf8String(isolate, url->get_password()); argv[8] = Utf8String(isolate, url->get_port()); argv[9] = Utf8String(isolate, url->get_hash()); - argv[10] = Boolean::New(isolate, url->host.has_value()); - argv[11] = Boolean::New(isolate, url->has_opaque_path); } void Parse(const FunctionCallbackInfo& args) { @@ -86,8 +83,7 @@ void Parse(const FunctionCallbackInfo& args) { } base_pointer = &base.value(); } - ada::result out = - ada::parse(std::string_view(input.out(), input.length()), base_pointer); + ada::result out = ada::parse(input.ToStringView(), base_pointer); if (!out) { return args.GetReturnValue().Set(false); @@ -105,8 +101,6 @@ void Parse(const FunctionCallbackInfo& args) { undef, undef, undef, - undef, - undef, }; SetArgs(env, argv, out); USE(success_callback_->Call( @@ -192,10 +186,8 @@ void UpdateUrl(const FunctionCallbackInfo& args) { Utf8Value new_value(isolate, args[2].As()); Local success_callback_ = args[3].As(); - std::string_view new_value_view = - std::string_view(new_value.out(), new_value.length()); - std::string_view input_view = std::string_view(input.out(), input.length()); - ada::result out = ada::parse(input_view); + std::string_view new_value_view = new_value.ToStringView(); + ada::result out = ada::parse(input.ToStringView()); CHECK(out); bool result{true}; @@ -255,8 +247,6 @@ void UpdateUrl(const FunctionCallbackInfo& args) { undef, undef, undef, - undef, - undef, }; SetArgs(env, argv, out); USE(success_callback_->Call( @@ -264,12 +254,66 @@ void UpdateUrl(const FunctionCallbackInfo& args) { args.GetReturnValue().Set(result); } +void FormatUrl(const FunctionCallbackInfo& args) { + CHECK_GT(args.Length(), 4); + CHECK(args[0]->IsString()); // url href + + Environment* env = Environment::GetCurrent(args); + Isolate* isolate = env->isolate(); + + Utf8Value href(isolate, args[0].As()); + const bool fragment = args[1]->IsTrue(); + const bool unicode = args[2]->IsTrue(); + const bool search = args[3]->IsTrue(); + const bool auth = args[4]->IsTrue(); + + ada::result out = ada::parse(href.ToStringView()); + CHECK(out); + + if (!fragment) { + out->fragment = std::nullopt; + } + + if (unicode) { +#if defined(NODE_HAVE_I18N_SUPPORT) + std::string hostname = out->get_hostname(); + MaybeStackBuffer buf; + int32_t len = i18n::ToUnicode(&buf, hostname.data(), hostname.length()); + + if (len < 0) { + out->host = ""; + } else { + out->host = buf.ToString(); + } +#else + out->host = ""; +#endif + } + + if (!search) { + out->query = std::nullopt; + } + + if (!auth) { + out->username = ""; + out->password = ""; + } + + std::string result = out->get_href(); + args.GetReturnValue().Set(String::NewFromUtf8(env->isolate(), + result.data(), + NewStringType::kNormal, + result.length()) + .ToLocalChecked()); +} + void Initialize(Local target, Local unused, Local context, void* priv) { SetMethod(context, target, "parse", Parse); SetMethod(context, target, "updateUrl", UpdateUrl); + SetMethod(context, target, "formatUrl", FormatUrl); SetMethodNoSideEffect(context, target, "domainToASCII", DomainToASCII); SetMethodNoSideEffect(context, target, "domainToUnicode", DomainToUnicode); @@ -279,6 +323,7 @@ void Initialize(Local target, void RegisterExternalReferences(ExternalReferenceRegistry* registry) { registry->Register(Parse); registry->Register(UpdateUrl); + registry->Register(FormatUrl); registry->Register(DomainToASCII); registry->Register(DomainToUnicode); diff --git a/test/parallel/test-whatwg-url-custom-inspect.js b/test/parallel/test-whatwg-url-custom-inspect.js index ad7a48f72d5958..a7d30a6ab936c3 100644 --- a/test/parallel/test-whatwg-url-custom-inspect.js +++ b/test/parallel/test-whatwg-url-custom-inspect.js @@ -55,9 +55,7 @@ assert.strictEqual( username: 'username', password: 'password', port: '8080', - hash: '#hash', - hasHost: true, - hasOpaquePath: false + hash: '#hash' } }`);