This repository has been archived by the owner. It is now read-only.
Permalink
Browse files

Pull in the uri.js from Narwhal and create tests, stripping out the c…

…ruft from a previous code-surgery.
  • Loading branch information...
isaacs authored and ry committed Dec 11, 2009
1 parent f3b0cef commit 2f9722cca0a72122aa03763c085f6b5aa7f0ada2
Showing with 427 additions and 0 deletions.
  1. +236 −0 lib/uri.js
  2. +191 −0 test/mjsunit/test-uri.js
View
@@ -0,0 +1,236 @@
+/**
+ * uri.js
+ * A URI parser, compliant with assorted RFCs, providing parsing and resolution utilities.
+ **/
+
+exports.parse = uri_parse;
+exports.format = uri_format;
+exports.resolve = uri_resolve;
+exports.resolveObject = uri_resolveObject;
+
+
+/**** expressionKeys
+ members of a parsed URI object that you get
+ from evaluting the strict regular expression.
+*/
+var expressionKeys = [
+ "url",
+ "protocol",
+ "authorityRoot",
+ "authority",
+ "userInfo",
+ "user",
+ "password",
+ "domain",
+ "port",
+ "path",
+ "root",
+ "directory",
+ "file",
+ "query",
+ "anchor"
+ ],
+ strictExpression = new RegExp( /* url */
+ "^" +
+ "(?:" +
+ "([^:/?#]+):" + /* protocol */
+ ")?" +
+ "(?:" +
+ "(//)" + /* authorityRoot */
+ "(" + /* authority */
+ "(?:" +
+ "(" + /* userInfo */
+ "([^:@/]*)" + /* user */
+ ":?" +
+ "([^@/]*)" + /* password */
+ ")?" +
+ "@" +
+ ")?" +
+ "([^:/?#]*)" + /* domain */
+ "(?::(\\d*))?" + /* port */
+ ")" +
+ ")?" +
+ "(" + /* path */
+ "(/?)" + /* root */
+ "((?:[^?#/]*/)*)" +
+ "([^?#]*)" + /* file */
+ ")" +
+ "(?:\\?([^#]*))?" + /* query */
+ "(?:#(.*))?" /*anchor */
+ );
+
+/**** parse
+ a URI parser function that uses the `strictExpression`
+ and `expressionKeys` and returns an `Object`
+ mapping all `keys` to values.
+*/
+function uri_parse (url) {
+ var items = {},
+ parts = strictExpression.exec(url);
+
+ for (var i = 0; i < parts.length; i++) {
+ items[expressionKeys[i]] = parts[i] ? parts[i] : "";
+ }
+
+ items.root = (items.root || items.authorityRoot) ? '/' : '';
+
+ items.directories = items.directory.split("/");
+ if (items.directories[items.directories.length - 1] == "") {
+ items.directories.pop();
+ }
+
+ /* normalize */
+ var directories = [];
+ for (var i = 0; i < items.directories.length; i++) {
+ var directory = items.directories[i];
+ if (directory == '.') {
+ } else if (directory == '..') {
+ if (directories.length && directories[directories.length - 1] != '..')
+ directories.pop();
+ else
+ directories.push('..');
+ } else {
+ directories.push(directory);
+ }
+ }
+ items.directories = directories;
+
+ items.domains = items.domain.split(".");
+
+ return items;
+};
+
+
+/**** format
+ accepts a parsed URI object and returns
+ the corresponding string.
+*/
+function uri_format (object) {
+ if (typeof(object) == 'undefined')
+ throw new Error("UrlError: URL undefined for urls#format");
+ if (object instanceof String || typeof(object) === 'string')
+ return object;
+ var domain =
+ object.domains ?
+ object.domains.join(".") :
+ object.domain;
+ var userInfo = (
+ object.user ||
+ object.password
+ ) ? (
+ (object.user || "") +
+ (object.password ? ":" + object.password : "")
+ ) :
+ object.userInfo;
+ var authority = (
+ userInfo ||
+ domain ||
+ object.port
+ ) ? (
+ (userInfo ? userInfo + "@" : "") +
+ (domain || "") +
+ (object.port ? ":" + object.port : "")
+ ) :
+ object.authority || "";
+
+ var directory =
+ object.directories ?
+ object.directories.join("/") :
+ object.directory;
+ var path =
+ directory || object.file ?
+ (
+ (directory ? directory + "/" : "") +
+ (object.file || "")
+ ) :
+ object.path;
+ var authorityRoot =
+ object.authorityRoot
+ || authority ? "//" : "";
+
+ return object.url = ((
+ (object.protocol ? object.protocol + ":" : "") +
+ (authorityRoot) +
+ (authority) +
+ (object.root || (authority && path) ? "/" : "") +
+ (path ? path : "") +
+ (object.query ? "?" + object.query : "") +
+ (object.anchor ? "#" + object.anchor : "")
+ ) || object.url || "");
+};
+
+/**** resolveObject
+ returns an object representing a URL resolved from
+ a relative location and a source location.
+*/
+function uri_resolveObject (source, relative) {
+ if (!source)
+ return relative;
+
+ source = uri_parse(source);
+ relative = uri_parse(relative);
+
+ if (relative.url == "")
+ return source;
+
+ delete source.url;
+ delete source.authority;
+ delete source.domain;
+ delete source.userInfo;
+ delete source.path;
+ delete source.directory;
+
+ if (
+ relative.protocol && relative.protocol != source.protocol ||
+ relative.authority && relative.authority != source.authority
+ ) {
+ source = relative;
+ } else {
+ if (relative.root) {
+ source.directories = relative.directories;
+ } else {
+
+ var directories = relative.directories;
+ for (var i = 0; i < directories.length; i++) {
+ var directory = directories[i];
+ if (directory == ".") {
+ } else if (directory == "..") {
+ if (source.directories.length) {
+ source.directories.pop();
+ } else {
+ source.directories.push('..');
+ }
+ } else {
+ source.directories.push(directory);
+ }
+ }
+
+ if (relative.file == ".") {
+ relative.file = "";
+ } else if (relative.file == "..") {
+ source.directories.pop();
+ relative.file = "";
+ }
+ }
+ }
+
+ if (relative.root)
+ source.root = relative.root;
+ if (relative.protcol)
+ source.protocol = relative.protocol;
+ if (!(!relative.path && relative.anchor))
+ source.file = relative.file;
+ source.query = relative.query;
+ source.anchor = relative.anchor;
+
+ return source;
+};
+
+
+/**** resolve
+ returns a URL resovled to a relative URL from a source URL.
+*/
+function uri_resolve (source, relative) {
+ return uri_format(uri_resolveObject(source, relative));
+};
+
Oops, something went wrong.

2 comments on commit 2f9722c

@rsms

This comment has been minimized.

Show comment Hide comment
@rsms

rsms Dec 24, 2009

Technically this module should be named "url" since it deals with urls, not uris in general. A common mistake. I also suggest calling the fragment part of the URL "fragment" (per rfc) rather than the "anchor" (which normally refers to hyperlinks within a HTML document).

rsms replied Dec 24, 2009

Technically this module should be named "url" since it deals with urls, not uris in general. A common mistake. I also suggest calling the fragment part of the URL "fragment" (per rfc) rather than the "anchor" (which normally refers to hyperlinks within a HTML document).

@isaacs

This comment has been minimized.

Show comment Hide comment
@isaacs

isaacs Dec 27, 2009

I think maybe we should consider changing "anchor" to "hash", since that's the language used by window.location, and JavaScripters are likely to be somewhat familiar with that object. (By that same logic, we'd have hash, host, hostname, href, pathname, port, protocol, and search; some of those are kinda confusing, and I much prefer "query" to "search".) Seems like a bikeshed.

However, "URI" is certainly what this module is concerned with, in the general sense of "a compact string of characters for identifying an abstract or physical resource", consistent with many of the examples and semantics described in RFC 2396, by Berners-Lee, et. al. http://www.ietf.org/rfc/rfc2396.txt For instance, it will parse, validate, and provide a meaningful toolkit for dealing with "mailto:i@izs.me" or "xmpp:isaacschlueter@jabber.org/Adium-12345" and so on. Those are not "resource locators", but they are definitely "resource identifiers".

That being said, it doesn't seem to correctly parse these. Without the "//", it doesn't seem to realize that anything following the "protocol:" is typically a user/domain information. The reason for this is that it would make the "resolve" functionality more complicated. (Without the authorityRoot, a href is resolved relative to the existing authority, but when treated as an absolute link, the authorityRoot is implied, which is weird, but that's the internet for ya.)

If this issue bugs you, please bring it up on the nodejs google group list. Your request will have lots more weight if you write a failing test case and send that along with your bug report. For even more points, send a patch that makes it pass the test :) Whatever bugs may exist, the purpose of the module is URIs in general, with some sugar for URLs, since they are the sort of URI that is dealt with most often.

I think maybe we should consider changing "anchor" to "hash", since that's the language used by window.location, and JavaScripters are likely to be somewhat familiar with that object. (By that same logic, we'd have hash, host, hostname, href, pathname, port, protocol, and search; some of those are kinda confusing, and I much prefer "query" to "search".) Seems like a bikeshed.

However, "URI" is certainly what this module is concerned with, in the general sense of "a compact string of characters for identifying an abstract or physical resource", consistent with many of the examples and semantics described in RFC 2396, by Berners-Lee, et. al. http://www.ietf.org/rfc/rfc2396.txt For instance, it will parse, validate, and provide a meaningful toolkit for dealing with "mailto:i@izs.me" or "xmpp:isaacschlueter@jabber.org/Adium-12345" and so on. Those are not "resource locators", but they are definitely "resource identifiers".

That being said, it doesn't seem to correctly parse these. Without the "//", it doesn't seem to realize that anything following the "protocol:" is typically a user/domain information. The reason for this is that it would make the "resolve" functionality more complicated. (Without the authorityRoot, a href is resolved relative to the existing authority, but when treated as an absolute link, the authorityRoot is implied, which is weird, but that's the internet for ya.)

If this issue bugs you, please bring it up on the nodejs google group list. Your request will have lots more weight if you write a failing test case and send that along with your bug report. For even more points, send a patch that makes it pass the test :) Whatever bugs may exist, the purpose of the module is URIs in general, with some sugar for URLs, since they are the sort of URI that is dealt with most often.

Please sign in to comment.