Skip to content

Commit

Permalink
Address PR comments and increase test coverage.
Browse files Browse the repository at this point in the history
  • Loading branch information
remusao committed Sep 1, 2017
1 parent c0a7904 commit aaf0853
Show file tree
Hide file tree
Showing 12 changed files with 377 additions and 135 deletions.
24 changes: 13 additions & 11 deletions index.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,12 @@ var allRules = Trie.fromJson(require('./rules.json'));

var cleanHostValue = require('./lib/clean-host.js');
var getDomain = require('./lib/domain.js');
var getPublicSuffix = require('./lib/public-suffix.js');
var getSubdomain = require('./lib/subdomain.js');
var isValid = require('./lib/is-valid.js');
var getPublicSuffix = require('./lib/public-suffix.js');
var tldExists = require('./lib/tld-exists.js');


/**
* Creates a new instance of tldjs
* @param {Object.<rules,validHosts>} options [description]
Expand All @@ -22,23 +23,24 @@ function factory(options) {

return {
cleanHostValue: cleanHostValue,
getDomain: function (host, isHostClean) {
return getDomain(rules, validHosts, host, isHostClean);
getDomain: function (hostname) {
return getDomain(rules, validHosts, hostname);
},
getSubdomain: function (host, isHostClean) {
return getSubdomain(rules, validHosts, host, isHostClean);
getSubdomain: function (hostname) {
return getSubdomain(rules, validHosts, hostname);
},
isValid: function (host) {
return isValid(validHosts, host);
isValid: function (hostname) {
return isValid(validHosts, hostname);
},
getPublicSuffix: function (host, isHostClean) {
return getPublicSuffix(rules, host, isHostClean);
getPublicSuffix: function (hostname) {
return getPublicSuffix(rules, hostname);
},
tldExists: function (tld, isHostClean) {
return tldExists(rules, tld, isHostClean);
tldExists: function (tld) {
return tldExists(rules, tld);
},
fromUserSettings: factory
};
}


module.exports = factory({ validHosts: [], rules: allRules });
16 changes: 8 additions & 8 deletions lib/clean-host.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,21 +18,21 @@ var hasPrefixRE = /^(([a-z][a-z0-9+.-]*)?:)?\/\//;
var invalidHostnameChars = /[^A-Za-z0-9.-]/;
var trailingDotsRE = /[.]+$/g;

//@see https://github.com/oncletom/tld.js/issues/95
// @see https://github.com/oncletom/tld.js/issues/95
function rtrim(value) {
return String(value).replace(trailingDotsRE, '');
}

module.exports = function cleanHostValue(value, isHostClean) {
if (isHostClean) {
return value;
}

module.exports = function cleanHostValue(value) {
value = String(value).trim().toLowerCase();

var parts = URL.parse(hasPrefixRE.test(value) ? value : '//' + value, null, true);

if (parts.hostname && !invalidHostnameChars.test(parts.hostname)) { return rtrim(parts.hostname); }
if (!invalidHostnameChars.test(value)) { return rtrim(value); }
if (parts.hostname && !invalidHostnameChars.test(parts.hostname)) {
return rtrim(parts.hostname);
} else if (!invalidHostnameChars.test(value)) {
return rtrim(value);
}

return '';
};
123 changes: 97 additions & 26 deletions lib/domain.js
Original file line number Diff line number Diff line change
@@ -1,51 +1,122 @@
"use strict";

var isValid = require('./is-valid.js');
var cleanHostValue = require('./clean-host.js');
var getPublicSuffix = require('./public-suffix.js');


/**
* Polyfill for `endsWith`
*
* @param {string} str
* @param {string} pattern
* @return {boolean}
*/
function endsWith(str, pattern) {
return (
str.lastIndexOf(pattern) === (str.length - pattern.length)
);
}


/**
* Check if `vhost` is a valid suffix of `hostname` (top-domain)
*
* It means that `vhost` needs to be a suffix of `hostname` and we then need to
* make sure that: either they are equal, or the character preceding `vhost` in
* `hostname` is a '.' (it should not be a partial label).
*
* * hostname = 'not.evil.com' and vhost = 'vil.com' => not ok
* * hostname = 'not.evil.com' and vhost = 'evil.com' => ok
* * hostname = 'not.evil.com' and vhost = 'not.evil.com' => ok
*
* @param {string} hostname
* @param {string} vhost
* @return {boolean}
*/
function shareSameDomainSuffix(hostname, vhost) {
if (endsWith(hostname, vhost)) {
return (
hostname.length === vhost.length ||
hostname[hostname.length - vhost.length - 1] === '.'
);
}

return false;
}


/**
* Given a hostname and its public suffix, extract the general domain.
*
* @param {string} hostname
* @param {string} publicSuffix
* @return {string}
*/
function extractDomainWithSuffix(hostname, publicSuffix) {
// Locate the index of the last '.' in the part of the `hostname` preceding
// the public suffix.
//
// examples:
// 1. not.evil.co.uk => evil.co.uk
// ^ ^
// | | start of public suffix
// | index of the last dot
//
// 2. example.co.uk => example.co.uk
// ^ ^
// | | start of public suffix
// |
// | (-1) no dot found before the public suffix
var publicSuffixIndex = hostname.length - publicSuffix.length - 2;
var lastDotBeforeSuffixIndex = hostname.lastIndexOf('.', publicSuffixIndex);

// No '.' found, then `hostname` is the general domain (no sub-domain)
if (lastDotBeforeSuffixIndex === -1) {
return hostname;
}

// Extract the part between the last '.'
return hostname.substr(lastDotBeforeSuffixIndex + 1);
}


/**
* Detects the domain based on rules and upon and a host string
*
* @api
* @param {string} host
* @return {String}
*/
module.exports = function getDomain(allRules, validHosts, host, isHostClean) {
var _validHosts = validHosts || [];
var cleanHost = cleanHostValue(host, isHostClean);
module.exports = function getDomain(rules, validHosts, hostname) {
hostname = cleanHostValue(hostname);

if (isValid(_validHosts, cleanHost) === false) {
if (isValid(validHosts, hostname) === false) {
return null;
}

// Check if `host` ends with '.' followed by one host specified in validHosts.
for (var i = 0; i < validHosts.length; i++) {
// Check if `hostname` ends with a member of `validHosts`.
for (var i = 0; i < validHosts.length; i += 1) {
var vhost = validHosts[i];
if (cleanHost.indexOf(vhost) === (cleanHost.length - vhost.length) && (
cleanHost.length === vhost.length ||
cleanHost[cleanHost.length - vhost.length - 1] === '.')) {
if (shareSameDomainSuffix(hostname, vhost)) {
return vhost;
}
}

var suffix = getPublicSuffix(allRules, cleanHost, true);
if (suffix === null) {
// TODO - shouldn't it be null?
// Otherwise 'should return the known valid host' fails
// return cleanHost;
return null;
}
// To extract the general domain, we start by identifying the public suffix
// (if any), then consider the domain to be the public suffix with one added
// level of depth. (e.g.: if hostname is `not.evil.co.uk` and public suffix:
// `co.uk`, then we take one more level: `evil`, giving the final result:
// `evil.co.uk`).
var suffix = getPublicSuffix(rules, hostname);

if (suffix.length === cleanHost.length) {
// If `hostname` is a valid public suffix, then there is no domain to return.
// Since we already know that `getPublicSuffix` returns a suffix of `hostname`
// there is no need to perform a string comparison and we only compare the
// size.
if (suffix.length === hostname.length) {
return null;
}

// google.fr (length 9)
// suffix = fr (length 2)
// 5 = 9 - 2 - 1 (ignore the dot) - 1 (zero-based indexing)
var lastDotBeforeSuffixIndex = cleanHost.lastIndexOf('.', cleanHost.length - suffix.length - 2);
if (lastDotBeforeSuffixIndex === -1) {
return cleanHost;
}

return cleanHost.substring(lastDotBeforeSuffixIndex + 1);
return extractDomainWithSuffix(hostname, suffix);
};
4 changes: 3 additions & 1 deletion lib/from-host.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
"use strict";

/**
* Utility to extract the TLD from a host string
*
Expand All @@ -10,5 +12,5 @@ module.exports = function extractTldFromHost(host) {
return null;
}

return host.substring(lastDotIndex + 1);
return host.substr(lastDotIndex + 1);
};
4 changes: 3 additions & 1 deletion lib/is-valid.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
"use strict";

/**
* Checking if a host string is valid
* It's usually a preliminary check before trying to use getDomain or anything else
Expand All @@ -8,6 +10,6 @@
* @param host {String}
* @return {Boolean}
*/
module.exports = function isValid (validHosts, host) {
module.exports = function isValid(validHosts, host) {
return typeof host === 'string' && (validHosts.indexOf(host) !== -1 || (host.indexOf('.') !== -1 && host[0] !== '.'));
};
21 changes: 13 additions & 8 deletions lib/parsers/publicsuffix-org.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,12 @@ var PublicSuffixOrgParser = {};
/**
* Filters a commented or empty line
*
* @param row {String}
* @return {String|null}
* @param {string} row
* @return {string|null}
*/
function keepOnlyRules(row) {
var trimmed = row.trim();
if (!trimmed || trimmed.indexOf('//') === 0) {
if (trimmed.length === 0 || trimmed.indexOf('//') === 0) {
return null;
}

Expand All @@ -27,7 +27,8 @@ function keepOnlyRules(row) {
/**
* Returns a rule based on string analysis
*
* @param rule {PublicSuffixRule}
* @param {string} row
* @return {object} a public suffix rule
*/
function domainBuilder(row) {
var rule = {
Expand All @@ -36,16 +37,20 @@ function domainBuilder(row) {
parts: null,
};

var spaceIndex = row.indexOf(' ');
// Only read line up to the first white-space
var spaceIndex = row.indexOf(' ');
if (spaceIndex !== -1) {
row = row.substr(0, spaceIndex);
}

row = punycode.toASCII(row);

//setting initial rule
// Keep track of initial rule
rule.source = row;

// exceptions
// Exception
if (row[0] === '!') {
row = row.substring(1);
row = row.substr(1);
rule.exception = true;
}

Expand Down
24 changes: 14 additions & 10 deletions lib/public-suffix.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
"use strict";

var cleanHostValue = require('./clean-host.js');
var extractTldFromHost = require('./from-host.js');

Expand All @@ -6,21 +8,23 @@ var extractTldFromHost = require('./from-host.js');
*
* @api
* @since 1.5
* @param {string} host
* @return {String}
* @param {string} hostname
* @return {string}
*/
module.exports = function getPublicSuffix(rules, host, isHostClean) {
var cleanHost = cleanHostValue(host, isHostClean);
module.exports = function getPublicSuffix(rules, hostname) {
// Extract hostname
hostname = cleanHostValue(hostname);

// Host is a valid TLD
if (rules.hasTld(cleanHost)) {
return cleanHost;
// First check if `hostname` is already a valid top-level Domain.
if (rules.hasTld(hostname)) {
return hostname;
}

var candidate = rules.suffixLookup(cleanHost);
var candidate = rules.suffixLookup(hostname);
if (candidate === null) {
// Prevailing rule is '*'
return extractTldFromHost(cleanHost);
// Prevailing rule is '*' so we consider the top-level domain to be the
// public suffix of `hostname` (e.g.: 'example.org' => 'org').
return extractTldFromHost(hostname);
}

return candidate;
Expand Down
19 changes: 12 additions & 7 deletions lib/subdomain.js
Original file line number Diff line number Diff line change
@@ -1,21 +1,26 @@
"use strict";

var cleanHostValue = require('./clean-host.js');
var getDomain = require('./domain.js');


/**
* Returns the subdomain of a host string
* Returns the subdomain of a hostname string
*
* @api
* @param {string} host
* @return {string|null} a subdomain string if any, blank string if subdomain is empty, otherwise null
* @param {string} hostname
* @return {string|null} a subdomain string if any, blank string if subdomain
* is empty, otherwise null.
*/
module.exports = function getSubdomain(allRules, validHosts, host, isHostClean) {
var cleanHost = cleanHostValue(host, isHostClean);
var domain = getDomain(allRules, validHosts, cleanHost, true);
module.exports = function getSubdomain(rules, validHosts, hostname) {
hostname = cleanHostValue(hostname);

var domain = getDomain(rules, validHosts, hostname);

// No domain found? Just abort, abort!
if (domain === null) {
return null;
}

return cleanHost.substring(0, cleanHost.length - domain.length - 1);
return hostname.substr(0, hostname.length - domain.length - 1);
};
Loading

0 comments on commit aaf0853

Please sign in to comment.