Skip to content

Commit

Permalink
Add ip validation.
Browse files Browse the repository at this point in the history
  • Loading branch information
remusao committed Sep 11, 2017
1 parent 9a7a3f2 commit 5464bef
Show file tree
Hide file tree
Showing 7 changed files with 324 additions and 51 deletions.
70 changes: 48 additions & 22 deletions bin/benchmark.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ var tld = require('../index.js');
var Benchmark = require('benchmark');


var DOMAINS = [
var HOSTNAMES = [
// No public suffix
'example.foo.edu.au', // null
'example.foo.edu.sh', // null
Expand All @@ -30,7 +30,10 @@ var DOMAINS = [
'example.www.ck', // !www.ck
'foo.bar.baz.city.yokohama.jp', // !city.yokohama.jp
'example.city.kobe.jp', // !city.kobe.jp
];


var URLS = [
// IDN labels
'example.北海道.jp', // 北海道.jp
'example.和歌山.jp', // 和歌山.jp
Expand All @@ -44,54 +47,62 @@ var DOMAINS = [
'FOO.bar.BAZ.ortsinfo.AT', // null

// Full URLs
// '2001:0DB8:0100:F101:0210:A4FF:FEE3:9566',
// 'http://user:pass@www.examplegoogle.com:21/blah#baz',
// 'http://iris.test.ing/ḍ̇/?ḍ̇#ḍ̇',
// 'http://0000000000000300.0xffffffffFFFFFFFF.3022415481470977',
'2001:0DB8:0100:F101:0210:A4FF:FEE3:9566',
'http://user:pass@www.examplegoogle.com:21/blah#baz',
'http://iris.test.ing/ḍ̇/?ḍ̇#ḍ̇',
'http://0000000000000300.0xffffffffFFFFFFFF.3022415481470977',
'http://192.168.0.1/',
'http://%30%78%63%30%2e%30%32%35%30.01%2e',
'http://user:pass@[::1]/segment/index.html?query#frag',
'https://[::1]',
];


// TODO - Compare to other libraries
function main() {
function bench(values) {
console.log(
'While interpreting the results, keep in mind that each "op" reported' +
' by the benchmark is processing ' + DOMAINS.length + ' domains'
' by the benchmark is processing ' + values.length + ' domains'
);

new Benchmark.Suite()
.add('tldjs#isIp', () => {
for (var i = 0; i < values.length; i += 1) {
tld.isIp(values[i]);
}
})
.add('tldjs#isValid', () => {
for (var i = 0; i < DOMAINS.length; i += 1) {
tld.isValid(DOMAINS[i]);
for (var i = 0; i < values.length; i += 1) {
tld.isValid(values[i]);
}
})
.add('tldjs#extractHostname', () => {
for (var i = 0; i < DOMAINS.length; i += 1) {
tld.extractHostname(DOMAINS[i]);
for (var i = 0; i < values.length; i += 1) {
tld.extractHostname(values[i]);
}
})
.add('tldjs#tldExists', () => {
for (var i = 0; i < DOMAINS.length; i += 1) {
tld.tldExists(DOMAINS[i]);
for (var i = 0; i < values.length; i += 1) {
tld.tldExists(values[i]);
}
})
.add('tldjs#getPublicSuffix', () => {
for (var i = 0; i < DOMAINS.length; i += 1) {
tld.getPublicSuffix(DOMAINS[i]);
for (var i = 0; i < values.length; i += 1) {
tld.getPublicSuffix(values[i]);
}
})
.add('tldjs#getDomain', () => {
for (var i = 0; i < DOMAINS.length; i += 1) {
tld.getDomain(DOMAINS[i]);
for (var i = 0; i < values.length; i += 1) {
tld.getDomain(values[i]);
}
})
.add('tldjs#getSubdomain', () => {
for (var i = 0; i < DOMAINS.length; i += 1) {
tld.getSubdomain(DOMAINS[i]);
for (var i = 0; i < values.length; i += 1) {
tld.getSubdomain(values[i]);
}
})
.add('tldjs#parse', () => {
for (var i = 0; i < DOMAINS.length; i += 1) {
tld.parse(DOMAINS[i]);
for (var i = 0; i < values.length; i += 1) {
tld.parse(values[i]);
}
})
.on('cycle', function (event) {
Expand All @@ -101,4 +112,19 @@ function main() {
}


// TODO - Compare to other libraries
function main() {
console.log('>>> -------------------- <<<');
console.log('>>> Only valid hostnames <<<');
console.log('>>> -------------------- <<<');
bench(HOSTNAMES);

console.log();
console.log('>>> ----------- <<<');
console.log('>>> Random URLs <<<');
console.log('>>> ----------- <<<');
bench(URLS);
}


main();
19 changes: 18 additions & 1 deletion index.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
'use strict';


// Load rules
var Trie = require('./lib/suffix-trie.js');
var allRules = Trie.fromJson(require('./rules.json'));
Expand All @@ -10,6 +11,7 @@ var getDomain = require('./lib/domain.js');
var getPublicSuffix = require('./lib/public-suffix.js');
var getSubdomain = require('./lib/subdomain.js');
var isValid = require('./lib/is-valid.js');
var isIp = require('./lib/is-ip.js');
var tldExists = require('./lib/tld-exists.js');


Expand Down Expand Up @@ -50,12 +52,26 @@ function factory(options) {
var result = {
hostname: _extractHostname(url),
isValid: null,
tldExists: null,
isIp: null,
tldExists: false,
publicSuffix: null,
domain: null,
subdomain: null,
};

if (result.hostname === null) {
result.isIp = false;
result.isValid = false;
return result;
}

// Check if `hostname` is a valid ip address
result.isIp = isIp(result.hostname);
if (result.isIp) {
result.isValid = true;
return result;
}

// Check if `hostname` is valid
result.isValid = isValid(result.hostname);
if (result.isValid === false) return result;
Expand Down Expand Up @@ -83,6 +99,7 @@ function factory(options) {

return {
extractHostname: _extractHostname,
isIp: isIp,
isValid: isValid,
parse: parse,
tldExists: function (url) {
Expand Down
53 changes: 48 additions & 5 deletions lib/clean-host.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ var isValid = require('./is-valid.js');

// scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
var hasPrefixRE = /^(([a-z][a-z0-9+.-]*)?:)?\/\//;
var invalidHostnameChars = /[^A-Za-z0-9.-]/;


// @see https://github.com/oncletom/tld.js/issues/95
function rtrim(value) {
Expand All @@ -28,21 +28,64 @@ function rtrim(value) {
return value;
}


function checkTrimmingNeeded(value) {
return (
value.length > 0 && (
value.charCodeAt(0) <= 32 ||
value.charCodeAt(value.length - 1) <= 32
)
);
}


function checkLowerCaseNeeded(value) {
for (var i = 0; i < value.length; i += 1) {
var code = value.charCodeAt(i);
if (code >= 65 && code <= 90) { // [A-Z]
return true;
}
}

return false;
}


module.exports = function extractHostname(value) {
// First check if `value` is already a valid hostname.
if (isValid(value)) {
return rtrim(value);
}

var url = ('' + value).toLowerCase().trim();
var url = value;

if (typeof url !== 'string') {
url = '' + url;
}

var needsTrimming = checkTrimmingNeeded(url);
if (needsTrimming) {
url = url.trim();
}

if (isValid(url)) {
var needsLowerCase = checkLowerCaseNeeded(url);
if (needsLowerCase) {
url = url.toLowerCase();
}

// Try again after `url` has been transformed to lowercase and trimmed.
if ((needsLowerCase || needsTrimming) && isValid(url)) {
return rtrim(url);
}

// Proceed with heavier url parsing to extract the hostname.
var parts = URL.parse(hasPrefixRE.test(url) ? url : '//' + url, null, true);
if (!hasPrefixRE.test(url)) {
url = '//' + url;
}

var parts = URL.parse(url, null, true);

if (parts.hostname && !invalidHostnameChars.test(parts.hostname)) {
if (parts.hostname) {
return rtrim(parts.hostname);
}

Expand Down
76 changes: 76 additions & 0 deletions lib/is-ip.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
'use strict';


var isIp = require('is-ip');


/**
* Check if `hostname` could be a valid ipv4 address (contains only characters
* in the range: [0-9.]). If this is the case, we will perform a more exhaustive
* check using 'is-ip' library (which is more expensive).
*
* @param {string} hostname
* @return {boolean}
*/
function isProbablyIpv4(hostname) {
for (var i = 0; i < hostname.length; i += 1) {
var code = hostname.charCodeAt(i);
// 46 => '.'
// 48 => '0'
// 57 => '9'
if (!(code === 46 || (code >= 48 && code <= 57))) {
return false;
}
}

return true;
}


/**
* Check if `hostname` could be a valid ipv6 address (contains only characters
* in the range: [0-9a-f:]). If this is the case, we will perform a more
* exhaustive check using 'is-ip' library (which is more expensive).
*
* @param {string} hostname
* @return {boolean}
*/
function isProbablyIpv6(hostname) {
for (var i = 0; i < hostname.length; i += 1) {
var code = hostname.charCodeAt(i);
if (!(
code === 58 || // ':'
(code >= 48 && code <= 57) || // 0-9
(code >= 97 && code <= 102) // a-f
)) {
return false;
}
}

return true;
}


/**
* Check if `hostname` is a valid ip addr (either ipv6 or ipv4).
*
* @param {string} hostname
* @return {boolean}
*/
module.exports = function (hostname) {
if (typeof hostname !== 'string') {
return false;
}

if (hostname.length === 0) {
return false;
}

// We always perform a very fast verification to check if the `hostname` has a
// chance of being an ip. If so, we perform a more expensive, exhaustive check
// based on regexps.
return (
(isProbablyIpv6(hostname) && isIp.v6(hostname)) ||
(isProbablyIpv4(hostname) && isIp.v4(hostname))
);
};
Loading

0 comments on commit 5464bef

Please sign in to comment.