Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Url parse #55

Merged
merged 4 commits into from
Jun 19, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions lib/parsers/publicsuffix-org.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

var PublicSuffixOrgParser = {};

var punycode = require('punycode');

/**
* Parse a one-domain-per-line file
*
Expand All @@ -22,7 +24,8 @@ PublicSuffixOrgParser.parse = function (body){
*/
PublicSuffixOrgParser.domainBuilder = function (row){
var rule = {};
row = row.trim();

row = punycode.toASCII(row.trim());

//setting initial rule
rule.source = row;
Expand Down Expand Up @@ -64,4 +67,4 @@ PublicSuffixOrgParser.filterRow = function (row) {
return (/^\/\//).test(row) ? null : row;
};

module.exports = PublicSuffixOrgParser;
module.exports = PublicSuffixOrgParser;
18 changes: 7 additions & 11 deletions lib/tld.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"use strict";

var Rule = require('./rule.js');
var urlParts = /(^https?:?\/\/|^\/\/)?([^:]+(:[^@]+)?@)?([^:@\/]+)(:|\/|$)/; // 1 = protocol, 2/3 = auth, 4 = domain
var URL = require('url');

/**
* tld library
Expand Down Expand Up @@ -308,20 +308,16 @@ tld.prototype.isValid = function isValid (host) {
* @param {string} value
* @return {String}
*/
tld.cleanHostValue = function cleanHostValue(value){
value = trim(value).toLowerCase();

var parts = urlParts.exec(value);
// scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
var hasPrefixRE = /^(([a-z][a-z0-9+.-]*)?:)?\/\//;

if (parts[3] && typeof console !== 'undefined' && console.log) {
var userPasswordSeparatorPosition = parts[3].lastIndexOf(':');
tld.cleanHostValue = function cleanHostValue(value){
value = trim(value).toLowerCase();

if (parts[3][userPasswordSeparatorPosition + 1] !== '/') {
console.error('user:password syntax is deprecated in RFC3986. You should consider an alternate way of doing it.');
}
}
var parts = URL.parse(hasPrefixRE.test(value) ? value : '//' + value, null, true);

return parts[4] || value || '';
return parts.hostname || value || '';
};

/**
Expand Down
2 changes: 1 addition & 1 deletion rules.json

Large diffs are not rendered by default.

40 changes: 34 additions & 6 deletions test/tld.js
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,21 @@ describe('tld.js', function () {
expect(tld.getDomain('fr.t.co')).to.equal('t.co');
});

//@see https://github.com/oncletom/tld.js/issues/33
it('should not break on specific RegExp characters', function () {
expect(tld.getDomain('www.weir)domain.com')).to.equal('weir)domain.com');
it('should not break on specific RegExp characters', function (){
expect(function (){
//@see https://github.com/oncletom/tld.js/issues/33
tld.getDomain('www.weir)domain.com');
}).not.to.throwError();
expect(function (){
//@see https://github.com/oncletom/tld.js/issues/53
tld.getDomain("http://('4drsteve.com', [], ['54.213.246.177'])/xmlrpc.php");
}).not.to.throwError();
});

//@see https://github.com/oncletom/tld.js/issues/53
it('should correctly extract domain from paths including "@" in the path', function (){
var domain = tld.getDomain('http://cdn.jsdelivr.net/g/jquery@1.8.2,jquery.waypoints@2.0.2,qtip2@2.2.1,typeahead.js@0.9.3,sisyphus@0.1,jquery.slick@1.3.15,fastclick@1.0.3');
expect(domain).to.equal('jsdelivr.net');
});

it('should provide consistent results', function(){
Expand Down Expand Up @@ -189,6 +201,10 @@ describe('tld.js', function () {
it('should return www.nytimes.com even with an URL as a parameter', function(){
expect(tldLib.cleanHostValue('http://www.nytimes.com/glogin?URI=http://www.notnytimes.com/2010/03/26/us/politics/26court.html&OQ=_rQ3D1Q26&OP=45263736Q2FKgi!KQ7Dr!K@@@Ko!fQ24KJg(Q3FQ5Cgg!Q60KQ60W.WKWQ22KQ60IKyQ3FKigQ24Q26!Q26(Q3FKQ60I(gyQ5C!Q2Ao!fQ24')).to.equal('www.nytimes.com');
});

it('should return punycode for international hostnames', function() {
expect(tldLib.cleanHostValue('台灣')).to.equal('xn--kpry57d');
});
});

describe('getSubdomain method', function(){
Expand Down Expand Up @@ -227,11 +243,23 @@ describe('tld.js', function () {
expect(tld.getSubdomain('emergency.blogspot.co.uk')).to.equal('emergency');
});

//@see https://github.com/oncletom/tld.js/issues/33
it('should not break on specific RegExp characters', function () {
expect(tld.getSubdomain('www.weir)domain.com')).to.equal('www');
it('should not break on specific RegExp characters', function (){
expect(function (){
//@see https://github.com/oncletom/tld.js/issues/33
tld.getSubdomain('www.weir)domain.com');
}).not.to.throwError();
expect(function (){
//@see https://github.com/oncletom/tld.js/issues/53
tld.getSubdomain("http://('4drsteve.com', [], ['54.213.246.177'])/xmlrpc.php");
}).not.to.throwError();
});

//@see https://github.com/oncletom/tld.js/issues/53
it('should correctly extract domain from paths including "@" in the path', function (){
var domain = tld.getSubdomain('http://cdn.jsdelivr.net/g/jquery@1.8.2,jquery.waypoints@2.0.2,qtip2@2.2.1,typeahead.js@0.9.3,sisyphus@0.1,jquery.slick@1.3.15,fastclick@1.0.3');
expect(domain).to.equal('cdn');
});

//@see https://github.com/oncletom/tld.js/issues/35
it('should provide consistent results', function(){
expect(tld.getSubdomain('www.bl.uk')).to.equal('www');
Expand Down