Permalink
Browse files

changed tld to public_suffix to appease some pedantic douche nozzle t…

…hat gave me shit
  • Loading branch information...
pauldix committed Jan 8, 2010
1 parent 2b7991f commit 648407b7f400f682a6475876d15f926a02eecb7a
View
@@ -4,11 +4,11 @@ h1. Domainatrix
h2. Summary
A cruel mistress that uses the public suffix domain list to dominate URLs by canonicalizing, finding TLDs, and breaking them into their domain parts.
A cruel mistress that uses the public suffix domain list to dominate URLs by canonicalizing, finding public suffixes, and breaking them into their domain parts.
h2. Description
This simple library can parse a URL into its canonical form. It uses the list of domains from "http://publicsuffix.org":http://publicsuffix.org to break the domain into its tld, domain, and subdomain.
This simple library can parse a URL into its canonical form. It uses the list of domains from "http://publicsuffix.org":http://publicsuffix.org to break the domain into its public suffix, domain, and subdomain.
h2. Installation
@@ -24,12 +24,12 @@ require 'domainatrix'
url = Domainatrix.parse("http://www.pauldix.net")
url.url # => "http://www.pauldix.net" (the original url)
url.tld # => "net"
url.public_suffix # => "net"
url.domain # => "pauldix"
url.canonical # => "net.pauldix"
url = Domainatrix.parse("http://foo.bar.pauldix.co.uk/asdf.html?q=arg")
url.tld # => "co.uk"
url.public_suffix # => "co.uk"
url.domain # => "pauldix"
url.subdomain # => "foo.bar"
url.path # => "/asdf.html?q=arg"
View
@@ -23,7 +23,7 @@ Gem::Specification.new do |s|
s.homepage = %q{http://github.com/pauldix/domainatrix}
s.require_paths = ["lib"]
s.rubygems_version = %q{1.3.5}
s.summary = %q{A cruel mistress that uses the public suffix domain list to dominate URLs by canonicalizing, finding TLDs, and breaking them into their domain parts.}
s.summary = %q{A cruel mistress that uses the public suffix domain list to dominate URLs by canonicalizing, finding the public suffix, and breaking them into their domain parts.}
if s.respond_to? :specification_version then
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
@@ -1,9 +1,9 @@
module Domainatrix
class DomainParser
attr_reader :tlds
attr_reader :public_suffixes
def initialize(file_name)
@tlds = {}
@public_suffixes = {}
read_dat_file(file_name)
end
@@ -13,7 +13,7 @@ def read_dat_file(file_name)
unless (line =~ /\/\//) || line.empty?
parts = line.split(".").reverse
sub_hash = @tlds
sub_hash = @public_suffixes
parts.each do |part|
sub_hash = (sub_hash[part] ||= {})
end
@@ -33,31 +33,31 @@ def parse(url)
def parse_domains_from_host(host)
parts = host.split(".").reverse
tld = []
public_suffix = []
domain = ""
subdomains = []
sub_hash = @tlds
sub_hash = @public_suffixes
parts.each_index do |i|
part = parts[i]
sub_parts = sub_hash[part]
sub_hash = sub_parts
if sub_parts.has_key? "*"
tld << part
tld << parts[i+1]
public_suffix << part
public_suffix << parts[i+1]
domain = parts[i+2]
subdomains = parts.slice(i+3, parts.size)
break
elsif sub_parts.empty? || !sub_parts.has_key?(parts[i+1])
tld << part
public_suffix << part
domain = parts[i+1]
subdomains = parts.slice(i+2, parts.size)
break
else
tld << part
public_suffix << part
end
end
{:tld => tld.reverse.join("."), :domain => domain, :subdomain => subdomains.reverse.join(".")}
{:public_suffix => public_suffix.reverse.join("."), :domain => domain, :subdomain => subdomains.reverse.join(".")}
end
end
end
View
@@ -1,18 +1,18 @@
module Domainatrix
class Url
attr_reader :tld, :domain, :subdomain, :path, :url
attr_reader :public_suffix, :domain, :subdomain, :path, :url
def initialize(attrs = {})
@url = attrs[:url]
@tld = attrs[:tld]
@public_suffix = attrs[:public_suffix]
@domain = attrs[:domain]
@subdomain = attrs[:subdomain]
@path = attrs[:path]
end
def canonical(options = {})
tld_parts = @tld.split(".")
url = "#{tld_parts.reverse.join(".")}.#{@domain}"
public_suffix_parts = @public_suffix.split(".")
url = "#{public_suffix_parts.reverse.join(".")}.#{@domain}"
if @subdomain && !@subdomain.empty?
subdomain_parts = @subdomain.split(".")
url << ".#{subdomain_parts.reverse.join(".")}"
@@ -7,20 +7,20 @@
describe "reading the dat file" do
it "creates a tree of the domain names" do
@domain_parser.tlds.should be_a Hash
@domain_parser.public_suffixes.should be_a Hash
end
it "creates the first level of the tree" do
@domain_parser.tlds.should have_key("com")
@domain_parser.public_suffixes.should have_key("com")
end
it "creates the first level of the tree even when the first doesn't appear on a line by itself" do
@domain_parser.tlds.should have_key("uk")
@domain_parser.public_suffixes.should have_key("uk")
end
it "creates lower levels of the tree" do
@domain_parser.tlds["jp"].should have_key("ac")
@domain_parser.tlds["jp"]["aichi"].should have_key("*")
@domain_parser.public_suffixes["jp"].should have_key("ac")
@domain_parser.public_suffixes["jp"]["aichi"].should have_key("*")
end
end
@@ -40,10 +40,10 @@
end
it "parses the tld" do
@domain_parser.parse("http://pauldix.net")[:tld].should == "net"
@domain_parser.parse("http://pauldix.co.uk")[:tld].should == "co.uk"
@domain_parser.parse("http://pauldix.com.kg")[:tld].should == "com.kg"
@domain_parser.parse("http://pauldix.com.aichi.jp")[:tld].should == "com.aichi.jp"
@domain_parser.parse("http://pauldix.net")[:public_suffix].should == "net"
@domain_parser.parse("http://pauldix.co.uk")[:public_suffix].should == "co.uk"
@domain_parser.parse("http://pauldix.com.kg")[:public_suffix].should == "com.kg"
@domain_parser.parse("http://pauldix.com.aichi.jp")[:public_suffix].should == "com.aichi.jp"
end
it "should have the domain" do
@@ -5,8 +5,8 @@
Domainatrix::Url.new(:url => "http://pauldix.net").url.should == "http://pauldix.net"
end
it "has the tld" do
Domainatrix::Url.new(:tld => "net").tld.should == "net"
it "has the public_suffix" do
Domainatrix::Url.new(:public_suffix => "net").public_suffix.should == "net"
end
it "has the domain" do
@@ -22,20 +22,20 @@
end
it "canonicalizes the url" do
Domainatrix::Url.new(:domain => "pauldix", :tld => "net").canonical.should == "net.pauldix"
Domainatrix::Url.new(:subdomain => "foo", :domain => "pauldix", :tld => "net").canonical.should == "net.pauldix.foo"
Domainatrix::Url.new(:subdomain => "foo.bar", :domain => "pauldix", :tld => "net").canonical.should == "net.pauldix.bar.foo"
Domainatrix::Url.new(:domain => "pauldix", :tld => "co.uk").canonical.should == "uk.co.pauldix"
Domainatrix::Url.new(:subdomain => "foo", :domain => "pauldix", :tld => "co.uk").canonical.should == "uk.co.pauldix.foo"
Domainatrix::Url.new(:subdomain => "foo.bar", :domain => "pauldix", :tld => "co.uk").canonical.should == "uk.co.pauldix.bar.foo"
Domainatrix::Url.new(:subdomain => "", :domain => "pauldix", :tld => "co.uk").canonical.should == "uk.co.pauldix"
Domainatrix::Url.new(:domain => "pauldix", :public_suffix => "net").canonical.should == "net.pauldix"
Domainatrix::Url.new(:subdomain => "foo", :domain => "pauldix", :public_suffix => "net").canonical.should == "net.pauldix.foo"
Domainatrix::Url.new(:subdomain => "foo.bar", :domain => "pauldix", :public_suffix => "net").canonical.should == "net.pauldix.bar.foo"
Domainatrix::Url.new(:domain => "pauldix", :public_suffix => "co.uk").canonical.should == "uk.co.pauldix"
Domainatrix::Url.new(:subdomain => "foo", :domain => "pauldix", :public_suffix => "co.uk").canonical.should == "uk.co.pauldix.foo"
Domainatrix::Url.new(:subdomain => "foo.bar", :domain => "pauldix", :public_suffix => "co.uk").canonical.should == "uk.co.pauldix.bar.foo"
Domainatrix::Url.new(:subdomain => "", :domain => "pauldix", :public_suffix => "co.uk").canonical.should == "uk.co.pauldix"
end
it "canonicalizes the url with the path" do
Domainatrix::Url.new(:subdomain => "foo", :domain => "pauldix", :tld => "net", :path => "/hello").canonical.should == "net.pauldix.foo/hello"
Domainatrix::Url.new(:subdomain => "foo", :domain => "pauldix", :public_suffix => "net", :path => "/hello").canonical.should == "net.pauldix.foo/hello"
end
it "canonicalizes the url without the path" do
Domainatrix::Url.new(:subdomain => "foo", :domain => "pauldix", :tld => "net").canonical(:include_path => false).should == "net.pauldix.foo"
Domainatrix::Url.new(:subdomain => "foo", :domain => "pauldix", :public_suffix => "net").canonical(:include_path => false).should == "net.pauldix.foo"
end
end

6 comments on commit 648407b

@jacqui

This comment has been minimized.

Show comment
Hide comment
@jacqui

jacqui Jan 8, 2010

that is possibly one of the best commit messages ever, period.

another period. period. :)

jacqui replied Jan 8, 2010

that is possibly one of the best commit messages ever, period.

another period. period. :)

@pauldix

This comment has been minimized.

Show comment
Hide comment
@pauldix

pauldix Jan 8, 2010

Owner

hahaha thanks.

Owner

pauldix replied Jan 8, 2010

hahaha thanks.

@stve

This comment has been minimized.

Show comment
Hide comment
@stve

stve Jan 8, 2010

pedantic douche nozzle should be the name of your next project

stve replied Jan 8, 2010

pedantic douche nozzle should be the name of your next project

@joshbuddy

This comment has been minimized.

Show comment
Hide comment
@joshbuddy

joshbuddy Jan 8, 2010

I want every commit message everywhere from now on to include "pedantic douche nozzle" actually. Most amazing commit message ever.

joshbuddy replied Jan 8, 2010

I want every commit message everywhere from now on to include "pedantic douche nozzle" actually. Most amazing commit message ever.

@lookfirst

This comment has been minimized.

Show comment
Hide comment
@lookfirst

lookfirst Feb 1, 2010

as the 'pedantic douche nozzle' who gave you shit, i applaud you for a great comment.

funny how i just ran across this now.

lookfirst replied Feb 1, 2010

as the 'pedantic douche nozzle' who gave you shit, i applaud you for a great comment.

funny how i just ran across this now.

@pauldix

This comment has been minimized.

Show comment
Hide comment
@pauldix

pauldix Feb 2, 2010

Owner

hahah thanks for the inspiration. :)

Owner

pauldix replied Feb 2, 2010

hahah thanks for the inspiration. :)

Please sign in to comment.