Skip to content

Commit

Permalink
wired up all the stuff to break down a host into its tld, domain, and…
Browse files Browse the repository at this point in the history
… subdomain parts
  • Loading branch information
pauldix committed Dec 10, 2009
1 parent b4cd3fb commit 19f5474
Show file tree
Hide file tree
Showing 2 changed files with 81 additions and 4 deletions.
39 changes: 39 additions & 0 deletions lib/domainatrix/domain_parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,44 @@ def read_dat_file(file_name)
end
end
end

def parse(url)
uri = URI.parse(url)
if uri.query
path = "#{uri.path}?#{uri.query}"
else
path = uri.path
end
parse_domains_from_host(uri.host).merge({:path => path})
end

def parse_domains_from_host(host)
parts = host.split(".").reverse
tld = []
domain = ""
subdomains = []
sub_hash = @tlds
parts.each_index do |i|
part = parts[i]

sub_parts = sub_hash[part]
sub_hash = sub_parts
if sub_parts.has_key? "*"
tld << part
tld << parts[i+1]
domain = parts[i+2]
subdomains = parts.slice(i+3, parts.size)
break
elsif sub_parts.empty? || !sub_parts.has_key?(parts[i+1])
tld << part
domain = parts[i+1]
subdomains = parts.slice(i+2, parts.size)
break
else
tld << part
end
end
{:tld => tld.reverse.join("."), :domain => domain, :subdomain => subdomains.reverse.join(".")}
end
end
end
46 changes: 42 additions & 4 deletions spec/domainatrix/domain_parser_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,20 +6,58 @@
end

describe "reading the dat file" do
it "create a trie of the domain names" do
it "creates a tree of the domain names" do
@domain_parser.tlds.should be_a Hash
end

it "should have the first level of the tree" do
it "creates the first level of the tree" do
@domain_parser.tlds.should have_key("com")
end

it "should have the first level of the tree even when the first doesn't appear on a line by itself" do
it "creates the first level of the tree even when the first doesn't appear on a line by itself" do
@domain_parser.tlds.should have_key("uk")
end

it "should have lower levels of the tree" do
it "creates lower levels of the tree" do
@domain_parser.tlds["jp"].should have_key("ac")
@domain_parser.tlds["jp"]["aichi"].should have_key("*")
end
end

describe "parsing" do
it "returns a hash of parts" do
@domain_parser.parse("http://pauldix.net").should be_a Hash
end

it "should strip the http://" do
@domain_parser.parse("http://pauldix.net").values.each {|val| (val =~ /http\:\/\//).should_not be}
end

it "parses out the path" do
@domain_parser.parse("http://pauldix.net/foo.html?asdf=foo")[:path].should == "/foo.html?asdf=foo"
@domain_parser.parse("http://pauldix.net?asdf=foo")[:path].should == "?asdf=foo"
@domain_parser.parse("http://pauldix.net")[:path].should == ""
end

it "parses the tld" do
@domain_parser.parse("http://pauldix.net")[:tld].should == "net"
@domain_parser.parse("http://pauldix.co.uk")[:tld].should == "co.uk"
@domain_parser.parse("http://pauldix.com.kg")[:tld].should == "com.kg"
@domain_parser.parse("http://pauldix.com.aichi.jp")[:tld].should == "com.aichi.jp"
end

it "should have the domain" do
@domain_parser.parse("http://pauldix.net")[:domain].should == "pauldix"
@domain_parser.parse("http://foo.pauldix.net")[:domain].should == "pauldix"
@domain_parser.parse("http://pauldix.co.uk")[:domain].should == "pauldix"
@domain_parser.parse("http://foo.pauldix.co.uk")[:domain].should == "pauldix"
@domain_parser.parse("http://pauldix.com.kg")[:domain].should == "pauldix"
@domain_parser.parse("http://pauldix.com.aichi.jp")[:domain].should == "pauldix"
end

it "should have subdomains" do
@domain_parser.parse("http://foo.pauldix.net")[:subdomain].should == "foo"
@domain_parser.parse("http://bar.foo.pauldix.co.uk")[:subdomain].should == "bar.foo"
end
end
end

0 comments on commit 19f5474

Please sign in to comment.