Browse files

Merge pull request #21 from hashrocket/master

Add support for a wider variety of urls
  • Loading branch information...
2 parents 7b72bd6 + 12d186d commit 0d19c1caf36ffe01db7d66874810ae6b0dd822c5 @pauldix committed Feb 20, 2012
Showing with 167 additions and 17 deletions.
  1. +47 −0 .rvmrc
  2. +6 −0 Gemfile
  3. +50 −0 Gemfile.lock
  4. +2 −1 domainatrix.gemspec
  5. +17 −8 lib/domainatrix/domain_parser.rb
  6. +8 −8 lib/domainatrix/url.rb
  7. +37 −0 spec/domainatrix_spec.rb
View
47 .rvmrc
@@ -0,0 +1,47 @@
+#!/usr/bin/env bash
+
+# This is an RVM Project .rvmrc file, used to automatically load the ruby
+# development environment upon cd'ing into the directory
+
+# First we specify our desired <ruby>[@<gemset>], the @gemset name is optional.
+environment_id="ruby-1.9.2-p180@domainatrix"
+
+#
+# Uncomment following line if you want options to be set only for given project.
+#
+# PROJECT_JRUBY_OPTS=( --1.9 )
+
+#
+# First we attempt to load the desired environment directly from the environment
+# file. This is very fast and efficient compared to running through the entire
+# CLI and selector. If you want feedback on which environment was used then
+# insert the word 'use' after --create as this triggers verbose mode.
+#
+if [[ -d "${rvm_path:-$HOME/.rvm}/environments" \
+ && -s "${rvm_path:-$HOME/.rvm}/environments/$environment_id" ]]
+then
+ \. "${rvm_path:-$HOME/.rvm}/environments/$environment_id"
+
+ if [[ -s "${rvm_path:-$HOME/.rvm}/hooks/after_use" ]]
+ then
+ . "${rvm_path:-$HOME/.rvm}/hooks/after_use"
+ fi
+else
+ # If the environment file has not yet been created, use the RVM CLI to select.
+ if ! rvm --create "$environment_id"
+ then
+ echo "Failed to create RVM environment '${environment_id}'."
+ exit 1
+ fi
+fi
+
+#
+# If you use an RVM gemset file to install a list of gems (*.gems), you can have
+# it be automatically loaded. Uncomment the following and adjust the filename if
+# necessary.
+#
+# filename=".gems"
+# if [[ -s "$filename" ]] ; then
+# rvm gemset import "$filename" | grep -v already | grep -v listed | grep -v complete | sed '/^$/d'
+# fi
+
View
6 Gemfile
@@ -0,0 +1,6 @@
+source :rubygems
+
+gemspec
+
+gem "ruby-debug", :platform => :mri_18
+gem "ruby-debug19", :platform => :mri_19, :require => 'ruby-debug'
View
50 Gemfile.lock
@@ -0,0 +1,50 @@
+PATH
+ remote: .
+ specs:
+ domainatrix (0.0.9)
+ addressable
+
+GEM
+ remote: http://rubygems.org/
+ specs:
+ addressable (2.2.6)
+ archive-tar-minitar (0.5.2)
+ columnize (0.3.4)
+ diff-lcs (1.1.3)
+ linecache (0.46)
+ rbx-require-relative (> 0.0.4)
+ linecache19 (0.5.12)
+ ruby_core_source (>= 0.1.4)
+ rbx-require-relative (0.0.5)
+ rspec (2.7.0)
+ rspec-core (~> 2.7.0)
+ rspec-expectations (~> 2.7.0)
+ rspec-mocks (~> 2.7.0)
+ rspec-core (2.7.1)
+ rspec-expectations (2.7.0)
+ diff-lcs (~> 1.1.2)
+ rspec-mocks (2.7.0)
+ ruby-debug (0.10.4)
+ columnize (>= 0.1)
+ ruby-debug-base (~> 0.10.4.0)
+ ruby-debug-base (0.10.4)
+ linecache (>= 0.3)
+ ruby-debug-base19 (0.11.25)
+ columnize (>= 0.3.1)
+ linecache19 (>= 0.5.11)
+ ruby_core_source (>= 0.1.4)
+ ruby-debug19 (0.11.6)
+ columnize (>= 0.3.1)
+ linecache19 (>= 0.5.11)
+ ruby-debug-base19 (>= 0.11.19)
+ ruby_core_source (0.1.5)
+ archive-tar-minitar (>= 0.5.2)
+
+PLATFORMS
+ ruby
+
+DEPENDENCIES
+ domainatrix!
+ rspec
+ ruby-debug
+ ruby-debug19
View
3 domainatrix.gemspec
@@ -25,6 +25,7 @@ Gem::Specification.new do |s|
s.rubygems_version = %q{1.3.5}
s.summary = %q{A cruel mistress that uses the public suffix domain list to dominate URLs by canonicalizing, finding the public suffix, and breaking them into their domain parts.}
s.add_dependency("addressable")
+ s.add_development_dependency("rspec")
if s.respond_to? :specification_version then
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
@@ -35,4 +36,4 @@ Gem::Specification.new do |s|
end
else
end
-end
+end
View
25 lib/domainatrix/domain_parser.rb
@@ -31,13 +31,22 @@ def read_dat_file(file_name)
end
def parse(url)
+ return {} unless url && url.strip != ''
+ url = "http://#{url}" unless url[/:\/\//]
uri = URI.parse(url)
if uri.query
path = "#{uri.path}?#{uri.query}"
else
path = uri.path
end
- parse_domains_from_host(uri.host).merge({
+
+ if uri.host == 'localhost'
+ uri_hash = { :public_suffix => '', :domain => 'localhost', :subdomain => '' }
+ else
+ uri_hash = parse_domains_from_host(uri.host || uri.basename)
+ end
+
+ uri_hash.merge({
:scheme => uri.scheme,
:host => uri.host,
:path => path,
@@ -46,32 +55,32 @@ def parse(url)
end
def parse_domains_from_host(host)
+ return {} unless host
parts = host.split(".").reverse
public_suffix = []
domain = ""
subdomains = []
sub_hash = @public_suffixes
- parts.each_index do |i|
- part = parts[i]
- sub_parts = sub_hash[part]
- sub_hash = sub_parts
+ parts.each_with_index do |part, i|
+ sub_hash = sub_parts = sub_hash[part] || {}
if sub_parts.has_key? "*"
public_suffix << part
public_suffix << parts[i+1]
domain = parts[i+2]
- subdomains = parts.slice(i+3, parts.size)
+ subdomains = parts.slice(i+3, parts.size) || []
break
elsif sub_parts.empty? || !sub_parts.has_key?(parts[i+1])
public_suffix << part
domain = parts[i+1]
- subdomains = parts.slice(i+2, parts.size)
+ subdomains = parts.slice(i+2, parts.size) || []
break
else
public_suffix << part
end
end
+
{:public_suffix => public_suffix.reverse.join("."), :domain => domain, :subdomain => subdomains.reverse.join(".")}
end
end
-end
+end
View
16 lib/domainatrix/url.rb
@@ -3,13 +3,13 @@ class Url
attr_reader :public_suffix, :domain, :subdomain, :path, :url, :scheme, :host
def initialize(attrs = {})
- @scheme = attrs[:scheme]
- @host = attrs[:host]
- @url = attrs[:url]
- @public_suffix = attrs[:public_suffix]
- @domain = attrs[:domain]
- @subdomain = attrs[:subdomain]
- @path = attrs[:path]
+ @scheme = attrs[:scheme] || ''
+ @host = attrs[:host] || ''
+ @url = attrs[:url] || ''
+ @public_suffix = attrs[:public_suffix] || ''
+ @domain = attrs[:domain] || ''
+ @subdomain = attrs[:subdomain] || ''
+ @path = attrs[:path] || ''
end
def canonical(options = {})
@@ -25,7 +25,7 @@ def canonical(options = {})
end
def domain_with_public_suffix
- "#{@domain}.#{@public_suffix}"
+ [@domain, @public_suffix].compact.reject{|s|s==''}.join('.')
end
alias domain_with_tld domain_with_public_suffix
View
37 spec/domainatrix_spec.rb
@@ -13,4 +13,41 @@
Domainatrix.parse("http://foo.bar.pauldix.net").canonical.should == "net.pauldix.bar.foo"
Domainatrix.parse("http://pauldix.co.uk").canonical.should == "uk.co.pauldix"
end
+
+ context 'localhost with a port' do
+ subject { Domainatrix.parse('localhost:3000') }
+ its(:scheme) { should == 'http' }
+ its(:host) { should == 'localhost' }
+ its(:url) { should == 'http://localhost:3000' }
+ its(:public_suffix) { should == '' }
+ its(:domain) { should == 'localhost' }
+ its(:subdomain) { should == '' }
+ its(:path) { should == '' }
+ its(:domain_with_tld) { should == 'localhost' }
+ end
+
+ context 'without a scheme' do
+ subject { Domainatrix.parse('www.pauldix.net') }
+ its(:scheme) { should == 'http' }
+ its(:host) { should == 'www.pauldix.net' }
+ its(:url) { should == 'http://www.pauldix.net' }
+ its(:public_suffix) { should == 'net' }
+ its(:domain) { should == 'pauldix' }
+ its(:subdomain) { should == 'www' }
+ its(:path) { should == '' }
+ its(:domain_with_tld) { should == 'pauldix.net' }
+ end
+
+ context 'with a blank url' do
+ subject { Domainatrix.parse(nil) }
+ its(:scheme) { should == '' }
+ its(:host) { should == '' }
+ its(:url) { should == '' }
+ its(:public_suffix) { should == '' }
+ its(:domain) { should == '' }
+ its(:subdomain) { should == '' }
+ its(:path) { should == '' }
+ its(:domain_with_tld) { should == '' }
+ end
+
end

0 comments on commit 0d19c1c

Please sign in to comment.