Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Merge pull request #7 from dparis/public_suffix

Removed dependency on domainatrix and replaced with public_suffix gem
  • Loading branch information...
commit f0b524b93bf4a7a8f6eca216c277dc3c39a0a860 2 parents f8b52f7 + 46d51e0
@igrigorik igrigorik authored
View
3  Rakefile
@@ -2,5 +2,4 @@ require 'bundler'
Bundler::GemHelper.install_tasks
require 'rspec/core/rake_task'
-
-Rspec::Core::RakeTask.new(:spec)
+RSpec::Core::RakeTask.new(:spec)
View
33 lib/postrank-uri.rb
@@ -1,22 +1,16 @@
# -*- encoding: utf-8 -*-
require 'addressable/uri'
-require 'domainatrix'
require 'digest/md5'
require 'nokogiri'
+require 'public_suffix'
require 'yaml'
module Addressable
class URI
def domain
- begin
- dp = Domainatrix.parse(self)
- rescue
- return nil
- end
-
- dom = dp.public_suffix
- dom = dp.domain.downcase + "." + dom unless dp.domain.empty?
+ host = self.host
+ (host && PublicSuffix.valid?(host)) ? PublicSuffix.parse(host).domain : nil
end
def normalized_query
@@ -103,11 +97,10 @@ def extract(text)
return [] if !text
urls = []
text.to_s.scan(URIREGEX[:valid_url]) do |all, before, url, protocol, domain, path, query|
- begin
+ # Only extract the URL if the domain is valid
+ if PublicSuffix.valid?(domain)
url = clean(url)
- Domainatrix.parse(url)
urls.push url.to_s
- rescue NoMethodError
end
end
@@ -223,10 +216,18 @@ def parse(uri, opts = {})
end
def valid?(uri)
- Domainatrix.parse(uri)
- true
- rescue
- false
+ # URI is only valid if it is not nil, parses cleanly as a URI,
+ # and the domain has a recognized, valid TLD component
+ return false if uri.nil?
+
+ is_valid = false
+ cleaned_uri = clean(uri, :raw => true)
+
+ if host = cleaned_uri.host
+ is_valid = PublicSuffix.valid?(host)
+ end
+
+ is_valid
end
end
end
View
2  lib/postrank-uri/version.rb
@@ -1,5 +1,5 @@
module PostRank
module URI
- VERSION = "1.0.16"
+ VERSION = "1.0.17"
end
end
View
8 postrank-uri.gemspec
@@ -14,11 +14,11 @@ Gem::Specification.new do |s|
s.rubyforge_project = "postrank-uri"
- s.add_dependency "addressable", ">= 2.3.0"
- s.add_dependency "domainatrix"
- s.add_dependency "nokogiri"
+ s.add_dependency "addressable", "~> 2.3.0"
+ s.add_dependency "public_suffix", "~> 1.1.3"
+ s.add_dependency "nokogiri", "~> 1.5.5"
+
s.add_development_dependency "rspec"
- #s.add_development_dependency "idn" # test with idn
s.files = `git ls-files`.split("\n")
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
View
4 spec/postrank-uri_spec.rb
@@ -339,6 +339,10 @@ def e(text)
end
context 'valid?' do
+ it 'marks incomplete URI string as invalid' do
+ PostRank::URI.valid?('/path/page.html').should be_false
+ end
+
it 'marks www.test.c as invalid' do
PostRank::URI.valid?('http://www.test.c').should be_false
end
Please sign in to comment.
Something went wrong with that request. Please try again.