Permalink
Browse files

initial import and new case for handling Forbidden responses.

  • Loading branch information...
1 parent 9e5dd25 commit d252dfc2473385143e0272cf5a2d8bb8da3755eb Walter McGinnis committed Jun 4, 2010
Showing with 181 additions and 18 deletions.
  1. +2 −1 LICENSE → MIT-LICENSE
  2. +40 −0 README
  3. +0 −17 README.rdoc
  4. +138 −0 lib/http_url_validation_improved.rb
  5. +1 −0 rails/init.rb
View
@@ -1,4 +1,5 @@
-Copyright (c) 2009 Walter McGinnis
+Copyright (c) 2006 Cloves Carneiro Jr (and others, such as
+Katipo Communications, Ltd. and Erik Gregg)
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
View
40 README
@@ -0,0 +1,40 @@
+HTTP URL Validation Plugin Improved
+by Erik Gregg, Walter McGinnis, Kieran Pilkington
+
+This work is based on Erik's work, but mainly refined for the Kete application (http://kete.net.nz).
+
+Inspired by HTTP URL Validation Plugin by C. Carneiro Jr.
+========================
+
+HTTP URL Validation Improved is a Rails gem that allows you to validate a URL
+entered in a form. It validates if the URL exists by hitting it with a HEAD
+request.
+
+The improved version includes retries for common patterns when the head request is refused before giving a failure notice.
+
+It also looks up a SITE_URL constant to the user agent in the headers.
+
+There's also the option to also check that the URL returns content of
+a specified type. Here’s how you can use it your model:
+
+Check for content type:
+ validates_http_url :url, :content_type => "text/html"
+
+Do not check for content type, just make sure the site is accessible:
+ validates_http_url :website
+
+Make sure there is a DNS entry for a domain
+ validates_http_domain :domain
+# Domain must be in 'www.site.com' for or 'site.com' form. No http://, no path.
+
+This example will make sure the value entered for the URL field points to a
+publicly accessible HTML page, and the photo field points to an image:
+ validates_http_url :image_url, :content_type => "image"
+# :content_type checks for a matching substring, so any image will validate
+
+========================
+
+Bug reports and feedback are always welcome.
+
+Please report them via http://github.com/kete/http_url_validation_improved
+
View
@@ -1,17 +0,0 @@
-= http_url_validation_improved
-
-Description goes here.
-
-== Note on Patches/Pull Requests
-
-* Fork the project.
-* Make your feature addition or bug fix.
-* Add tests for it. This is important so I don't break it in a
- future version unintentionally.
-* Commit, do not mess with rakefile, version, or history.
- (if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
-* Send me a pull request. Bonus points for topic branches.
-
-== Copyright
-
-Copyright (c) 2010 Walter McGinnis. See LICENSE for details.
@@ -0,0 +1,138 @@
+require 'net/http'
+require 'uri'
+require 'socket'
+
+module ActiveRecord
+ module Validations
+ module ClassMethods
+
+ # Validates a URL.
+ def validates_http_url(*attr_names)
+ configuration = {
+ :message_not_accessible => "is not accessible when we tried the link",
+ :message_wrong_content => "is not of the appropriate content type",
+ :message_moved_permanently => "has moved permanently",
+ :message_url_format => "is not formatted correctly. (Missing 'http://'?)"
+ }
+ configuration.update(attr_names.pop) if attr_names.last.is_a?(Hash)
+ validates_each(attr_names, configuration) do |record, attr_name, value|
+
+ # Ignore blank URLs, these can be validated with validates_presence_of
+ if value.nil? or value.empty?
+ next
+ end
+
+ begin
+ moved_retry ||= false
+ not_allowed_retry ||= false
+ retry_without_headers ||= false
+ # Check Formatting
+ raise if not value =~ /^(http|https):\/\/[a-z0-9]+([\-\.]{1}[a-z0-9]+)*\.[a-z]{2,5}(([0-9]{1,5})?\/.*)?$/ix
+ response = nil
+ url = URI.parse(value)
+ url.path = "/" if url.path.length < 1
+ http = Net::HTTP.new(url.host, (url.scheme == 'https') ? 443 : 80)
+ if url.scheme == 'https'
+ http.use_ssl = true
+ http.verify_mode = OpenSSL::SSL::VERIFY_NONE
+ end
+ headers = Object.const_defined?('SITE_URL') ? { "User-Agent" => "#{SITE_URL} link checking mechanism via Ruby Net/HTTP" } : { "User-Agent" => "Ruby Net/HTTp used for link checking mechanism" }
+ response = if not_allowed_retry
+ if retry_without_headers
+ http.request_get(url.path) {|r|}
+ else
+ http.request_get(url.path, headers) {|r|}
+ end
+ else
+ http.request_head(url.path, headers)
+ end
+ # response = not_allowed_retry ? http.request_get(url.path) {|r|} : http.request_head(url.path)
+ # Comment out as you need to
+ allowed_codes = [
+ Net::HTTPMovedPermanently,
+ Net::HTTPOK,
+ Net::HTTPCreated,
+ Net::HTTPAccepted,
+ Net::HTTPNonAuthoritativeInformation,
+ Net::HTTPPartialContent,
+ Net::HTTPFound,
+ Net::HTTPTemporaryRedirect,
+ Net::HTTPSeeOther
+ ]
+ # If response is not allowed, raise an error
+ raise unless allowed_codes.include?(response.class)
+ # Check if the model requires a specific content type
+ unless configuration[:content_type].nil?
+ record.errors.add(attr_name, configuration[:message_wrong_content]) if response['content-type'].index(configuration[:content_type]).nil?
+ end
+ rescue
+ # Has the page moved?
+ if response.is_a?(Net::HTTPMovedPermanently)
+ unless moved_retry
+ moved_retry = true
+ value += "/" # In case webserver is just adding a /
+ retry
+ else
+ record.errors.add(attr_name, configuration[:message_moved_permanently])
+ end
+ elsif response.is_a?(Net::HTTPMethodNotAllowed) || response.is_a?(Net::HTTPInternalServerError)
+ unless not_allowed_retry
+ # Retry with a GET
+ not_allowed_retry = true
+ retry
+ else
+ if response.is_a?(Net::HTTPInternalServerError)
+ record.errors.add(attr_name, configuration[:message_not_accessible]+". The site link in question has had a problem. Please raise the issue with them and let them know that requests to the link break when coming from the automatic link checking mechanism on this site.")
+ else
+ record.errors.add(attr_name, configuration[:message_not_accessible]+" (GET method not allowed)")
+ end
+ end
+ elsif response.is_a?(Net::HTTPForbidden)
+ # handle requests where particular variants are forbidden
+ unless (not_allowed_retry && retry_without_headers)
+ unless not_allowed_retry
+ # try a full request GET first (rather than just head)
+ not_allowed_retry = true
+ retry
+ else
+ # try again but without headers (sometimes site refuse custom headers)
+ # for now, at least, this does a full GET request (rather than just head)
+ retry_without_headers = true
+ retry
+ end
+ else
+ record.errors.add(attr_name, configuration[:message_not_accessible] + ". The website says the URL is Forbidden.")
+ end
+ else
+ # if response is nil, then it's a format issue
+ if response.nil?
+ record.errors.add(attr_name, configuration[:message_url_format])
+ else
+ # Just Plain non-accessible
+ record.errors.add(attr_name, configuration[:message_not_accessible]+". This is what the website in question returned to us: "+response.class.to_s)
+ end
+ end
+ end
+ end
+ end
+
+ def validates_http_domain(*attr_names)
+ validates_each(attr_names) do |record, attr_name, value|
+ # Set valid true on successful connect (all we need is one, one is all we need)
+ failed = true
+ possibilities = [value, "www."+value]
+ possibilities.each do |url|
+ begin
+ temp = Socket.gethostbyname(url)
+ rescue SocketError
+ next
+ end
+ failed = false
+ break
+ end
+ record.errors.add(attr_name, "cannot be resolved.") if failed
+ end
+ end
+ end
+ end
+end
View
@@ -0,0 +1 @@
+require 'http_url_validation_improved'

0 comments on commit d252dfc

Please sign in to comment.