Permalink
Browse files

Follow changes in webrobots 0.0.6.

  • Loading branch information...
1 parent d79dbb6 commit fc83848842ebc16a42904e3d9c27e1efe1c6702d @knu knu committed Jan 8, 2011
Showing with 30 additions and 5 deletions.
  1. +1 −1 Rakefile
  2. +29 −4 lib/mechanize.rb
View
@@ -12,7 +12,7 @@ Hoe.spec 'mechanize' do
self.extra_rdoc_files += Dir['*.rdoc']
self.extra_deps << ['nokogiri', '>= 1.2.1']
self.extra_deps << ['net-http-persistent', '~> 1.1']
- self.extra_deps << ['webrobots', '>= 0.0.5']
+ self.extra_deps << ['webrobots', '>= 0.0.6']
end
desc "Update SSL Certificate"
View
@@ -488,22 +488,47 @@ def transact
# site's robots.txt.
def robots_allowed?(url)
webrobots.allowed?(url)
- rescue WebRobots::ParseError => e
- log.info("error in parsing robots.txt for #{url}: #{e.message}") if log
- return true
end
# Equivalent to !robots_allowed?(url).
def robots_disallowed?(url)
!webrobots.allowed?(url)
end
+ # Returns an error object if there is an error in fetching or
+ # parsing robots.txt of the site +url+.
+ def robots_error(url)
+ webrobots.error(url)
+ end
+
+ # Raises the error if there is an error in fetching or parsing
+ # robots.txt of the site +url+.
+ def robots_error!(url)
+ webrobots.error!(url)
+ end
+
+ # Removes robots.txt cache for the site +url+.
+ def robots_reset(url)
+ webrobots.reset(url)
+ end
+
alias :page :current_page
private
+ def webrobots_http_get(uri)
+ get_file(uri)
+ rescue Net::HTTPExceptions => e
+ case e.response
+ when Net::HTTPNotFound
+ ''
+ else
+ raise e
+ end
+ end
+
def webrobots
- @webrobots ||= WebRobots.new(@user_agent, :http_get => method(:get_file))
+ @webrobots ||= WebRobots.new(@user_agent, :http_get => method(:webrobots_http_get))
end
def resolve(url, referer = current_page())

0 comments on commit fc83848

Please sign in to comment.