Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Follow changes in webrobots 0.0.6.

  • Loading branch information...
commit fc83848842ebc16a42904e3d9c27e1efe1c6702d 1 parent d79dbb6
@knu knu authored
Showing with 30 additions and 5 deletions.
  1. +1 −1  Rakefile
  2. +29 −4 lib/mechanize.rb
View
2  Rakefile
@@ -12,7 +12,7 @@ Hoe.spec 'mechanize' do
self.extra_rdoc_files += Dir['*.rdoc']
self.extra_deps << ['nokogiri', '>= 1.2.1']
self.extra_deps << ['net-http-persistent', '~> 1.1']
- self.extra_deps << ['webrobots', '>= 0.0.5']
+ self.extra_deps << ['webrobots', '>= 0.0.6']
end
desc "Update SSL Certificate"
View
33 lib/mechanize.rb
@@ -488,9 +488,6 @@ def transact
# site's robots.txt.
def robots_allowed?(url)
webrobots.allowed?(url)
- rescue WebRobots::ParseError => e
- log.info("error in parsing robots.txt for #{url}: #{e.message}") if log
- return true
end
# Equivalent to !robots_allowed?(url).
@@ -498,12 +495,40 @@ def robots_disallowed?(url)
!webrobots.allowed?(url)
end
+ # Returns an error object if there is an error in fetching or
+ # parsing robots.txt of the site +url+.
+ def robots_error(url)
+ webrobots.error(url)
+ end
+
+ # Raises the error if there is an error in fetching or parsing
+ # robots.txt of the site +url+.
+ def robots_error!(url)
+ webrobots.error!(url)
+ end
+
+ # Removes robots.txt cache for the site +url+.
+ def robots_reset(url)
+ webrobots.reset(url)
+ end
+
alias :page :current_page
private
+ def webrobots_http_get(uri)
+ get_file(uri)
+ rescue Net::HTTPExceptions => e
+ case e.response
+ when Net::HTTPNotFound
+ ''
+ else
+ raise e
+ end
+ end
+
def webrobots
- @webrobots ||= WebRobots.new(@user_agent, :http_get => method(:get_file))
+ @webrobots ||= WebRobots.new(@user_agent, :http_get => method(:webrobots_http_get))
end
def resolve(url, referer = current_page())
Please sign in to comment.
Something went wrong with that request. Please try again.