Skip to content

Commit

Permalink
Follow changes in webrobots 0.0.6.
Browse files Browse the repository at this point in the history
  • Loading branch information
knu committed Jan 8, 2011
1 parent d79dbb6 commit fc83848
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 5 deletions.
2 changes: 1 addition & 1 deletion Rakefile
Expand Up @@ -12,7 +12,7 @@ Hoe.spec 'mechanize' do
self.extra_rdoc_files += Dir['*.rdoc']
self.extra_deps << ['nokogiri', '>= 1.2.1']
self.extra_deps << ['net-http-persistent', '~> 1.1']
self.extra_deps << ['webrobots', '>= 0.0.5']
self.extra_deps << ['webrobots', '>= 0.0.6']
end

desc "Update SSL Certificate"
Expand Down
33 changes: 29 additions & 4 deletions lib/mechanize.rb
Expand Up @@ -488,22 +488,47 @@ def transact
# site's robots.txt.
def robots_allowed?(url)
webrobots.allowed?(url)
rescue WebRobots::ParseError => e
log.info("error in parsing robots.txt for #{url}: #{e.message}") if log
return true
end

# Equivalent to !robots_allowed?(url).
def robots_disallowed?(url)
!webrobots.allowed?(url)
end

# Returns an error object if there is an error in fetching or
# parsing robots.txt of the site +url+.
def robots_error(url)
webrobots.error(url)
end

# Raises the error if there is an error in fetching or parsing
# robots.txt of the site +url+.
def robots_error!(url)
webrobots.error!(url)
end

# Removes robots.txt cache for the site +url+.
def robots_reset(url)
webrobots.reset(url)
end

alias :page :current_page

private

def webrobots_http_get(uri)
get_file(uri)
rescue Net::HTTPExceptions => e
case e.response
when Net::HTTPNotFound
''
else
raise e
end
end

def webrobots
@webrobots ||= WebRobots.new(@user_agent, :http_get => method(:get_file))
@webrobots ||= WebRobots.new(@user_agent, :http_get => method(:webrobots_http_get))
end

def resolve(url, referer = current_page())
Expand Down

0 comments on commit fc83848

Please sign in to comment.