Skip to content
This repository
Browse code

Follow changes in webrobots 0.0.6.

  • Loading branch information...
commit fc83848842ebc16a42904e3d9c27e1efe1c6702d 1 parent d79dbb6
Akinori MUSHA knu authored

Showing 2 changed files with 30 additions and 5 deletions. Show diff stats Hide diff stats

  1. +1 1  Rakefile
  2. +29 4 lib/mechanize.rb
2  Rakefile
@@ -12,7 +12,7 @@ Hoe.spec 'mechanize' do
12 12 self.extra_rdoc_files += Dir['*.rdoc']
13 13 self.extra_deps << ['nokogiri', '>= 1.2.1']
14 14 self.extra_deps << ['net-http-persistent', '~> 1.1']
15   - self.extra_deps << ['webrobots', '>= 0.0.5']
  15 + self.extra_deps << ['webrobots', '>= 0.0.6']
16 16 end
17 17
18 18 desc "Update SSL Certificate"
33 lib/mechanize.rb
@@ -488,9 +488,6 @@ def transact
488 488 # site's robots.txt.
489 489 def robots_allowed?(url)
490 490 webrobots.allowed?(url)
491   - rescue WebRobots::ParseError => e
492   - log.info("error in parsing robots.txt for #{url}: #{e.message}") if log
493   - return true
494 491 end
495 492
496 493 # Equivalent to !robots_allowed?(url).
@@ -498,12 +495,40 @@ def robots_disallowed?(url)
498 495 !webrobots.allowed?(url)
499 496 end
500 497
  498 + # Returns an error object if there is an error in fetching or
  499 + # parsing robots.txt of the site +url+.
  500 + def robots_error(url)
  501 + webrobots.error(url)
  502 + end
  503 +
  504 + # Raises the error if there is an error in fetching or parsing
  505 + # robots.txt of the site +url+.
  506 + def robots_error!(url)
  507 + webrobots.error!(url)
  508 + end
  509 +
  510 + # Removes robots.txt cache for the site +url+.
  511 + def robots_reset(url)
  512 + webrobots.reset(url)
  513 + end
  514 +
501 515 alias :page :current_page
502 516
503 517 private
504 518
  519 + def webrobots_http_get(uri)
  520 + get_file(uri)
  521 + rescue Net::HTTPExceptions => e
  522 + case e.response
  523 + when Net::HTTPNotFound
  524 + ''
  525 + else
  526 + raise e
  527 + end
  528 + end
  529 +
505 530 def webrobots
506   - @webrobots ||= WebRobots.new(@user_agent, :http_get => method(:get_file))
  531 + @webrobots ||= WebRobots.new(@user_agent, :http_get => method(:webrobots_http_get))
507 532 end
508 533
509 534 def resolve(url, referer = current_page())

0 comments on commit fc83848

Please sign in to comment.
Something went wrong with that request. Please try again.