Skip to content

Commit

Permalink
removing hpricot, updating history
Browse files Browse the repository at this point in the history
  • Loading branch information
tenderlove committed Dec 22, 2008
1 parent 3a3ef23 commit 5cb3f3b
Show file tree
Hide file tree
Showing 9 changed files with 61 additions and 113 deletions.
8 changes: 3 additions & 5 deletions GUIDE.txt
Expand Up @@ -115,10 +115,8 @@ tell it what file name you want to upload:
form.file_uploads.first.file_name = "somefile.jpg"

== Scraping Data
Mechanize uses hpricot[http://code.whytheluckystiff.net/hpricot/] to parse
Mechanize uses nokogiri[http://nokogiri.rubyforge.org/] to parse
html. What does this mean for you? You can treat a mechanize page like
an hpricot object. After you have used Mechanize to navigate to the page
that you need to scrape, then scrape it using hpricot methods:
an nokogiri object. After you have used Mechanize to navigate to the page
that you need to scrape, then scrape it using nokogiri methods:
agent.get('http://someurl.com/').search(".//p[@class='posted']")
For more information on this powerful scraper, take a look at
HpricotBasics[http://code.whytheluckystiff.net/hpricot/wiki/HpricotBasics]
10 changes: 10 additions & 0 deletions History.txt
@@ -1,5 +1,15 @@
= Mechanize CHANGELOG

=== 0.9.0

* Deprecations
* WWW::Mechanize::List is gone!
* Mechanize uses Nokogiri as it's HTML parser but you may switch to
Hpricot by using WWW::Mechanize.html_parser = Hpricot

* Bug Fixes:
* Nil check on page when base tag is used #23021

=== 0.8.5

* Deprecations
Expand Down
2 changes: 1 addition & 1 deletion Rakefile
Expand Up @@ -9,7 +9,7 @@ HOE = Hoe.new('mechanize', WWW::Mechanize::VERSION) do |p|
p.developer('Aaron Patterson','aaronp@rubyforge.org')
p.developer('Mike Dalessio','mike.dalessio@gmail.com')
p.summary = "Mechanize provides automated web-browsing"
p.extra_deps = [['hpricot', '>= 0.5.0']]
p.extra_deps = [['nokogiri', '>= 1.0.7']]
end

desc "Update SSL Certificate"
Expand Down
5 changes: 2 additions & 3 deletions lib/www/mechanize.rb
Expand Up @@ -6,7 +6,7 @@
require 'stringio'
require 'digest/md5'
require 'fileutils'
require 'hpricot'
require 'nokogiri'
require 'forwardable'

require 'www/mechanize/util'
Expand All @@ -18,7 +18,6 @@
require 'www/mechanize/cookie'
require 'www/mechanize/cookie_jar'
require 'www/mechanize/history'
require 'www/mechanize/list'
require 'www/mechanize/form'
require 'www/mechanize/pluggable_parsers'
require 'www/mechanize/file_response'
Expand Down Expand Up @@ -87,7 +86,7 @@ class Mechanize

alias :follow_redirect? :redirect_ok

@html_parser = Hpricot
@html_parser = Nokogiri::HTML
class << self; attr_accessor :html_parser, :log end

def initialize
Expand Down
10 changes: 5 additions & 5 deletions lib/www/mechanize/form.rb
Expand Up @@ -232,11 +232,11 @@ def #{singular}_with criteria = {}

private
def parse
@fields = WWW::Mechanize::List.new
@buttons = WWW::Mechanize::List.new
@file_uploads = WWW::Mechanize::List.new
@radiobuttons = WWW::Mechanize::List.new
@checkboxes = WWW::Mechanize::List.new
@fields = []
@buttons = []
@file_uploads = []
@radiobuttons = []
@checkboxes = []

# Find all input tags
form_node.search('input').each do |node|
Expand Down
2 changes: 1 addition & 1 deletion lib/www/mechanize/form/multi_select_list.rb
Expand Up @@ -14,7 +14,7 @@ class MultiSelectList < Field

def initialize(name, node)
value = []
@options = WWW::Mechanize::List.new
@options = []

# parse
node.search('option').each do |n|
Expand Down
52 changes: 0 additions & 52 deletions lib/www/mechanize/list.rb

This file was deleted.

53 changes: 22 additions & 31 deletions lib/www/mechanize/page.rb
Expand Up @@ -86,56 +86,47 @@ def #{type}_with(criteria)
end

def links
@links ||= WWW::Mechanize::List.new(
%w{ a area }.map do |tag|
search(tag).map do |node|
Link.new(node, @mech, self)
end
end.flatten
)
@links ||= %w{ a area }.map do |tag|
search(tag).map do |node|
Link.new(node, @mech, self)
end
end.flatten
end

def forms
@forms ||= WWW::Mechanize::List.new(
search('form').map do |html_form|
form = Form.new(html_form, @mech, self)
form.action ||= @uri.to_s
form
end
)
@forms ||= search('form').map do |html_form|
form = Form.new(html_form, @mech, self)
form.action ||= @uri.to_s
form
end
end

def meta
@meta ||= WWW::Mechanize::List.new(
search('meta').map do |node|
next unless node['http-equiv'] && node['content']
(equiv, content) = node['http-equiv'], node['content']
if equiv && equiv.downcase == 'refresh'
if content && content =~ /^\d+\s*;\s*url\s*=\s*'?([^\s']+)/i
node['href'] = $1
Meta.new(node, @mech, self)
end
@meta ||= search('meta').map do |node|
next unless node['http-equiv'] && node['content']
(equiv, content) = node['http-equiv'], node['content']
if equiv && equiv.downcase == 'refresh'
if content && content =~ /^\d+\s*;\s*url\s*=\s*'?([^\s']+)/i
node['href'] = $1
Meta.new(node, @mech, self)
end
end.compact
)
end
end.compact
end

def bases
@bases ||= WWW::Mechanize::List.new(
@bases ||=
search('base').map { |node| Base.new(node, @mech, self) }
)
end

def frames
@frames ||= WWW::Mechanize::List.new(
@frames ||=
search('frame').map { |node| Frame.new(node, @mech, self) }
)
end

def iframes
@iframes ||= WWW::Mechanize::List.new(
@iframes ||=
search('iframe').map { |node| Frame.new(node, @mech, self) }
)
end
end
end
Expand Down

0 comments on commit 5cb3f3b

Please sign in to comment.