Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

nokogiri may be dropped in as an html replacement

  • Loading branch information...
commit c166682640dfc546e4a9cbaae98a47e962165519 1 parent 570db7e
@tenderlove tenderlove authored
View
2  GUIDE.txt
@@ -119,6 +119,6 @@ Mechanize uses hpricot[http://code.whytheluckystiff.net/hpricot/] to parse
html. What does this mean for you? You can treat a mechanize page like
an hpricot object. After you have used Mechanize to navigate to the page
that you need to scrape, then scrape it using hpricot methods:
- agent.get('http://someurl.com/').search("//p[@class='posted']")
+ agent.get('http://someurl.com/').search(".//p[@class='posted']")
For more information on this powerful scraper, take a look at
HpricotBasics[http://code.whytheluckystiff.net/hpricot/wiki/HpricotBasics]
View
2  History.txt
@@ -8,6 +8,8 @@
* Fixing strange uri escaping problems [#22604]
* Making content-type determintation more robust. (thanks Han Holl!)
* Dealing with links that are query string only. [#22402]
+ * Nokogiri may be dropped in as a replacement.
+ WWW::Mechanize.html_parser = Nokogiri::HTML
=== 0.8.5
View
8 lib/www/mechanize/form.rb
@@ -239,7 +239,7 @@ def parse
@checkboxes = WWW::Mechanize::List.new
# Find all input tags
- form_node.search('//input').each do |node|
+ form_node.search('input').each do |node|
type = (node['type'] || 'text').downcase
name = node['name']
next if name.nil? && !(type == 'submit' || type =='button')
@@ -262,13 +262,13 @@ def parse
end
# Find all textarea tags
- form_node.search('//textarea').each do |node|
+ form_node.search('textarea').each do |node|
next if node['name'].nil?
@fields << Field.new(node['name'], node.inner_text)
end
# Find all select tags
- form_node.search('//select').each do |node|
+ form_node.search('select').each do |node|
next if node['name'].nil?
if node.has_attribute? 'multiple'
@fields << MultiSelectList.new(node['name'], node)
@@ -279,7 +279,7 @@ def parse
# Find all submit button tags
# FIXME: what can I do with the reset buttons?
- form_node.search('//button').each do |node|
+ form_node.search('button').each do |node|
type = (node['type'] || 'submit').downcase
next if type == 'reset'
@buttons << Button.new(node['name'], node['value'])
View
2  lib/www/mechanize/form/multi_select_list.rb
@@ -17,7 +17,7 @@ def initialize(name, node)
@options = WWW::Mechanize::List.new
# parse
- node.search('//option').each do |n|
+ node.search('option').each do |n|
option = Option.new(n, self)
@options << option
end
View
25 lib/www/mechanize/page.rb
@@ -33,13 +33,20 @@ def initialize(uri=nil, response=nil, body=nil, code=nil, mech=nil)
end
def title
- @title ||= if parser && search('//title').inner_text.length > 0
- search('//title').inner_text
+ @title ||= if parser && search('title').inner_text.length > 0
+ search('title').inner_text
end
end
def parser
- @parser ||= body && response ? Mechanize.html_parser.parse(body) : nil
+ return @parser if @parser
+
+ if body && response
+ html_body = body.length > 0 ? body : '<html></html>'
+ @parser = Mechanize.html_parser.parse(html_body)
+ end
+
+ @parser
end
alias :root :parser
@@ -80,7 +87,7 @@ def #{type}_with(criteria)
def links
@links ||= WWW::Mechanize::List.new(
- %w{ //a //area }.map do |tag|
+ %w{ a area }.map do |tag|
search(tag).map do |node|
Link.new(node, @mech, self)
end
@@ -90,7 +97,7 @@ def links
def forms
@forms ||= WWW::Mechanize::List.new(
- search('//form').map do |html_form|
+ search('form').map do |html_form|
form = Form.new(html_form, @mech, self)
form.action ||= @uri.to_s
form
@@ -100,7 +107,7 @@ def forms
def meta
@meta ||= WWW::Mechanize::List.new(
- search('//meta').map do |node|
+ search('meta').map do |node|
next unless node['http-equiv'] && node['content']
(equiv, content) = node['http-equiv'], node['content']
if equiv && equiv.downcase == 'refresh'
@@ -115,19 +122,19 @@ def meta
def bases
@bases ||= WWW::Mechanize::List.new(
- search('//base').map { |node| Base.new(node, @mech, self) }
+ search('base').map { |node| Base.new(node, @mech, self) }
)
end
def frames
@frames ||= WWW::Mechanize::List.new(
- search('//frame').map { |node| Frame.new(node, @mech, self) }
+ search('frame').map { |node| Frame.new(node, @mech, self) }
)
end
def iframes
@iframes ||= WWW::Mechanize::List.new(
- search('//iframe').map { |node| Frame.new(node, @mech, self) }
+ search('iframe').map { |node| Frame.new(node, @mech, self) }
)
end
end
View
4 lib/www/mechanize/page/link.rb
@@ -27,9 +27,9 @@ def initialize(node, mech, page)
@attributes = node
# If there is no text, try to find an image and use it's alt text
- if (@text.nil? || @text.length == 0) && node.search('//img').length > 0
+ if (@text.nil? || @text.length == 0) && node.search('img').length > 0
@text = ''
- node.search('//img').each do |e|
+ node.search('img').each do |e|
@text << ( e['alt'] || '')
end
end
View
2  test/test_encoded_links.rb
@@ -14,7 +14,7 @@ def test_click_link
end
def test_hpricot_link
- page = @agent.click(@page.search('//a').first)
+ page = @agent.click(@page.search('a').first)
assert_equal("http://localhost/form_post?a=b&b=c", page.uri.to_s)
end
end
Please sign in to comment.
Something went wrong with that request. Please try again.