forked from sparklemotion/mechanize
/
page.rb
139 lines (120 loc) · 3.79 KB
/
page.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
require 'www/mechanize/page/link'
require 'www/mechanize/page/meta'
require 'www/mechanize/page/base'
require 'www/mechanize/page/frame'
require 'www/mechanize/headers'
module WWW
class Mechanize
# = Synopsis
# This class encapsulates an HTML page. If Mechanize finds a content
# type of 'text/html', this class will be instantiated and returned.
#
# == Example
# require 'rubygems'
# require 'mechanize'
#
# agent = WWW::Mechanize.new
# agent.get('http://google.com/').class #=> WWW::Mechanize::Page
#
class Page < WWW::Mechanize::File
extend Forwardable
attr_accessor :mech
attr_reader :encoding
def initialize(uri=nil, response=nil, body=nil, code=nil, mech=nil)
super(uri, response, body, code)
@encoding = Util.detect_charset(body)
@mech ||= mech
raise Mechanize::ContentTypeError.new(response['content-type']) unless
response['content-type'] =~ /^(text\/html)|(application\/xhtml\+xml)/
@parser = @links = @forms = @meta = @bases = @frames = @iframes = nil
end
def title
@title ||= if parser && search('title').inner_text.length > 0
search('title').inner_text
end
end
def parser
return @parser if @parser
if body && response
html_body = body.length > 0 ? body : '<html></html>'
if @parser == Nokogiri::HTML
@parser = Mechanize.html_parser.parse(html_body, nil, @encoding)
else
@parser = Mechanize.html_parser.parse(html_body)
end
end
@parser
end
alias :root :parser
# Get the content type
def content_type
response['content-type']
end
# Search through the page like HPricot
def_delegator :parser, :search, :search
def_delegator :parser, :/, :/
def_delegator :parser, :at, :at
# Find a form matching +criteria+.
# Example:
# page.form_with(:action => '/post/login.php') do |f|
# ...
# end
[:form, :link, :base, :frame, :iframe].each do |type|
eval(<<-eomethod)
def #{type}s_with(criteria)
criteria = {:name => criteria} if String === criteria
f = #{type}s.find_all do |thing|
criteria.all? { |k,v| v === thing.send(k) }
end
yield f if block_given?
f
end
def #{type}_with(criteria)
f = #{type}s_with(criteria).first
yield f if block_given?
f
end
alias :#{type} :#{type}_with
eomethod
end
def links
@links ||= %w{ a area }.map do |tag|
search(tag).map do |node|
Link.new(node, @mech, self)
end
end.flatten
end
def forms
@forms ||= search('form').map do |html_form|
form = Form.new(html_form, @mech, self)
form.action ||= @uri.to_s
form
end
end
def meta
@meta ||= search('meta').map do |node|
next unless node['http-equiv'] && node['content']
(equiv, content) = node['http-equiv'], node['content']
if equiv && equiv.downcase == 'refresh'
if content && content =~ /^\d+\s*;\s*url\s*=\s*'?([^\s']+)/i
node['href'] = $1
Meta.new(node, @mech, self)
end
end
end.compact
end
def bases
@bases ||=
search('base').map { |node| Base.new(node, @mech, self) }
end
def frames
@frames ||=
search('frame').map { |node| Frame.new(node, @mech, self) }
end
def iframes
@iframes ||=
search('iframe').map { |node| Frame.new(node, @mech, self) }
end
end
end
end