Navigation Menu

Skip to content

Commit

Permalink
Merge branch 'master' of git@github.com:tenderlove/mechanize
Browse files Browse the repository at this point in the history
  • Loading branch information
tenderlove committed Jan 9, 2009
2 parents 07291e4 + 89bea42 commit fb5aec2
Show file tree
Hide file tree
Showing 6 changed files with 84 additions and 30 deletions.
1 change: 1 addition & 0 deletions lib/www/mechanize.rb
Expand Up @@ -9,6 +9,7 @@
require 'nokogiri'
require 'forwardable'
require 'iconv'
require 'nkf'

require 'www/mechanize/util'
require 'www/mechanize/content_type_error'
Expand Down
4 changes: 2 additions & 2 deletions lib/www/mechanize/chain/response_header_handler.rb
Expand Up @@ -26,8 +26,8 @@ def handle(ctx, params)
cache_obj[:keep_alive_options][k.intern] = v
end
end

if page.is_a?(Page) && page.body =~ /Set-Cookie/
body = Util.to_native_charset(page.body)
if page.is_a?(Page) && body =~ /Set-Cookie/
page.search('//meta[@http-equiv="Set-Cookie"]').each do |meta|
Cookie::parse(uri, meta['content']) { |c|
Mechanize.log.debug("saved cookie: #{c}") if Mechanize.log
Expand Down
41 changes: 33 additions & 8 deletions lib/www/mechanize/form.rb
Expand Up @@ -136,24 +136,48 @@ def click_button(button = buttons.first)
submit(button)
end

# This method is sub-method of build_query.
# It converts charset of query value of fields into excepted one.
def proc_query(field)
field.query_value.map{|(name, val)|
[from_native_charset(name), from_native_charset(val)]
}
end
private :proc_query

def from_native_charset(str, enc=nil)
if page
enc ||= page.encoding
Util.from_native_charset(str,enc)
else
str
end
end
private :from_native_charset

# This method builds an array of arrays that represent the query
# parameters to be used with this form. The return value can then
# be used to create a query string for this form.
def build_query(buttons = [])
query = []

fields().each do |f|
query.push(*f.query_value)
qval = proc_query(f)
query.push(*qval)
end

checkboxes().each do |f|
query.push(*f.query_value) if f.checked
if f.checked
qval = proc_query(f)
query.push(*qval)
end
end

radio_groups = {}
radiobuttons().each do |f|
radio_groups[f.name] ||= []
radio_groups[f.name] << f
fname = from_native_charset(f.name)
radio_groups[fname] ||= []
radio_groups[fname] << f
end

# take one radio button from each group
Expand All @@ -162,16 +186,17 @@ def build_query(buttons = [])

if checked.size == 1
f = checked.first
query.push(*f.query_value)
qval = proc_query(f)
query.push(*qval)
elsif checked.size > 1
raise "multiple radiobuttons are checked in the same group!"
end
end

@clicked_buttons.each { |b|
query.push(*b.query_value)
qval = proc_query(b)
query.push(*qval)
}

query
end

Expand All @@ -196,7 +221,7 @@ def request_data
params.collect { |p| "--#{boundary}\r\n#{p}" }.join('') +
"--#{boundary}--\r\n"
else
WWW::Mechanize::Util.build_query_string(query_params, page.encoding)
WWW::Mechanize::Util.build_query_string(query_params)
end
end

Expand Down
19 changes: 4 additions & 15 deletions lib/www/mechanize/page.rb
Expand Up @@ -3,15 +3,9 @@
require 'www/mechanize/page/base'
require 'www/mechanize/page/frame'
require 'www/mechanize/headers'
require 'nkf'

module WWW
class Mechanize
CODE_DIC = {
:JIS => "ISO-2022-JP",
:EUC => "EUC-JP",
:SJIS => "SHIFT_JIS",
:UTF8 => "UTF-8", :UTF16 => "UTF-16", :UTF32 => "UTF-32"}
# = Synopsis
# This class encapsulates an HTML page. If Mechanize finds a content
# type of 'text/html', this class will be instantiated and returned.
Expand All @@ -31,6 +25,7 @@ class Page < WWW::Mechanize::File

def initialize(uri=nil, response=nil, body=nil, code=nil, mech=nil)
super(uri, response, body, code)
@encoding = Util.detect_charset(body)
@mech ||= mech

raise Mechanize::ContentTypeError.new(response['content-type']) unless
Expand All @@ -50,17 +45,11 @@ def parser

if body && response
html_body = body.length > 0 ? body : '<html></html>'
tmp = NKF.guess(html_body)
if RUBY_VERSION >= "1.9.0"
enc = tmp.to_s.upcase
if @parser == Nokogiri::HTML
@parser = Mechanize.html_parser.parse(html_body, nil, @encoding)
else
enc = NKF.constants.find{|c|
NKF.const_get(c) == tmp
}
enc = CODE_DIC[enc.intern]
@parser = Mechanize.html_parser.parse(html_body)
end
@encoding = enc || "UTF-8"
@parser = Mechanize.html_parser.parse(html_body, nil, @encoding)
end

@parser
Expand Down
48 changes: 44 additions & 4 deletions lib/www/mechanize/util.rb
@@ -1,20 +1,46 @@
module WWW
class Mechanize
class Util
CODE_DIC = {
:JIS => "ISO-2022-JP",
:EUC => "EUC-JP",
:SJIS => "SHIFT_JIS",
:UTF8 => "UTF-8", :UTF16 => "UTF-16", :UTF32 => "UTF-32"}

class << self
def build_query_string(parameters, enc=nil)
parameters.map { |k,v|
if k
if enc
k = Iconv.iconv(enc, "UTF-8", k.to_s)
v = Iconv.iconv(enc, "UTF-8", v.to_s)
end
# WEBrick::HTTP.escape* has some problems about m17n on ruby-1.9.*.
[URI.escape(k.to_s), URI.escape(v.to_s)].join("=")
=begin
[WEBrick::HTTPUtils.escape_form(k.to_s),
WEBrick::HTTPUtils.escape_form(v.to_s)].join("=")
=end

end
}.compact.join('&')
end

def to_native_charset(s, code=nil)
if Mechanize.html_parser == Nokogiri::HTML
return unless s
code ||= detect_charset(s)
Iconv.iconv("UTF-8", code, s).join("")
else
s
end
end

def from_native_charset(s, code)
if Mechanize.html_parser == Nokogiri::HTML
return unless s
Iconv.iconv(code, "UTF-8", s).join("")
else
return s
end
end

def html_unescape(s)
return s unless s
s.gsub(/&(\w+|#[0-9]+);/) { |match|
Expand All @@ -28,6 +54,20 @@ def html_unescape(s)
number ? ([number].pack('U') rescue match) : match
}
end

def detect_charset(src)
tmp = NKF.guess(src || "<html></html>")
if RUBY_VERSION >= "1.9.0"
enc = tmp.to_s.upcase
else
enc = NKF.constants.find{|c|
NKF.const_get(c) == tmp
}
enc = CODE_DIC[enc.intern]
end
enc || "ASCII"
end

end
end
end
Expand Down
1 change: 0 additions & 1 deletion test/test_forms.rb
Expand Up @@ -447,7 +447,6 @@ def test_get_with_param_in_action
}.checked = true
get_form.checkboxes.find { |f| f.name == "likes ham" }.checked = true
page = @agent.submit(get_form, get_form.buttons.first)

# Check that the submitted fields exist
assert_equal(3, page.links.size, "Not enough links")
assert_not_nil(
Expand Down

0 comments on commit fb5aec2

Please sign in to comment.