Skip to content

Commit

Permalink
Use net-http-persistent when speaking HTTP
Browse files Browse the repository at this point in the history
  • Loading branch information
drbrain committed May 22, 2010
1 parent 01fe820 commit 4d074f4
Show file tree
Hide file tree
Showing 19 changed files with 110 additions and 209 deletions.
1 change: 1 addition & 0 deletions Rakefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ Hoe.spec 'mechanize' do
self.history_file = 'CHANGELOG.rdoc'
self.extra_rdoc_files += Dir['*.rdoc']
self.extra_deps << ['nokogiri', '>= 1.2.1']
self.extra_deps << ['net-http-persistent', '~> 1.1']
end

desc "Update SSL Certificate"
Expand Down
83 changes: 39 additions & 44 deletions lib/mechanize.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
require 'net/http'
require 'net/https'
require 'openssl'
require 'net/http/persistent'
require 'uri'
require 'webrick/httputils'
require 'zlib'
Expand Down Expand Up @@ -97,6 +97,8 @@ class Mechanize
# The HTML parser to be used when parsing documents
attr_accessor :html_parser

attr_reader :http # :nodoc:

attr_reader :history
attr_reader :pluggable_parser

Expand Down Expand Up @@ -145,19 +147,12 @@ def initialize
@auth_hash = {} # Keep track of urls for sending auth
@request_headers= {} # A hash of request headers to be used

# Proxy settings
@proxy_addr = nil
@proxy_pass = nil
@proxy_port = nil
@proxy_user = nil

@conditional_requests = true

@follow_meta_refresh = false
@redirection_limit = 20

# Connection Cache & Keep alive
@connection_cache = {}
@keep_alive_time = 300
@keep_alive = true

Expand All @@ -174,6 +169,7 @@ def initialize
@pre_connect_hook = Chain::PreConnectHook.new
@post_connect_hook = Chain::PostConnectHook.new

set_http
@html_parser = self.class.html_parser

yield self if block_given?
Expand All @@ -195,7 +191,14 @@ def post_connect_hooks
# Sets the proxy address, port, user, and password
# +addr+ should be a host, with no "http://"
def set_proxy(addr, port, user = nil, pass = nil)
@proxy_addr, @proxy_port, @proxy_user, @proxy_pass = addr, port, user, pass
proxy = URI.parse "http://#{addr}"
proxy.port = port
proxy.user = user if user
proxy.password = pass if pass

set_http proxy

nil
end

# Set the user agent for the Mechanize object.
Expand Down Expand Up @@ -459,6 +462,20 @@ def resolve(url, referer = current_page())
hash[:uri].to_s
end

def set_http proxy = nil
@http = Net::HTTP::Persistent.new 'mechanize', proxy

@http.keep_alive = @keep_alive_time

@http.ca_file = @ca_file
@http.verify_callback = @verify_callback

if @cert and @key then
@http.certificate = OpenSSL::X509::Certificate.new ::File.read(@cert)
@http.private_key = OpenSSL::PKey::RSA.new ::File.read(@key), @pass
end
end

def post_form(url, form, headers = {})
cur_page = form.page || current_page ||
Page.new( nil, {'content-type'=>'text/html'})
Expand Down Expand Up @@ -499,27 +516,18 @@ def fetch_page(params)
Chain::URIResolver.new(@scheme_handlers),
Chain::ParameterResolver.new,
Chain::RequestResolver.new,
Chain::ConnectionResolver.new(
@connection_cache,
@keep_alive,
@proxy_addr,
@proxy_port,
@proxy_user,
@proxy_pass
),
Chain::SSLResolver.new(@ca_file, @verify_callback, @cert, @key, @pass),
Chain::ConnectionResolver.new,
Chain::AuthHeaders.new(@auth_hash, @user, @password, @digest),
Chain::HeaderResolver.new(
@keep_alive,
@keep_alive_time,
@cookie_jar,
@user_agent,
@gzip_enabled,
@request_headers
),
Chain::CustomHeaders.new,
@pre_connect_hook,
])
], @http)

before_connect.handle(options)

uri = options[:uri]
Expand All @@ -534,11 +542,9 @@ def fetch_page(params)
request['If-Modified-Since'] = page.response['Last-Modified']
end if(@conditional_requests)

http_obj.mu_lock
# Specify timeouts if given
http_obj.open_timeout = @open_timeout if @open_timeout
http_obj.read_timeout = @read_timeout if @read_timeout
http_obj.start unless http_obj.started?

# Log specified headers for the request
log.info("#{ request.class }: #{ request.path }") if log
Expand All @@ -547,32 +553,20 @@ def fetch_page(params)
end if log

# Send the request
attempts = 0
begin
response = http_obj.request(request, *request_data) { |r|
connection_chain = Chain.new([
Chain::ResponseReader.new(r),
Chain::BodyDecodingHandler.new,
])
connection_chain.handle(options)
}
rescue EOFError, Errno::ECONNRESET, Errno::EPIPE => x
log.error("Rescuing EOF error") if log
http_obj.finish
raise x if attempts >= 2
request.body = nil
http_obj.start
attempts += 1
retry
end
response = http_obj.request(uri, request) { |r|
connection_chain = Chain.new([
Chain::ResponseReader.new(r),
Chain::BodyDecodingHandler.new,
])
connection_chain.handle(options)
}

after_connect = Chain.new([
@post_connect_hook,
Chain::ResponseBodyParser.new(@pluggable_parser, @watch_for_set),
Chain::ResponseHeaderHandler.new(@cookie_jar, @connection_cache),
Chain::ResponseHeaderHandler.new(@cookie_jar),
])
after_connect.handle(options)
http_obj.mu_unlock

res_klass = options[:res_klass]
response_body = options[:response_body]
Expand All @@ -595,6 +589,7 @@ def fetch_page(params)
end
sleep delay.to_f
end

if redirect_uri
@history.push(page, page.uri)
return fetch_page(
Expand Down
5 changes: 4 additions & 1 deletion lib/mechanize/chain.rb
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,10 @@

class Mechanize
class Chain
def initialize(list)
attr_accessor :http

def initialize(list, http = nil)
@http = http
@list = list
@list.each { |l| l.chain = self }
end
Expand Down
61 changes: 6 additions & 55 deletions lib/mechanize/chain/connection_resolver.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,73 +3,24 @@ class Chain
class ConnectionResolver
include Mechanize::Handler

def initialize( connection_cache,
keep_alive,
proxy_addr,
proxy_port,
proxy_user,
proxy_pass )

@connection_cache = connection_cache
@keep_alive = keep_alive
@proxy_addr = proxy_addr
@proxy_port = proxy_port
@proxy_user = proxy_user
@proxy_pass = proxy_pass
end

def handle(ctx, params)
uri = params[:uri]
http_obj = nil

case uri.scheme.downcase
when 'http', 'https'
cache_obj = (@connection_cache["#{uri.host}:#{uri.port}"] ||= {
:connection => nil,
:keep_alive_options => {},
})
http_obj = cache_obj[:connection]
if http_obj.nil? || ! http_obj.started?
http_obj = cache_obj[:connection] =
Net::HTTP.new( uri.host,
uri.port,
@proxy_addr,
@proxy_port,
@proxy_user,
@proxy_pass
)
cache_obj[:keep_alive_options] = {}
end

# If we're keeping connections alive and the last request time is too
# long ago, stop the connection. Or, if the max requests left is 1,
# reset the connection.
if @keep_alive && http_obj.started?
opts = cache_obj[:keep_alive_options]
if((opts[:timeout] &&
Time.now.to_i - cache_obj[:last_request_time] > opts[:timeout].to_i) ||
opts[:max] && opts[:max].to_i == 1)

Mechanize.log.debug('Finishing stale connection') if Mechanize.log
http_obj.finish

end
end

cache_obj[:last_request_time] = Time.now.to_i
when 'file'
when 'http', 'https' then
http_obj = ctx.http
when 'file' then
http_obj = Object.new
class << http_obj
def started?; true; end
def request(request, *args, &block)
response = FileResponse.new(request.uri.path)
yield response
def request(uri, request)
yield FileResponse.new(uri.path)
end
end
end

http_obj.extend(Mutex_m)
params[:connection] = http_obj

super
end
end
Expand Down
11 changes: 1 addition & 10 deletions lib/mechanize/chain/header_resolver.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,7 @@ class Mechanize
class Chain
class HeaderResolver
include Mechanize::Handler
def initialize(keep_alive, keep_alive_time, cookie_jar, user_agent,
gzip_enabled, headers)
@keep_alive = keep_alive
@keep_alive_time = keep_alive_time
def initialize(cookie_jar, user_agent, gzip_enabled, headers)
@cookie_jar = cookie_jar
@user_agent = user_agent
@gzip_enabled = gzip_enabled
Expand All @@ -17,12 +14,6 @@ def handle(ctx, params)
referer = params[:referer]
request = params[:request]

if @keep_alive
request['Connection'] = 'keep-alive'
request['Keep-Alive'] = @keep_alive_time.to_s
else
request['Connection'] = 'close'
end
if @gzip_enabled
request['Accept-Encoding'] = 'gzip,identity'
else
Expand Down
4 changes: 2 additions & 2 deletions lib/mechanize/chain/parameter_resolver.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@ def handle(ctx, params)
uri = params[:uri]
case params[:verb]
when :head, :get, :delete, :trace
if parameters.length > 0
if parameters and parameters.length > 0
uri.query ||= ''
uri.query << '&' if uri.query.length > 0
uri.query << Util.build_query_string(parameters)
end
params[:params] = []
params[:params] = nil
end
super
end
Expand Down
1 change: 1 addition & 0 deletions lib/mechanize/chain/request_resolver.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ def handle(ctx, params)
if %w{ http https }.include?(uri.scheme.downcase)
klass = Net::HTTP.const_get(params[:verb].to_s.capitalize)
params[:request] ||= klass.new(uri.request_uri)
params[:request].body = params[:params].first if params[:params]
end

if %w{ file }.include?(uri.scheme.downcase)
Expand Down
10 changes: 4 additions & 6 deletions lib/mechanize/chain/response_body_parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,15 @@ def handle(ctx, params)
end

# Find our pluggable parser
params[:page] = @pluggable_parser.parser(content_type).new(
uri,
response,
response_body,
response.code
) { |parser|
parser = @pluggable_parser.parser(content_type)
params[:page] = parser.new(uri, response, response_body,
response.code) { |parser|
parser.mech = params[:agent] if parser.respond_to? :mech=
if parser.respond_to?(:watch_for_set=) && @watch_for_set
parser.watch_for_set = @watch_for_set
end
}

super
end
end
Expand Down
16 changes: 1 addition & 15 deletions lib/mechanize/chain/response_header_handler.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,28 +3,14 @@ class Chain
class ResponseHeaderHandler
include Mechanize::Handler

def initialize(cookie_jar, connection_cache)
def initialize(cookie_jar)
@cookie_jar = cookie_jar
@connection_cache = connection_cache
end

def handle(ctx, params)
response = params[:response]
uri = params[:uri]
page = params[:page]
cache_obj = (@connection_cache["#{uri.host}:#{uri.port}"] ||= {
:connection => nil,
:keep_alive_options => {},
})

# If the server sends back keep alive options, save them
if keep_alive_info = response['keep-alive']
keep_alive_info.split(/,\s*/).each do |option|
k, v = option.split(/\=/)
cache_obj[:keep_alive_options] ||= {}
cache_obj[:keep_alive_options][k.intern] = v
end
end

if page.is_a?(Page) && page.body =~ /Set-Cookie/n
page.search('//head/meta[@http-equiv="Set-Cookie"]').each do |meta|
Expand Down
2 changes: 0 additions & 2 deletions test/chain/test_header_resolver.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@ class TestHeaderResolver < Test::Unit::TestCase
def setup
@chain = Mechanize::Chain.new([
Mechanize::Chain::HeaderResolver.new(
true,
300,
Mechanize::CookieJar.new,
'foobar',
true,
Expand Down
2 changes: 1 addition & 1 deletion test/chain/test_parameter_resolver.rb
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def test_handle_get
v.handle(hash)
}
assert_equal('q=hello', hash[:uri].query)
assert_equal([], hash[:params])
assert_nil(hash[:params])
end

def test_handle_post
Expand Down
Loading

0 comments on commit 4d074f4

Please sign in to comment.