Skip to content

Commit

Permalink
refactor Page.fetch and Anemone::HTTP
Browse files Browse the repository at this point in the history
  • Loading branch information
mislav committed Oct 1, 2009
1 parent d7eceaf commit b25cfb6
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 45 deletions.
47 changes: 22 additions & 25 deletions lib/anemone/http.rb
Expand Up @@ -2,42 +2,39 @@

module Anemone
class HTTP < Net::HTTP
# Maximum number of redirects to follow on each get_response
REDIRECTION_LIMIT = 5

#
# Retrieve an HTTP response for *url*, following redirects.
# Returns the response object, response code, and final URI location.
# Returns the response object, initial response code, and final URI location.
#
def self.get(url, referer = nil)
response = get_response(url, referer)
code = Integer(response.code)
loc = url
def self.get(url, referer = nil)
headers = {}
headers['User-Agent'] = Anemone.options.user_agent
headers['Referer'] = referer.to_s if referer

response = get_response(url, headers)
code = response.code.to_i
limit = Anemone.options.redirect_limit

limit = REDIRECTION_LIMIT
while response.is_a?(Net::HTTPRedirection) and limit > 0
loc = URI(response['location'])
loc = url.merge(loc) if loc.relative?
response = get_response(loc, referer)
limit -= 1
target = URI(response['location'])
target = url.merge(target) if target.relative?
response = get_response(target, headers)

url = target
limit -= 1
end

return response, code, loc
[response, code, url]
end

#
# Get an HTTPResponse for *url*, sending the appropriate User-Agent string
# Get an HTTPResponse for *url*
#
def self.get_response(url, referer = nil)
full_path = url.query.nil? ? url.path : "#{url.path}?#{url.query}"
user_agent = Anemone.options.user_agent rescue nil

opts = {}
opts['User-Agent'] = user_agent if user_agent
opts['Referer'] = referer.to_s if referer

def self.get_response(url, headers = {})
Net::HTTP.start(url.host, url.port) do |http|
return http.get(full_path, opts)
path = url.path
path << '?' << url.query if url.query
http.get(path, headers)
end
end
end
Expand Down
30 changes: 13 additions & 17 deletions lib/anemone/page.rb
Expand Up @@ -31,26 +31,22 @@ class Page
#
# Create a new Page from the response of an HTTP request to *url*
#
def self.fetch(url, from_page = nil)
begin
url = URI(url) unless url.is_a?(URI)
def self.fetch(url, parent_page = nil)
url = URI(url) unless URI === url

if from_page
referer = from_page.url
depth = from_page.depth + 1
end

response, code, location = Anemone::HTTP.get(url, referer)
if parent_page
referer = parent_page.url
depth = parent_page.depth + 1
end

aka = nil
if !url.eql?(location)
aka = location
end
response, code, final_url = Anemone::HTTP.get(url, referer)
aka = final_url == url ? nil : final_url

return Page.new(url, response.body, code, response.to_hash, aka, referer, depth)
rescue
return Page.new(url)
end
new(url, response.body, code, response.to_hash, aka, referer, depth)
end

def fetch(url)
self.class.fetch(url, self)
end

#
Expand Down
4 changes: 4 additions & 0 deletions spec/anemone_spec.rb
@@ -1,6 +1,10 @@
require 'spec_helper'

describe Anemone do

before(:all) do
Anemone::FakePage.new
end

it "should have a version" do
Anemone.const_defined?('VERSION').should == true
Expand Down
3 changes: 0 additions & 3 deletions spec/spec_helper.rb
@@ -1,6 +1,3 @@
require 'anemone'
SPEC_DOMAIN = 'http://www.example.com/'
require 'fakeweb_helper'

# default root page
# Anemone::FakePage.new

0 comments on commit b25cfb6

Please sign in to comment.