Skip to content

Commit

Permalink
introduce URI::Generic#path_with_query
Browse files Browse the repository at this point in the history
  • Loading branch information
mislav committed Oct 1, 2009
1 parent d5bec8b commit 69d1813
Show file tree
Hide file tree
Showing 5 changed files with 23 additions and 8 deletions.
2 changes: 1 addition & 1 deletion lib/anemone/cli/url_list.rb
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@

anemone.on_every_page do |page|
if options.relative
puts page.url.path
puts page.url.path_with_query
else
puts page.url
end
Expand Down
16 changes: 13 additions & 3 deletions lib/anemone/core.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
require 'net/http'
require 'uri'
require 'thread'
require 'anemone/tentacle'
require 'anemone/page_hash'
Expand All @@ -13,15 +13,19 @@ class Core
# and optional *block*
#
def initialize(urls, &block)
@urls = [urls].flatten.map{ |url| URI(url) if url.is_a?(String) }
@urls.each{ |url| url.path = '/' if url.path.empty? }
@urls = Array(urls).map do |url|
url = URI(url) if String === url
url.path = '/' if url.path.empty?
url
end

@tentacles = []
@pages = PageHash.new
@on_every_page_blocks = []
@on_pages_like_blocks = Hash.new { |hash,key| hash[key] = [] }
@skip_link_patterns = []
@after_crawl_blocks = []
@focus_crawl_block = nil

if Anemone.options.obey_robots_txt
@robots = Robots.new(Anemone.options.user_agent)
Expand Down Expand Up @@ -218,3 +222,9 @@ def skip_link?(link)

end
end

URI::Generic.class_eval do
def path_with_query
self.path + (self.query ? '?' + self.query : '')
end
end
4 changes: 1 addition & 3 deletions lib/anemone/http.rb
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,7 @@ def self.get(url, referer = nil)
#
def self.get_response(url, headers = {})
Net::HTTP.start(url.host, url.port) do |http|
path = url.path
path << '?' << url.query if url.query
http.get(path, headers)
http.get(url.path_with_query, headers)
end
end
end
Expand Down
2 changes: 1 addition & 1 deletion lib/anemone/page_hash.rb
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def uniq
page_added = page.aliases.inject(false) { |r, a| r ||= results.has_key? a}
if !page.redirect? and !page_added
results[url] = page.clone
results[url].aliases = []
results[url].aliases.clear
end
end

Expand Down
7 changes: 7 additions & 0 deletions spec/page_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -88,5 +88,12 @@ module Anemone
]
end

it "should include query string when fetching" do
url = URI(FakePage.new('foo?bar=baz').url)
page = Page.fetch(url)
page.url.should == url
page.url.path_with_query.should == '/foo?bar=baz'
end

end
end

0 comments on commit 69d1813

Please sign in to comment.