Skip to content

Commit

Permalink
implemented queue partitioned on host
Browse files Browse the repository at this point in the history
  • Loading branch information
sqs committed Aug 13, 2008
1 parent 1bf667f commit 6cd00b8
Show file tree
Hide file tree
Showing 4 changed files with 104 additions and 17 deletions.
1 change: 1 addition & 0 deletions lib/rider.rb
Expand Up @@ -5,6 +5,7 @@
require 'mechanize'

require 'rider/queue'
require 'rider/part_queue'
require 'rider/crawler'

$KCODE = 'u'
Expand Down
64 changes: 64 additions & 0 deletions lib/rider/part_queue.rb
@@ -0,0 +1,64 @@
module Rider
class HostPartitionedQueue
attr_reader :name

def initialize(name)
@name = name
clear
end

def push(url)
host = get_host(url)
@hosts << host unless @hosts.include?(host)
@urls_by_host[host] ||= []
@urls_by_host[host] << url
return true
end

def pop
if empty?
Rider.log.debug("Q #{name} POP nil")
return nil
end
host = @hosts[@current_host_index]
puts "\n\nHOSTS:#{@hosts.inspect}\nURLS:#{@urls_by_host.inspect}\nHOSTIDX:#{@current_host_index}\n"
url = @urls_by_host[host].shift

if @urls_by_host[host].empty?
@hosts.delete_at(@current_host_index)
@urls_by_host.delete(host)
# no need to increment @current_host_index since we just effectively pushed every element down by one
# by deleting from @hosts, UNLESS it was the last item in the array, in which case that index doesn't
# exist anymore
increment_current_host_index if @current_host_index == @hosts.length
else
increment_current_host_index
end
return url
end

def clear
@urls_by_host = {}
@hosts = []
@current_host_index = 0
end

def empty?
@hosts.empty?
end

private
def get_host(url)
URI.parse(url).host
end

def increment_current_host_index
if @hosts.length == 0
@current_host_index = 0
else
# increment by one but go back to 0 if it exceeds the length of the array
@current_host_index = (@current_host_index + 1) % @hosts.length
end
end
end
end
21 changes: 21 additions & 0 deletions spec/part_queue_spec.rb
@@ -0,0 +1,21 @@
require 'spec/spec_helper'
require 'spec/queue_spec'

describe Rider::HostPartitionedQueue do
it_should_behave_like "queue"

before do
@q = Rider::HostPartitionedQueue.new('test')
end

it "should alternate among hosts when popping" do
%w(http://example.com/path1 http://example.com/path2 http://example.net/ http://localhost/path).each { |u| @q.push(u) }
[@q.pop, @q.pop, @q.pop, @q.pop].should ==
%w(http://example.com/path1 http://example.net/ http://localhost/path http://example.com/path2)
end

it "should return the same host if only one distinct host exists" do
%w(http://example.com/path1 http://example.com/path2 http://example.com/path3).each { |u| @q.push(u) }
[@q.pop, @q.pop, @q.pop].should == %w(http://example.com/path1 http://example.com/path2 http://example.com/path3)
end
end
35 changes: 18 additions & 17 deletions spec/queue_spec.rb
@@ -1,16 +1,6 @@
require 'spec/spec_helper'

describe Rider::Queue do

before do
@q = Rider::Queue.new('tmp/colors.q')
@q.clear
end

after do
@q.clear
end

shared_examples_for "queue" do
it "must not have a blank or nil name" do
lambda { Rider::Queue.new(nil) }.should raise_error(ArgumentError)
lambda { Rider::Queue.new('') }.should raise_error(ArgumentError)
Expand All @@ -26,17 +16,28 @@
@q.pop.should == 'blue'
end

it "should push then pop multiple items" do
%w(red green orange).each { |color| @q.push(color) }
puts "POP x 3"
[@q.pop, @q.pop, @q.pop].should == %w(red green orange)
end

describe "when empty" do
it "should return nil if popped" do
@q.pop.should == nil
end
end

it "should not clobber the queue upon initialization"
end

describe Rider::Queue do
before do
@q = Rider::Queue.new('tmp/colors.q')
@q.clear
end

after do
@q.clear
end

it "should push then pop multiple items" do
%w(red green orange).each { |color| @q.push(color) }
puts "POP x 3"
[@q.pop, @q.pop, @q.pop].should == %w(red green orange)
end
end

0 comments on commit 6cd00b8

Please sign in to comment.