Skip to content

Commit

Permalink
some tests
Browse files Browse the repository at this point in the history
  • Loading branch information
jeremybmerrill committed Feb 16, 2014
1 parent d76514e commit c73319b
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 5 deletions.
10 changes: 7 additions & 3 deletions lib/upton/scraper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -206,9 +206,13 @@ def scrape_to_tsv filename, &blk
end

def +(other_scraper)
raise ArgumentError, "#{other_scraper.class} can't be coerced into Upton::Scraper" unless other_scraper.class < Upton::Scraper
@indexes += other_scraper.instance_variable_get(:@indexes)
@instances += other_scraper.instance_variable_get(:@instance_urls)
raise ArgumentError, "#{other_scraper.class} can't be coerced into Upton::Scraper" unless other_scraper.class <= Upton::Scraper
new_scraper = Scraper.new
new_indexes = @indexes + other_scraper.instance_variable_get(:@indexes)
new_instances = @instance_urls + other_scraper.instance_variable_get(:@instance_urls)
new_scraper.instance_variable_set(:@indexes, new_indexes)
new_scraper.instance_variable_set(:@instance_urls, new_instances)
new_scraper
end

protected
Expand Down
14 changes: 13 additions & 1 deletion spec/unit/pagination_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,31 @@

let(:page_url){ 'http://www.propublica.org/search.php?q=test' }
let(:pagination_param){ 'page' }
# let(:unpaginated_scraper){ Upton::Scraper.index(@page_url, "a", {:paginated => false, :pagination_param => pagination_param})}
let(:unpaginated_scraper){ Upton::Scraper.index(@page_url, "a", {:paginated => false, :pagination_param => pagination_param})}
let(:u){ Upton::Scraper.index(@page_url, "a", {:paginated => true, :pagination_param => pagination_param}) }

#obsolete, since get_index_pages controls whether next_index_page_url gets called
# it "should return an empty string by default" do
# expect(u.next_index_page_url(page_url, pagination_param, 1)).to be_empty
# end
it "should not call next_index_page_url if paginated is false" do
unpaginated_scraper.should_not_receive(:next_index_page_url)
end

context "@paginated is true" do
it "should use use pagination_param to specify the current page in the query string" do
expect(u.next_index_page_url(page_url, pagination_param, 2)).to eq "#{page_url}&#{pagination_param}=2"
end

it "should increment the page number by @pagination_interval" do
pending
end

it "should not make any requests when the pagination_index is greater than the set pagination_max_pages" do
pending
end


# outdated, since max_pages is tested in get_index_pages
# it "should return an empty string if pagination_index argument is greater than @pagination_max_pages" do
# u.pagination_max_pages = 10
Expand Down
15 changes: 14 additions & 1 deletion spec/upton_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -300,10 +300,23 @@
heds.should eql @headlines
end

it "should allow Scrapers to be added" do
it "should allow Scrapers to be added (indexes)" do
u = Upton::Scraper.index("http://www.example1.com", '.link')
w = Upton::Scraper.index("http://www.example2.com", '.link')
new_scraper = u + w
new_scraper.instance_variable_get(:@indexes).map{|a| a[0]}.should eql ["http://www.example1.com", "http://www.example2.com"]
end

it "should allow Scrapers to be added (instances)" do
pending
u = Upton::Scraper.instances(["http://www.example1.com"])
w = Upton::Scraper.instances(["http://www.example2.com"])
new_scraper = u + w
new_scraper.instance_variable_get(:@indexes).should eql []
new_scraper.instance_variable_get(:@instance_urls).map{|a| a[0]}.should eql ["http://www.example1.com", "http://www.example2.com"]
end


before do
Upton::Scraper.stub(:puts)
end
Expand Down

0 comments on commit c73319b

Please sign in to comment.