some tests

propublica · Feb 16, 2014 · c73319b · c73319b
1 parent d76514e
commit c73319b
Show file tree

Hide file tree

Showing 3 changed files with 34 additions and 5 deletions.
diff --git a/lib/upton/scraper.rb b/lib/upton/scraper.rb
@@ -206,9 +206,13 @@ def scrape_to_tsv filename, &blk
     end
 
     def +(other_scraper)
-      raise ArgumentError, "#{other_scraper.class} can't be coerced into Upton::Scraper" unless other_scraper.class < Upton::Scraper
-      @indexes += other_scraper.instance_variable_get(:@indexes)
-      @instances += other_scraper.instance_variable_get(:@instance_urls)
+      raise ArgumentError, "#{other_scraper.class} can't be coerced into Upton::Scraper" unless other_scraper.class <= Upton::Scraper
+      new_scraper = Scraper.new
+      new_indexes = @indexes + other_scraper.instance_variable_get(:@indexes)
+      new_instances = @instance_urls + other_scraper.instance_variable_get(:@instance_urls)
+      new_scraper.instance_variable_set(:@indexes, new_indexes)
+      new_scraper.instance_variable_set(:@instance_urls, new_instances)
+      new_scraper
     end
 
     protected

diff --git a/spec/unit/pagination_spec.rb b/spec/unit/pagination_spec.rb
@@ -8,19 +8,31 @@
 
         let(:page_url){ 'http://www.propublica.org/search.php?q=test' }
         let(:pagination_param){ 'page' }
-        # let(:unpaginated_scraper){ Upton::Scraper.index(@page_url, "a", {:paginated => false, :pagination_param => pagination_param})}
+        let(:unpaginated_scraper){ Upton::Scraper.index(@page_url, "a", {:paginated => false, :pagination_param => pagination_param})}
         let(:u){ Upton::Scraper.index(@page_url, "a", {:paginated => true, :pagination_param => pagination_param}) }
 
         #obsolete, since get_index_pages controls whether next_index_page_url gets called
         # it "should return an empty string by default" do
         #   expect(u.next_index_page_url(page_url, pagination_param, 1)).to be_empty
         # end
+        it "should not call next_index_page_url if paginated is false" do
+          unpaginated_scraper.should_not_receive(:next_index_page_url)
+        end
 
         context "@paginated is true" do
           it "should use use pagination_param to specify the current page in the query string" do
             expect(u.next_index_page_url(page_url, pagination_param, 2)).to eq "#{page_url}&#{pagination_param}=2"
           end
 
+          it "should increment the page number by @pagination_interval" do
+            pending
+          end
+
+          it "should not make any requests when the pagination_index is greater than the set pagination_max_pages" do
+            pending
+          end
+
+
           # outdated, since max_pages is tested in get_index_pages
           # it "should return an empty string if pagination_index argument is greater than @pagination_max_pages" do
           #   u.pagination_max_pages = 10

diff --git a/spec/upton_spec.rb b/spec/upton_spec.rb
@@ -300,10 +300,23 @@
     heds.should eql @headlines
   end
 
-  it "should allow Scrapers to be added" do
+  it "should allow Scrapers to be added (indexes)" do
+    u = Upton::Scraper.index("http://www.example1.com", '.link')
+    w = Upton::Scraper.index("http://www.example2.com", '.link')
+    new_scraper = u + w
+    new_scraper.instance_variable_get(:@indexes).map{|a| a[0]}.should eql ["http://www.example1.com", "http://www.example2.com"]
+  end
+
+  it "should allow Scrapers to be added (instances)" do
     pending
+    u = Upton::Scraper.instances(["http://www.example1.com"])
+    w = Upton::Scraper.instances(["http://www.example2.com"])
+    new_scraper = u + w
+    new_scraper.instance_variable_get(:@indexes).should eql []
+    new_scraper.instance_variable_get(:@instance_urls).map{|a| a[0]}.should eql ["http://www.example1.com", "http://www.example2.com"]
   end
 
+
   before do
     Upton::Scraper.stub(:puts)
   end