Skip to content

Commit

Permalink
Merge b74479a into 6e74516
Browse files Browse the repository at this point in the history
  • Loading branch information
yolile committed Oct 15, 2020
2 parents 6e74516 + b74479a commit 90ca634
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 0 deletions.
21 changes: 21 additions & 0 deletions kingfisher_scrapy/pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,27 @@ def process_item(self, item, spider):
return item


class Sample:
"""
Drop items and close the spider when more than 1 item is scraped
"""
def __init__(self):
self.item_count = 0

def process_item(self, item, spider):
if not spider.sample:
return item
if self.item_count >= 1:
spider.crawler.engine.close_spider(spider, 'closespider_itemcount')
raise DropItem
self.item_count += 1
return item

def open_spider(self, spider):
if spider.sample:
spider.crawler.engine.downloader.total_concurrency = 1


class Pluck:
def __init__(self):
self.processed = set()
Expand Down
1 change: 1 addition & 0 deletions kingfisher_scrapy/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@
# Configure item pipelines
# See https://docs.scrapy.org/en/latest/topics/item-pipeline.html
ITEM_PIPELINES = {
'kingfisher_scrapy.pipelines.Sample': 200,
'kingfisher_scrapy.pipelines.Validate': 300,
'kingfisher_scrapy.pipelines.Pluck': 301,
}
Expand Down

0 comments on commit 90ca634

Please sign in to comment.