diff --git a/kingfisher_scrapy/base_spider.py b/kingfisher_scrapy/base_spider.py index e966864d3..0ee87526e 100644 --- a/kingfisher_scrapy/base_spider.py +++ b/kingfisher_scrapy/base_spider.py @@ -76,6 +76,8 @@ class BaseSpider(scrapy.Spider): ocds_version = '1.1' date_format = 'date' + # Set `date_required` to True in class attribute to always set the `from` and `until` date parameters. + date_required = False def __init__(self, sample=None, note=None, from_date=None, until_date=None, crawl_time=None, keep_collection_open=None, package_pointer=None, release_pointer=None, truncate=None, qs=None, *args, @@ -96,6 +98,7 @@ def __init__(self, sample=None, note=None, from_date=None, until_date=None, craw self.qs = qs self.date_format = self.VALID_DATE_FORMATS[self.date_format] + self.date_required = self.date_required self.pluck = bool(package_pointer or release_pointer) if self.qs and hasattr(self, 'start_requests'): @@ -129,14 +132,14 @@ def from_crawler(cls, crawler, *args, **kwargs): except ValueError as e: raise SpiderArgumentError('spider argument crawl_time: invalid date value: {}'.format(e)) - # Checks Spider date ranges arguments - if spider.from_date or spider.until_date: + # Checks Spider date ranges arguments and `date_required` class attribute. + if spider.from_date or spider.until_date or spider.date_required: if not spider.from_date: # Default to `default_from_date` class attribute. spider.from_date = spider.default_from_date try: if isinstance(spider.from_date, str): - # convert to date format, if needed + # Convert to date format, if needed. spider.from_date = datetime.strptime(spider.from_date, spider.date_format) except ValueError as e: raise SpiderArgumentError('spider argument from_date: invalid date value: {}'.format(e)) diff --git a/kingfisher_scrapy/spiders/nicaragua_solid_waste.py b/kingfisher_scrapy/spiders/nicaragua_solid_waste.py new file mode 100644 index 000000000..e2bd97fc9 --- /dev/null +++ b/kingfisher_scrapy/spiders/nicaragua_solid_waste.py @@ -0,0 +1,32 @@ +from kingfisher_scrapy.base_spider import SimpleSpider +from kingfisher_scrapy.util import components + + +class NicaraguaSolidWaste(SimpleSpider): + """ + Spider arguments + sample + Download only data released on 2013-01-23 + from_date + Download only data from this date onward (YYYY-MM-DD format). + If ``until_date`` is provided, defaults to '2000-01-01'. + until_date + Download only data until this date (YYYY-MM-DD format). + If ``from_date`` is provided, defaults to today. + """ + name = 'nicaragua_solid_waste' + data_type = 'release_package' + default_from_date = '2000-01-01' + date_required = True + url = 'http://www.gekoware.com/swmp/api/ocds/{}/{}' + + def start_requests(self): + url = self.url + if self.sample: + # date parameter setting to get one release from 2013 + url = url.format('20130123', '20130123') + else: + # date parameter obtained + url = url.format(self.from_date.strftime("%Y%m%d"), self.until_date.strftime("%Y%m%d")) + # url looks like http://www.gekoware.com/swmp/api/ocds/20190101/20201005 + yield self.build_request(url, formatter=components(-2))