Skip to content

Commit

Permalink
Merge pull request #511 from open-contracting/493-nicaragua-solid-waste
Browse files Browse the repository at this point in the history
Add Nicaragua Solid Waste spider
  • Loading branch information
yolile committed Oct 6, 2020
2 parents 8288383 + ea9716f commit f4e6d7b
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 4 deletions.
13 changes: 9 additions & 4 deletions kingfisher_scrapy/base_spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,10 @@ class BaseSpider(scrapy.Spider):
ocds_version = '1.1'
date_format = 'date'

# Set `date_required` to True in class attribute to always set the `from` and `until` date parameters.
# If `date_required` is true the attribute `default_from_date` should be set too.
date_required = False

def __init__(self, sample=None, note=None, from_date=None, until_date=None, crawl_time=None,
keep_collection_open=None, package_pointer=None, release_pointer=None, truncate=None, *args,
**kwargs):
Expand Down Expand Up @@ -126,10 +130,11 @@ def from_crawler(cls, crawler, *args, **kwargs):
except ValueError as e:
raise SpiderArgumentError('spider argument crawl_time: invalid date value: {}'.format(e))

if spider.from_date or spider.until_date:
# If either `from_date` or `until_date` is set, then `from_date` defaults to the `default_from_date` class
# attribute and `until_date` defaults to the `get_default_until_date()` return value (now, by default). In
# other words, spiders that support `from_date` and `until_date` filters need to set `default_from_date`.
if spider.from_date or spider.until_date or spider.date_required:
# If either `from_date`, `until_date` or `date_required` is set, then `from_date` defaults to the
# `default_from_date` class attribute and `until_date` defaults to the `get_default_until_date()` return
# value (now, by default). In other words, spiders that support `from_date` and `until_date` filters need
# to set `default_from_date`.
if not spider.from_date:
spider.from_date = spider.default_from_date
try:
Expand Down
32 changes: 32 additions & 0 deletions kingfisher_scrapy/spiders/nicaragua_solid_waste.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from kingfisher_scrapy.base_spider import SimpleSpider
from kingfisher_scrapy.util import components


class NicaraguaSolidWaste(SimpleSpider):
"""
Spider arguments
sample
Download only data released on 2013-01-23
from_date
Download only data from this date onward (YYYY-MM-DD format).
If ``until_date`` is provided, defaults to '2000-01-01'.
until_date
Download only data until this date (YYYY-MM-DD format).
If ``from_date`` is provided, defaults to today.
"""
name = 'nicaragua_solid_waste'
data_type = 'release_package'
default_from_date = '2000-01-01'
date_required = True
url = 'http://www.gekoware.com/swmp/api/ocds/{}/{}'

def start_requests(self):
url = self.url
if self.sample:
# date parameter setting to get one release from 2013
url = url.format('20130123', '20130123')
else:
# date parameter obtained
url = url.format(self.from_date.strftime("%Y%m%d"), self.until_date.strftime("%Y%m%d"))
# url looks like http://www.gekoware.com/swmp/api/ocds/20190101/20201005
yield self.build_request(url, formatter=components(-2))

0 comments on commit f4e6d7b

Please sign in to comment.