Skip to content

Commit

Permalink
Merge 37d4763 into 156e5a6
Browse files Browse the repository at this point in the history
  • Loading branch information
aguilerapy committed Oct 5, 2020
2 parents 156e5a6 + 37d4763 commit b137438
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 3 deletions.
9 changes: 6 additions & 3 deletions kingfisher_scrapy/base_spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@ class BaseSpider(scrapy.Spider):

ocds_version = '1.1'
date_format = 'date'
# Set `date_required` to True in class attribute to always set the `from` and `until` date parameters.
date_required = False

def __init__(self, sample=None, note=None, from_date=None, until_date=None, crawl_time=None,
keep_collection_open=None, package_pointer=None, release_pointer=None, truncate=None, qs=None, *args,
Expand All @@ -96,6 +98,7 @@ def __init__(self, sample=None, note=None, from_date=None, until_date=None, craw
self.qs = qs

self.date_format = self.VALID_DATE_FORMATS[self.date_format]
self.date_required = self.date_required
self.pluck = bool(package_pointer or release_pointer)

if self.qs and hasattr(self, 'start_requests'):
Expand Down Expand Up @@ -129,14 +132,14 @@ def from_crawler(cls, crawler, *args, **kwargs):
except ValueError as e:
raise SpiderArgumentError('spider argument crawl_time: invalid date value: {}'.format(e))

# Checks Spider date ranges arguments
if spider.from_date or spider.until_date:
# Checks Spider date ranges arguments and `date_required` class attribute.
if spider.from_date or spider.until_date or spider.date_required:
if not spider.from_date:
# Default to `default_from_date` class attribute.
spider.from_date = spider.default_from_date
try:
if isinstance(spider.from_date, str):
# convert to date format, if needed
# Convert to date format, if needed.
spider.from_date = datetime.strptime(spider.from_date, spider.date_format)
except ValueError as e:
raise SpiderArgumentError('spider argument from_date: invalid date value: {}'.format(e))
Expand Down
32 changes: 32 additions & 0 deletions kingfisher_scrapy/spiders/nicaragua_solid_waste.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from kingfisher_scrapy.base_spider import SimpleSpider
from kingfisher_scrapy.util import components


class NicaraguaSolidWaste(SimpleSpider):
"""
Spider arguments
sample
Download only data released on 2013-01-23
from_date
Download only data from this date onward (YYYY-MM-DD format).
If ``until_date`` is provided, defaults to '2000-01-01'.
until_date
Download only data until this date (YYYY-MM-DD format).
If ``from_date`` is provided, defaults to today.
"""
name = 'nicaragua_solid_waste'
data_type = 'release_package'
default_from_date = '2000-01-01'
date_required = True
url = 'http://www.gekoware.com/swmp/api/ocds/{}/{}'

def start_requests(self):
url = self.url
if self.sample:
# date parameter setting to get one release from 2013
url = url.format('20130123', '20130123')
else:
# date parameter obtained
url = url.format(self.from_date.strftime("%Y%m%d"), self.until_date.strftime("%Y%m%d"))
# url looks like http://www.gekoware.com/swmp/api/ocds/20190101/20201005
yield self.build_request(url, formatter=components(-2))

0 comments on commit b137438

Please sign in to comment.