diff --git a/kingfisher_scrapy/base_spider.py b/kingfisher_scrapy/base_spider.py index a3aa62c7..badf778d 100644 --- a/kingfisher_scrapy/base_spider.py +++ b/kingfisher_scrapy/base_spider.py @@ -42,7 +42,7 @@ class BaseSpider(scrapy.Spider): MAX_SAMPLE = 10 MAX_RELEASES_PER_PACKAGE = 100 - VALID_DATE_FORMATS = {'date': '%Y-%m-%d', 'datetime': '%Y-%m-%dT%H:%M:%S'} + VALID_DATE_FORMATS = {'date': '%Y-%m-%d', 'datetime': '%Y-%m-%dT%H:%M:%S', 'year-month': '%Y-%m'} def __init__(self, sample=None, note=None, from_date=None, until_date=None, date_format='date', *args, **kwargs): diff --git a/kingfisher_scrapy/spiders/uruguay_base.py b/kingfisher_scrapy/spiders/uruguay_base.py index d554fe89..168708d7 100644 --- a/kingfisher_scrapy/spiders/uruguay_base.py +++ b/kingfisher_scrapy/spiders/uruguay_base.py @@ -1,19 +1,22 @@ -from datetime import date - from kingfisher_scrapy.base_spider import SimpleSpider from kingfisher_scrapy.util import components, date_range_by_month class UruguayBase(SimpleSpider): download_delay = 0.9 + default_from_date = '2017-11' + + @classmethod + def from_crawler(cls, crawler, from_date=None, *args, **kwargs): + if not from_date: + from_date = cls.default_from_date + + return super().from_crawler(crawler, date_format='year-month', from_date=from_date, *args, **kwargs) def start_requests(self): url = 'http://comprasestatales.gub.uy/ocds/rss/{0.year:d}/{0.month:02d}' - - start = date(2017, 11, 1) - stop = date.today().replace(day=1) if self.sample: - start = stop + self.from_date = self.until_date - for d in date_range_by_month(start, stop): + for d in date_range_by_month(self.from_date, self.until_date): yield self.build_request(url.format(d), formatter=components(-2), callback=self.parse_list)