Skip to content

Commit

Permalink
Merge branch 'master' into 446-paraguay-until-date
Browse files Browse the repository at this point in the history
  • Loading branch information
jpmckinney committed Jul 13, 2020
2 parents edba9cb + 1f7fa1d commit 7c3d1f9
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 8 deletions.
2 changes: 1 addition & 1 deletion kingfisher_scrapy/base_spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ class BaseSpider(scrapy.Spider):

MAX_SAMPLE = 10
MAX_RELEASES_PER_PACKAGE = 100
VALID_DATE_FORMATS = {'date': '%Y-%m-%d', 'datetime': '%Y-%m-%dT%H:%M:%S'}
VALID_DATE_FORMATS = {'date': '%Y-%m-%d', 'datetime': '%Y-%m-%dT%H:%M:%S', 'year-month': '%Y-%m'}

def __init__(self, sample=None, note=None, from_date=None, until_date=None,
date_format='date', *args, **kwargs):
Expand Down
17 changes: 10 additions & 7 deletions kingfisher_scrapy/spiders/uruguay_base.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,22 @@
from datetime import date

from kingfisher_scrapy.base_spider import SimpleSpider
from kingfisher_scrapy.util import components, date_range_by_month


class UruguayBase(SimpleSpider):
download_delay = 0.9
default_from_date = '2017-11'

@classmethod
def from_crawler(cls, crawler, from_date=None, *args, **kwargs):
if not from_date:
from_date = cls.default_from_date

return super().from_crawler(crawler, date_format='year-month', from_date=from_date, *args, **kwargs)

def start_requests(self):
url = 'http://comprasestatales.gub.uy/ocds/rss/{0.year:d}/{0.month:02d}'

start = date(2017, 11, 1)
stop = date.today().replace(day=1)
if self.sample:
start = stop
self.from_date = self.until_date

for d in date_range_by_month(start, stop):
for d in date_range_by_month(self.from_date, self.until_date):
yield self.build_request(url.format(d), formatter=components(-2), callback=self.parse_list)

0 comments on commit 7c3d1f9

Please sign in to comment.