Skip to content

Commit

Permalink
Update date format to allow just date or datetime
Browse files Browse the repository at this point in the history
Signed-off-by: Yohanna Lisnichuk <yohanitalisnichuk@gmail.com>
  • Loading branch information
yolile committed May 13, 2020
1 parent 5458901 commit 879a52e
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 7 deletions.
8 changes: 4 additions & 4 deletions kingfisher_scrapy/base_spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,17 +41,18 @@ class BaseSpider(scrapy.Spider):

MAX_SAMPLE = 10
MAX_RELEASES_PER_PACKAGE = 100
VALID_DATE_FORMATS = {'year_month_day': '%Y-%m-%d', 'year_month_day_time': '%Y-%m-%dT%H:%M:%S'}

def __init__(self, sample=None, note=None, from_date=None, until_date=None, date_format='%Y-%m-%d',
*args, **kwargs):
def __init__(self, sample=None, note=None, from_date=None, until_date=None,
date_format='year_month_day', *args, **kwargs):
super().__init__(*args, **kwargs)

# https://docs.scrapy.org/en/latest/topics/spiders.html#spider-arguments
self.sample = sample == 'true'
self.from_date = from_date
self.until_date = until_date
self.note = note
self.date_format = date_format
self.date_format = self.VALID_DATE_FORMATS[date_format]

spider_arguments = {
'sample': sample,
Expand All @@ -74,7 +75,6 @@ def from_crawler(cls, crawler, *args, **kwargs):
if not spider.until_date:
# 'until_date' defaults to today
spider.until_date = datetime.now().strftime(spider.date_format)

try:
spider.from_date = datetime.strptime(spider.from_date, spider.date_format)
except ValueError as e:
Expand Down
5 changes: 2 additions & 3 deletions kingfisher_scrapy/spiders/paraguay_dncp_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ class ParaguayDNCPBaseSpider(BaseSpider):

@classmethod
def from_crawler(cls, crawler, *args, **kwargs):
spider = super(ParaguayDNCPBaseSpider, cls).from_crawler(crawler, date_format='%Y-%m-%dT%H:%M:%S',
spider = super(ParaguayDNCPBaseSpider, cls).from_crawler(crawler, date_format='year_month_day_time',
*args, **kwargs)

spider.request_token = crawler.settings.get('KINGFISHER_PARAGUAY_DNCP_REQUEST_TOKEN')
Expand All @@ -50,9 +50,8 @@ def from_crawler(cls, crawler, *args, **kwargs):

def start_requests(self):
if self.from_date:
self.from_date = self.from_date.strftime(self.date_format)
self.base_page_url = '{}/search/processes?tipo_fecha=fecha_release&fecha_desde={}'\
.format(self.base_url, self.from_date)
.format(self.base_url, self.from_date.strftime(self.date_format))
yield scrapy.Request(
self.base_page_url,
# send duplicate requests when the token expired and in the continuation of last_request saved.
Expand Down

0 comments on commit 879a52e

Please sign in to comment.