From 8224fb194477da5456961970c46d846c0f13e6a6 Mon Sep 17 00:00:00 2001 From: Yohanna Lisnichuk Date: Tue, 19 Sep 2023 14:14:48 -0400 Subject: [PATCH] fix(spiders): use date filters as part of file names --- kingfisher_scrapy/spiders/australia.py | 6 ++++-- kingfisher_scrapy/spiders/colombia_api.py | 6 ++++-- kingfisher_scrapy/spiders/kyrgyzstan.py | 7 +++++-- kingfisher_scrapy/spiders/portugal_base.py | 9 ++++++--- .../spiders/south_africa_national_treasury_api.py | 6 ++++-- .../spiders/united_kingdom_contracts_finder_base.py | 10 +++++----- kingfisher_scrapy/spiders/united_kingdom_fts.py | 4 +++- 7 files changed, 31 insertions(+), 17 deletions(-) diff --git a/kingfisher_scrapy/spiders/australia.py b/kingfisher_scrapy/spiders/australia.py index 404106db..33d18e11 100644 --- a/kingfisher_scrapy/spiders/australia.py +++ b/kingfisher_scrapy/spiders/australia.py @@ -32,7 +32,9 @@ class Australia(LinksSpider): formatter = staticmethod(parameters('cursor')) def start_requests(self): + from_date = self.from_date.strftime(self.date_format) + until_date = self.until_date.strftime(self.date_format) url = f'https://api.tenders.gov.au/ocds/findByDates/contractPublished/' \ - f'{self.from_date.strftime(self.date_format)}Z/{self.until_date.strftime(self.date_format)}Z' + f'{from_date}Z/{self.until_date.strftime(until_date)}Z' - yield scrapy.Request(url, meta={'file_name': 'start.json'}) + yield scrapy.Request(url, meta={'file_name': f'{from_date}-{until_date}-start.json'}) diff --git a/kingfisher_scrapy/spiders/colombia_api.py b/kingfisher_scrapy/spiders/colombia_api.py index dc8018e3..4171b0be 100644 --- a/kingfisher_scrapy/spiders/colombia_api.py +++ b/kingfisher_scrapy/spiders/colombia_api.py @@ -32,7 +32,9 @@ class ColombiaAPI(LinksSpider): formatter = staticmethod(parameters('_id')) def start_requests(self): + from_date = self.from_date.strftime(self.date_format) + until_date = self.until_date.strftime(self.date_format) url = 'https://apiocds.colombiacompra.gov.co/apiCCE2.0/rest/releases/dates/' \ - f'{self.from_date.strftime(self.date_format)}/{self.until_date.strftime(self.date_format)}' + f'{from_date}/{until_date}' - yield scrapy.Request(url, meta={'file_name': 'page-1.json'}) + yield scrapy.Request(url, meta={'file_name': f'{from_date}-{until_date}-start.json'}) diff --git a/kingfisher_scrapy/spiders/kyrgyzstan.py b/kingfisher_scrapy/spiders/kyrgyzstan.py index 95372038..1313905a 100644 --- a/kingfisher_scrapy/spiders/kyrgyzstan.py +++ b/kingfisher_scrapy/spiders/kyrgyzstan.py @@ -32,7 +32,10 @@ class Kyrgyzstan(LinksSpider): def start_requests(self): url = 'http://ocds.zakupki.gov.kg/api/tendering' + file_name = 'start.json' if self.from_date: + from_date = self.from_date.strftime(self.date_format) # The API requires the timezone and seconds in the since parameter. - url = f'{url}?since={self.from_date.strftime(self.date_format)}.00%2B06:00' - yield scrapy.Request(url, meta={'file_name': 'start.json'}) + url = f'{url}?since={from_date}.00%2B06:00' + file_name = f'{from_date}-{file_name}' + yield scrapy.Request(url, meta={'file_name': file_name}) diff --git a/kingfisher_scrapy/spiders/portugal_base.py b/kingfisher_scrapy/spiders/portugal_base.py index dbf84ad0..89192302 100644 --- a/kingfisher_scrapy/spiders/portugal_base.py +++ b/kingfisher_scrapy/spiders/portugal_base.py @@ -19,11 +19,14 @@ class PortugalBase(LinksSpider): def start_requests(self): url = self.start_url + file_name = 'start.json' if self.from_date and self.until_date: - url = f'{url}?contractStartDate={self.from_date.strftime(self.date_format)}' \ - f'&contractEndDate={self.until_date.strftime(self.date_format)}' + from_date = self.from_date.strftime(self.date_format) + until_date = self.until_date.strftime(self.date_format) + url = f'{url}?contractStartDate={from_date}&contractEndDate={until_date}' + file_name = f'{from_date}-{until_date}-{file_name}' - yield scrapy.Request(url, meta={'file_name': 'offset-1.json'}) + yield scrapy.Request(url, meta={'file_name': file_name}) def is_http_retryable(self, response): return response.status != 404 diff --git a/kingfisher_scrapy/spiders/south_africa_national_treasury_api.py b/kingfisher_scrapy/spiders/south_africa_national_treasury_api.py index 2284aa65..c32d6659 100644 --- a/kingfisher_scrapy/spiders/south_africa_national_treasury_api.py +++ b/kingfisher_scrapy/spiders/south_africa_national_treasury_api.py @@ -27,8 +27,10 @@ class SouthAfricaNationalTreasuryAPI(LinksSpider): data_type = 'release_package' # LinksSpider - formatter = staticmethod(parameters('PageNumber')) + formatter = staticmethod(parameters('PageNumber', 'dateFrom', 'dateTo')) def start_requests(self): + yield scrapy.Request('https://ocds-api.etenders.gov.za/api/OCDSReleases?PageNumber=1&PageSize=50&' - f'dateFrom={self.from_date}&dateTo={self.until_date}', meta={'file_name': 'start.json'}) + f'dateFrom={self.from_date}&dateTo={self.until_date}', + meta={'file_name': f'{self.from_date}-{self.until_date}-start.json'}) diff --git a/kingfisher_scrapy/spiders/united_kingdom_contracts_finder_base.py b/kingfisher_scrapy/spiders/united_kingdom_contracts_finder_base.py index 61fb50c2..992903ac 100644 --- a/kingfisher_scrapy/spiders/united_kingdom_contracts_finder_base.py +++ b/kingfisher_scrapy/spiders/united_kingdom_contracts_finder_base.py @@ -28,12 +28,12 @@ class UnitedKingdomContractsFinderBase(LinksSpider): def start_requests(self): # https://www.contractsfinder.service.gov.uk/apidocumentation/Notices/1/GET-Published-Notice-OCDS-Search url = f'{self.url_prefix}Notices/OCDS/Search?limit=100' - if self.from_date and self.until_date: - from_date = self.from_date.strftime(self.date_format) - until_date = self.until_date.strftime(self.date_format) - url = f'{url}&publishedFrom={from_date}&publishedTo={until_date}' + from_date = self.from_date.strftime(self.date_format) + until_date = self.until_date.strftime(self.date_format) + url = f'{url}&publishedFrom={from_date}&publishedTo={until_date}' - yield scrapy.Request(url, meta={'file_name': 'page-1.json'}, callback=self.parse_page) + yield scrapy.Request(url, meta={'file_name': f'{from_date}-{until_date}-page-1.json'}, + callback=self.parse_page) @handle_http_error def parse(self, response): diff --git a/kingfisher_scrapy/spiders/united_kingdom_fts.py b/kingfisher_scrapy/spiders/united_kingdom_fts.py index a44eb7cf..167c13e6 100644 --- a/kingfisher_scrapy/spiders/united_kingdom_fts.py +++ b/kingfisher_scrapy/spiders/united_kingdom_fts.py @@ -32,12 +32,14 @@ class UnitedKingdomFTS(LinksSpider): def start_requests(self): url = 'https://www.find-tender.service.gov.uk/api/1.0/ocdsReleasePackages' + file_name = 'start.json' if self.from_date and self.until_date: from_date = self.from_date.strftime(self.date_format) until_date = self.until_date.strftime(self.date_format) url = f'{url}?updatedFrom={from_date}&updatedTo={until_date}' + file_name = f'{from_date}-{until_date}-{file_name}' - yield scrapy.Request(url, meta={'file_name': 'start.json'}, headers={'Accept': 'application/json'}) + yield scrapy.Request(url, meta={'file_name': file_name}, headers={'Accept': 'application/json'}) @handle_http_error def parse(self, response):