From 8224fb194477da5456961970c46d846c0f13e6a6 Mon Sep 17 00:00:00 2001
From: Yohanna Lisnichuk <yohanitalisnichuk@gmail.com>
Date: Tue, 19 Sep 2023 14:14:48 -0400
Subject: [PATCH] fix(spiders): use date filters as part of file names

---
 kingfisher_scrapy/spiders/australia.py                 |  6 ++++--
 kingfisher_scrapy/spiders/colombia_api.py              |  6 ++++--
 kingfisher_scrapy/spiders/kyrgyzstan.py                |  7 +++++--
 kingfisher_scrapy/spiders/portugal_base.py             |  9 ++++++---
 .../spiders/south_africa_national_treasury_api.py      |  6 ++++--
 .../spiders/united_kingdom_contracts_finder_base.py    | 10 +++++-----
 kingfisher_scrapy/spiders/united_kingdom_fts.py        |  4 +++-
 7 files changed, 31 insertions(+), 17 deletions(-)

diff --git a/kingfisher_scrapy/spiders/australia.py b/kingfisher_scrapy/spiders/australia.py
index 404106db..33d18e11 100644
--- a/kingfisher_scrapy/spiders/australia.py
+++ b/kingfisher_scrapy/spiders/australia.py
@@ -32,7 +32,9 @@ class Australia(LinksSpider):
     formatter = staticmethod(parameters('cursor'))
 
     def start_requests(self):
+        from_date = self.from_date.strftime(self.date_format)
+        until_date = self.until_date.strftime(self.date_format)
         url = f'https://api.tenders.gov.au/ocds/findByDates/contractPublished/' \
-              f'{self.from_date.strftime(self.date_format)}Z/{self.until_date.strftime(self.date_format)}Z'
+              f'{from_date}Z/{self.until_date.strftime(until_date)}Z'
 
-        yield scrapy.Request(url, meta={'file_name': 'start.json'})
+        yield scrapy.Request(url, meta={'file_name': f'{from_date}-{until_date}-start.json'})
diff --git a/kingfisher_scrapy/spiders/colombia_api.py b/kingfisher_scrapy/spiders/colombia_api.py
index dc8018e3..4171b0be 100644
--- a/kingfisher_scrapy/spiders/colombia_api.py
+++ b/kingfisher_scrapy/spiders/colombia_api.py
@@ -32,7 +32,9 @@ class ColombiaAPI(LinksSpider):
     formatter = staticmethod(parameters('_id'))
 
     def start_requests(self):
+        from_date = self.from_date.strftime(self.date_format)
+        until_date = self.until_date.strftime(self.date_format)
         url = 'https://apiocds.colombiacompra.gov.co/apiCCE2.0/rest/releases/dates/' \
-              f'{self.from_date.strftime(self.date_format)}/{self.until_date.strftime(self.date_format)}'
+              f'{from_date}/{until_date}'
 
-        yield scrapy.Request(url, meta={'file_name': 'page-1.json'})
+        yield scrapy.Request(url, meta={'file_name': f'{from_date}-{until_date}-start.json'})
diff --git a/kingfisher_scrapy/spiders/kyrgyzstan.py b/kingfisher_scrapy/spiders/kyrgyzstan.py
index 95372038..1313905a 100644
--- a/kingfisher_scrapy/spiders/kyrgyzstan.py
+++ b/kingfisher_scrapy/spiders/kyrgyzstan.py
@@ -32,7 +32,10 @@ class Kyrgyzstan(LinksSpider):
 
     def start_requests(self):
         url = 'http://ocds.zakupki.gov.kg/api/tendering'
+        file_name = 'start.json'
         if self.from_date:
+            from_date = self.from_date.strftime(self.date_format)
             # The API requires the timezone and seconds in the since parameter.
-            url = f'{url}?since={self.from_date.strftime(self.date_format)}.00%2B06:00'
-        yield scrapy.Request(url, meta={'file_name': 'start.json'})
+            url = f'{url}?since={from_date}.00%2B06:00'
+            file_name = f'{from_date}-{file_name}'
+        yield scrapy.Request(url, meta={'file_name': file_name})
diff --git a/kingfisher_scrapy/spiders/portugal_base.py b/kingfisher_scrapy/spiders/portugal_base.py
index dbf84ad0..89192302 100644
--- a/kingfisher_scrapy/spiders/portugal_base.py
+++ b/kingfisher_scrapy/spiders/portugal_base.py
@@ -19,11 +19,14 @@ class PortugalBase(LinksSpider):
 
     def start_requests(self):
         url = self.start_url
+        file_name = 'start.json'
         if self.from_date and self.until_date:
-            url = f'{url}?contractStartDate={self.from_date.strftime(self.date_format)}' \
-                  f'&contractEndDate={self.until_date.strftime(self.date_format)}'
+            from_date = self.from_date.strftime(self.date_format)
+            until_date = self.until_date.strftime(self.date_format)
+            url = f'{url}?contractStartDate={from_date}&contractEndDate={until_date}'
+            file_name = f'{from_date}-{until_date}-{file_name}'
 
-        yield scrapy.Request(url, meta={'file_name': 'offset-1.json'})
+        yield scrapy.Request(url, meta={'file_name': file_name})
 
     def is_http_retryable(self, response):
         return response.status != 404
diff --git a/kingfisher_scrapy/spiders/south_africa_national_treasury_api.py b/kingfisher_scrapy/spiders/south_africa_national_treasury_api.py
index 2284aa65..c32d6659 100644
--- a/kingfisher_scrapy/spiders/south_africa_national_treasury_api.py
+++ b/kingfisher_scrapy/spiders/south_africa_national_treasury_api.py
@@ -27,8 +27,10 @@ class SouthAfricaNationalTreasuryAPI(LinksSpider):
     data_type = 'release_package'
 
     # LinksSpider
-    formatter = staticmethod(parameters('PageNumber'))
+    formatter = staticmethod(parameters('PageNumber', 'dateFrom', 'dateTo'))
 
     def start_requests(self):
+
         yield scrapy.Request('https://ocds-api.etenders.gov.za/api/OCDSReleases?PageNumber=1&PageSize=50&'
-                             f'dateFrom={self.from_date}&dateTo={self.until_date}', meta={'file_name': 'start.json'})
+                             f'dateFrom={self.from_date}&dateTo={self.until_date}',
+                             meta={'file_name': f'{self.from_date}-{self.until_date}-start.json'})
diff --git a/kingfisher_scrapy/spiders/united_kingdom_contracts_finder_base.py b/kingfisher_scrapy/spiders/united_kingdom_contracts_finder_base.py
index 61fb50c2..992903ac 100644
--- a/kingfisher_scrapy/spiders/united_kingdom_contracts_finder_base.py
+++ b/kingfisher_scrapy/spiders/united_kingdom_contracts_finder_base.py
@@ -28,12 +28,12 @@ class UnitedKingdomContractsFinderBase(LinksSpider):
     def start_requests(self):
         # https://www.contractsfinder.service.gov.uk/apidocumentation/Notices/1/GET-Published-Notice-OCDS-Search
         url = f'{self.url_prefix}Notices/OCDS/Search?limit=100'
-        if self.from_date and self.until_date:
-            from_date = self.from_date.strftime(self.date_format)
-            until_date = self.until_date.strftime(self.date_format)
-            url = f'{url}&publishedFrom={from_date}&publishedTo={until_date}'
+        from_date = self.from_date.strftime(self.date_format)
+        until_date = self.until_date.strftime(self.date_format)
+        url = f'{url}&publishedFrom={from_date}&publishedTo={until_date}'
 
-        yield scrapy.Request(url, meta={'file_name': 'page-1.json'}, callback=self.parse_page)
+        yield scrapy.Request(url, meta={'file_name': f'{from_date}-{until_date}-page-1.json'},
+                             callback=self.parse_page)
 
     @handle_http_error
     def parse(self, response):
diff --git a/kingfisher_scrapy/spiders/united_kingdom_fts.py b/kingfisher_scrapy/spiders/united_kingdom_fts.py
index a44eb7cf..167c13e6 100644
--- a/kingfisher_scrapy/spiders/united_kingdom_fts.py
+++ b/kingfisher_scrapy/spiders/united_kingdom_fts.py
@@ -32,12 +32,14 @@ class UnitedKingdomFTS(LinksSpider):
 
     def start_requests(self):
         url = 'https://www.find-tender.service.gov.uk/api/1.0/ocdsReleasePackages'
+        file_name = 'start.json'
         if self.from_date and self.until_date:
             from_date = self.from_date.strftime(self.date_format)
             until_date = self.until_date.strftime(self.date_format)
             url = f'{url}?updatedFrom={from_date}&updatedTo={until_date}'
+            file_name = f'{from_date}-{until_date}-{file_name}'
 
-        yield scrapy.Request(url, meta={'file_name': 'start.json'}, headers={'Accept': 'application/json'})
+        yield scrapy.Request(url, meta={'file_name': file_name}, headers={'Accept': 'application/json'})
 
     @handle_http_error
     def parse(self, response):