Skip to content

Commit

Permalink
fix(spiders): use date filters as part of file names
Browse files Browse the repository at this point in the history
  • Loading branch information
yolile committed Sep 19, 2023
1 parent 78fddbb commit 8224fb1
Show file tree
Hide file tree
Showing 7 changed files with 31 additions and 17 deletions.
6 changes: 4 additions & 2 deletions kingfisher_scrapy/spiders/australia.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,9 @@ class Australia(LinksSpider):
formatter = staticmethod(parameters('cursor'))

def start_requests(self):
from_date = self.from_date.strftime(self.date_format)
until_date = self.until_date.strftime(self.date_format)
url = f'https://api.tenders.gov.au/ocds/findByDates/contractPublished/' \
f'{self.from_date.strftime(self.date_format)}Z/{self.until_date.strftime(self.date_format)}Z'
f'{from_date}Z/{self.until_date.strftime(until_date)}Z'

yield scrapy.Request(url, meta={'file_name': 'start.json'})
yield scrapy.Request(url, meta={'file_name': f'{from_date}-{until_date}-start.json'})
6 changes: 4 additions & 2 deletions kingfisher_scrapy/spiders/colombia_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,9 @@ class ColombiaAPI(LinksSpider):
formatter = staticmethod(parameters('_id'))

def start_requests(self):
from_date = self.from_date.strftime(self.date_format)
until_date = self.until_date.strftime(self.date_format)
url = 'https://apiocds.colombiacompra.gov.co/apiCCE2.0/rest/releases/dates/' \
f'{self.from_date.strftime(self.date_format)}/{self.until_date.strftime(self.date_format)}'
f'{from_date}/{until_date}'

yield scrapy.Request(url, meta={'file_name': 'page-1.json'})
yield scrapy.Request(url, meta={'file_name': f'{from_date}-{until_date}-start.json'})
7 changes: 5 additions & 2 deletions kingfisher_scrapy/spiders/kyrgyzstan.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,10 @@ class Kyrgyzstan(LinksSpider):

def start_requests(self):
url = 'http://ocds.zakupki.gov.kg/api/tendering'
file_name = 'start.json'
if self.from_date:
from_date = self.from_date.strftime(self.date_format)
# The API requires the timezone and seconds in the since parameter.
url = f'{url}?since={self.from_date.strftime(self.date_format)}.00%2B06:00'
yield scrapy.Request(url, meta={'file_name': 'start.json'})
url = f'{url}?since={from_date}.00%2B06:00'
file_name = f'{from_date}-{file_name}'
yield scrapy.Request(url, meta={'file_name': file_name})
9 changes: 6 additions & 3 deletions kingfisher_scrapy/spiders/portugal_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,14 @@ class PortugalBase(LinksSpider):

def start_requests(self):
url = self.start_url
file_name = 'start.json'
if self.from_date and self.until_date:
url = f'{url}?contractStartDate={self.from_date.strftime(self.date_format)}' \
f'&contractEndDate={self.until_date.strftime(self.date_format)}'
from_date = self.from_date.strftime(self.date_format)
until_date = self.until_date.strftime(self.date_format)
url = f'{url}?contractStartDate={from_date}&contractEndDate={until_date}'
file_name = f'{from_date}-{until_date}-{file_name}'

yield scrapy.Request(url, meta={'file_name': 'offset-1.json'})
yield scrapy.Request(url, meta={'file_name': file_name})

def is_http_retryable(self, response):
return response.status != 404
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,10 @@ class SouthAfricaNationalTreasuryAPI(LinksSpider):
data_type = 'release_package'

# LinksSpider
formatter = staticmethod(parameters('PageNumber'))
formatter = staticmethod(parameters('PageNumber', 'dateFrom', 'dateTo'))

def start_requests(self):

yield scrapy.Request('https://ocds-api.etenders.gov.za/api/OCDSReleases?PageNumber=1&PageSize=50&'
f'dateFrom={self.from_date}&dateTo={self.until_date}', meta={'file_name': 'start.json'})
f'dateFrom={self.from_date}&dateTo={self.until_date}',
meta={'file_name': f'{self.from_date}-{self.until_date}-start.json'})
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,12 @@ class UnitedKingdomContractsFinderBase(LinksSpider):
def start_requests(self):
# https://www.contractsfinder.service.gov.uk/apidocumentation/Notices/1/GET-Published-Notice-OCDS-Search
url = f'{self.url_prefix}Notices/OCDS/Search?limit=100'
if self.from_date and self.until_date:
from_date = self.from_date.strftime(self.date_format)
until_date = self.until_date.strftime(self.date_format)
url = f'{url}&publishedFrom={from_date}&publishedTo={until_date}'
from_date = self.from_date.strftime(self.date_format)
until_date = self.until_date.strftime(self.date_format)
url = f'{url}&publishedFrom={from_date}&publishedTo={until_date}'

yield scrapy.Request(url, meta={'file_name': 'page-1.json'}, callback=self.parse_page)
yield scrapy.Request(url, meta={'file_name': f'{from_date}-{until_date}-page-1.json'},
callback=self.parse_page)

@handle_http_error
def parse(self, response):
Expand Down
4 changes: 3 additions & 1 deletion kingfisher_scrapy/spiders/united_kingdom_fts.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,14 @@ class UnitedKingdomFTS(LinksSpider):

def start_requests(self):
url = 'https://www.find-tender.service.gov.uk/api/1.0/ocdsReleasePackages'
file_name = 'start.json'
if self.from_date and self.until_date:
from_date = self.from_date.strftime(self.date_format)
until_date = self.until_date.strftime(self.date_format)
url = f'{url}?updatedFrom={from_date}&updatedTo={until_date}'
file_name = f'{from_date}-{until_date}-{file_name}'

yield scrapy.Request(url, meta={'file_name': 'start.json'}, headers={'Accept': 'application/json'})
yield scrapy.Request(url, meta={'file_name': file_name}, headers={'Accept': 'application/json'})

@handle_http_error
def parse(self, response):
Expand Down

0 comments on commit 8224fb1

Please sign in to comment.