Skip to content

Commit

Permalink
Merge pull request #1026 from open-contracting/fix-incremental-file-n…
Browse files Browse the repository at this point in the history
…ames

fix(spiders): use date filters as part of file names
  • Loading branch information
yolile authored Sep 20, 2023
2 parents 49528bf + ef95e65 commit e45e405
Show file tree
Hide file tree
Showing 7 changed files with 37 additions and 19 deletions.
7 changes: 4 additions & 3 deletions kingfisher_scrapy/spiders/australia.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ class Australia(LinksSpider):
formatter = staticmethod(parameters('cursor'))

def start_requests(self):
url = f'https://api.tenders.gov.au/ocds/findByDates/contractPublished/' \
f'{self.from_date.strftime(self.date_format)}Z/{self.until_date.strftime(self.date_format)}Z'
from_date = self.from_date.strftime(self.date_format)
until_date = self.until_date.strftime(self.date_format)
url = f'https://api.tenders.gov.au/ocds/findByDates/contractPublished/{from_date}Z/{until_date}Z'

yield scrapy.Request(url, meta={'file_name': 'start.json'})
yield scrapy.Request(url, meta={'file_name': f'{until_date}.json'}) # reverse chronological order
7 changes: 4 additions & 3 deletions kingfisher_scrapy/spiders/colombia_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ class ColombiaAPI(LinksSpider):
formatter = staticmethod(parameters('_id'))

def start_requests(self):
url = 'https://apiocds.colombiacompra.gov.co/apiCCE2.0/rest/releases/dates/' \
f'{self.from_date.strftime(self.date_format)}/{self.until_date.strftime(self.date_format)}'
from_date = self.from_date.strftime(self.date_format)
until_date = self.until_date.strftime(self.date_format)
url = f'https://apiocds.colombiacompra.gov.co/apiCCE2.0/rest/releases/dates/{from_date}/{until_date}'

yield scrapy.Request(url, meta={'file_name': 'page-1.json'})
yield scrapy.Request(url, meta={'file_name': f'{from_date}.json'})
8 changes: 6 additions & 2 deletions kingfisher_scrapy/spiders/kyrgyzstan.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@ class Kyrgyzstan(LinksSpider):
def start_requests(self):
url = 'http://ocds.zakupki.gov.kg/api/tendering'
if self.from_date:
from_date = self.from_date.strftime(self.date_format)
# The API requires the timezone and seconds in the since parameter.
url = f'{url}?since={self.from_date.strftime(self.date_format)}.00%2B06:00'
yield scrapy.Request(url, meta={'file_name': 'start.json'})
url = f'{url}?since={from_date}.00%2B06:00'
self.formatter = staticmethod(parameters('offset', 'since'))
else:
from_date = '1970-01-01T00:00:00'
yield scrapy.Request(url, meta={'file_name': f'{from_date}.json'})
11 changes: 7 additions & 4 deletions kingfisher_scrapy/spiders/portugal_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,13 @@ class PortugalBase(LinksSpider):
def start_requests(self):
url = self.start_url
if self.from_date and self.until_date:
url = f'{url}?contractStartDate={self.from_date.strftime(self.date_format)}' \
f'&contractEndDate={self.until_date.strftime(self.date_format)}'

yield scrapy.Request(url, meta={'file_name': 'offset-1.json'})
from_date = self.from_date.strftime(self.date_format)
until_date = self.until_date.strftime(self.date_format)
url = f'{url}?contractStartDate={from_date}&contractEndDate={until_date}'
self.formatter = staticmethod(parameters('offset', 'contractStartDate'))
else:
from_date = self.default_from_date
yield scrapy.Request(url, meta={'file_name': f'{from_date}.json'})

def is_http_retryable(self, response):
return response.status != 404
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,10 @@ class SouthAfricaNationalTreasuryAPI(LinksSpider):
data_type = 'release_package'

# LinksSpider
formatter = staticmethod(parameters('PageNumber'))
formatter = staticmethod(parameters('PageNumber', 'dateFrom'))

def start_requests(self):

yield scrapy.Request('https://ocds-api.etenders.gov.za/api/OCDSReleases?PageNumber=1&PageSize=50&'
f'dateFrom={self.from_date}&dateTo={self.until_date}', meta={'file_name': 'start.json'})
f'dateFrom={self.from_date}&dateTo={self.until_date}',
meta={'file_name': f'{self.from_date}.json'})
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from datetime import datetime

import scrapy

from kingfisher_scrapy.base_spiders import LinksSpider
Expand All @@ -12,7 +14,6 @@ class UnitedKingdomContractsFinderBase(LinksSpider):

# BaseSpider
date_format = 'datetime'
date_required = True
default_from_date = '2014-01-01T00:00:00'
encoding = 'iso-8859-1'
max_attempts = 5
Expand All @@ -32,8 +33,10 @@ def start_requests(self):
from_date = self.from_date.strftime(self.date_format)
until_date = self.until_date.strftime(self.date_format)
url = f'{url}&publishedFrom={from_date}&publishedTo={until_date}'

yield scrapy.Request(url, meta={'file_name': 'page-1.json'}, callback=self.parse_page)
else:
until_date = datetime.utcnow().strftime(self.date_format)
yield scrapy.Request(url, meta={'file_name': f'{until_date}.json'}, # reverse chronological order
callback=self.parse_page)

@handle_http_error
def parse(self, response):
Expand Down
8 changes: 6 additions & 2 deletions kingfisher_scrapy/spiders/united_kingdom_fts.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from datetime import datetime

import scrapy

from kingfisher_scrapy.base_spiders import LinksSpider
Expand Down Expand Up @@ -36,8 +38,10 @@ def start_requests(self):
from_date = self.from_date.strftime(self.date_format)
until_date = self.until_date.strftime(self.date_format)
url = f'{url}?updatedFrom={from_date}&updatedTo={until_date}'

yield scrapy.Request(url, meta={'file_name': 'start.json'}, headers={'Accept': 'application/json'})
else:
until_date = datetime.utcnow().strftime(self.date_format)
yield scrapy.Request(url, meta={'file_name': f'{until_date}.json'}, # reverse chronological order
headers={'Accept': 'application/json'})

@handle_http_error
def parse(self, response):
Expand Down

0 comments on commit e45e405

Please sign in to comment.