Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(spiders): use date filters as part of file names #1026

Merged
merged 11 commits into from
Sep 20, 2023
6 changes: 4 additions & 2 deletions kingfisher_scrapy/spiders/australia.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,9 @@ class Australia(LinksSpider):
formatter = staticmethod(parameters('cursor'))

def start_requests(self):
from_date = self.from_date.strftime(self.date_format)
until_date = self.until_date.strftime(self.date_format)
url = f'https://api.tenders.gov.au/ocds/findByDates/contractPublished/' \
f'{self.from_date.strftime(self.date_format)}Z/{self.until_date.strftime(self.date_format)}Z'
f'{from_date}Z/{self.until_date.strftime(until_date)}Z'
jpmckinney marked this conversation as resolved.
Show resolved Hide resolved

yield scrapy.Request(url, meta={'file_name': 'start.json'})
yield scrapy.Request(url, meta={'file_name': f'{from_date}-{until_date}-start.json'})
jpmckinney marked this conversation as resolved.
Show resolved Hide resolved
6 changes: 4 additions & 2 deletions kingfisher_scrapy/spiders/colombia_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,9 @@ class ColombiaAPI(LinksSpider):
formatter = staticmethod(parameters('_id'))

def start_requests(self):
from_date = self.from_date.strftime(self.date_format)
until_date = self.until_date.strftime(self.date_format)
url = 'https://apiocds.colombiacompra.gov.co/apiCCE2.0/rest/releases/dates/' \
f'{self.from_date.strftime(self.date_format)}/{self.until_date.strftime(self.date_format)}'
f'{from_date}/{until_date}'
jpmckinney marked this conversation as resolved.
Show resolved Hide resolved

yield scrapy.Request(url, meta={'file_name': 'page-1.json'})
yield scrapy.Request(url, meta={'file_name': f'{from_date}-{until_date}-start.json'})
jpmckinney marked this conversation as resolved.
Show resolved Hide resolved
7 changes: 5 additions & 2 deletions kingfisher_scrapy/spiders/kyrgyzstan.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,10 @@ class Kyrgyzstan(LinksSpider):

def start_requests(self):
jpmckinney marked this conversation as resolved.
Show resolved Hide resolved
url = 'http://ocds.zakupki.gov.kg/api/tendering'
file_name = 'start.json'
if self.from_date:
from_date = self.from_date.strftime(self.date_format)
# The API requires the timezone and seconds in the since parameter.
url = f'{url}?since={self.from_date.strftime(self.date_format)}.00%2B06:00'
yield scrapy.Request(url, meta={'file_name': 'start.json'})
url = f'{url}?since={from_date}.00%2B06:00'
file_name = f'{from_date}-{file_name}'
yield scrapy.Request(url, meta={'file_name': file_name})
9 changes: 6 additions & 3 deletions kingfisher_scrapy/spiders/portugal_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,14 @@ class PortugalBase(LinksSpider):

def start_requests(self):
jpmckinney marked this conversation as resolved.
Show resolved Hide resolved
url = self.start_url
file_name = 'start.json'
if self.from_date and self.until_date:
url = f'{url}?contractStartDate={self.from_date.strftime(self.date_format)}' \
f'&contractEndDate={self.until_date.strftime(self.date_format)}'
from_date = self.from_date.strftime(self.date_format)
until_date = self.until_date.strftime(self.date_format)
url = f'{url}?contractStartDate={from_date}&contractEndDate={until_date}'
file_name = f'{from_date}-{until_date}-{file_name}'

yield scrapy.Request(url, meta={'file_name': 'offset-1.json'})
yield scrapy.Request(url, meta={'file_name': file_name})

def is_http_retryable(self, response):
return response.status != 404
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,10 @@ class SouthAfricaNationalTreasuryAPI(LinksSpider):
data_type = 'release_package'

# LinksSpider
formatter = staticmethod(parameters('PageNumber'))
formatter = staticmethod(parameters('PageNumber', 'dateFrom', 'dateTo'))
jpmckinney marked this conversation as resolved.
Show resolved Hide resolved

def start_requests(self):

yield scrapy.Request('https://ocds-api.etenders.gov.za/api/OCDSReleases?PageNumber=1&PageSize=50&'
f'dateFrom={self.from_date}&dateTo={self.until_date}', meta={'file_name': 'start.json'})
f'dateFrom={self.from_date}&dateTo={self.until_date}',
meta={'file_name': f'{self.from_date}-{self.until_date}-start.json'})
jpmckinney marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,12 @@ class UnitedKingdomContractsFinderBase(LinksSpider):
def start_requests(self):
# https://www.contractsfinder.service.gov.uk/apidocumentation/Notices/1/GET-Published-Notice-OCDS-Search
url = f'{self.url_prefix}Notices/OCDS/Search?limit=100'
if self.from_date and self.until_date:
jpmckinney marked this conversation as resolved.
Show resolved Hide resolved
from_date = self.from_date.strftime(self.date_format)
until_date = self.until_date.strftime(self.date_format)
url = f'{url}&publishedFrom={from_date}&publishedTo={until_date}'
from_date = self.from_date.strftime(self.date_format)
until_date = self.until_date.strftime(self.date_format)
url = f'{url}&publishedFrom={from_date}&publishedTo={until_date}'

yield scrapy.Request(url, meta={'file_name': 'page-1.json'}, callback=self.parse_page)
yield scrapy.Request(url, meta={'file_name': f'{from_date}-{until_date}-page-1.json'},
jpmckinney marked this conversation as resolved.
Show resolved Hide resolved
callback=self.parse_page)

@handle_http_error
def parse(self, response):
Expand Down
4 changes: 3 additions & 1 deletion kingfisher_scrapy/spiders/united_kingdom_fts.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,14 @@ class UnitedKingdomFTS(LinksSpider):

def start_requests(self):
url = 'https://www.find-tender.service.gov.uk/api/1.0/ocdsReleasePackages'
file_name = 'start.json'
if self.from_date and self.until_date:
from_date = self.from_date.strftime(self.date_format)
until_date = self.until_date.strftime(self.date_format)
url = f'{url}?updatedFrom={from_date}&updatedTo={until_date}'
file_name = f'{from_date}-{until_date}-{file_name}'

yield scrapy.Request(url, meta={'file_name': 'start.json'}, headers={'Accept': 'application/json'})
yield scrapy.Request(url, meta={'file_name': file_name}, headers={'Accept': 'application/json'})

@handle_http_error
def parse(self, response):
Expand Down