-
Notifications
You must be signed in to change notification settings - Fork 12
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: add new spider italy_ministry_of_infrastructure_and_transport
- Loading branch information
Showing
2 changed files
with
65 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
58 changes: 58 additions & 0 deletions
58
kingfisher_scrapy/spiders/italy_ministry_of_infrastructure_and_transport.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
import scrapy | ||
|
||
from kingfisher_scrapy.base_spiders import SimpleSpider | ||
from kingfisher_scrapy.util import handle_http_error, parameters, replace_parameters | ||
|
||
|
||
class ItalyMinistryOfInfrastructureAndTransport(SimpleSpider): | ||
""" | ||
Domain | ||
Public Contracts Service (SCP) of the Ministry of Infrastructure and Transport | ||
Spider arguments | ||
from_date | ||
Download only data from this time onward (YYYY-MM-DD format). | ||
If ``until_date`` is provided, defaults to '2022-01-01'. | ||
until_date | ||
Download only data until this time (YYYY-MM-DD format). | ||
If ``from_date`` is provided, defaults to today. | ||
Swagger API documentation | ||
https://www.serviziocontrattipubblici.it/ocds-ms/swagger-ui.html | ||
""" | ||
name = 'italy_ministry_of_infrastructure_and_transport' | ||
|
||
# BaseSpider | ||
date_format = 'date' | ||
default_from_date = '2022-01-01' | ||
|
||
# SimpleSpider | ||
data_type = 'release_package' | ||
|
||
def start_requests(self): | ||
url = 'https://www.serviziocontrattipubblici.it/ocdsReleasePackages-ms/v1.0/ocdsReleasePackages?page=1' \ | ||
'&pageSize=5 ' | ||
if self.from_date and self.until_date: | ||
from_date = self.from_date.strftime(self.date_format) | ||
until_date = self.until_date.strftime(self.date_format) | ||
url = f'{url}&dataInvioDa={from_date}&dataInvioA={until_date}' | ||
yield scrapy.Request(url, meta={'file_name': 'page-1.json', 'page': 1}) | ||
|
||
@handle_http_error | ||
def parse(self, response): | ||
data = response.json() | ||
# A 200 HTTP response with a dict like the below is returned instead of 404, for example for not available | ||
# date periods | ||
# { | ||
# "esito": false, | ||
# "errorData": "Si è verificato un errore durante la creazione di OCDS" | ||
# } | ||
if "errorData" in data: | ||
data['http_code'] = response.status | ||
yield self.build_file_error_from_response(response, errors=data) | ||
|
||
# An empty release package is returned pages after the last page is reached | ||
if 'releases' not in data: | ||
return | ||
yield from super().parse(response) | ||
next_page = response.request.meta['page']+1 | ||
yield self.build_request(replace_parameters(response.url, page=next_page), meta={'page': next_page}, | ||
formatter=parameters('page')) |