From c367d6fb4a47592e11066cbbcd64cf498c07f861 Mon Sep 17 00:00:00 2001 From: nativaldezt Date: Tue, 17 Nov 2020 10:12:26 -0300 Subject: [PATCH 1/3] Use ocid insted of id --- kingfisher_scrapy/spiders/spain_zaragoza.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kingfisher_scrapy/spiders/spain_zaragoza.py b/kingfisher_scrapy/spiders/spain_zaragoza.py index 7c1ab824..86ca1c15 100644 --- a/kingfisher_scrapy/spiders/spain_zaragoza.py +++ b/kingfisher_scrapy/spiders/spain_zaragoza.py @@ -43,5 +43,5 @@ def start_requests(self): def parse_list(self, response): ids = json.loads(response.text) for contracting_process_id in ids: - url = self.url + contracting_process_id['id'] + url = self.url + contracting_process_id['ocid'] yield self.build_request(url, formatter=components(-1)) From a76339a3fd8237837b5f5cf055840b5e166fd311 Mon Sep 17 00:00:00 2001 From: nativaldezt Date: Tue, 17 Nov 2020 11:31:30 -0300 Subject: [PATCH 2/3] Fix data_type, date range option is removed because api ignores it --- kingfisher_scrapy/spiders/spain_zaragoza.py | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/kingfisher_scrapy/spiders/spain_zaragoza.py b/kingfisher_scrapy/spiders/spain_zaragoza.py index 86ca1c15..c133c7c3 100644 --- a/kingfisher_scrapy/spiders/spain_zaragoza.py +++ b/kingfisher_scrapy/spiders/spain_zaragoza.py @@ -21,21 +21,12 @@ class SpainZaragoza(SimpleSpider): https://www.zaragoza.es/docs-api_sede/ """ name = 'spain_zaragoza' - data_type = 'release_list' - date_format = 'datetime' - default_from_date = '2000-01-01T00:00:00' + data_type = 'release_package' url = 'https://www.zaragoza.es/sede/servicio/contratacion-publica/ocds/contracting-process/' def start_requests(self): # row parameter setting to 100000 to get all releases - url = self.url + '?rf=html&rows=100000' - - # check date parameters and set "yyyy-MM-dd'T'HH:mm:ss'Z'" format - if self.from_date and self.until_date: - # `before` and `after` query string parameters behave opposite in API - after = self.until_date.strftime("%Y-%m-%dT%H:%M:%SZ") - before = self.from_date.strftime("%Y-%m-%dT%H:%M:%SZ") - url = f'{url}&before={before}&after={after}' + url = f'{self.url}?rf=html&rows=100000' yield scrapy.Request(url, meta={'file_name': 'list.json'}, callback=self.parse_list) From 8318da6847e2a2d0168587bb016bd8bf21edd3f7 Mon Sep 17 00:00:00 2001 From: nativaldezt Date: Tue, 17 Nov 2020 13:08:37 -0300 Subject: [PATCH 3/3] Spider documentation fix --- kingfisher_scrapy/spiders/spain_zaragoza.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/kingfisher_scrapy/spiders/spain_zaragoza.py b/kingfisher_scrapy/spiders/spain_zaragoza.py index c133c7c3..695a3ac6 100644 --- a/kingfisher_scrapy/spiders/spain_zaragoza.py +++ b/kingfisher_scrapy/spiders/spain_zaragoza.py @@ -10,13 +10,6 @@ class SpainZaragoza(SimpleSpider): """ Domain Ayuntamiento de Zaragoza - Spider arguments - from_date - Download only data from this date onward (YYYY-MM-DDTHH:mm:ss format). - If ``until_date`` is provided, defaults to '2000-01-01T00:00:00'. - until_date - Download only data until this date (YYYY-MM-DDTHH:mm:ss format). - If ``from_date`` is provided, defaults to today. Swagger API documentation https://www.zaragoza.es/docs-api_sede/ """ @@ -34,5 +27,6 @@ def start_requests(self): def parse_list(self, response): ids = json.loads(response.text) for contracting_process_id in ids: - url = self.url + contracting_process_id['ocid'] + ocid = contracting_process_id['ocid'] + url = f'{self.url}{ocid}' yield self.build_request(url, formatter=components(-1))