diff --git a/kingfisher_scrapy/item_schema/item.json b/kingfisher_scrapy/item_schema/item.json index 9f6fba66d..3004142be 100644 --- a/kingfisher_scrapy/item_schema/item.json +++ b/kingfisher_scrapy/item_schema/item.json @@ -44,7 +44,8 @@ "release_package_json_lines", "record_package_json_lines", "release_package_in_ocdsReleasePackage_in_list_in_results", - "release_in_Release" + "release_in_Release", + "zip" ] }, "encoding": { diff --git a/kingfisher_scrapy/spiders/chile_base.py b/kingfisher_scrapy/spiders/chile_base.py index 356192418..268fe7476 100644 --- a/kingfisher_scrapy/spiders/chile_base.py +++ b/kingfisher_scrapy/spiders/chile_base.py @@ -42,6 +42,11 @@ def start_requests(self): @handle_http_error def parse_list(self, response): data = json.loads(response.text) + # some files contain invalid packages, eg: + # { + # "status": 500, + # "detail": "error" + # } if 'status' in data and data['status'] != 200: yield self.build_file_error_from_response(response, errors={'http_code': data['status']}) return diff --git a/kingfisher_scrapy/spiders/chile_compra_bulk.py b/kingfisher_scrapy/spiders/chile_compra_bulk.py index 54fb273e7..07b4f9d43 100644 --- a/kingfisher_scrapy/spiders/chile_compra_bulk.py +++ b/kingfisher_scrapy/spiders/chile_compra_bulk.py @@ -1,6 +1,8 @@ +import json from datetime import date from kingfisher_scrapy.base_spider import ZipSpider +from kingfisher_scrapy.items import FileError from kingfisher_scrapy.util import components, date_range_by_month @@ -10,7 +12,7 @@ class ChileCompraBulk(ZipSpider): https://desarrolladores.mercadopublico.cl/OCDS/DescargaMasiva Spider arguments sample - Download only data released on February 2017. + Download only data released this month. """ name = 'chile_compra_bulk' data_type = 'record_package' @@ -27,6 +29,20 @@ def start_requests(self): stop = date.today().replace(day=1) if self.sample: start = stop - for d in date_range_by_month(start, stop): yield self.build_request(url.format(d), formatter=components(-1)) + + def build_file(self, file_name=None, url=None, data=None, data_type=None, encoding='utf-8', post_to_api=True): + json_data = json.loads(data) + # some files contain invalid record packages, eg: + # { + # "status": 500, + # "detail": "error" + # } + if 'status' in json_data and json_data['status'] != 200: + return FileError({ + 'url': url, + 'errors': {'http_code': json_data['status']}, + }) + else: + return super().build_file(data=data, file_name=file_name, url=url, data_type=data_type, encoding=encoding)