From ad61829abbc94cc4ad5253bf9b96961bb09a70a2 Mon Sep 17 00:00:00 2001 From: nativaldezt Date: Tue, 29 Dec 2020 10:01:27 -0300 Subject: [PATCH] Download only the newest json --- kingfisher_scrapy/spiders/ecuador_emergency.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/kingfisher_scrapy/spiders/ecuador_emergency.py b/kingfisher_scrapy/spiders/ecuador_emergency.py index fc18ddd4b..726ff95a6 100644 --- a/kingfisher_scrapy/spiders/ecuador_emergency.py +++ b/kingfisher_scrapy/spiders/ecuador_emergency.py @@ -21,5 +21,8 @@ def start_requests(self): @handle_http_error def parse_list(self, response): html_urls = response.xpath('//a/@href').getall() - for html_url in html_urls: - yield self.build_request(response.request.url + html_url, formatter=components(-1)) + if html_urls: + # Each link contains different versions of SERCOP's emergency dataset, only the newest should be downloaded + # URL format: ./archivos/ocds-YYYY-MM-DD.json + html_urls.sort(reverse=True) + yield self.build_request(f'{response.request.url}{html_urls[0]}', formatter=components(-1))