From 4c72dda61b54f98412b376c570c325115d4f9539 Mon Sep 17 00:00:00 2001 From: James McKinney <26463+jpmckinney@users.noreply.github.com> Date: Fri, 26 Feb 2021 19:57:13 -0500 Subject: [PATCH 1/2] nepal_dhangadhi: Don't follow redirect to https://admin.ims.susasan.org/login --- kingfisher_scrapy/spiders/nepal_dhangadhi.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kingfisher_scrapy/spiders/nepal_dhangadhi.py b/kingfisher_scrapy/spiders/nepal_dhangadhi.py index 6cc22c71e..74dfb399a 100644 --- a/kingfisher_scrapy/spiders/nepal_dhangadhi.py +++ b/kingfisher_scrapy/spiders/nepal_dhangadhi.py @@ -28,4 +28,6 @@ def parse_list(self, response): pattern = 'https://admin.ims.susasan.org/ocds/json/dhangadhi-{}.json' data = response.json() for item in data['data']['fiscal_years']: - yield self.build_request(pattern.format(item['name']), formatter=components(-1)) + # A URL might redirect to https://admin.ims.susasan.org/login + yield self.build_request(pattern.format(item['name']), formatter=components(-1), + meta={'dont_redirect': True}) From d3dca0a4fb4cfbc415e2e22194c979344288cdfe Mon Sep 17 00:00:00 2001 From: Yohanna Lisnichuk Date: Tue, 2 Mar 2021 15:29:48 -0300 Subject: [PATCH 2/2] nepal_dhangadhi: add retry for 403 erros Signed-off-by: Yohanna Lisnichuk --- kingfisher_scrapy/spiders/nepal_dhangadhi.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/kingfisher_scrapy/spiders/nepal_dhangadhi.py b/kingfisher_scrapy/spiders/nepal_dhangadhi.py index 74dfb399a..0e1a6490b 100644 --- a/kingfisher_scrapy/spiders/nepal_dhangadhi.py +++ b/kingfisher_scrapy/spiders/nepal_dhangadhi.py @@ -31,3 +31,11 @@ def parse_list(self, response): # A URL might redirect to https://admin.ims.susasan.org/login yield self.build_request(pattern.format(item['name']), formatter=components(-1), meta={'dont_redirect': True}) + + def parse(self, response): + # if we got a redirect response we try it again to download that file + if response.status == 302: + yield self.build_request(response.request.url, formatter=components(-1), + dont_filter=True) + else: + yield from super().parse(response)