Skip to content

Commit

Permalink
Merge pull request #636 from open-contracting/599-remove-retry-colombia
Browse files Browse the repository at this point in the history
colombia: remove retry
  • Loading branch information
yolile committed Feb 25, 2021
2 parents 0fabbcd + 3af7241 commit dcb0877
Showing 1 changed file with 0 additions and 28 deletions.
28 changes: 0 additions & 28 deletions kingfisher_scrapy/spiders/colombia.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,3 @@
import time
from json import JSONDecodeError

import scrapy

from kingfisher_scrapy.base_spider import LinksSpider
from kingfisher_scrapy.util import parameters

Expand Down Expand Up @@ -49,26 +44,3 @@ def start_requests(self):
if hasattr(self, 'start_page'):
start_page = int(self.start_page)
yield self.build_request(base_url.format(start_page), formatter=parameters('page'))

def retry(self, response, reason):
url = response.request.url
self.logger.info(reason.format(url=url, status=response.status))
time.sleep(120 * 60)
yield scrapy.Request(url, dont_filter=True, meta=response.request.meta)

def parse(self, response):
# In Colombia, every day at certain hour they run a process in their system that drops the database and make
# the services unavailable for about 120 minutes, as Colombia has a lot of data,
# the spider takes more than one day to scrape all the data,
# so eventually the spider will always face the service problems. For that, when the problem occurs, (503
# status or invalid json) we wait 120 minutes and then continue
try:
if self.is_http_success(response):
yield self.build_file_from_response(response, data_type=self.data_type)
yield self.next_link(response)
elif response.status == 503:
self.retry(response, 'Sleeping due to HTTP error {status} from {url}')
else:
yield self.build_file_error_from_response(response)
except JSONDecodeError:
self.retry(response, 'Sleeping due to JSONDecodeError from {url}')

0 comments on commit dcb0877

Please sign in to comment.