Merge pull request #636 from open-contracting/599-remove-retry-colombia

colombia: remove retry
open-contracting · Feb 25, 2021 · dcb0877 · dcb0877
2 parents 0fabbcd + 3af7241
commit dcb0877
Showing 1 changed file with 0 additions and 28 deletions.
diff --git a/kingfisher_scrapy/spiders/colombia.py b/kingfisher_scrapy/spiders/colombia.py
@@ -1,8 +1,3 @@
-import time
-from json import JSONDecodeError
-
-import scrapy
-
 from kingfisher_scrapy.base_spider import LinksSpider
 from kingfisher_scrapy.util import parameters
 
@@ -49,26 +44,3 @@ def start_requests(self):
         if hasattr(self, 'start_page'):
             start_page = int(self.start_page)
         yield self.build_request(base_url.format(start_page), formatter=parameters('page'))
-
-    def retry(self, response, reason):
-        url = response.request.url
-        self.logger.info(reason.format(url=url, status=response.status))
-        time.sleep(120 * 60)
-        yield scrapy.Request(url, dont_filter=True, meta=response.request.meta)
-
-    def parse(self, response):
-        # In Colombia, every day at certain hour they run a process in their system that drops the database and make
-        # the services unavailable for about 120 minutes, as Colombia has a lot of data,
-        # the spider takes more than one day to scrape all the data,
-        # so eventually the spider will always face the service problems. For that, when the problem occurs, (503
-        # status or invalid json) we wait 120 minutes and then continue
-        try:
-            if self.is_http_success(response):
-                yield self.build_file_from_response(response, data_type=self.data_type)
-                yield self.next_link(response)
-            elif response.status == 503:
-                self.retry(response, 'Sleeping due to HTTP error {status} from {url}')
-            else:
-                yield self.build_file_error_from_response(response)
-        except JSONDecodeError:
-            self.retry(response, 'Sleeping due to JSONDecodeError from {url}')