Skip to content

Commit

Permalink
Update Ecuador scraper
Browse files Browse the repository at this point in the history
  • Loading branch information
aguilerapy committed Sep 17, 2020
1 parent 279e79c commit f5860fb
Showing 1 changed file with 6 additions and 33 deletions.
39 changes: 6 additions & 33 deletions kingfisher_scrapy/spiders/ecuador_emergency.py
Expand Up @@ -14,43 +14,16 @@ class EcuadorEmergency(SimpleSpider):
"""
name = 'ecuador_emergency'
data_type = 'release_package'
custom_settings = {
'CONCURRENT_REQUESTS': 1,
}
urls = []

def start_requests(self):
url = 'https://portal.compraspublicas.gob.ec/sercop/data-estandar-ocds/'
url = 'https://datosabiertos.compraspublicas.gob.ec/OCDS/'
yield scrapy.Request(url, meta={'file_name': 'list.html'}, callback=self.parse_list)

@handle_http_error
def parse_list(self, response):
for row in response.xpath('//tr'):
html_url = row.xpath('td/strong/a/@href').extract_first()
filename = row.xpath('td/p/strong/text()').extract_first()
if html_url:
data_url = f'{html_url.replace("sharing", "fsdownload")}/ocds-{filename}.json'
self.urls.append((html_url, data_url))
if self.sample:
break
html_urls = response.xpath('//a/@href').getall()
for html_url in html_urls:
yield self.build_request(response.request.url + html_url, formatter=components(-1))

yield self.request_cookie()

def request_cookie(self):
# This request sets a cookie, which must be used immediately to download the data. So, we set
# `CONCURRENT_REQUESTS` to 1, and yield the requests in order.
html_url, data_url = self.urls.pop()
return self.build_request(html_url, meta={'next': data_url}, formatter=components(-1),
callback=self.parse_page)

@handle_http_error
def parse_page(self, response):
# If there is an error, a request for the data URL redirects to the html URL. To treat this as an error, we set
# `dont_redirect`.
yield self.build_request(response.meta['next'], meta={'dont_redirect': True}, formatter=components(-1))

def parse(self, response):
yield from super().parse(response)

if self.urls:
yield self.request_cookie()
if self.sample:
break

0 comments on commit f5860fb

Please sign in to comment.