Skip to content

Commit

Permalink
Merge 6fa5649 into ec0953b
Browse files Browse the repository at this point in the history
  • Loading branch information
yolile committed Jul 20, 2020
2 parents ec0953b + 6fa5649 commit fcce55d
Showing 1 changed file with 41 additions and 0 deletions.
41 changes: 41 additions & 0 deletions kingfisher_scrapy/spiders/mexico_inai_portal.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import scrapy

from kingfisher_scrapy.base_spider import SimpleSpider
from kingfisher_scrapy.util import components, handle_http_error


class MexicoINAIPortal(SimpleSpider):
"""
Spider arguments
sample
Downloads the records listed on the first page at
http://contratacionesabiertas.inai.org.mx/contratacionesabiertas/contratos/.
"""
name = 'mexico_inai_portal'
data_type = 'record'

def build_url(self, page):
return scrapy.FormRequest(
'http://contratacionesabiertas.inai.org.mx/contratacionesabiertas/pagination',
meta={'file_name': f'{page}.html'},
callback=self.parse_list,
formdata={'npage': page,
'keyword': '',
'process': '',
'stage': '',
'status': '',
'year': '',
'orderby': 'datesigned'}
)

def start_requests(self):
yield self.build_url('1')

@handle_http_error
def parse_list(self, response):
for row in response.xpath('//div[@class="contract-download col-md-1"]'):
url = row.xpath('div/a/@href').extract_first()
yield self.build_request(url, formatter=components(-1))
next_page = response.xpath('//ul[@class="pagination"]/li/a[@aria-label="Next"]/@data-page').extract_first()
if next_page and not self.sample:
yield self.build_url(next_page)

0 comments on commit fcce55d

Please sign in to comment.