Skip to content

Commit

Permalink
feat: spiders: add all available publishers from
Browse files Browse the repository at this point in the history
Mexico Plataforma Digital Nacional
  • Loading branch information
yolile committed Apr 15, 2024
1 parent 29a56fa commit 2303f2a
Show file tree
Hide file tree
Showing 9 changed files with 174 additions and 33 deletions.
42 changes: 42 additions & 0 deletions docs/spiders.rst
Original file line number Diff line number Diff line change
Expand Up @@ -768,6 +768,20 @@ Mexico
scrapy crawl mexico_mexico_state_infoem
.. autoclass:: kingfisher_scrapy.spiders.mexico_mexico_state_sesaemm_plataforma_digital_nacional.MexicoMexicoStateSESAEMMPlataformaDigitalNacional
:no-members:

.. code-block:: bash
scrapy crawl mexico_mexico_state_sesaemm_plataforma_digital_nacional
.. autoclass:: kingfisher_scrapy.spiders.mexico_michoacan_sesea_plataforma_digital_nacional.MexicoMichoacanSESEAPlataformaDigitalNacional
:no-members:

.. code-block:: bash
scrapy crawl mexico_michoacan_sesea_plataforma_digital_nacional
.. autoclass:: kingfisher_scrapy.spiders.mexico_nuevo_leon_cotai.MexicoNuevoLeonCOTAI
:no-members:

Expand All @@ -789,6 +803,13 @@ Mexico
scrapy crawl mexico_nuevo_leon_releases
.. autoclass:: kingfisher_scrapy.spiders.mexico_puebla_state_seseap_plataforma_digital_nacional.MexicoPueblaStateSESEAPlataformaDigitalNacional
:no-members:

.. code-block:: bash
scrapy crawl mexico_puebla_state_seseap_plataforma_digital_nacional
.. autoclass:: kingfisher_scrapy.spiders.mexico_quien_es_quien_releases.MexicoQuienEsQuienReleases
:no-members:

Expand All @@ -803,6 +824,20 @@ Mexico
scrapy crawl mexico_quintana_roo_idaip
.. autoclass:: kingfisher_scrapy.spiders.mexico_quintana_roo_sesaeqroo_plataforma_digital_nacional.MexicoQuintanaRooSESAEQROOPlataformaDigitalNacional
:no-members:

.. code-block:: bash
scrapy crawl mexico_quintana_roo_sesaeqroo_plataforma_digital_nacional
.. autoclass:: kingfisher_scrapy.spiders.mexico_shcp_plataforma_digital_nacional.MexicoSHCPPlataformaDigitalNacional
:no-members:

.. code-block:: bash
scrapy crawl mexico_shcp_plataforma_digital_nacional
.. autoclass:: kingfisher_scrapy.spiders.mexico_sinaloa_ceaip.MexicoSinaloaCEAIP
:no-members:

Expand All @@ -817,6 +852,13 @@ Mexico
scrapy crawl mexico_veracruz_ivai
.. autoclass:: kingfisher_scrapy.spiders.mexico_veracruz_state_sesea_plataforma_digital_nacional.MexicoVeracruzStateSESEAPlataformaDigitalNacional
:no-members:

.. code-block:: bash
scrapy crawl mexico_veracruz_state_sesea_plataforma_digital_nacional
.. autoclass:: kingfisher_scrapy.spiders.mexico_yucatan_inaip.MexicoYucatanINAIP
:no-members:

Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,7 @@
import json
from kingfisher_scrapy.spiders.mexico_plataforma_digital_nacional_base import MexicoPlataformaDigitalNacionalBase

import scrapy

from kingfisher_scrapy.base_spiders import IndexSpider
from kingfisher_scrapy.util import handle_http_error


class MexicoAguascalientesSESEAPlataformaDigitalNacional(IndexSpider):
class MexicoAguascalientesSESEAPlataformaDigitalNacional(MexicoPlataformaDigitalNacionalBase):
"""
Domain
Secretaría Ejecutiva del Sistema Estatal Anticorrupción de Aguascalientes (SESEA) - Plataforma Digital Nacional
Expand All @@ -15,29 +10,5 @@ class MexicoAguascalientesSESEAPlataformaDigitalNacional(IndexSpider):
"""
name = 'mexico_aguascalientes_sesea_plataforma_digital_nacional'

# BaseSpider
root_path = 'data.item'

# SimpleSpider
data_type = 'release'

# IndexSpider
limit = '/pagination/pageSize'
result_count_pointer = '/pagination/total'
start_page = 0
use_page = True

# Local
url = 'https://api.plataformadigitalnacional.org/s6/api/v1/search?supplier_id=SESEA_AGS'

def start_requests(self):
yield scrapy.Request(self.url, meta={'file_name': 'page-0.json'}, callback=self.parse_list, method='POST')

@handle_http_error
def parse_list(self, response):
data = self.parse_list_loader(response)
yield from self.parse(response)
for value in self.range_generator(data, response):
payload = json.dumps({'page': value, 'pageSize': 10})
yield scrapy.Request(self.url, body=payload, meta={'file_name': f'page-{value}.json'}, method='POST',
headers={'Accept': 'application/json', 'Content-Type': 'application/json'})
# MexicoPlataformaDigitalNacionalBase
publisher_id = 'SESEA_AGS'
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from kingfisher_scrapy.spiders.mexico_plataforma_digital_nacional_base import MexicoPlataformaDigitalNacionalBase


class MexicoMexicoStateSESAEMMPlataformaDigitalNacional(MexicoPlataformaDigitalNacionalBase):
"""
Domain
Secretaría Ejecutiva del Sistema Estatal Anticorrupción del Estado de México y Municipios (SESAEMM) (Mexico) -
Plataforma Digital Nacional
Bulk download documentation
https://plataformadigitalnacional.org/contrataciones
"""
name = 'mexico_mexico_state_sesaemm_plataforma_digital_nacional'

# MexicoPlataformaDigitalNacionalBase
publisher_id = 'SESAEMM_EDOMEX'
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from kingfisher_scrapy.spiders.mexico_plataforma_digital_nacional_base import MexicoPlataformaDigitalNacionalBase


class MexicoMichoacanSESEAPlataformaDigitalNacional(MexicoPlataformaDigitalNacionalBase):
"""
Domain
Secretaría Ejecutiva del Sistema Estatal Anticorrupción del Estado de Michoacán (SESEA) (Mexico) -
Plataforma Digital Nacional
Bulk download documentation
https://plataformadigitalnacional.org/contrataciones
"""
name = 'mexico_michoacan_sesea_plataforma_digital_nacional'

# MexicoPlataformaDigitalNacionalBase
publisher_id = 'SESEA_MCH'
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import json

import scrapy

from kingfisher_scrapy.base_spiders import IndexSpider
from kingfisher_scrapy.util import handle_http_error


class MexicoPlataformaDigitalNacionalBase(IndexSpider):
# BaseSpider
root_path = 'data.item'

# SimpleSpider
data_type = 'release'

# IndexSpider
limit = '/pagination/pageSize'
result_count_pointer = '/pagination/total'
start_page = 1
use_page = True

# Local
base_url = 'https://api.plataformadigitalnacional.org/s6/api/v1/search?supplier_id={}'

# publisher_id must be provided by subclasses.

def start_requests(self):
yield scrapy.Request(self.base_url.format(self.publisher_id), meta={'file_name': 'page-0.json'},
callback=self.parse_list, method='POST')

@handle_http_error
def parse_list(self, response):
data = self.parse_list_loader(response)
yield from self.parse(response)
for value in self.range_generator(data, response):
payload = json.dumps({'page': value, 'pageSize': 10})
yield scrapy.Request(self.base_url.format(self.publisher_id), body=payload,
meta={'file_name': f'page-{value}.json'}, method='POST',
headers={'Accept': 'application/json', 'Content-Type': 'application/json'})
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from kingfisher_scrapy.spiders.mexico_plataforma_digital_nacional_base import MexicoPlataformaDigitalNacionalBase


class MexicoPueblaStateSESEAPlataformaDigitalNacional(MexicoPlataformaDigitalNacionalBase):
"""
Domain
Secretaría Ejecutiva del Sistema Estatal Anticorrupción del Estado de Puebla (SESEAP) (Mexico) -
Plataforma Digital Nacional
Bulk download documentation
https://plataformadigitalnacional.org/contrataciones
"""
name = 'mexico_puebla_state_seseap_plataforma_digital_nacional'

# MexicoPlataformaDigitalNacionalBase
publisher_id = 'SESAE_PUE'
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from kingfisher_scrapy.spiders.mexico_plataforma_digital_nacional_base import MexicoPlataformaDigitalNacionalBase


class MexicoQuintanaRooSESAEQROOPlataformaDigitalNacional(MexicoPlataformaDigitalNacionalBase):
"""
Domain
Secretaría Ejecutiva del Sistema Anticorrupción del Estado de Quintana Roo (SESAEQROO) (Mexico) -
Plataforma Digital Nacional
Bulk download documentation
https://plataformadigitalnacional.org/contrataciones
"""
name = 'mexico_quintana_roo_sesaeqroo_plataforma_digital_nacional'

# MexicoPlataformaDigitalNacionalBase
publisher_id = 'SESAE_QROO'
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from kingfisher_scrapy.spiders.mexico_plataforma_digital_nacional_base import MexicoPlataformaDigitalNacionalBase


class MexicoSHCPPlataformaDigitalNacional(MexicoPlataformaDigitalNacionalBase):
"""
Domain
Secretaría de Hacienda y Crédito Público (SHCP) (Mexico) - Plataforma Digital Nacional
Bulk download documentation
https://plataformadigitalnacional.org/contrataciones
"""
name = 'mexico_shcp_plataforma_digital_nacional'

# MexicoPlataformaDigitalNacionalBase
publisher_id = 'SHCP'
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from kingfisher_scrapy.spiders.mexico_plataforma_digital_nacional_base import MexicoPlataformaDigitalNacionalBase


class MexicoVeracruzStateSESEAPlataformaDigitalNacional(MexicoPlataformaDigitalNacionalBase):
"""
Domain
Secretaría Ejecutiva del Sistema Estatal Anticorrupción de Veracruz de Ignacio de la Llave (SESEA) (Mexico) -
Plataforma Digital Nacional
Bulk download documentation
https://plataformadigitalnacional.org/contrataciones
"""
name = 'mexico_veracruz_state_sesea_plataforma_digital_nacional'

# MexicoPlataformaDigitalNacionalBase
publisher_id = 'SESEA_VER'

0 comments on commit 2303f2a

Please sign in to comment.