Skip to content

Commit

Permalink
Merge pull request #1089 from open-contracting/1088-guatemala
Browse files Browse the repository at this point in the history
feat: add guatemala_bulk
  • Loading branch information
yolile committed May 9, 2024
2 parents f8d9033 + 8cb6805 commit c2dee22
Show file tree
Hide file tree
Showing 4 changed files with 78 additions and 3 deletions.
10 changes: 10 additions & 0 deletions docs/spiders.rst
Original file line number Diff line number Diff line change
Expand Up @@ -505,6 +505,16 @@ Greece
scrapy crawl greece_digiwhist
Guatemala
~~~~~~~~~

.. autoclass:: kingfisher_scrapy.spiders.guatemala_bulk.GuatemalaBulk
:no-members:

.. code-block:: bash
scrapy crawl guatemala_bulk
Honduras
~~~~~~~~

Expand Down
65 changes: 65 additions & 0 deletions kingfisher_scrapy/spiders/guatemala_bulk.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
from datetime import datetime

import scrapy

from kingfisher_scrapy.base_spiders import SimpleSpider
from kingfisher_scrapy.util import components, handle_http_error


class GuatemalaBulk(SimpleSpider):
"""
Domain
Ministerio de Finanzas Públicas - Dirección General de Adquisiciones del Estado
Spider arguments
from_date
Download only data from this month onward (YYYY-MM format).
If ``until_date`` is provided, defaults to '2020-01'.
until_date
Download only data until this month (YYYY-MM format).
If ``from_date`` is provided, defaults to the current month.
API documentation
https://ocds.guatecompras.gt/api-ocds
Bulk download documentation
https://ocds.guatecompras.gt/descarga-datos
"""
name = 'guatemala_bulk'

# BaseSpider
date_format = 'year-month'
default_from_date = '2020-01'

# SimpleSpider
data_type = 'record_package'

def start_requests(self):
url = 'https://ocds.guatecompras.gt/files'
yield scrapy.Request(url, meta={'file_name': 'list.json'}, callback=self.parse_list)

@handle_http_error
def parse_list(self, response):
# An example of expected response is:
# {
# "id": "gc-{year}-{month}"
# "results": [
# {
# "files": {
# "csv": "...",
# "sha": "...",
# "json": "...",
# "xlsx": "..."
# },
# "year": "values between 2020 to the current year",
# "month": "values between 1 and 12",
# "monthName": "values between enero to diciembre",
# "source": "Guatecompras",
# "timestamp": "last updated date in timestamp with time zone format"
# }, ...
# ]
# }
for item in response.json()["result"]:
if self.from_date and self.until_date:
date = datetime(int(item['year']), int(item['month']), 1)
if not (self.from_date <= date <= self.until_date):
continue

yield self.build_request(item['files']['json'], formatter=components(-2))
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@ class ItalyMinistryOfInfrastructureAndTransport(SimpleSpider):
Public Contracts Service (SCP) of the Ministry of Infrastructure and Transport
Spider arguments
from_date
Download only data from this time onward (YYYY-MM-DD format).
Download only data from this date onward (YYYY-MM-DD format).
If ``until_date`` is provided, defaults to '2022-01-01'.
until_date
Download only data until this time (YYYY-MM-DD format).
Download only data until this date (YYYY-MM-DD format).
If ``from_date`` is provided, defaults to today.
Swagger API documentation
https://www.serviziocontrattipubblici.it/ocds-ms/swagger-ui.html
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from kingfisher_scrapy.spiders.mexico_plataforma_digital_nacional_base import MexicoPlataformaDigitalNacionalBase


class MexicoPueblaStateSESEAPlataformaDigitalNacional(MexicoPlataformaDigitalNacionalBase):
class MexicoPueblaStateSESEAPPlataformaDigitalNacional(MexicoPlataformaDigitalNacionalBase):
"""
Domain
Secretaría Ejecutiva del Sistema Estatal Anticorrupción del Estado de Puebla (SESEAP) (Mexico) -
Expand Down

0 comments on commit c2dee22

Please sign in to comment.