Skip to content

Commit

Permalink
Merge c1e73ee into 8508ac4
Browse files Browse the repository at this point in the history
  • Loading branch information
cecicasco committed Mar 19, 2021
2 parents 8508ac4 + c1e73ee commit a500424
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 0 deletions.
7 changes: 7 additions & 0 deletions docs/spiders.rst
Original file line number Diff line number Diff line change
Expand Up @@ -627,6 +627,13 @@ Indonesia
scrapy crawl indonesia_bandung
.. autoclass:: kingfisher_scrapy.spiders.indonesia_opentender.IndonesiaOpentender
:no-members:

.. code-block:: bash
scrapy crawl indonesia_opentender
Italy
-----

Expand Down
49 changes: 49 additions & 0 deletions kingfisher_scrapy/spiders/indonesia_opentender.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
from kingfisher_scrapy.base_spider import CompressedFileSpider, PeriodicSpider
from kingfisher_scrapy.util import components, handle_http_error, join, parameters


class IndonesiaOpentender(CompressedFileSpider, PeriodicSpider):
"""
Domain
Open Tender
Spider arguments
from_date
Download only releases from this date onward (YYYY format).
If ``from_date`` is not provided defaults to 2008.
until_date
Download only releases until this date (YYYY format).
If ``from_date`` is not provided defaults to current year.
Bulk download documentation
https://v3.opentender.net/#/ocds
"""

name = 'indonesia_opentender'

# BaseSpider
date_format = 'year'
default_from_date = '2008'

# SimpleSpider
data_type = 'release_package'

base_url = 'https://opentender.net/api/'
# PeriodicSpider
pattern = base_url + 'master/lpse?year={}'
start_requests_callback = 'parse_list'

@handle_http_error
def parse_list(self, response):
data = response.json()
year = response.request.url.split('=')[1]
requested_codes = []
for item in data['data']:
code = item['code']
# there are some duplicated codes
if code and code not in requested_codes:
requested_codes.append(code)
url = f'{self.base_url}tender/export-ocds-batch?year={year}&lpse={code}'
yield self.build_request(url, formatter=join(components(-1),
parameters('year', 'lpse'), extension='zip'))

def get_formatter(self):
return components(-1)

0 comments on commit a500424

Please sign in to comment.