Skip to content

Commit

Permalink
spiders: add Ukraine
Browse files Browse the repository at this point in the history
Signed-off-by: Yohanna Lisnichuk <yohanitalisnichuk@gmail.com>
  • Loading branch information
yolile committed Oct 22, 2021
1 parent cfe9132 commit 4c52e34
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 0 deletions.
10 changes: 10 additions & 0 deletions docs/spiders.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1180,6 +1180,16 @@ Uk
scrapy crawl uk_fts_test
Ukraine
-------

.. autoclass:: kingfisher_scrapy.spiders.ukraine.Ukraine
:no-members:

.. code-block:: bash
scrapy crawl ukraine
Uruguay
-------

Expand Down
62 changes: 62 additions & 0 deletions kingfisher_scrapy/spiders/ukraine.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import scrapy

from kingfisher_scrapy.base_spider import SimpleSpider
from kingfisher_scrapy.util import handle_http_error, join, components, parameters, replace_parameters, \
append_path_components


class Ukraine(SimpleSpider):
"""
Domain
ProZorro OpenProcurement API
Caveats
The API returns OCDS-like tenders and contracts objects, however an ocid is not set. Therefore, as part of this
spider, the tender.id is used and added as the ocid.
API documentation
https://prozorro-api-docs.readthedocs.io/uk/latest/tendering/index.html
"""
name = 'ukraine'
base_url = 'https://public.api.openprocurement.org/api/0/'

# BaseSpider
encoding = 'utf-16'
data_type = 'release'
ocds_version = '1.0'

def start_requests(self):
for stage in ['tenders', 'contracts']:
yield scrapy.Request(f'{self.base_url}{stage}', meta={'file_name': 'list.json'}, callback=self.parse_list)

@handle_http_error
def parse_list(self, response):
data = response.json()

for item in data['data']:
url = append_path_components(replace_parameters(response.request.url, offset=None), item['id'])
yield self.build_request(url, formatter=components(-2))

yield self.build_request(data['next_page']['uri'], formatter=join(components(-1), parameters('offset')),
callback=self.parse_list)

@handle_http_error
def parse(self, response):
data = response.json()
if 'tenders' in response.request.url:
# the Ukraine publication doesn't have an ocid, but the id field in the tender JSON can be used as one
data['data']['ocid'] = data['data']['id']
# the data looks like:
# {
# "data": { tender fields }
# }
data = {'tender': data['data']}
else:
# the Ukraine publication doesn't have an ocid, but the tender_id field in the contract JSON
# can be used as one, as it is the same as tender.id in the tender JSON and therefore can be used to link
# both.
data['data']['ocid'] = data['data']['tender_id']
# the data looks like:
# {
# "data": { contract fields }
# }
data = {'contracts': [data['data']]}
yield self.build_file_from_response(response, data=data, data_type=self.data_type)

0 comments on commit 4c52e34

Please sign in to comment.