Skip to content

Commit

Permalink
Merge 2c4c0cc into 2dde5cc
Browse files Browse the repository at this point in the history
  • Loading branch information
yolile committed Feb 26, 2021
2 parents 2dde5cc + 2c4c0cc commit b687640
Showing 1 changed file with 9 additions and 14 deletions.
23 changes: 9 additions & 14 deletions kingfisher_scrapy/spiders/moldova.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import scrapy

from kingfisher_scrapy.base_spider import SimpleSpider
from kingfisher_scrapy.util import components, handle_http_error, join, parameters, replace_parameters

Expand All @@ -10,31 +12,24 @@ class Moldova(SimpleSpider):
name = 'moldova'

# SimpleSpider
data_type = 'record_package'
data_type = 'release_package'

def start_requests(self):
endpoints = {
'budgets': 'https://public.mtender.gov.md/budgets/',
# From https://github.com/open-contracting/kingfisher-collect/issues/192#issuecomment-529928683
# The /tenders/plans endpoint appeared to return exactly the same data as the /tenders endpoint except
# that when given an OCID parameter it returned an error message. It may be that /tenders/plans just
# lists a subset of /tenders but this isn't clear.
# 'plans': 'https://public.mtender.gov.md/tenders/plan/',
'tenders': 'https://public.mtender.gov.md/tenders/',
}

for endpoint, url in endpoints.items():
yield self.build_request(url, formatter=components(-1), callback=self.parse_list)
# this URL list all the ocids and works with http://public.eprocurement.systems/ocds/tenders/ where the actual
# valid OCDS data is returned (as one ocid per process)
url = 'https://public.mtender.gov.md/tenders/'
yield scrapy.Request(url, meta={'file_name': 'list.json'}, callback=self.parse_list)

@handle_http_error
def parse_list(self, response):
base_url = 'http://public.eprocurement.systems/ocds/tenders/'
data = response.json()
# The last page returns an empty JSON object.
if not data:
return

for item in data['data']:
url = replace_parameters(response.request.url, offset=None) + item['ocid']
url = replace_parameters(base_url, offset=None) + item['ocid']
yield self.build_request(url, formatter=components(-2))

url = replace_parameters(response.request.url, offset=data['offset'])
Expand Down

0 comments on commit b687640

Please sign in to comment.