Skip to content

Commit

Permalink
Merge pull request #503 from open-contracting/502-fix-portugal
Browse files Browse the repository at this point in the history
Update portugal to download records and jsons
  • Loading branch information
yolile committed Sep 21, 2020
2 parents a87a3a4 + f6d3233 commit 93aef51
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 6 deletions.
17 changes: 14 additions & 3 deletions kingfisher_scrapy/spiders/portugal_base.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import json
from io import BytesIO

import ijson
Expand All @@ -20,9 +21,19 @@ def start_requests(self):
@handle_http_error
def parse_data(self, response):
json_array = []
for data in ijson.items(BytesIO(response.body), '', multiple_values=True, use_float=True):
json_array.append(data)
yield self.build_file_from_response(response, data=json_array, data_type=self.data_type)
for number, data in enumerate(ijson.items(BytesIO(response.body), '', multiple_values=True, use_float=True)):
if number == 10 and self.sample:
break
# get records service returns release packages
if self.data_type == 'record_package':
# the service returns one release per package
ocid = data['releases'][0]['ocid']
url = f'http://www.base.gov.pt/api/Record/GetRecordByOCID?ocid={ocid}'
yield self.build_request(url, formatter=parameters('ocid'))
else:
json_array.append(data)
if json_array:
yield self.build_file_from_response(response, data=json.dumps(json_array), data_type=self.data_type)

if not self.sample:
next_url = response.request.url
Expand Down
4 changes: 2 additions & 2 deletions kingfisher_scrapy/spiders/portugal_records.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ class PortugalRecords(PortugalBase):
http://www.base.gov.pt/swagger/index.html
Spider arguments
sample
Download one list of 100 record packages.
Download 10 record packages.
from_date
Download only the data with the contract signing date from this date onward (YYYY-MM-DD format).
If ``until_date`` is provided, defaults to '2010-01-01'.
Expand All @@ -16,5 +16,5 @@ class PortugalRecords(PortugalBase):
If ``from_date`` is provided, defaults to today.
"""
name = 'portugal_records'
data_type = 'record_package_list'
data_type = 'record_package'
url = 'http://www.base.gov.pt/api/Record/GetRecords?offset=1'
2 changes: 1 addition & 1 deletion kingfisher_scrapy/spiders/portugal_releases.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ class PortugalReleases(PortugalBase):
http://www.base.gov.pt/swagger/index.html
Spider arguments
sample
Download one list of 100 release packages.
Download 10 release packages.
from_date
Download only the data with the contract signing date from this date onward (YYYY-MM-DD format).
If ``until_date`` is provided, defaults to '2010-01-01'.
Expand Down

0 comments on commit 93aef51

Please sign in to comment.