Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update portugal to download records and jsons #503

Merged
merged 1 commit into from
Sep 21, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
17 changes: 14 additions & 3 deletions kingfisher_scrapy/spiders/portugal_base.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import json
from io import BytesIO

import ijson
Expand All @@ -20,9 +21,19 @@ def start_requests(self):
@handle_http_error
def parse_data(self, response):
json_array = []
for data in ijson.items(BytesIO(response.body), '', multiple_values=True, use_float=True):
json_array.append(data)
yield self.build_file_from_response(response, data=json_array, data_type=self.data_type)
for number, data in enumerate(ijson.items(BytesIO(response.body), '', multiple_values=True, use_float=True)):
if number == 10 and self.sample:
break
# get records service returns release packages
if self.data_type == 'record_package':
# the service returns one release per package
ocid = data['releases'][0]['ocid']
url = f'http://www.base.gov.pt/api/Record/GetRecordByOCID?ocid={ocid}'
yield self.build_request(url, formatter=parameters('ocid'))
else:
json_array.append(data)
if json_array:
yield self.build_file_from_response(response, data=json.dumps(json_array), data_type=self.data_type)

if not self.sample:
next_url = response.request.url
Expand Down
4 changes: 2 additions & 2 deletions kingfisher_scrapy/spiders/portugal_records.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ class PortugalRecords(PortugalBase):
http://www.base.gov.pt/swagger/index.html
Spider arguments
sample
Download one list of 100 record packages.
Download 10 record packages.
from_date
Download only the data with the contract signing date from this date onward (YYYY-MM-DD format).
If ``until_date`` is provided, defaults to '2010-01-01'.
Expand All @@ -16,5 +16,5 @@ class PortugalRecords(PortugalBase):
If ``from_date`` is provided, defaults to today.
"""
name = 'portugal_records'
data_type = 'record_package_list'
data_type = 'record_package'
url = 'http://www.base.gov.pt/api/Record/GetRecords?offset=1'
2 changes: 1 addition & 1 deletion kingfisher_scrapy/spiders/portugal_releases.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ class PortugalReleases(PortugalBase):
http://www.base.gov.pt/swagger/index.html
Spider arguments
sample
Download one list of 100 release packages.
Download 10 release packages.
from_date
Download only the data with the contract signing date from this date onward (YYYY-MM-DD format).
If ``until_date`` is provided, defaults to '2010-01-01'.
Expand Down