Skip to content

Commit

Permalink
Merge 04c1456 into 895116a
Browse files Browse the repository at this point in the history
  • Loading branch information
yolile committed Nov 10, 2020
2 parents 895116a + 04c1456 commit 4bc56b4
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 31 deletions.
33 changes: 5 additions & 28 deletions kingfisher_scrapy/spiders/portugal_base.py
Original file line number Diff line number Diff line change
@@ -1,39 +1,16 @@
import json
from io import BytesIO

import ijson
import scrapy

from kingfisher_scrapy.base_spider import SimpleSpider
from kingfisher_scrapy.util import get_parameter_value, handle_http_error, parameters, replace_parameters
from kingfisher_scrapy.base_spider import LinksSpider
from kingfisher_scrapy.util import parameters


class PortugalBase(SimpleSpider):
class PortugalBase(LinksSpider):
default_from_date = '2010-01-01'
download_delay = 1
next_page_formatter = staticmethod(parameters('offset'))

def start_requests(self):
url = self.url
if self.from_date and self.until_date:
url = f'{url}&contractStartDate={self.from_date}&contractEndDate={self.until_date}'
yield scrapy.Request(url, meta={'file_name': 'offset-1.json'}, callback=self.parse_data)

@handle_http_error
def parse_data(self, response):
json_array = []
for number, data in enumerate(ijson.items(BytesIO(response.body), '', multiple_values=True, use_float=True)):
# get records service returns release packages
if self.data_type == 'record_package':
# the service returns one release per package
ocid = data['releases'][0]['ocid']
url = f'http://www.base.gov.pt/api/Record/GetRecordByOCID?ocid={ocid}'
yield self.build_request(url, formatter=parameters('ocid'))
else:
json_array.append(data)
if json_array:
yield self.build_file_from_response(response, data=json.dumps(json_array), data_type=self.data_type)

next_url = response.request.url
offset = int(get_parameter_value(next_url, 'offset'))
url = replace_parameters(next_url, offset=offset + 1)
yield self.build_request(url, formatter=parameters('offset'), callback=self.parse_data)
yield scrapy.Request(url, meta={'file_name': 'offset-1.json'})
2 changes: 1 addition & 1 deletion kingfisher_scrapy/spiders/portugal_records.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,4 @@ class PortugalRecords(PortugalBase):
"""
name = 'portugal_records'
data_type = 'record_package'
url = 'http://www.base.gov.pt/api/Record/GetRecords?offset=1'
url = 'http://www.base.gov.pt/api/Record/GetRecords'
4 changes: 2 additions & 2 deletions kingfisher_scrapy/spiders/portugal_releases.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,5 @@ class PortugalReleases(PortugalBase):
http://www.base.gov.pt/swagger/index.html
"""
name = 'portugal_releases'
data_type = 'release_package_list'
url = 'http://www.base.gov.pt/api/Release/GetReleases?offset=1'
data_type = 'release_package'
url = 'http://www.base.gov.pt/api/Release/GetReleases'

0 comments on commit 4bc56b4

Please sign in to comment.