Skip to content

Commit

Permalink
Merge df87a8c into ebe94d5
Browse files Browse the repository at this point in the history
  • Loading branch information
aguilerapy committed Aug 26, 2020
2 parents ebe94d5 + df87a8c commit e939151
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 0 deletions.
33 changes: 33 additions & 0 deletions kingfisher_scrapy/spiders/portugal_base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from io import BytesIO
from urllib.parse import parse_qs, urlsplit

import ijson
import scrapy

from kingfisher_scrapy.base_spider import SimpleSpider
from kingfisher_scrapy.util import handle_http_error, parameters, replace_parameter


class PortugalBase(SimpleSpider):
default_from_date = '2010-01-01'
download_delay = 1

def start_requests(self):
url = self.url
if self.from_date and self.until_date:
url = url + '&contractStartDate={}&contractEndDate={}'.format(self.from_date, self.until_date)
yield scrapy.Request(url, meta={'file_name': 'offset-1.json'}, callback=self.parse_data)

@handle_http_error
def parse_data(self, response):
json_array = []
for data in ijson.items(BytesIO(response.body), '', multiple_values=True, use_float=True):
json_array.append(data)
yield self.build_file_from_response(response, data=json_array, data_type=self.data_type)

if not self.sample:
next_url = response.request.url
query = parse_qs(urlsplit(next_url).query)
offset = int(query['offset'][0])
url = replace_parameter(next_url, 'offset', offset + 1)
yield self.build_request(url, formatter=parameters('offset'), callback=self.parse_data)
20 changes: 20 additions & 0 deletions kingfisher_scrapy/spiders/portugal_records.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from kingfisher_scrapy.spiders.portugal_base import PortugalBase


class PortugalRecords(PortugalBase):
"""
Swagger API documentation
http://www.base.gov.pt/swagger/index.html
Spider arguments
sample
Download one list of 100 record packages.
from_date
Download only the data with the contract signing date from this date onward (YYYY-MM-DD format).
If ``until_date`` is provided, defaults to '2010-01-01'.
until_date
Download only the data with the contract signing date until this date (YYYY-MM-DD format).
If ``from_date`` is provided, defaults to today.
"""
name = 'portugal_records'
data_type = 'record_package_list'
url = 'http://www.base.gov.pt/api/Record/GetRecords?offset=1'
20 changes: 20 additions & 0 deletions kingfisher_scrapy/spiders/portugal_releases.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from kingfisher_scrapy.spiders.portugal_base import PortugalBase


class PortugalReleases(PortugalBase):
"""
Swagger API documentation
http://www.base.gov.pt/swagger/index.html
Spider arguments
sample
Download one list of 100 release packages.
from_date
Download only the data with the contract signing date from this date onward (YYYY-MM-DD format).
If ``until_date`` is provided, defaults to '2010-01-01'.
until_date
Download only the data with the contract signing date until this date (YYYY-MM-DD format).
If ``from_date`` is provided, defaults to today.
"""
name = 'portugal_releases'
data_type = 'release_package_list'
url = 'http://www.base.gov.pt/api/Release/GetReleases?offset=1'

0 comments on commit e939151

Please sign in to comment.