Skip to content

Commit

Permalink
Merge 88e13d9 into e07e00f
Browse files Browse the repository at this point in the history
  • Loading branch information
aguilerapy committed Aug 20, 2020
2 parents e07e00f + 88e13d9 commit feba74a
Show file tree
Hide file tree
Showing 3 changed files with 67 additions and 0 deletions.
27 changes: 27 additions & 0 deletions kingfisher_scrapy/spiders/portugal_base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import scrapy
from urllib.parse import parse_qs, urlsplit

from kingfisher_scrapy.base_spider import SimpleSpider
from kingfisher_scrapy.util import handle_http_error, parameters, replace_parameter


class PortugalBase(SimpleSpider):
default_from_date = '2010-01-01'
download_delay = 1

def start_requests(self):
url = self.url
if self.from_date and self.until_date:
url = url + '&contractStartDate={}&contractEndDate={}'.format(self.from_date, self.until_date)
yield scrapy.Request(url, meta={'file_name': 'offset-1.json'}, callback=self.parse_data)

@handle_http_error
def parse_data(self, response):
yield from self.parse(response)

if not self.sample:
next_url = response.request.url
query = parse_qs(urlsplit(next_url).query)
offset = int(query['offset'][0])
url = replace_parameter(next_url, 'offset', offset + 1)
yield self.build_request(url, formatter=parameters('offset'), callback=self.parse_data)
20 changes: 20 additions & 0 deletions kingfisher_scrapy/spiders/portugal_records.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from kingfisher_scrapy.spiders.portugal_base import PortugalBase


class PortugalRecords(PortugalBase):
"""
Swagger API documentation
http://www.base.gov.pt/swagger/index.html
Spider arguments
sample
Download only one record.
from_date
Download only data from this date onward (YYYY-MM-DD format).
If ``until_date`` is provided, defaults to '2010-01-01'.
until_date
Download only data until this date (YYYY-MM-DD format).
If ``from_date`` is provided, defaults to today.
"""
name = 'portugal_records'
data_type = 'record_package_json_lines'
url = 'http://www.base.gov.pt/api/Record/GetRecords?offset=1'
20 changes: 20 additions & 0 deletions kingfisher_scrapy/spiders/portugal_releases.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from kingfisher_scrapy.spiders.portugal_base import PortugalBase


class PortugalReleases(PortugalBase):
"""
Swagger API documentation
http://www.base.gov.pt/swagger/index.html
Spider arguments
sample
Download only one release.
from_date
Download only data from this date onward (YYYY-MM-DD format).
If ``until_date`` is provided, defaults to '2010-01-01'.
until_date
Download only data until this date (YYYY-MM-DD format).
If ``from_date`` is provided, defaults to today.
"""
name = 'portugal_releases'
data_type = 'release_package_json_lines'
url = 'http://www.base.gov.pt/api/Release/GetReleases?offset=1'

0 comments on commit feba74a

Please sign in to comment.