diff --git a/kingfisher_scrapy/spiders/nigeria_budeshi_base.py b/kingfisher_scrapy/spiders/nigeria_budeshi_base.py new file mode 100644 index 00000000..417c1c05 --- /dev/null +++ b/kingfisher_scrapy/spiders/nigeria_budeshi_base.py @@ -0,0 +1,23 @@ +import json + +import scrapy + +from kingfisher_scrapy.base_spider import SimpleSpider +from kingfisher_scrapy.util import components, handle_http_error + + +class NigeriaBudeshiBase(SimpleSpider): + def start_requests(self): + yield scrapy.Request( + 'https://budeshi.ng/api/project_list', + meta={'file_name': 'project_list.json'}, + callback=self.parse_list + ) + + @handle_http_error + def parse_list(self, response): + project_list = json.loads(response.text) + for project in project_list: + yield self.build_request(self.url.format(project['id']), formatter=components(-2)) + if self.sample: + return diff --git a/kingfisher_scrapy/spiders/nigeria_budeshi_records.py b/kingfisher_scrapy/spiders/nigeria_budeshi_records.py new file mode 100644 index 00000000..a5d8d39c --- /dev/null +++ b/kingfisher_scrapy/spiders/nigeria_budeshi_records.py @@ -0,0 +1,14 @@ +from kingfisher_scrapy.spiders.nigeria_budeshi_base import NigeriaBudeshiBase + + +class NigeriaBudeshiRecords(NigeriaBudeshiBase): + """ + API documentation + https://budeshi.ng/Api + Spider arguments + sample + Download only the first record package from https://budeshi.ng/api/project_list. + """ + name = 'nigeria_budeshi_records' + data_type = 'record_package' + url = 'https://budeshi.ng/api/record/{}' diff --git a/kingfisher_scrapy/spiders/nigeria_budeshi_releases.py b/kingfisher_scrapy/spiders/nigeria_budeshi_releases.py new file mode 100644 index 00000000..52cc36b4 --- /dev/null +++ b/kingfisher_scrapy/spiders/nigeria_budeshi_releases.py @@ -0,0 +1,14 @@ +from kingfisher_scrapy.spiders.nigeria_budeshi_base import NigeriaBudeshiBase + + +class NigeriaBudeshiReleases(NigeriaBudeshiBase): + """ + API documentation + https://budeshi.ng/Api + Spider arguments + sample + Download only the first record package from https://budeshi.ng/api/project_list. + """ + name = 'nigeria_budeshi_releases' + data_type = 'release_package' + url = 'https://budeshi.ng/api/releases/{}' diff --git a/kingfisher_scrapy/spiders/scotland_base.py b/kingfisher_scrapy/spiders/scotland_base.py index 56aa38eb..2f8ac806 100644 --- a/kingfisher_scrapy/spiders/scotland_base.py +++ b/kingfisher_scrapy/spiders/scotland_base.py @@ -5,10 +5,17 @@ class ScotlandBase(SimpleSpider): + default_from_date = '2019-01' date_format = 'year-month' - def parse_requests(self, pattern): + @classmethod + def from_crawler(cls, crawler, from_date=None, *args, **kwargs): + if not from_date: + from_date = cls.default_from_date + return super().from_crawler(crawler, from_date=from_date, *args, **kwargs) + + def start_requests(self): notice_types = [ 1, # OJEU - F1 - Prior Information Notice 2, # OJEU - F2 - Contract Notice @@ -33,18 +40,12 @@ def parse_requests(self, pattern): 104, # Site Notice - Quick Quote Award ] - now = date.today() - if self.from_date: - start = date(self.from_date.year, self.from_date.month, 1) - else: - start = date(now.year - 1, now.month, 1) - if self.sample: - start = now - - for d in date_range_by_month(start, now): - date_string = '{:02d}-{:04d}'.format(d.month, d.year) + for year_month in date_range_by_month(self.from_date, date.today()): + date_string = year_month.strftime('%m-%Y') for notice_type in notice_types: yield self.build_request( - pattern.format(date_string, notice_type), + self.url.format(date_string, notice_type), formatter=parameters('noticeType', 'dateFrom') ) + if self.sample: + return diff --git a/kingfisher_scrapy/spiders/scotland_proactis.py b/kingfisher_scrapy/spiders/scotland_proactis.py index d3599c0c..a44c0d78 100644 --- a/kingfisher_scrapy/spiders/scotland_proactis.py +++ b/kingfisher_scrapy/spiders/scotland_proactis.py @@ -7,13 +7,10 @@ class ScotlandProactis(ScotlandBase): https://sandbox4.proactislabs.com/v1 Spider arguments sample - Downloads packages for releases dated one year ago, for each notice type available. + Download this month's release packages for each notice type available. from_date - Download only data from this month onward (YYYY-MM format). Defaults to one year back. + Download only data from this month onward (YYYY-MM format). Defaults to '2019-01'. """ name = 'scotland_proactis' data_type = 'release_package' - - def start_requests(self): - pattern = 'https://sandbox4.proactislabs.com/v1/Notices?dateFrom={}&outputType=0¬iceType={}' - return self.parse_requests(pattern) + url = 'https://sandbox4.proactislabs.com/v1/Notices?dateFrom={}&outputType=0¬iceType={}' diff --git a/kingfisher_scrapy/spiders/scotland_public_contracts.py b/kingfisher_scrapy/spiders/scotland_public_contracts.py index 0a4166a5..2be9fb68 100644 --- a/kingfisher_scrapy/spiders/scotland_public_contracts.py +++ b/kingfisher_scrapy/spiders/scotland_public_contracts.py @@ -7,13 +7,10 @@ class ScotlandPublicContracts(ScotlandBase): https://api.publiccontractsscotland.gov.uk/v1 Spider arguments sample - Downloads packages for releases dated one year ago, for each notice type available. + Download this month's release packages for each notice type available. from_date - Download only data from this month onward (YYYY-MM format). Defaults to one year back. + Download only data from this month onward (YYYY-MM format). Defaults to '2019-01'. """ name = 'scotland_public_contracts' data_type = 'release_package' - - def start_requests(self): - pattern = 'https://api.publiccontractsscotland.gov.uk/v1/Notices?dateFrom={}&outputType=0¬iceType={}' - return self.parse_requests(pattern) + url = 'https://api.publiccontractsscotland.gov.uk/v1/Notices?dateFrom={}&outputType=1¬iceType={}'