Skip to content

Commit

Permalink
Merge branch 'master' into 439-new-portugal
Browse files Browse the repository at this point in the history
# Conflicts:
#	kingfisher_scrapy/spiders/portugal_base.py
#	kingfisher_scrapy/spiders/portugal_records.py
#	kingfisher_scrapy/spiders/portugal_releases.py
  • Loading branch information
aguilerapy committed Aug 26, 2020
2 parents 4095667 + ebe94d5 commit f04f4f7
Show file tree
Hide file tree
Showing 6 changed files with 70 additions and 24 deletions.
23 changes: 23 additions & 0 deletions kingfisher_scrapy/spiders/nigeria_budeshi_base.py
@@ -0,0 +1,23 @@
import json

import scrapy

from kingfisher_scrapy.base_spider import SimpleSpider
from kingfisher_scrapy.util import components, handle_http_error


class NigeriaBudeshiBase(SimpleSpider):
def start_requests(self):
yield scrapy.Request(
'https://budeshi.ng/api/project_list',
meta={'file_name': 'project_list.json'},
callback=self.parse_list
)

@handle_http_error
def parse_list(self, response):
project_list = json.loads(response.text)
for project in project_list:
yield self.build_request(self.url.format(project['id']), formatter=components(-2))
if self.sample:
return
14 changes: 14 additions & 0 deletions kingfisher_scrapy/spiders/nigeria_budeshi_records.py
@@ -0,0 +1,14 @@
from kingfisher_scrapy.spiders.nigeria_budeshi_base import NigeriaBudeshiBase


class NigeriaBudeshiRecords(NigeriaBudeshiBase):
"""
API documentation
https://budeshi.ng/Api
Spider arguments
sample
Download only the first record package from https://budeshi.ng/api/project_list.
"""
name = 'nigeria_budeshi_records'
data_type = 'record_package'
url = 'https://budeshi.ng/api/record/{}'
14 changes: 14 additions & 0 deletions kingfisher_scrapy/spiders/nigeria_budeshi_releases.py
@@ -0,0 +1,14 @@
from kingfisher_scrapy.spiders.nigeria_budeshi_base import NigeriaBudeshiBase


class NigeriaBudeshiReleases(NigeriaBudeshiBase):
"""
API documentation
https://budeshi.ng/Api
Spider arguments
sample
Download only the first record package from https://budeshi.ng/api/project_list.
"""
name = 'nigeria_budeshi_releases'
data_type = 'release_package'
url = 'https://budeshi.ng/api/releases/{}'
25 changes: 13 additions & 12 deletions kingfisher_scrapy/spiders/scotland_base.py
Expand Up @@ -5,10 +5,17 @@


class ScotlandBase(SimpleSpider):
default_from_date = '2019-01'
date_format = 'year-month'

def parse_requests(self, pattern):
@classmethod
def from_crawler(cls, crawler, from_date=None, *args, **kwargs):
if not from_date:
from_date = cls.default_from_date

return super().from_crawler(crawler, from_date=from_date, *args, **kwargs)

def start_requests(self):
notice_types = [
1, # OJEU - F1 - Prior Information Notice
2, # OJEU - F2 - Contract Notice
Expand All @@ -33,18 +40,12 @@ def parse_requests(self, pattern):
104, # Site Notice - Quick Quote Award
]

now = date.today()
if self.from_date:
start = date(self.from_date.year, self.from_date.month, 1)
else:
start = date(now.year - 1, now.month, 1)
if self.sample:
start = now

for d in date_range_by_month(start, now):
date_string = '{:02d}-{:04d}'.format(d.month, d.year)
for year_month in date_range_by_month(self.from_date, date.today()):
date_string = year_month.strftime('%m-%Y')
for notice_type in notice_types:
yield self.build_request(
pattern.format(date_string, notice_type),
self.url.format(date_string, notice_type),
formatter=parameters('noticeType', 'dateFrom')
)
if self.sample:
return
9 changes: 3 additions & 6 deletions kingfisher_scrapy/spiders/scotland_proactis.py
Expand Up @@ -7,13 +7,10 @@ class ScotlandProactis(ScotlandBase):
https://sandbox4.proactislabs.com/v1
Spider arguments
sample
Downloads packages for releases dated one year ago, for each notice type available.
Download this month's release packages for each notice type available.
from_date
Download only data from this month onward (YYYY-MM format). Defaults to one year back.
Download only data from this month onward (YYYY-MM format). Defaults to '2019-01'.
"""
name = 'scotland_proactis'
data_type = 'release_package'

def start_requests(self):
pattern = 'https://sandbox4.proactislabs.com/v1/Notices?dateFrom={}&outputType=0&noticeType={}'
return self.parse_requests(pattern)
url = 'https://sandbox4.proactislabs.com/v1/Notices?dateFrom={}&outputType=0&noticeType={}'
9 changes: 3 additions & 6 deletions kingfisher_scrapy/spiders/scotland_public_contracts.py
Expand Up @@ -7,13 +7,10 @@ class ScotlandPublicContracts(ScotlandBase):
https://api.publiccontractsscotland.gov.uk/v1
Spider arguments
sample
Downloads packages for releases dated one year ago, for each notice type available.
Download this month's release packages for each notice type available.
from_date
Download only data from this month onward (YYYY-MM format). Defaults to one year back.
Download only data from this month onward (YYYY-MM format). Defaults to '2019-01'.
"""
name = 'scotland_public_contracts'
data_type = 'release_package'

def start_requests(self):
pattern = 'https://api.publiccontractsscotland.gov.uk/v1/Notices?dateFrom={}&outputType=0&noticeType={}'
return self.parse_requests(pattern)
url = 'https://api.publiccontractsscotland.gov.uk/v1/Notices?dateFrom={}&outputType=1&noticeType={}'

0 comments on commit f04f4f7

Please sign in to comment.