Skip to content

Commit

Permalink
Update URLs, add date filters and increase download delay
Browse files Browse the repository at this point in the history
  • Loading branch information
aguilerapy committed May 4, 2021
1 parent 7ffafec commit 437154f
Showing 1 changed file with 22 additions and 4 deletions.
26 changes: 22 additions & 4 deletions kingfisher_scrapy/spiders/uganda_releases.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,37 +8,55 @@ class Uganda(IndexSpider):
"""
Domain
Government Procurement Portal (GPP) of Public Procurement and Disposal of Public Assets Authority (PPDA)
Spider arguments
from_date
Download only data from this year onward (YYYY format). Defaults to '2017'.
The year refers to the start of the fiscal year range, e.g. if from_date='2017' then fiscal_year='2017-2018'
until_date
Download only data until this year (YYYY format). Defaults to the current year.
The year refers to the start of the fiscal year range, e.g. if until_date='2017' then fiscal_year='2017-2018'
API documentation
https://docs.google.com/spreadsheets/d/10tVioy-VOQa1FwWoRl5e1pMbGpiymA0iycNcoDFkvks/edit#gid=365266172
"""
name = 'uganda_releases'
download_delay = 0.9
download_delay = 30 # to avoid API 429 error "too many request"
date_format = 'year'
custom_settings = {
'CONCURRENT_REQUESTS': 1,
}

# BaseSpider
default_from_date = '2017'

# SimpleSpider
data_type = 'release_package'

# IndexSpider
total_pages_pointer = '/data/last_page'
formatter = staticmethod(parameters('page'))
base_url = 'https://gpp.ppda.go.ug/adminapi/public/api/pdes'
base_url = 'https://gpppapi.com/adminapi/public/api/pdes'
yield_list_results = False

def start_requests(self):
yield scrapy.Request(
'https://gpp.ppda.go.ug/adminapi/public/api/pdes',
self.base_url,
meta={'file_name': 'page-1.json'},
callback=self.parse_list,
cb_kwargs={'callback': self.parse_data}
)

@handle_http_error
def parse_data(self, response):
pattern = 'https://gpp.ppda.go.ug/adminapi/public/api/open-data/v1/releases/{}?fy={}&pde={}'
pattern = 'https://gpppapi.com/adminapi/public/api/open-data/v1/releases/{}?fy={}&pde={}'

data = response.json()
for pdes in data['data']['data']:
for plans in pdes['procurement_plans']:
for tag in ('planning', 'tender', 'award', 'contract'):
if self.from_date and self.until_date:
start_year = int(plans['financial_year'].split("-")[0])
if not (self.from_date.year <= start_year <= self.until_date.year):
continue
yield self.build_request(
pattern.format(tag, plans['financial_year'], plans['pde_id']),
formatter=join(components(-1), parameters('fy', 'pde'))
Expand Down

0 comments on commit 437154f

Please sign in to comment.