Skip to content

Commit

Permalink
add suggest changes
Browse files Browse the repository at this point in the history
  • Loading branch information
cecicasco committed Dec 29, 2021
1 parent b9b5a01 commit a96d80c
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 9 deletions.
29 changes: 26 additions & 3 deletions kingfisher_scrapy/spiders/europe_dynamic_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,30 @@
from kingfisher_scrapy.util import components, handle_http_error, join


class EuropeDynamicBase(CompressedFileSpider):
class EuropeanDynamicsBase(CompressedFileSpider):

"""
This class makes it easy to collect data from an API that implements the European Dynamics Base:
#. Inherit from ``EuropeanDynamicsBase``
#. Set a ``base_url`` class attribute with the portal's domain
#. Set a ``default_from_date`` class attribute with the initial date (year-month) to scrape
.. code-block:: python
from kingfisher_scrapy.spiders.europe_dynamic_base import EuropeanDynamicsBase
class MySpider(EuropeanDynamicsBase):
name = 'my_spider'
# BaseSpider
default_from_date = '2019-07'
# EuropeanDynamicsBase
base_url = 'http://base-url'
"""

# SimpleSpider
data_type = 'record_package'
date_format = 'year-month'
Expand All @@ -22,12 +45,12 @@ def start_requests(self):
@handle_http_error
def parse_list(self, response):
urls = response.json()['packagesPerMonth']
for url in reversed(urls):
for number, url in enumerate(reversed(urls)):
path = urlsplit(url).path
if self.from_date and self.until_date:
# URL looks like https://www.zppa.org.zm/ocds/services/recordpackage/getrecordpackage/2016/7
year, month = map(int, url.rsplit('/', 2)[1:])
url_date = datetime.datetime(year, month, 1)
if not (self.from_date <= url_date <= self.until_date):
continue
yield self.build_request(self.base_url+path, formatter=join(components(-2), extension='zip'))
yield self.build_request(f'{self.base_url}{path}', formatter=join(components(-2), extension='zip'), priority=number * -1)
5 changes: 3 additions & 2 deletions kingfisher_scrapy/spiders/ghana.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from kingfisher_scrapy.spiders.europe_dynamic_base import EuropeDynamicBase
from kingfisher_scrapy.spiders.europe_dynamic_base import EuropeanDynamicsBase


class Ghana(EuropeDynamicBase):
class Ghana(EuropeanDynamicsBase):
"""
Domain
Ghana Electronic Procurement System (GHANEPS)
Expand All @@ -18,4 +18,5 @@ class Ghana(EuropeDynamicBase):
# BaseSpider
default_from_date = '2019-07'

# EuropeanDynamicsBase
base_url = 'https://www.ghaneps.gov.gh'
5 changes: 3 additions & 2 deletions kingfisher_scrapy/spiders/malta.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from kingfisher_scrapy.spiders.europe_dynamic_base import EuropeDynamicBase
from kingfisher_scrapy.spiders.europe_dynamic_base import EuropeanDynamicsBase


class Malta(EuropeDynamicBase):
class Malta(EuropeanDynamicsBase):
"""
Domain
Malta
Expand All @@ -20,4 +20,5 @@ class Malta(EuropeDynamicBase):
# BaseSpider
default_from_date = '2019-10'

# EuropeanDynamicsBase
base_url = 'http://demowww.etenders.gov.mt'
5 changes: 3 additions & 2 deletions kingfisher_scrapy/spiders/zambia.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from kingfisher_scrapy.spiders.europe_dynamic_base import EuropeDynamicBase
from kingfisher_scrapy.spiders.europe_dynamic_base import EuropeanDynamicsBase


class Zambia(EuropeDynamicBase):
class Zambia(EuropeanDynamicsBase):
"""
Domain
Zambia Public Procurement Authority (ZPPA)
Expand All @@ -19,4 +19,5 @@ class Zambia(EuropeDynamicBase):
# BaseSpider
ocds_version = '1.0'

# EuropeanDynamicsBase
base_url = 'https://www.zppa.org.zm'

0 comments on commit a96d80c

Please sign in to comment.