diff --git a/kingfisher_scrapy/spiders/armenia.py b/kingfisher_scrapy/spiders/armenia.py index 98a680431..24a85c6a3 100644 --- a/kingfisher_scrapy/spiders/armenia.py +++ b/kingfisher_scrapy/spiders/armenia.py @@ -7,6 +7,11 @@ class Armenia(BaseSpider): + """ + Spider arguments + sample + Download only the first release package in the dataset. + """ name = 'armenia' start_urls = ['https://armeps.am/ocds/release'] @@ -23,7 +28,7 @@ def parse(self, response): data_type='release_package') json_data = json.loads(response.text) - if not (self.sample): + if not self.sample: if 'next_page' in json_data and 'uri' in json_data['next_page']: url = json_data['next_page']['uri'] yield scrapy.Request( diff --git a/kingfisher_scrapy/spiders/australia.py b/kingfisher_scrapy/spiders/australia.py index 85ac50769..802b5140b 100644 --- a/kingfisher_scrapy/spiders/australia.py +++ b/kingfisher_scrapy/spiders/australia.py @@ -6,7 +6,13 @@ class Australia(LinksSpider): - + """ + API documentation + https://data.gov.au/dataset/ds-dga-5c7fa69b-b0e9-4553-b8df-2a022dd2e982/distribution/dist-dga-a7f471ad-e085-49b5-bd6b-1b270ea46e99/details?q= + Spider arguments + sample + Download only data released on 2018. + """ name = 'australia' def start_requests(self): diff --git a/kingfisher_scrapy/spiders/australia_nsw.py b/kingfisher_scrapy/spiders/australia_nsw.py index b16b270e3..9ca3449c4 100644 --- a/kingfisher_scrapy/spiders/australia_nsw.py +++ b/kingfisher_scrapy/spiders/australia_nsw.py @@ -7,6 +7,13 @@ class AustraliaNSW(BaseSpider): + """ + API documentation + https://github.com/NSW-eTendering/NSW-eTendering-API/blob/master/README.md + Spider arguments + sample + Download only 30 releases. + """ name = 'australia_nsw' start_urls = ['https://tenders.nsw.gov.au'] diff --git a/kingfisher_scrapy/spiders/chile_compra_bulk.py b/kingfisher_scrapy/spiders/chile_compra_bulk.py index ba80c0c38..4aa0ab443 100644 --- a/kingfisher_scrapy/spiders/chile_compra_bulk.py +++ b/kingfisher_scrapy/spiders/chile_compra_bulk.py @@ -6,6 +6,11 @@ class ChileCompraBulk(ZipSpider): + """ + Spider arguments + sample + Download only data released on February 2017. + """ name = 'chile_compra_bulk' download_warnsize = 0 download_timeout = 99999 diff --git a/kingfisher_scrapy/spiders/chile_compra_records.py b/kingfisher_scrapy/spiders/chile_compra_records.py index d1329e5a9..a602a3d81 100644 --- a/kingfisher_scrapy/spiders/chile_compra_records.py +++ b/kingfisher_scrapy/spiders/chile_compra_records.py @@ -2,6 +2,13 @@ class ChileCompraRecords(ChileCompraBaseSpider): + """ + Swagger API documentation + https://desarrolladores.mercadopublico.cl/docs/services/5a9ed02f718ed712f4403e75/operations/data-listaa-omes-agno-mes? + Spider arguments + sample + Download only data released on October 2017. + """ name = 'chile_compra_records' def parse(self, response): diff --git a/kingfisher_scrapy/spiders/chile_compra_releases.py b/kingfisher_scrapy/spiders/chile_compra_releases.py index acd16e0ca..fd8f6fca6 100644 --- a/kingfisher_scrapy/spiders/chile_compra_releases.py +++ b/kingfisher_scrapy/spiders/chile_compra_releases.py @@ -2,6 +2,13 @@ class ChileCompraReleases(ChileCompraBaseSpider): + """ + Swagger API documentation + https://desarrolladores.mercadopublico.cl/docs/services/5a9ed02f718ed712f4403e75/operations/data-listaa-omes-agno-mes? + Spider arguments + sample + Download only data released on October 2017. + """ name = 'chile_compra_releases' def parse(self, response): diff --git a/kingfisher_scrapy/spiders/colombia.py b/kingfisher_scrapy/spiders/colombia.py index 5f6328dad..2a5768b2a 100644 --- a/kingfisher_scrapy/spiders/colombia.py +++ b/kingfisher_scrapy/spiders/colombia.py @@ -9,6 +9,19 @@ class Colombia(LinksSpider): + """ + API documentation + https://www.colombiacompra.gov.co/transparencia/api + Swagger API documentation + https://apiocds.colombiacompra.gov.co:8443/apiCCE2.0/ + Spider arguments + sample + Download only the first page of results. + page + The page number from which to start crawling. + year + The year to crawl. See API documentation for valid values. + """ name = 'colombia' sleep = 120 * 60 diff --git a/kingfisher_scrapy/spiders/honduras_portal_records.py b/kingfisher_scrapy/spiders/honduras_portal_records.py index 35dbf90b8..326328775 100644 --- a/kingfisher_scrapy/spiders/honduras_portal_records.py +++ b/kingfisher_scrapy/spiders/honduras_portal_records.py @@ -7,6 +7,13 @@ class HondurasPortalRecords(BaseSpider): + """ + API documentation + http://www.contratacionesabiertas.gob.hn/servicio/ + Spider arguments + sample + Download only the first record package in the dataset. + """ name = 'honduras_portal_records' download_delay = 0.9 diff --git a/kingfisher_scrapy/spiders/honduras_portal_releases.py b/kingfisher_scrapy/spiders/honduras_portal_releases.py index cd1216dd1..c511809d2 100644 --- a/kingfisher_scrapy/spiders/honduras_portal_releases.py +++ b/kingfisher_scrapy/spiders/honduras_portal_releases.py @@ -7,6 +7,13 @@ class HondurasPortalReleases(BaseSpider): + """ + API documentation + http://www.contratacionesabiertas.gob.hn/servicio/ + Spider arguments + sample + Download only the first release package in the dataset. + """ name = 'honduras_portal_releases' download_delay = 0.9 diff --git a/kingfisher_scrapy/spiders/kenya_makueni.py b/kingfisher_scrapy/spiders/kenya_makueni.py index b35a02229..6c099f247 100644 --- a/kingfisher_scrapy/spiders/kenya_makueni.py +++ b/kingfisher_scrapy/spiders/kenya_makueni.py @@ -7,6 +7,13 @@ class KenyaMakueni(BaseSpider): + """ + Swagger API documentation + https://opencontracting.makueni.go.ke/swagger-ui.html#/ocds-controller + Spider arguments + sample + Download only the first 10 release packages in the dataset. + """ name = 'kenya_makueni' url = 'https://opencontracting.makueni.go.ke/api/ocds/package/all?pageSize={}&pageNumber={}' diff --git a/kingfisher_scrapy/spiders/malta.py b/kingfisher_scrapy/spiders/malta.py index a16e282a0..44ce13810 100644 --- a/kingfisher_scrapy/spiders/malta.py +++ b/kingfisher_scrapy/spiders/malta.py @@ -8,6 +8,13 @@ class Malta(ZipSpider): + """ + API documentation + https://docs.google.com/document/d/1VnCEywKkkQ7BcVbT7HlW2s_N_QI8W0KE/edit + Spider arguments + sample + Download only data released on October 2019. + """ name = 'malta' def start_requests(self): diff --git a/kingfisher_scrapy/spiders/mexico_quien_es_quien.py b/kingfisher_scrapy/spiders/mexico_quien_es_quien.py index 38d930058..a17ce4f54 100644 --- a/kingfisher_scrapy/spiders/mexico_quien_es_quien.py +++ b/kingfisher_scrapy/spiders/mexico_quien_es_quien.py @@ -8,6 +8,15 @@ class MexicoQuienEsQuien(BaseSpider): + """ + API documentation + https://quienesquienapi.readthedocs.io/es/latest/ + Swagger API documentation + https://api.quienesquien.wiki/v2/docs/ + Spider arguments + sample + Download a single record package with 10 records. + """ name = 'mexico_quien_es_quien' download_delay = 0.9 url = 'https://api.quienesquien.wiki/v2/contracts?limit={}&offset={}' diff --git a/kingfisher_scrapy/spiders/nepal_dhangadhi.py b/kingfisher_scrapy/spiders/nepal_dhangadhi.py index db5049194..88671c52f 100644 --- a/kingfisher_scrapy/spiders/nepal_dhangadhi.py +++ b/kingfisher_scrapy/spiders/nepal_dhangadhi.py @@ -7,6 +7,11 @@ class NepalDhangadhi(BaseSpider): + """ + Spider arguments + sample + Download only the first release package in the dataset. + """ name = "nepal_dhangadhi" def start_requests(self): diff --git a/kingfisher_scrapy/spiders/nepal_portal.py b/kingfisher_scrapy/spiders/nepal_portal.py index 3af259431..514158952 100644 --- a/kingfisher_scrapy/spiders/nepal_portal.py +++ b/kingfisher_scrapy/spiders/nepal_portal.py @@ -7,6 +7,11 @@ class NepalPortal(BaseSpider): + """ + Spider arguments + sample + Download only data released on 2018. + """ name = 'nepal_portal' def start_requests(self): diff --git a/kingfisher_scrapy/spiders/nigeria_portal.py b/kingfisher_scrapy/spiders/nigeria_portal.py index ad4e2d298..a546a7845 100644 --- a/kingfisher_scrapy/spiders/nigeria_portal.py +++ b/kingfisher_scrapy/spiders/nigeria_portal.py @@ -6,6 +6,11 @@ class NigeriaPortal(BaseSpider): + """ + Spider arguments + sample + Download only the first release package in the dataset. + """ name = 'nigeria_portal' start_urls = ['http://nocopo.bpp.gov.ng/OpenData.aspx'] download_delay = 0.9 diff --git a/kingfisher_scrapy/spiders/paraguay_hacienda.py b/kingfisher_scrapy/spiders/paraguay_hacienda.py index 87ead8a0e..500b6c29b 100644 --- a/kingfisher_scrapy/spiders/paraguay_hacienda.py +++ b/kingfisher_scrapy/spiders/paraguay_hacienda.py @@ -8,6 +8,18 @@ class ParaguayHacienda(BaseSpider): + """ + Swagger API documentation + https://datos.hacienda.gov.py/odmh-api-v1/api-docs/ + Spider arguments + sample + Download only 11 releases. + Environment variables + KINGFISHER_PARAGUAY_HACIENDA_REQUEST_TOKEN + To get an API account and request token go to https://datos.hacienda.gov.py/aplicaciones/new. + KINGFISHER_PARAGUAY_HACIENDA_CLIENT_SECRET + Your client secret generated. + """ name = 'paraguay_hacienda' start_time = None diff --git a/kingfisher_scrapy/spiders/portugal.py b/kingfisher_scrapy/spiders/portugal.py index 81216c14b..b939162e5 100644 --- a/kingfisher_scrapy/spiders/portugal.py +++ b/kingfisher_scrapy/spiders/portugal.py @@ -7,6 +7,13 @@ class Portugal(ZipSpider): + """ + API documentation + https://dados.gov.pt/pt/apidoc/ + Spider arguments + sample + Download only the first file in the dataset. + """ name = 'portugal' download_warnsize = 0 download_timeout = 9999 diff --git a/kingfisher_scrapy/spiders/uganda_releases.py b/kingfisher_scrapy/spiders/uganda_releases.py index c8d38a759..02e770c39 100644 --- a/kingfisher_scrapy/spiders/uganda_releases.py +++ b/kingfisher_scrapy/spiders/uganda_releases.py @@ -7,6 +7,13 @@ class Uganda(BaseSpider): + """ + API documentation + https://docs.google.com/spreadsheets/d/10tVioy-VOQa1FwWoRl5e1pMbGpiymA0iycNcoDFkvks/edit#gid=365266172 + Spider arguments + sample + Download only 19 releases. + """ name = 'uganda_releases' download_delay = 0.9 diff --git a/kingfisher_scrapy/spiders/uruguay_historical.py b/kingfisher_scrapy/spiders/uruguay_historical.py index 0988b2371..310061363 100644 --- a/kingfisher_scrapy/spiders/uruguay_historical.py +++ b/kingfisher_scrapy/spiders/uruguay_historical.py @@ -4,6 +4,13 @@ class UruguayHistorical(ZipSpider): + """ + API documentation + https://www.gub.uy/agencia-compras-contrataciones-estado/datos-y-estadisticas/datos/open-contracting + Spider arguments + sample + Download only data released on 2002. + """ name = 'uruguay_historical' # the files takes too long to be downloaded, so we increase the download timeout download_timeout = 1000 diff --git a/kingfisher_scrapy/spiders/uruguay_records.py b/kingfisher_scrapy/spiders/uruguay_records.py index 8584f44ff..4645f8771 100644 --- a/kingfisher_scrapy/spiders/uruguay_records.py +++ b/kingfisher_scrapy/spiders/uruguay_records.py @@ -6,6 +6,13 @@ class UruguayRecords(UruguayBase): + """ + API documentation + https://www.gub.uy/agencia-compras-contrataciones-estado/datos-y-estadisticas/datos/open-contracting + Spider arguments + sample + Download only 1 record. + """ name = 'uruguay_records' base_record_url = 'https://www.comprasestatales.gub.uy/ocds/record/{}' diff --git a/kingfisher_scrapy/spiders/uruguay_releases.py b/kingfisher_scrapy/spiders/uruguay_releases.py index d980f064f..3e06347c8 100644 --- a/kingfisher_scrapy/spiders/uruguay_releases.py +++ b/kingfisher_scrapy/spiders/uruguay_releases.py @@ -6,6 +6,13 @@ class UruguayReleases(UruguayBase): + """ + API documentation + https://www.gub.uy/agencia-compras-contrataciones-estado/datos-y-estadisticas/datos/open-contracting + Spider arguments + sample + Download only 1 release. + """ name = 'uruguay_releases' def parse_list(self, response): diff --git a/kingfisher_scrapy/spiders/zambia.py b/kingfisher_scrapy/spiders/zambia.py index 4f1de5943..a1bbc723e 100644 --- a/kingfisher_scrapy/spiders/zambia.py +++ b/kingfisher_scrapy/spiders/zambia.py @@ -6,6 +6,11 @@ class Zambia(ZipSpider): + """ + Spider arguments + sample + Download only data released on July 2016. + """ name = 'zambia' def start_requests(self): diff --git a/tests/test_extensions.py b/tests/test_extensions.py index 216dff325..332001430 100644 --- a/tests/test_extensions.py +++ b/tests/test_extensions.py @@ -6,7 +6,7 @@ import pytest from scrapy.exceptions import NotConfigured -from kingfisher_scrapy.extensions import KingfisherProcessAPI, KingfisherFilesStore +from kingfisher_scrapy.extensions import KingfisherFilesStore, KingfisherProcessAPI from kingfisher_scrapy.items import FileError from tests import spider_with_crawler