From fe578740fa6da63f7a905916cd5e6d1a63898b3b Mon Sep 17 00:00:00 2001 From: aguilerapy Date: Wed, 17 Mar 2021 13:13:01 -0300 Subject: [PATCH 1/3] Add slovenia spider --- kingfisher_scrapy/spiders/slovenia.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 kingfisher_scrapy/spiders/slovenia.py diff --git a/kingfisher_scrapy/spiders/slovenia.py b/kingfisher_scrapy/spiders/slovenia.py new file mode 100644 index 00000000..24006739 --- /dev/null +++ b/kingfisher_scrapy/spiders/slovenia.py @@ -0,0 +1,27 @@ +import scrapy + +from kingfisher_scrapy.base_spider import SimpleSpider +from kingfisher_scrapy.util import components, handle_http_error + + +class Slovenia(SimpleSpider): + """ + Domain + Ministry of Public Administration Slovenia + """ + name = 'slovenia' + + # SimpleSpider + data_type = 'release_package' + + url = 'http://tbfy.ijs.si/public/ocds/mju/' + + def start_requests(self): + yield scrapy.Request(self.url, meta={'file_name': 'list.html'}, callback=self.parse_list) + + @handle_http_error + def parse_list(self, response): + html_urls = response.xpath('//a/@href').getall() + for url in html_urls: + if 'ocds' and 'json' in url: + yield self.build_request(f"{self.url}{url}", formatter=components(-1)) From d611dd91f559f358d681cad3e7c9092c9d9c21b2 Mon Sep 17 00:00:00 2001 From: aguilerapy Date: Wed, 17 Mar 2021 13:13:16 -0300 Subject: [PATCH 2/3] Update docstrings --- docs/spiders.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/docs/spiders.rst b/docs/spiders.rst index c236fc06..ed9ad3c6 100644 --- a/docs/spiders.rst +++ b/docs/spiders.rst @@ -913,6 +913,16 @@ Scotland scrapy crawl scotland_public_contracts +Slovenia +-------- + +.. autoclass:: kingfisher_scrapy.spiders.slovenia.Slovenia + :no-members: + +.. code-block:: bash + + scrapy crawl slovenia + Spain ----- From f8fdfe5379ece70ecda54ff9bf51e57e405d230d Mon Sep 17 00:00:00 2001 From: Yohanna Lisnichuk Date: Wed, 17 Mar 2021 13:27:20 -0300 Subject: [PATCH 3/3] Update kingfisher_scrapy/spiders/slovenia.py --- kingfisher_scrapy/spiders/slovenia.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kingfisher_scrapy/spiders/slovenia.py b/kingfisher_scrapy/spiders/slovenia.py index 24006739..cbab3969 100644 --- a/kingfisher_scrapy/spiders/slovenia.py +++ b/kingfisher_scrapy/spiders/slovenia.py @@ -24,4 +24,4 @@ def parse_list(self, response): html_urls = response.xpath('//a/@href').getall() for url in html_urls: if 'ocds' and 'json' in url: - yield self.build_request(f"{self.url}{url}", formatter=components(-1)) + yield self.build_request(f'{self.url}{url}', formatter=components(-1))