diff --git a/docs/spiders.rst b/docs/spiders.rst index c236fc06..ed9ad3c6 100644 --- a/docs/spiders.rst +++ b/docs/spiders.rst @@ -913,6 +913,16 @@ Scotland scrapy crawl scotland_public_contracts +Slovenia +-------- + +.. autoclass:: kingfisher_scrapy.spiders.slovenia.Slovenia + :no-members: + +.. code-block:: bash + + scrapy crawl slovenia + Spain ----- diff --git a/kingfisher_scrapy/spiders/slovenia.py b/kingfisher_scrapy/spiders/slovenia.py new file mode 100644 index 00000000..cbab3969 --- /dev/null +++ b/kingfisher_scrapy/spiders/slovenia.py @@ -0,0 +1,27 @@ +import scrapy + +from kingfisher_scrapy.base_spider import SimpleSpider +from kingfisher_scrapy.util import components, handle_http_error + + +class Slovenia(SimpleSpider): + """ + Domain + Ministry of Public Administration Slovenia + """ + name = 'slovenia' + + # SimpleSpider + data_type = 'release_package' + + url = 'http://tbfy.ijs.si/public/ocds/mju/' + + def start_requests(self): + yield scrapy.Request(self.url, meta={'file_name': 'list.html'}, callback=self.parse_list) + + @handle_http_error + def parse_list(self, response): + html_urls = response.xpath('//a/@href').getall() + for url in html_urls: + if 'ocds' and 'json' in url: + yield self.build_request(f'{self.url}{url}', formatter=components(-1))