Merge branch 'master' into 347-add_docstrings

# Conflicts: # kingfisher_scrapy/spiders/armenia.py # kingfisher_scrapy/spiders/australia_nsw.py # kingfisher_scrapy/spiders/honduras_portal_records.py # kingfisher_scrapy/spiders/honduras_portal_releases.py # kingfisher_scrapy/spiders/kenya_makueni.py # kingfisher_scrapy/spiders/nepal_dhangadhi.py # kingfisher_scrapy/spiders/nepal_portal.py # kingfisher_scrapy/spiders/nigeria_portal.py # kingfisher_scrapy/spiders/uganda_releases.py
open-contracting · Jun 4, 2020 · 0029767 · 0029767
2 parents 8c12a73 + 996614a
commit 0029767
Show file tree

Hide file tree

Showing 88 changed files with 1,351 additions and 1,529 deletions.
diff --git a/docs/api/base_spider.rst b/docs/api/base_spider.rst
@@ -4,4 +4,3 @@ Base Spider
 .. automodule:: kingfisher_scrapy.base_spider
    :members:
    :undoc-members:
-
diff --git a/docs/api/exceptions.rst b/docs/api/exceptions.rst
@@ -4,4 +4,3 @@ Exceptions
 .. automodule:: kingfisher_scrapy.exceptions
    :members:
    :undoc-members:
-
diff --git a/docs/api/extensions.rst b/docs/api/extensions.rst
@@ -0,0 +1,6 @@
+Extensions
+==========
+
+.. automodule:: kingfisher_scrapy.extensions
+   :members:
+   :undoc-members:
diff --git a/docs/api/index.rst b/docs/api/index.rst
@@ -4,4 +4,6 @@ API Reference
 .. toctree::
 
    base_spider.rst
+   extensions.rst
+   util.rst
    exceptions.rst
diff --git a/docs/api/util.rst b/docs/api/util.rst
@@ -0,0 +1,6 @@
+Utilities
+=========
+
+.. automodule:: kingfisher_scrapy.util
+   :members:
+   :undoc-members:
diff --git a/docs/index.rst b/docs/index.rst
@@ -7,6 +7,7 @@ You can:
 
 -  :doc:`Download data to your computer, by installing Kingfisher Collect<local>`
 -  :doc:`Download data to a remote server, by using Scrapyd<scrapyd>`
+-  :doc:`Integrate with Kingfisher Process<kingfisher_process>`
 
 You can also try using Kingfisher Collect with `Scrapy Cloud <https://scrapinghub.com/scrapy-cloud>`_.
 

diff --git a/docs/kingfisher_process.rst b/docs/kingfisher_process.rst
@@ -0,0 +1,19 @@
+Integrate with Kingfisher Process
+=================================
+
+Besides storing the scraped data on disk, you can also send them to an instance of `Kingfisher Process <https://kingfisher-process.readthedocs.io/>`_ for processing.
+
+To do that, you need to deploy an instance of Kingfisher Process, including its `web app <https://kingfisher-process.readthedocs.io/en/latest/web.html#web-app>`__. Then, set the following either as environment variables or as Scrapy settings in ``kingfisher_scrapy.settings.py``:
+
+``KINGFISHER_API_URI``
+  The URL from which Kingfisher Process' `web app <https://kingfisher-process.readthedocs.io/en/latest/web.html#web-app>`_ is served. Do not include a trailing slash.
+``KINGFISHER_API_KEY``
+  One of the API keys in Kingfisher Process' `API_KEYS <https://kingfisher-process.readthedocs.io/en/latest/config.html#web-api>`__ setting.
+
+For example, set the environment variables, then run ``scrapy crawl`` commands:
+
+.. code-block:: bash
+
+   export KINGFISHER_API_URI='http://127.0.0.1:5000'
+   export KINGFISHER_API_KEY=1234
+   scrapy crawl my_spider
diff --git a/docs/writing-spiders.rst b/docs/writing-spiders.rst
@@ -52,21 +52,17 @@ Here is a sample:
 
 .. code-block:: python
 
-    from kingfisher_scrapy.util import handle_error
+    from kingfisher_scrapy.base_spider import SimpleSpider
+    from kingfisher_scrapy.util import components, handle_http_error
 
-    class VerySimple(BaseSpider):
-        name = "very_simple"
+    class VerySimple(SimpleSpider):
+        name = 'very_simple'
+        data_type = 'release_package'
 
         def start_requests(self):
-            # This API only has one URL to get. Make a request for that, and set a filename
-            yield scrapy.Request(
-                url='https://buyandsell.gc.ca/cds/public/ocds/tpsgc-pwgsc_ocds_EF-FY-13-14.json',
-                meta={'kf_filename': '13-14.json'}
-            )
-
-        @handle_error
-        def parse(self, response):
-            yield self.build_file_from_response(response, response.request.meta['kf_filename'], data_type='release_package')
+            # Request the source's only URL, and transform the URL to a file name using ``basename``.
+            url = 'https://buyandsell.gc.ca/cds/public/ocds/tpsgc-pwgsc_ocds_EF-FY-13-14.json'
+            yield self.build_request(url, formatter=components(-1))
 
 Spider properties
 -----------------
Original file line number	Diff line number	Diff line change
Expand Up		@@ -4,4 +4,3 @@ Base Spider
		.. automodule:: kingfisher_scrapy.base_spider
		:members:
		:undoc-members:
Original file line number	Diff line number	Diff line change
Expand Up		@@ -4,4 +4,3 @@ Exceptions
		.. automodule:: kingfisher_scrapy.exceptions
		:members:
		:undoc-members: