diff --git a/kingfisher_scrapy/base_spider.py b/kingfisher_scrapy/base_spider.py index 5a9f9567..a3aa62c7 100644 --- a/kingfisher_scrapy/base_spider.py +++ b/kingfisher_scrapy/base_spider.py @@ -8,7 +8,7 @@ from jsonpointer import resolve_pointer from kingfisher_scrapy import util -from kingfisher_scrapy.exceptions import KingfisherScrapyError, SpiderArgumentError +from kingfisher_scrapy.exceptions import MissingNextLinkError, SpiderArgumentError from kingfisher_scrapy.items import File, FileError, FileItem from kingfisher_scrapy.util import handle_http_error @@ -379,4 +379,4 @@ def next_link(self, response): return self.build_request(url, formatter=self.next_page_formatter) if response.meta['depth'] == 0: - raise KingfisherScrapyError('next link not found on the first page: {}'.format(response.url)) + raise MissingNextLinkError('next link not found on the first page: {}'.format(response.url)) diff --git a/kingfisher_scrapy/exceptions.py b/kingfisher_scrapy/exceptions.py index da83a3a5..f1f0e191 100644 --- a/kingfisher_scrapy/exceptions.py +++ b/kingfisher_scrapy/exceptions.py @@ -12,3 +12,7 @@ class SpiderArgumentError(KingfisherScrapyError): class MissingRequiredFieldError(KingfisherScrapyError, KeyError): """Raised when an item is missing a required field""" + + +class MissingNextLinkError(KingfisherScrapyError): + """Raised when a next link is not found on the first page of results""" diff --git a/tests/__init__.py b/tests/__init__.py index 3db92b32..13d38bad 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -7,8 +7,10 @@ from kingfisher_scrapy.base_spider import BaseSpider -def response_fixture(**kwargs): - request = Request('http://example.com', meta={'file_name': 'test', 'depth': 0}) +def response_fixture(meta=None, **kwargs): + if meta is None: + meta = {'file_name': 'test'} + request = Request('http://example.com', meta=meta) if 'status' not in kwargs: kwargs['status'] = 200 if 'body' not in kwargs: diff --git a/tests/test_links_spider.py b/tests/test_links_spider.py index 2fde90fd..65b889ac 100644 --- a/tests/test_links_spider.py +++ b/tests/test_links_spider.py @@ -2,7 +2,7 @@ from scrapy.http import Request from kingfisher_scrapy.base_spider import LinksSpider -from kingfisher_scrapy.exceptions import KingfisherScrapyError +from kingfisher_scrapy.exceptions import MissingNextLinkError from kingfisher_scrapy.items import File, FileError from tests import response_fixture, spider_with_crawler @@ -65,7 +65,13 @@ def test_parse_200(): def test_next_link_not_found(): spider = spider_with_crawler(spider_class=LinksSpider) spider.next_page_formatter = lambda url: 'next.json' + body = '{"links": {"next": ""}}' - with pytest.raises(KingfisherScrapyError) as e: - assert spider.next_link(response_fixture(body='{"links": {"next": ""}}')) + with pytest.raises(MissingNextLinkError) as e: + meta = {'file_name': 'test', 'depth': 0} + spider.next_link(response_fixture(meta=meta, body=body)) assert str(e.value) == 'next link not found on the first page: http://example.com' + + meta = {'file_name': 'test', 'depth': 10} + response = spider.next_link(response_fixture(meta=meta, body=body)) + assert response is None