diff --git a/docs/topics/signals.rst b/docs/topics/signals.rst index 3f29aa32330..886d1b866f7 100644 --- a/docs/topics/signals.rst +++ b/docs/topics/signals.rst @@ -295,6 +295,23 @@ request_reached_downloader :param spider: the spider that yielded the request :type spider: :class:`~scrapy.spiders.Spider` object +request_left_downloader +----------------------- + +.. signal:: request_left_downloader +.. function:: request_left_downloader(request, spider) + + Sent when a :class:`~scrapy.http.Request` leaves the downloader, even in case of + failure. + + This signal does not support returning deferreds from its handlers. + + :param request: the request that reached the downloader + :type request: :class:`~scrapy.http.Request` object + + :param spider: the spider that yielded the request + :type spider: :class:`~scrapy.spiders.Spider` object + response_received ----------------- diff --git a/scrapy/core/downloader/__init__.py b/scrapy/core/downloader/__init__.py index 157dc341873..5a2fdadf53b 100644 --- a/scrapy/core/downloader/__init__.py +++ b/scrapy/core/downloader/__init__.py @@ -181,6 +181,9 @@ def _downloaded(response): def finish_transferring(_): slot.transferring.remove(request) self._process_queue(spider, slot) + self.signals.send_catch_log(signal=signals.request_left_downloader, + request=request, + spider=spider) return _ return dfd.addBoth(finish_transferring) diff --git a/scrapy/signals.py b/scrapy/signals.py index 6b91253029b..cd7ed7fb167 100644 --- a/scrapy/signals.py +++ b/scrapy/signals.py @@ -14,6 +14,7 @@ request_scheduled = object() request_dropped = object() request_reached_downloader = object() +request_left_downloader = object() response_received = object() response_downloaded = object() item_scraped = object() diff --git a/tests/test_request_left.py b/tests/test_request_left.py new file mode 100644 index 00000000000..5cfef8e7d5f --- /dev/null +++ b/tests/test_request_left.py @@ -0,0 +1,60 @@ +from twisted.internet import defer +from twisted.trial.unittest import TestCase +from scrapy.signals import request_left_downloader +from scrapy.spiders import Spider +from scrapy.utils.test import get_crawler +from tests.mockserver import MockServer + + +class SignalCatcherSpider(Spider): + name = 'signal_catcher' + + def __init__(self, crawler, url, *args, **kwargs): + super(SignalCatcherSpider, self).__init__(*args, **kwargs) + crawler.signals.connect(self.on_request_left, + signal=request_left_downloader) + self.caught_times = 0 + self.start_urls = [url] + + @classmethod + def from_crawler(cls, crawler, *args, **kwargs): + spider = cls(crawler, *args, **kwargs) + return spider + + def on_request_left(self, request, spider): + self.caught_times = self.caught_times + 1 + + +class TestCatching(TestCase): + + def setUp(self): + self.mockserver = MockServer() + self.mockserver.__enter__() + + def tearDown(self): + self.mockserver.__exit__(None, None, None) + + @defer.inlineCallbacks + def test_success(self): + crawler = get_crawler(SignalCatcherSpider) + yield crawler.crawl(self.mockserver.url("/status?n=200")) + self.assertEqual(crawler.spider.caught_times, 1) + + @defer.inlineCallbacks + def test_timeout(self): + crawler = get_crawler(SignalCatcherSpider, + {'DOWNLOAD_TIMEOUT': 0.1}) + yield crawler.crawl(self.mockserver.url("/delay?n=0.2")) + self.assertEqual(crawler.spider.caught_times, 1) + + @defer.inlineCallbacks + def test_disconnect(self): + crawler = get_crawler(SignalCatcherSpider) + yield crawler.crawl(self.mockserver.url("/drop")) + self.assertEqual(crawler.spider.caught_times, 1) + + @defer.inlineCallbacks + def test_noconnect(self): + crawler = get_crawler(SignalCatcherSpider) + yield crawler.crawl('http://thereisdefinetelynosuchdomain.com') + self.assertEqual(crawler.spider.caught_times, 1)