Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

request_left_downloader signal #4303

Merged
merged 13 commits into from Feb 7, 2020
17 changes: 17 additions & 0 deletions docs/topics/signals.rst
Expand Up @@ -295,6 +295,23 @@ request_reached_downloader
:param spider: the spider that yielded the request
:type spider: :class:`~scrapy.spiders.Spider` object

request_left_downloader
-----------------------

.. signal:: request_left_downloader
.. function:: request_left_downloader(request, spider)

Sent when a :class:`~scrapy.http.Request` leaves the downloader, even in case of
failure.

This signal does not support returning deferreds from its handlers.

:param request: the request that reached the downloader
:type request: :class:`~scrapy.http.Request` object

:param spider: the spider that yielded the request
:type spider: :class:`~scrapy.spiders.Spider` object

response_received
-----------------

Expand Down
3 changes: 3 additions & 0 deletions scrapy/core/downloader/__init__.py
Expand Up @@ -181,6 +181,9 @@ def _downloaded(response):
def finish_transferring(_):
slot.transferring.remove(request)
self._process_queue(spider, slot)
self.signals.send_catch_log(signal=signals.request_left_downloader,
request=request,
spider=spider)
return _

return dfd.addBoth(finish_transferring)
Expand Down
1 change: 1 addition & 0 deletions scrapy/signals.py
Expand Up @@ -14,6 +14,7 @@
request_scheduled = object()
request_dropped = object()
request_reached_downloader = object()
request_left_downloader = object()
response_received = object()
response_downloaded = object()
item_scraped = object()
Expand Down
60 changes: 60 additions & 0 deletions tests/test_request_left.py
@@ -0,0 +1,60 @@
from twisted.internet import defer
from twisted.trial.unittest import TestCase
from scrapy.signals import request_left_downloader
from scrapy.spiders import Spider
from scrapy.utils.test import get_crawler
from tests.mockserver import MockServer


class SignalCatcherSpider(Spider):
name = 'signal_catcher'

def __init__(self, crawler, url, *args, **kwargs):
super(SignalCatcherSpider, self).__init__(*args, **kwargs)
crawler.signals.connect(self.on_request_left,
signal=request_left_downloader)
self.caught_times = 0
self.start_urls = [url]

@classmethod
def from_crawler(cls, crawler, *args, **kwargs):
spider = cls(crawler, *args, **kwargs)
return spider

def on_request_left(self, request, spider):
self.caught_times = self.caught_times + 1


class TestCatching(TestCase):

def setUp(self):
self.mockserver = MockServer()
self.mockserver.__enter__()

def tearDown(self):
self.mockserver.__exit__(None, None, None)

@defer.inlineCallbacks
def test_success(self):
crawler = get_crawler(SignalCatcherSpider)
yield crawler.crawl(self.mockserver.url("/status?n=200"))
self.assertEqual(crawler.spider.caught_times, 1)

@defer.inlineCallbacks
def test_timeout(self):
crawler = get_crawler(SignalCatcherSpider,
{'DOWNLOAD_TIMEOUT': 0.1})
yield crawler.crawl(self.mockserver.url("/delay?n=0.2"))
self.assertEqual(crawler.spider.caught_times, 1)

@defer.inlineCallbacks
def test_disconnect(self):
crawler = get_crawler(SignalCatcherSpider)
yield crawler.crawl(self.mockserver.url("/drop"))
self.assertEqual(crawler.spider.caught_times, 1)

@defer.inlineCallbacks
def test_noconnect(self):
crawler = get_crawler(SignalCatcherSpider)
yield crawler.crawl('http://thereisdefinetelynosuchdomain.com')
self.assertEqual(crawler.spider.caught_times, 1)