Skip to content

Commit

Permalink
Merge pull request #6064 from wRAR/signals-proper
Browse files Browse the repository at this point in the history
Refactor installing signals.
  • Loading branch information
wRAR committed Oct 17, 2023
2 parents c655679 + 029a563 commit aa95ada
Show file tree
Hide file tree
Showing 8 changed files with 97 additions and 18 deletions.
12 changes: 7 additions & 5 deletions scrapy/crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -404,8 +404,8 @@ def start(
:param bool stop_after_crawl: stop or not the reactor when all
crawlers have finished
:param bool install_signal_handlers: whether to install the shutdown
handlers (default: True)
:param bool install_signal_handlers: whether to install the OS signal
handlers from Twisted and Scrapy (default: True)
"""
from twisted.internet import reactor

Expand All @@ -416,15 +416,17 @@ def start(
return
d.addBoth(self._stop_reactor)

if install_signal_handlers:
install_shutdown_handlers(self._signal_shutdown)
resolver_class = load_object(self.settings["DNS_RESOLVER"])
resolver = create_instance(resolver_class, self.settings, self, reactor=reactor)
resolver.install_on_reactor()
tp = reactor.getThreadPool()
tp.adjustPoolsize(maxthreads=self.settings.getint("REACTOR_THREADPOOL_MAXSIZE"))
reactor.addSystemEventTrigger("before", "shutdown", self.stop)
reactor.run(installSignalHandlers=False) # blocking call
if install_signal_handlers:
reactor.addSystemEventTrigger(
"after", "startup", install_shutdown_handlers, self._signal_shutdown
)
reactor.run(installSignalHandlers=install_signal_handlers) # blocking call

def _graceful_stop_reactor(self) -> Deferred:
d = self.stop()
Expand Down
9 changes: 3 additions & 6 deletions scrapy/utils/ossignal.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,10 @@ def install_shutdown_handlers(
function: SignalHandlerT, override_sigint: bool = True
) -> None:
"""Install the given function as a signal handler for all common shutdown
signals (such as SIGINT, SIGTERM, etc). If override_sigint is ``False`` the
SIGINT handler won't be install if there is already a handler in place
(e.g. Pdb)
signals (such as SIGINT, SIGTERM, etc). If ``override_sigint`` is ``False`` the
SIGINT handler won't be installed if there is already a handler in place
(e.g. Pdb)
"""
from twisted.internet import reactor

reactor._handleSignals()
signal.signal(signal.SIGTERM, function)
if signal.getsignal(signal.SIGINT) == signal.default_int_handler or override_sigint:
signal.signal(signal.SIGINT, function)
Expand Down
7 changes: 4 additions & 3 deletions scrapy/utils/testproc.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import os
import sys
from typing import Iterable, Optional, Tuple, cast
from typing import Iterable, List, Optional, Tuple, cast

from twisted.internet.defer import Deferred
from twisted.internet.error import ProcessTerminated
Expand All @@ -26,14 +26,15 @@ def execute(
env = os.environ.copy()
if settings is not None:
env["SCRAPY_SETTINGS_MODULE"] = settings
assert self.command
cmd = self.prefix + [self.command] + list(args)
pp = TestProcessProtocol()
pp.deferred.addBoth(self._process_finished, cmd, check_code)
pp.deferred.addCallback(self._process_finished, cmd, check_code)
reactor.spawnProcess(pp, cmd[0], cmd, env=env, path=self.cwd)
return pp.deferred

def _process_finished(
self, pp: TestProcessProtocol, cmd: str, check_code: bool
self, pp: TestProcessProtocol, cmd: List[str], check_code: bool
) -> Tuple[int, bytes, bytes]:
if pp.exitcode and check_code:
msg = f"process {cmd} exit with code {pp.exitcode}"
Expand Down
3 changes: 1 addition & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,7 @@


install_requires = [
# 23.8.0 incompatibility: https://github.com/scrapy/scrapy/issues/6024
"Twisted>=18.9.0,<23.8.0",
"Twisted>=18.9.0",
"cryptography>=36.0.0",
"cssselect>=0.9.1",
"itemloaders>=1.0.1",
Expand Down
24 changes: 24 additions & 0 deletions tests/CrawlerProcess/sleeping.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from twisted.internet.defer import Deferred

import scrapy
from scrapy.crawler import CrawlerProcess
from scrapy.utils.defer import maybe_deferred_to_future


class SleepingSpider(scrapy.Spider):
name = "sleeping"

start_urls = ["data:,;"]

async def parse(self, response):
from twisted.internet import reactor

d = Deferred()
reactor.callLater(3, d.callback, None)
await maybe_deferred_to_future(d)


process = CrawlerProcess(settings={})

process.crawl(SleepingSpider)
process.start()
1 change: 1 addition & 0 deletions tests/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Tests requirements
attrs
pexpect >= 4.8.0
pyftpdlib >= 1.5.8
pytest
pytest-cov==4.0.0
Expand Down
26 changes: 26 additions & 0 deletions tests/test_command_shell.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
import sys
from io import BytesIO
from pathlib import Path

from pexpect.popen_spawn import PopenSpawn
from twisted.internet import defer
from twisted.trial import unittest

from scrapy.utils.testproc import ProcessTest
from scrapy.utils.testsite import SiteTest
from tests import NON_EXISTING_RESOLVABLE, tests_datadir
from tests.mockserver import MockServer


class ShellTest(ProcessTest, SiteTest, unittest.TestCase):
Expand Down Expand Up @@ -133,3 +137,25 @@ def test_shell_fetch_async(self):
args = ["-c", code, "--set", f"TWISTED_REACTOR={reactor_path}"]
_, _, err = yield self.execute(args, check_code=True)
self.assertNotIn(b"RuntimeError: There is no current event loop in thread", err)


class InteractiveShellTest(unittest.TestCase):
def test_fetch(self):
args = (
sys.executable,
"-m",
"scrapy.cmdline",
"shell",
)
logfile = BytesIO()
p = PopenSpawn(args, timeout=5)
p.logfile_read = logfile
p.expect_exact("Available Scrapy objects")
with MockServer() as mockserver:
p.sendline(f"fetch('{mockserver.url('/')}')")
p.sendline("type(response)")
p.expect_exact("HtmlResponse")
p.sendeof()
p.wait()
logfile.seek(0)
self.assertNotIn("Traceback", logfile.read().decode())
33 changes: 31 additions & 2 deletions tests/test_crawler.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
import logging
import os
import platform
import signal
import subprocess
import sys
import warnings
from pathlib import Path
from typing import List

import pytest
from packaging.version import parse as parse_version
from pexpect.popen_spawn import PopenSpawn
from pytest import mark, raises
from twisted.internet import defer
from twisted.trial import unittest
Expand Down Expand Up @@ -289,9 +292,12 @@ class ScriptRunnerMixin:
script_dir: Path
cwd = os.getcwd()

def run_script(self, script_name: str, *script_args):
def get_script_args(self, script_name: str, *script_args: str) -> List[str]:
script_path = self.script_dir / script_name
args = [sys.executable, str(script_path)] + list(script_args)
return [sys.executable, str(script_path)] + list(script_args)

def run_script(self, script_name: str, *script_args: str) -> str:
args = self.get_script_args(script_name, *script_args)
p = subprocess.Popen(
args,
env=get_mockserver_env(),
Expand Down Expand Up @@ -517,6 +523,29 @@ def test_args_change_settings(self):
self.assertIn("Spider closed (finished)", log)
self.assertIn("The value of FOO is 42", log)

def test_shutdown_graceful(self):
sig = signal.SIGINT if sys.platform != "win32" else signal.SIGBREAK
args = self.get_script_args("sleeping.py")
p = PopenSpawn(args, timeout=5)
p.expect_exact("Spider opened")
p.expect_exact("Crawled (200)")
p.kill(sig)
p.expect_exact("shutting down gracefully")
p.expect_exact("Spider closed (shutdown)")
p.wait()

def test_shutdown_forced(self):
sig = signal.SIGINT if sys.platform != "win32" else signal.SIGBREAK
args = self.get_script_args("sleeping.py")
p = PopenSpawn(args, timeout=5)
p.expect_exact("Spider opened")
p.expect_exact("Crawled (200)")
p.kill(sig)
p.expect_exact("shutting down gracefully")
p.kill(sig)
p.expect_exact("forcing unclean shutdown")
p.wait()


class CrawlerRunnerSubprocess(ScriptRunnerMixin, unittest.TestCase):
script_dir = Path(__file__).parent.resolve() / "CrawlerRunner"
Expand Down

0 comments on commit aa95ada

Please sign in to comment.