diff --git a/scrapy_playwright/handler.py b/scrapy_playwright/handler.py index 63e9127f..05340a11 100644 --- a/scrapy_playwright/handler.py +++ b/scrapy_playwright/handler.py @@ -39,7 +39,8 @@ PlaywrightHandler = TypeVar("PlaywrightHandler", bound="ScrapyPlaywrightDownloadHandler") -logger = logging.getLogger("scrapy-playwright") +LOGGER_NAME = "scrapy-playwright" +logger = logging.getLogger(LOGGER_NAME) DEFAULT_BROWSER_TYPE = "chromium" @@ -60,6 +61,7 @@ def __init__(self, crawler: Crawler) -> None: super().__init__(settings=settings, crawler=crawler) verify_installed_reactor("twisted.internet.asyncioreactor.AsyncioSelectorReactor") crawler.signals.connect(self._engine_started, signals.engine_started) + crawler.signals.connect(self._spider_opened, signals.spider_opened) self.stats = crawler.stats # browser @@ -109,6 +111,17 @@ def _engine_started(self) -> Deferred: """Launch the browser. Use the engine_started signal as it supports returning deferreds.""" return deferred_from_coro(self._launch()) + def _spider_opened(self, spider: Spider) -> None: + old_factory = logging.getLogRecordFactory() + + def record_factory(name: str, *args, **kwargs): + record = old_factory(name, *args, **kwargs) + if name == LOGGER_NAME: + record.spider = spider + return record + + logging.setLogRecordFactory(record_factory) + async def _launch(self) -> None: """Launch Playwright manager and configured startup context(s).""" logger.info("Starting download handler") diff --git a/tests/test_playwright_requests.py b/tests/test_playwright_requests.py index 3a592b70..b71e4fe2 100644 --- a/tests/test_playwright_requests.py +++ b/tests/test_playwright_requests.py @@ -295,7 +295,7 @@ def should_abort_request_sync(request): assert handler.stats.get_value(f"{req_prefix}/aborted") == 3 @pytest.mark.asyncio - async def test_page_initialization_ok(self, caplog): + async def test_page_initialization_ok(self): async def init_page(page, request): await page.set_extra_http_headers({"Extra-Header": "Qwerty"}) @@ -360,6 +360,21 @@ async def test_redirect(self): server.urljoin("/redirect"), ] + @pytest.mark.asyncio + async def test_logging_record_spider(self, caplog): + """Make sure at least one log record has the spider as an attribute + (records sent before opening the spider will not have it). + """ + caplog.set_level(logging.INFO) + spider = Spider("spider_name") + async with make_handler({"PLAYWRIGHT_BROWSER_TYPE": self.browser_type}) as handler: + handler._spider_opened(spider) + with MockServer() as server: + req = Request(url=server.urljoin("/index.html"), meta={"playwright": True}) + await handler._download_request(req, spider) + + assert any(getattr(rec, "spider", None) is spider for rec in caplog.records) + class TestCaseChromium(MixinTestCase): browser_type = "chromium"