Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion scrapy_playwright/handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@
PlaywrightHandler = TypeVar("PlaywrightHandler", bound="ScrapyPlaywrightDownloadHandler")


logger = logging.getLogger("scrapy-playwright")
LOGGER_NAME = "scrapy-playwright"
logger = logging.getLogger(LOGGER_NAME)


DEFAULT_BROWSER_TYPE = "chromium"
Expand All @@ -60,6 +61,7 @@ def __init__(self, crawler: Crawler) -> None:
super().__init__(settings=settings, crawler=crawler)
verify_installed_reactor("twisted.internet.asyncioreactor.AsyncioSelectorReactor")
crawler.signals.connect(self._engine_started, signals.engine_started)
crawler.signals.connect(self._spider_opened, signals.spider_opened)
self.stats = crawler.stats

# browser
Expand Down Expand Up @@ -109,6 +111,17 @@ def _engine_started(self) -> Deferred:
"""Launch the browser. Use the engine_started signal as it supports returning deferreds."""
return deferred_from_coro(self._launch())

def _spider_opened(self, spider: Spider) -> None:
old_factory = logging.getLogRecordFactory()

def record_factory(name: str, *args, **kwargs):
record = old_factory(name, *args, **kwargs)
if name == LOGGER_NAME:
record.spider = spider
return record

logging.setLogRecordFactory(record_factory)

async def _launch(self) -> None:
"""Launch Playwright manager and configured startup context(s)."""
logger.info("Starting download handler")
Expand Down
17 changes: 16 additions & 1 deletion tests/test_playwright_requests.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,7 @@ def should_abort_request_sync(request):
assert handler.stats.get_value(f"{req_prefix}/aborted") == 3

@pytest.mark.asyncio
async def test_page_initialization_ok(self, caplog):
async def test_page_initialization_ok(self):
async def init_page(page, request):
await page.set_extra_http_headers({"Extra-Header": "Qwerty"})

Expand Down Expand Up @@ -360,6 +360,21 @@ async def test_redirect(self):
server.urljoin("/redirect"),
]

@pytest.mark.asyncio
async def test_logging_record_spider(self, caplog):
"""Make sure at least one log record has the spider as an attribute
(records sent before opening the spider will not have it).
"""
caplog.set_level(logging.INFO)
spider = Spider("spider_name")
async with make_handler({"PLAYWRIGHT_BROWSER_TYPE": self.browser_type}) as handler:
handler._spider_opened(spider)
with MockServer() as server:
req = Request(url=server.urljoin("/index.html"), meta={"playwright": True})
await handler._download_request(req, spider)

assert any(getattr(rec, "spider", None) is spider for rec in caplog.records)


class TestCaseChromium(MixinTestCase):
browser_type = "chromium"
Expand Down