diff --git a/scrapy_playwright/handler.py b/scrapy_playwright/handler.py index 9ebcfcec..c41702d0 100644 --- a/scrapy_playwright/handler.py +++ b/scrapy_playwright/handler.py @@ -11,6 +11,7 @@ Page, PlaywrightContextManager, Request as PlaywrightRequest, + Response as PlaywrightResponse, Route, ) from scrapy import Spider, signals @@ -35,6 +36,26 @@ logger = logging.getLogger("scrapy-playwright") +def _make_request_logger(context_name: str) -> Callable: + def _log_request(request: PlaywrightRequest) -> None: + logger.debug( + f"[Context={context_name}] Request: <{request.method.upper()} {request.url}> " + f"(resource type: {request.resource_type}, referrer: {request.headers.get('referer')})" + ) + + return _log_request + + +def _make_response_logger(context_name: str) -> Callable: + def _log_request(response: PlaywrightResponse) -> None: + logger.debug( + f"[Context={context_name}] Response: <{response.status} {response.url}> " + f"(referrer: {response.headers.get('referer')})" + ) + + return _log_request + + class ScrapyPlaywrightDownloadHandler(HTTPDownloadHandler): def __init__(self, crawler: Crawler) -> None: super().__init__(settings=crawler.settings, crawler=crawler) @@ -107,6 +128,8 @@ async def _create_page(self, request: Request) -> Page: context = await self._create_browser_context(context_name, context_kwargs) self.contexts[context_name] = context page = await context.new_page() + page.on("request", _make_request_logger(context_name)) + page.on("response", _make_response_logger(context_name)) self.stats.inc_value("playwright/page_count") if self.default_navigation_timeout: page.set_default_navigation_timeout(self.default_navigation_timeout)