Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions scrapy_playwright/handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
Page,
PlaywrightContextManager,
Request as PlaywrightRequest,
Response as PlaywrightResponse,
Route,
)
from scrapy import Spider, signals
Expand All @@ -35,6 +36,26 @@
logger = logging.getLogger("scrapy-playwright")


def _make_request_logger(context_name: str) -> Callable:
def _log_request(request: PlaywrightRequest) -> None:
logger.debug(
f"[Context={context_name}] Request: <{request.method.upper()} {request.url}> "
f"(resource type: {request.resource_type}, referrer: {request.headers.get('referer')})"
)

return _log_request


def _make_response_logger(context_name: str) -> Callable:
def _log_request(response: PlaywrightResponse) -> None:
logger.debug(
f"[Context={context_name}] Response: <{response.status} {response.url}> "
f"(referrer: {response.headers.get('referer')})"
)

return _log_request


class ScrapyPlaywrightDownloadHandler(HTTPDownloadHandler):
def __init__(self, crawler: Crawler) -> None:
super().__init__(settings=crawler.settings, crawler=crawler)
Expand Down Expand Up @@ -107,6 +128,8 @@ async def _create_page(self, request: Request) -> Page:
context = await self._create_browser_context(context_name, context_kwargs)
self.contexts[context_name] = context
page = await context.new_page()
page.on("request", _make_request_logger(context_name))
page.on("response", _make_response_logger(context_name))
self.stats.inc_value("playwright/page_count")
if self.default_navigation_timeout:
page.set_default_navigation_timeout(self.default_navigation_timeout)
Expand Down