diff --git a/scrapy_playwright/handler.py b/scrapy_playwright/handler.py index 842feabf..f3f4b1bb 100644 --- a/scrapy_playwright/handler.py +++ b/scrapy_playwright/handler.py @@ -291,6 +291,15 @@ async def _download_request_with_page(self, request: Request, page: Page) -> Res page_goto_kwargs = request.meta.get("playwright_page_goto_kwargs") or {} page_goto_kwargs.pop("url", None) response = await page.goto(url=request.url, **page_goto_kwargs) + if response is None: + logger.warning( + f"Navigating to {request} returned None, the response" + " will have empty headers and status 200" + ) + headers = Headers() + else: + headers = Headers(await response.all_headers()) + headers.pop("Content-Encoding", None) await self._apply_page_methods(page, request) body_str = await page.content() request.meta["download_latency"] = time() - start_time @@ -307,13 +316,11 @@ async def _download_request_with_page(self, request: Request, page: Page) -> Res with suppress(AttributeError): request.meta["playwright_security_details"] = await response.security_details() - headers = Headers(await response.all_headers()) - headers.pop("Content-Encoding", None) body, encoding = _encode_body(headers=headers, text=body_str) respcls = responsetypes.from_args(headers=headers, url=page.url, body=body) return respcls( url=page.url, - status=response.status, + status=response.status if response is not None else 200, headers=headers, body=body, request=request, diff --git a/tests/test_playwright_requests.py b/tests/test_playwright_requests.py index 59cb64d5..beef4fdc 100644 --- a/tests/test_playwright_requests.py +++ b/tests/test_playwright_requests.py @@ -349,6 +349,22 @@ async def test_page_goto_kwargs_referer(self): headers = json.loads(response.css("pre::text").get()) assert headers["Referer"] == fake_referer + @pytest.mark.asyncio + async def test_navigation_returns_none(self, caplog): + async with make_handler({"PLAYWRIGHT_BROWSER_TYPE": self.browser_type}) as handler: + with MockServer(): + req = Request(url="about:blank", meta={"playwright": True}) + response = await handler._download_request(req, Spider("spider_name")) + + assert ( + "scrapy-playwright", + logging.WARNING, + f"Navigating to {req!r} returned None, the response" + " will have empty headers and status 200", + ) in caplog.record_tuples + assert not response.headers + assert response.status == 200 + @pytest.mark.asyncio async def test_abort_requests(self): async def should_abort_request_async(request):