scrapy-plugins · elacuesta · Mar 12, 2022 · Mar 3, 2022 · Mar 3, 2022 · Mar 3, 2022
diff --git a/README.md b/README.md
@@ -316,16 +316,15 @@ This is useful when you need to perform certain actions on a page, like scrollin
 down or clicking links, and you want everything to count as a single Scrapy
 Response, containing the final result.
 
-### Supported actions
+### `PageCoroutine` class
 
 * `scrapy_playwright.page.PageCoroutine(method: str, *args, **kwargs)`:
 
-    _Represents a coroutine to be awaited on a `playwright.page.Page` object,
+    Represents a coroutine to be awaited on a `playwright.page.Page` object,
     such as "click", "screenshot", "evaluate", etc.
     `method` should be the name of the coroutine, `*args` and `**kwargs`
-    are passed to the function call._
-
-    _The coroutine result will be stored in the `PageCoroutine.result` attribute_
+    are passed to the function call. The return value of the coroutine call
+    will be stored in the `PageCoroutine.result` attribute.
 
     For instance,
     ```python
@@ -339,8 +338,21 @@ Response, containing the final result.
     ```
 
 
+### Supported coroutines
+
+Please refer to the [upstream docs for the `Page` class](https://playwright.dev/python/docs/api/class-page)
+to see available coroutines
+
+### Impact on Response objects
+
+Certain `Response` attributes (e.g. `url`, `ip_address`) reflect the state after the last
+action performed on a page. If you issue a `PageCoroutine` with an action that results in
+a navigation (e.g. a `click` on a link), the `Response.url` attribute will point to the
+new URL, which might be different from the request's URL.
+
+
 ## Page events
-A dictionary of Page event handlers can be specified in the  `playwright_page_event_handlers`
+A dictionary of Page event handlers can be specified in the `playwright_page_event_handlers`
 [Request.meta](https://docs.scrapy.org/en/latest/topics/request-response.html#scrapy.http.Request.meta) key.
 Keys are the name of the event to be handled (`dialog`, `download`, etc).
 Values can be either callables or strings (in which case a spider method with the name will be looked up).

diff --git a/examples/storage.py b/examples/storage.py
@@ -5,7 +5,7 @@
 
 class StorageSpider(Spider):
     """
-    Set and get storage state
+    Set and get storage state. Also get the server's IP address.
     """
 
     name = "storage"
@@ -24,7 +24,11 @@ def start_requests(self):
 
     async def parse(self, response):
         page = response.meta["playwright_page"]
-        return {"url": response.url, "storage_state": await page.context.storage_state()}
+        return {
+            "url": response.url,
+            "storage_state": await page.context.storage_state(),
+            "ip_address": response.ip_address,
+        }
 
 
 if __name__ == "__main__":

diff --git a/scrapy_playwright/handler.py b/scrapy_playwright/handler.py
@@ -3,6 +3,7 @@
 import warnings
 from collections import defaultdict
 from contextlib import suppress
+from ipaddress import ip_address
 from time import time
 from typing import Callable, Dict, Optional, Type, TypeVar
 
@@ -231,6 +232,11 @@ async def _download_request_with_page(self, request: Request, page: Page) -> Res
             await page.close()
             self.stats.inc_value("playwright/page_count/closed")
 
+        server_ip_address = None
+        with suppress(AttributeError, KeyError, ValueError):
+            server_addr = await response.server_addr()
+            server_ip_address = ip_address(server_addr["ipAddress"])
+
         headers = Headers(response.headers)
         headers.pop("Content-Encoding", None)
         encoding = _get_response_encoding(headers, body_str) or "utf-8"
@@ -244,6 +250,7 @@ async def _download_request_with_page(self, request: Request, page: Page) -> Res
             request=request,
             flags=["playwright"],
             encoding=encoding,
+            ip_address=server_ip_address,
         )
 
     def _increment_request_stats(self, request: PlaywrightRequest) -> None:

diff --git a/tests/test_playwright_requests.py b/tests/test_playwright_requests.py
@@ -2,6 +2,7 @@
 import logging
 import platform
 import subprocess
+from ipaddress import ip_address
 from tempfile import NamedTemporaryFile
 
 import pytest
@@ -372,6 +373,19 @@ async def test_event_handler_dialog_missing(self, caplog):
         ) in caplog.record_tuples
         assert getattr(spider, "dialog_message", None) is None
 
+    @pytest.mark.asyncio
+    async def test_response_attributes(self):
+        async with make_handler({"PLAYWRIGHT_BROWSER_TYPE": self.browser_type}) as handler:
+            with MockServer() as server:
+                spider = DialogSpider()
+                req = Request(
+                    url=server.urljoin("/index.html"),
+                    meta={"playwright": True},
+                )
+                response = await handler._download_request(req, spider)
+
+        assert response.ip_address == ip_address(server.address)
+
 
 class TestCaseChromium(MixinTestCase):
     browser_type = "chromium"