From 84d94a3bf5f55a08a9d30705f983548168b09c03 Mon Sep 17 00:00:00 2001 From: Ross Wollman Date: Mon, 27 Jun 2022 13:22:15 -0700 Subject: [PATCH] chore: port record_har_* options (content, mode, url_filter) (#1382) This is part 3/n of the 1.23 port. Relates #1308, #1374, #1376. Ports: - [x] https://github.com/microsoft/playwright/commit/fdcdd58d7fb128de2a26828eca4afa78c70047a8 (feat(har): introduce urlFilter (#14693)) - [x] https://github.com/microsoft/playwright/commit/c349c1d57f6a60813aa433fcfc39e0bce4c164c5 (feat: newContext.har (#14892)) - [x] https://github.com/microsoft/playwright/commit/245c33a5d44e6acd93b532e1166587fa716c560d (feat(har): allow storing content as separate files (#14934)) - [x] https://github.com/microsoft/playwright/commit/be64e9ce66bdd1e5b40c79b85a77b61a0c93b00a (chore(har): attach resources for .zip hars (#14938)) - [x] https://github.com/microsoft/playwright/commit/7bd72716f960ed11b48447c22f49a3b4ee453eb5 (feat(har): introduce the slim mode (#15053)) --- playwright/_impl/_browser.py | 34 +++++- playwright/_impl/_browser_type.py | 7 +- playwright/_impl/_helper.py | 2 + playwright/async_api/_generated.py | 48 +++++++- playwright/sync_api/_generated.py | 48 +++++++- scripts/expected_api_mismatch.txt | 3 - setup.py | 2 +- tests/async/test_har.py | 157 +++++++++++++++++++++++++- tests/async/test_request_intercept.py | 8 +- tests/server.py | 6 +- tests/sync/test_har.py | 154 ++++++++++++++++++++++++- tests/sync/test_request_intercept.py | 4 +- 12 files changed, 451 insertions(+), 22 deletions(-) diff --git a/playwright/_impl/_browser.py b/playwright/_impl/_browser.py index dc04e8cf7..4f5a72878 100644 --- a/playwright/_impl/_browser.py +++ b/playwright/_impl/_browser.py @@ -16,7 +16,7 @@ import json from pathlib import Path from types import SimpleNamespace -from typing import TYPE_CHECKING, Any, Dict, List, Union, cast +from typing import TYPE_CHECKING, Any, Dict, List, Pattern, Union, cast from playwright._impl._api_structures import ( Geolocation, @@ -31,6 +31,8 @@ from playwright._impl._helper import ( ColorScheme, ForcedColors, + HarContentPolicy, + HarMode, ReducedMotion, ServiceWorkersPolicy, async_readfile, @@ -40,6 +42,7 @@ from playwright._impl._local_utils import LocalUtils from playwright._impl._network import serialize_headers from playwright._impl._page import Page +from playwright._impl._str_utils import escape_regex_flags if TYPE_CHECKING: # pragma: no cover from playwright._impl._browser_type import BrowserType @@ -116,6 +119,9 @@ async def new_context( baseURL: str = None, strictSelectors: bool = None, serviceWorkers: ServiceWorkersPolicy = None, + recordHarUrlFilter: Union[Pattern, str] = None, + recordHarMode: HarMode = None, + recordHarContent: HarContentPolicy = None, ) -> BrowserContext: params = locals_to_params(locals()) await normalize_context_params(self._connection._is_sync, params) @@ -160,6 +166,9 @@ async def new_page( baseURL: str = None, strictSelectors: bool = None, serviceWorkers: ServiceWorkersPolicy = None, + recordHarUrlFilter: Union[Pattern, str] = None, + recordHarMode: HarMode = None, + recordHarContent: HarContentPolicy = None, ) -> Page: params = locals_to_params(locals()) context = await self.new_context(**params) @@ -217,9 +226,30 @@ async def normalize_context_params(is_sync: bool, params: Dict) -> None: if "recordHarPath" in params: recordHar: Dict[str, Any] = {"path": str(params["recordHarPath"])} params["recordHar"] = recordHar + if "recordHarUrlFilter" in params: + opt = params["recordHarUrlFilter"] + if isinstance(opt, str): + params["recordHar"]["urlGlob"] = opt + if isinstance(opt, Pattern): + params["recordHar"]["urlRegexSource"] = opt.pattern + params["recordHar"]["urlRegexFlags"] = escape_regex_flags(opt) + del params["recordHarUrlFilter"] + if "recordHarMode" in params: + params["recordHar"]["mode"] = params["recordHarMode"] + del params["recordHarMode"] + + new_content_api = None + old_content_api = None + if "recordHarContent" in params: + new_content_api = params["recordHarContent"] + del params["recordHarContent"] if "recordHarOmitContent" in params: - params["recordHar"]["omitContent"] = params["recordHarOmitContent"] + old_content_api = params["recordHarOmitContent"] del params["recordHarOmitContent"] + content = new_content_api or ("omit" if old_content_api else None) + if content: + params["recordHar"]["content"] = content + del params["recordHarPath"] if "recordVideoDir" in params: params["recordVideo"] = {"dir": str(params["recordVideoDir"])} diff --git a/playwright/_impl/_browser_type.py b/playwright/_impl/_browser_type.py index 500e8b138..761fb57c9 100644 --- a/playwright/_impl/_browser_type.py +++ b/playwright/_impl/_browser_type.py @@ -15,7 +15,7 @@ import asyncio import pathlib from pathlib import Path -from typing import TYPE_CHECKING, Dict, List, Optional, Union, cast +from typing import TYPE_CHECKING, Dict, List, Optional, Pattern, Union, cast from playwright._impl._api_structures import ( Geolocation, @@ -36,6 +36,8 @@ ColorScheme, Env, ForcedColors, + HarContentPolicy, + HarMode, ReducedMotion, ServiceWorkersPolicy, locals_to_params, @@ -139,6 +141,9 @@ async def launch_persistent_context( baseURL: str = None, strictSelectors: bool = None, serviceWorkers: ServiceWorkersPolicy = None, + recordHarUrlFilter: Union[Pattern, str] = None, + recordHarMode: HarMode = None, + recordHarContent: HarContentPolicy = None, ) -> BrowserContext: userDataDir = str(Path(userDataDir)) params = locals_to_params(locals()) diff --git a/playwright/_impl/_helper.py b/playwright/_impl/_helper.py index 51f15f437..71d55b917 100644 --- a/playwright/_impl/_helper.py +++ b/playwright/_impl/_helper.py @@ -65,6 +65,8 @@ KeyboardModifier = Literal["Alt", "Control", "Meta", "Shift"] MouseButton = Literal["left", "middle", "right"] ServiceWorkersPolicy = Literal["allow", "block"] +HarMode = Literal["full", "minimal"] +HarContentPolicy = Literal["attach", "embed", "omit"] class ErrorPayload(TypedDict, total=False): diff --git a/playwright/async_api/_generated.py b/playwright/async_api/_generated.py index a47af60a0..1eca729b8 100644 --- a/playwright/async_api/_generated.py +++ b/playwright/async_api/_generated.py @@ -10647,7 +10647,10 @@ async def new_context( storage_state: typing.Union[StorageState, str, pathlib.Path] = None, base_url: str = None, strict_selectors: bool = None, - service_workers: Literal["allow", "block"] = None + service_workers: Literal["allow", "block"] = None, + record_har_url_filter: typing.Union[str, typing.Pattern] = None, + record_har_mode: Literal["full", "minimal"] = None, + record_har_content: Literal["attach", "embed", "omit"] = None ) -> "BrowserContext": """Browser.new_context @@ -10756,6 +10759,14 @@ async def new_context( Whether to allow sites to register Service workers. Defaults to `'allow'`. - `'allow'`: [Service Workers](https://developer.mozilla.org/en-US/docs/Web/API/Service_Worker_API) can be registered. - `'block'`: Playwright will block all registration of Service Workers. + record_har_url_filter : Union[Pattern, str, NoneType] + record_har_mode : Union["full", "minimal", NoneType] + When set to `minimal`, only record information necessary for routing from HAR. This omits sizes, timing, page, cookies, + security and other types of HAR information that are not used when replaying from HAR. Defaults to `full`. + record_har_content : Union["attach", "embed", "omit", NoneType] + Optional setting to control resource content management. If `omit` is specified, content is not persisted. If `attach` + is specified, resources are persistet as separate files and all of these files are archived along with the HAR file. + Defaults to `embed`, which stores content inline the HAR file as per HAR specification. Returns ------- @@ -10795,6 +10806,9 @@ async def new_context( baseURL=base_url, strictSelectors=strict_selectors, serviceWorkers=service_workers, + recordHarUrlFilter=record_har_url_filter, + recordHarMode=record_har_mode, + recordHarContent=record_har_content, ) ) @@ -10831,7 +10845,10 @@ async def new_page( storage_state: typing.Union[StorageState, str, pathlib.Path] = None, base_url: str = None, strict_selectors: bool = None, - service_workers: Literal["allow", "block"] = None + service_workers: Literal["allow", "block"] = None, + record_har_url_filter: typing.Union[str, typing.Pattern] = None, + record_har_mode: Literal["full", "minimal"] = None, + record_har_content: Literal["attach", "embed", "omit"] = None ) -> "Page": """Browser.new_page @@ -10935,6 +10952,14 @@ async def new_page( Whether to allow sites to register Service workers. Defaults to `'allow'`. - `'allow'`: [Service Workers](https://developer.mozilla.org/en-US/docs/Web/API/Service_Worker_API) can be registered. - `'block'`: Playwright will block all registration of Service Workers. + record_har_url_filter : Union[Pattern, str, NoneType] + record_har_mode : Union["full", "minimal", NoneType] + When set to `minimal`, only record information necessary for routing from HAR. This omits sizes, timing, page, cookies, + security and other types of HAR information that are not used when replaying from HAR. Defaults to `full`. + record_har_content : Union["attach", "embed", "omit", NoneType] + Optional setting to control resource content management. If `omit` is specified, content is not persisted. If `attach` + is specified, resources are persistet as separate files and all of these files are archived along with the HAR file. + Defaults to `embed`, which stores content inline the HAR file as per HAR specification. Returns ------- @@ -10974,6 +10999,9 @@ async def new_page( baseURL=base_url, strictSelectors=strict_selectors, serviceWorkers=service_workers, + recordHarUrlFilter=record_har_url_filter, + recordHarMode=record_har_mode, + recordHarContent=record_har_content, ) ) @@ -11269,7 +11297,10 @@ async def launch_persistent_context( record_video_size: ViewportSize = None, base_url: str = None, strict_selectors: bool = None, - service_workers: Literal["allow", "block"] = None + service_workers: Literal["allow", "block"] = None, + record_har_url_filter: typing.Union[str, typing.Pattern] = None, + record_har_mode: Literal["full", "minimal"] = None, + record_har_content: Literal["attach", "embed", "omit"] = None ) -> "BrowserContext": """BrowserType.launch_persistent_context @@ -11413,6 +11444,14 @@ async def launch_persistent_context( Whether to allow sites to register Service workers. Defaults to `'allow'`. - `'allow'`: [Service Workers](https://developer.mozilla.org/en-US/docs/Web/API/Service_Worker_API) can be registered. - `'block'`: Playwright will block all registration of Service Workers. + record_har_url_filter : Union[Pattern, str, NoneType] + record_har_mode : Union["full", "minimal", NoneType] + When set to `minimal`, only record information necessary for routing from HAR. This omits sizes, timing, page, cookies, + security and other types of HAR information that are not used when replaying from HAR. Defaults to `full`. + record_har_content : Union["attach", "embed", "omit", NoneType] + Optional setting to control resource content management. If `omit` is specified, content is not persisted. If `attach` + is specified, resources are persistet as separate files and all of these files are archived along with the HAR file. + Defaults to `embed`, which stores content inline the HAR file as per HAR specification. Returns ------- @@ -11466,6 +11505,9 @@ async def launch_persistent_context( baseURL=base_url, strictSelectors=strict_selectors, serviceWorkers=service_workers, + recordHarUrlFilter=record_har_url_filter, + recordHarMode=record_har_mode, + recordHarContent=record_har_content, ) ) diff --git a/playwright/sync_api/_generated.py b/playwright/sync_api/_generated.py index 7882d2282..88c3aca7d 100644 --- a/playwright/sync_api/_generated.py +++ b/playwright/sync_api/_generated.py @@ -10669,7 +10669,10 @@ def new_context( storage_state: typing.Union[StorageState, str, pathlib.Path] = None, base_url: str = None, strict_selectors: bool = None, - service_workers: Literal["allow", "block"] = None + service_workers: Literal["allow", "block"] = None, + record_har_url_filter: typing.Union[str, typing.Pattern] = None, + record_har_mode: Literal["full", "minimal"] = None, + record_har_content: Literal["attach", "embed", "omit"] = None ) -> "BrowserContext": """Browser.new_context @@ -10778,6 +10781,14 @@ def new_context( Whether to allow sites to register Service workers. Defaults to `'allow'`. - `'allow'`: [Service Workers](https://developer.mozilla.org/en-US/docs/Web/API/Service_Worker_API) can be registered. - `'block'`: Playwright will block all registration of Service Workers. + record_har_url_filter : Union[Pattern, str, NoneType] + record_har_mode : Union["full", "minimal", NoneType] + When set to `minimal`, only record information necessary for routing from HAR. This omits sizes, timing, page, cookies, + security and other types of HAR information that are not used when replaying from HAR. Defaults to `full`. + record_har_content : Union["attach", "embed", "omit", NoneType] + Optional setting to control resource content management. If `omit` is specified, content is not persisted. If `attach` + is specified, resources are persistet as separate files and all of these files are archived along with the HAR file. + Defaults to `embed`, which stores content inline the HAR file as per HAR specification. Returns ------- @@ -10818,6 +10829,9 @@ def new_context( baseURL=base_url, strictSelectors=strict_selectors, serviceWorkers=service_workers, + recordHarUrlFilter=record_har_url_filter, + recordHarMode=record_har_mode, + recordHarContent=record_har_content, ) ) ) @@ -10855,7 +10869,10 @@ def new_page( storage_state: typing.Union[StorageState, str, pathlib.Path] = None, base_url: str = None, strict_selectors: bool = None, - service_workers: Literal["allow", "block"] = None + service_workers: Literal["allow", "block"] = None, + record_har_url_filter: typing.Union[str, typing.Pattern] = None, + record_har_mode: Literal["full", "minimal"] = None, + record_har_content: Literal["attach", "embed", "omit"] = None ) -> "Page": """Browser.new_page @@ -10959,6 +10976,14 @@ def new_page( Whether to allow sites to register Service workers. Defaults to `'allow'`. - `'allow'`: [Service Workers](https://developer.mozilla.org/en-US/docs/Web/API/Service_Worker_API) can be registered. - `'block'`: Playwright will block all registration of Service Workers. + record_har_url_filter : Union[Pattern, str, NoneType] + record_har_mode : Union["full", "minimal", NoneType] + When set to `minimal`, only record information necessary for routing from HAR. This omits sizes, timing, page, cookies, + security and other types of HAR information that are not used when replaying from HAR. Defaults to `full`. + record_har_content : Union["attach", "embed", "omit", NoneType] + Optional setting to control resource content management. If `omit` is specified, content is not persisted. If `attach` + is specified, resources are persistet as separate files and all of these files are archived along with the HAR file. + Defaults to `embed`, which stores content inline the HAR file as per HAR specification. Returns ------- @@ -10999,6 +11024,9 @@ def new_page( baseURL=base_url, strictSelectors=strict_selectors, serviceWorkers=service_workers, + recordHarUrlFilter=record_har_url_filter, + recordHarMode=record_har_mode, + recordHarContent=record_har_content, ) ) ) @@ -11299,7 +11327,10 @@ def launch_persistent_context( record_video_size: ViewportSize = None, base_url: str = None, strict_selectors: bool = None, - service_workers: Literal["allow", "block"] = None + service_workers: Literal["allow", "block"] = None, + record_har_url_filter: typing.Union[str, typing.Pattern] = None, + record_har_mode: Literal["full", "minimal"] = None, + record_har_content: Literal["attach", "embed", "omit"] = None ) -> "BrowserContext": """BrowserType.launch_persistent_context @@ -11443,6 +11474,14 @@ def launch_persistent_context( Whether to allow sites to register Service workers. Defaults to `'allow'`. - `'allow'`: [Service Workers](https://developer.mozilla.org/en-US/docs/Web/API/Service_Worker_API) can be registered. - `'block'`: Playwright will block all registration of Service Workers. + record_har_url_filter : Union[Pattern, str, NoneType] + record_har_mode : Union["full", "minimal", NoneType] + When set to `minimal`, only record information necessary for routing from HAR. This omits sizes, timing, page, cookies, + security and other types of HAR information that are not used when replaying from HAR. Defaults to `full`. + record_har_content : Union["attach", "embed", "omit", NoneType] + Optional setting to control resource content management. If `omit` is specified, content is not persisted. If `attach` + is specified, resources are persistet as separate files and all of these files are archived along with the HAR file. + Defaults to `embed`, which stores content inline the HAR file as per HAR specification. Returns ------- @@ -11497,6 +11536,9 @@ def launch_persistent_context( baseURL=base_url, strictSelectors=strict_selectors, serviceWorkers=service_workers, + recordHarUrlFilter=record_har_url_filter, + recordHarMode=record_har_mode, + recordHarContent=record_har_content, ) ) ) diff --git a/scripts/expected_api_mismatch.txt b/scripts/expected_api_mismatch.txt index e362d28d4..5fad86551 100644 --- a/scripts/expected_api_mismatch.txt +++ b/scripts/expected_api_mismatch.txt @@ -20,9 +20,6 @@ Method not implemented: Error.message Method not implemented: PlaywrightAssertions.expect # Pending 1.23 ports -Parameter not implemented: BrowserType.launch_persistent_context(record_har_url_filter=) Method not implemented: BrowserContext.route_from_har Method not implemented: Route.fallback -Parameter not implemented: Browser.new_page(record_har_url_filter=) Method not implemented: Page.route_from_har -Parameter not implemented: Browser.new_context(record_har_url_filter=) diff --git a/setup.py b/setup.py index 2023d03be..c3376f354 100644 --- a/setup.py +++ b/setup.py @@ -30,7 +30,7 @@ InWheel = None from wheel.bdist_wheel import bdist_wheel as BDistWheelCommand -driver_version = "1.23.0-beta-1656026605000" +driver_version = "1.23.0-beta-1656093125000" def extractall(zip: zipfile.ZipFile, path: str) -> None: diff --git a/tests/async/test_har.py b/tests/async/test_har.py index 6cb6f2472..00d02d32d 100644 --- a/tests/async/test_har.py +++ b/tests/async/test_har.py @@ -14,6 +14,11 @@ import json import os +import re +import zipfile + +from playwright.async_api import Browser +from tests.server import Server async def test_should_work(browser, server, tmpdir): @@ -28,6 +33,24 @@ async def test_should_work(browser, server, tmpdir): async def test_should_omit_content(browser, server, tmpdir): + path = os.path.join(tmpdir, "log.har") + context = await browser.new_context( + record_har_path=path, + record_har_content="omit", + ) + page = await context.new_page() + await page.goto(server.PREFIX + "/har.html") + await context.close() + with open(path) as f: + data = json.load(f) + assert "log" in data + log = data["log"] + content1 = log["entries"][0]["response"]["content"] + assert "text" not in content1 + assert "encoding" not in content1 + + +async def test_should_omit_content_legacy(browser, server, tmpdir): path = os.path.join(tmpdir, "log.har") context = await browser.new_context( record_har_path=path, record_har_omit_content=True @@ -40,10 +63,67 @@ async def test_should_omit_content(browser, server, tmpdir): assert "log" in data log = data["log"] content1 = log["entries"][0]["response"]["content"] - assert "text" in content1 + assert "text" not in content1 assert "encoding" not in content1 +async def test_should_attach_content(browser, server, tmpdir, is_firefox): + path = os.path.join(tmpdir, "log.har.zip") + context = await browser.new_context( + record_har_path=path, + record_har_content="attach", + ) + page = await context.new_page() + await page.goto(server.PREFIX + "/har.html") + await page.evaluate("() => fetch('/pptr.png').then(r => r.arrayBuffer())") + await context.close() + with zipfile.ZipFile(path) as z: + with z.open("har.har") as har: + entries = json.load(har)["log"]["entries"] + + assert "encoding" not in entries[0]["response"]["content"] + assert ( + entries[0]["response"]["content"]["mimeType"] + == "text/html; charset=utf-8" + ) + assert ( + "75841480e2606c03389077304342fac2c58ccb1b" + in entries[0]["response"]["content"]["_file"] + ) + assert entries[0]["response"]["content"]["size"] >= 96 + assert entries[0]["response"]["content"]["compression"] == 0 + + assert "encoding" not in entries[1]["response"]["content"] + assert ( + entries[1]["response"]["content"]["mimeType"] + == "text/css; charset=utf-8" + ) + assert ( + "79f739d7bc88e80f55b9891a22bf13a2b4e18adb" + in entries[1]["response"]["content"]["_file"] + ) + assert entries[1]["response"]["content"]["size"] >= 37 + assert entries[1]["response"]["content"]["compression"] == 0 + + assert "encoding" not in entries[2]["response"]["content"] + assert entries[2]["response"]["content"]["mimeType"] == "image/png" + assert ( + "a4c3a18f0bb83f5d9fe7ce561e065c36205762fa" + in entries[2]["response"]["content"]["_file"] + ) + assert entries[2]["response"]["content"]["size"] >= 6000 + assert entries[2]["response"]["content"]["compression"] == 0 + + with z.open("75841480e2606c03389077304342fac2c58ccb1b.html") as f: + assert b"HAR Page" in f.read() + + with z.open("79f739d7bc88e80f55b9891a22bf13a2b4e18adb.css") as f: + assert b"pink" in f.read() + + with z.open("a4c3a18f0bb83f5d9fe7ce561e065c36205762fa.png") as f: + assert len(f.read()) == entries[2]["response"]["content"]["size"] + + async def test_should_not_omit_content(browser, server, tmpdir): path = os.path.join(tmpdir, "log.har") context = await browser.new_context( @@ -70,5 +150,78 @@ async def test_should_include_content(browser, server, tmpdir): log = data["log"] content1 = log["entries"][0]["response"]["content"] - assert content1["mimeType"] == "text/html" + assert content1["mimeType"] == "text/html; charset=utf-8" assert "HAR Page" in content1["text"] + + +async def test_should_default_to_full_mode(browser, server, tmpdir): + path = os.path.join(tmpdir, "log.har") + context = await browser.new_context( + record_har_path=path, + ) + page = await context.new_page() + await page.goto(server.PREFIX + "/har.html") + await context.close() + with open(path) as f: + data = json.load(f) + assert "log" in data + log = data["log"] + assert log["entries"][0]["request"]["bodySize"] >= 0 + + +async def test_should_support_minimal_mode(browser, server, tmpdir): + path = os.path.join(tmpdir, "log.har") + context = await browser.new_context( + record_har_path=path, + record_har_mode="minimal", + ) + page = await context.new_page() + await page.goto(server.PREFIX + "/har.html") + await context.close() + with open(path) as f: + data = json.load(f) + assert "log" in data + log = data["log"] + assert log["entries"][0]["request"]["bodySize"] == -1 + + +async def test_should_filter_by_glob( + browser: Browser, server: Server, tmpdir: str +) -> None: + path = os.path.join(tmpdir, "log.har") + context = await browser.new_context( + base_url=server.PREFIX, + record_har_path=path, + record_har_url_filter="/*.css", + ignore_https_errors=True, + ) + page = await context.new_page() + await page.goto(server.PREFIX + "/har.html") + await context.close() + with open(path) as f: + data = json.load(f) + assert "log" in data + log = data["log"] + assert len(log["entries"]) == 1 + assert log["entries"][0]["request"]["url"].endswith("one-style.css") + + +async def test_should_filter_by_regexp( + browser: Browser, server: Server, tmpdir: str +) -> None: + path = os.path.join(tmpdir, "log.har") + context = await browser.new_context( + base_url=server.PREFIX, + record_har_path=path, + record_har_url_filter=re.compile("HAR.X?HTML", re.I), + ignore_https_errors=True, + ) + page = await context.new_page() + await page.goto(server.PREFIX + "/har.html") + await context.close() + with open(path) as f: + data = json.load(f) + assert "log" in data + log = data["log"] + assert len(log["entries"]) == 1 + assert log["entries"][0]["request"]["url"].endswith("har.html") diff --git a/tests/async/test_request_intercept.py b/tests/async/test_request_intercept.py index 1e5fca4fb..39ccf3d3f 100644 --- a/tests/async/test_request_intercept.py +++ b/tests/async/test_request_intercept.py @@ -43,7 +43,9 @@ async def handle(route: Route): async def test_should_fulfill_response_with_empty_body(page: Page, server: Server): async def handle(route: Route): response = await page.request.fetch(route.request) - await route.fulfill(response=response, status=201, body="") + await route.fulfill( + response=response, status=201, body="", headers={"content-length": "0"} + ) await page.route("**/*", handle) response = await page.goto(server.PREFIX + "/title.html") @@ -131,13 +133,13 @@ async def test_should_give_access_to_the_intercepted_response( assert response.status_text == "OK" assert response.ok is True assert response.url.endswith("/title.html") is True - assert response.headers["content-type"] == "text/html" + assert response.headers["content-type"] == "text/html; charset=utf-8" assert list( filter( lambda header: header["name"].lower() == "content-type", response.headers_array, ) - ) == [{"name": "Content-Type", "value": "text/html"}] + ) == [{"name": "Content-Type", "value": "text/html; charset=utf-8"}] await asyncio.gather( route.fulfill(response=response), diff --git a/tests/server.py b/tests/server.py index f63ea5ad8..75a0631d2 100644 --- a/tests/server.py +++ b/tests/server.py @@ -134,12 +134,16 @@ def process(self) -> None: file_content = None try: file_content = (static_path / path[1:]).read_bytes() - request.setHeader(b"Content-Type", mimetypes.guess_type(path)[0]) + content_type = mimetypes.guess_type(path)[0] + if content_type and content_type.startswith("text/"): + content_type += "; charset=utf-8" + request.setHeader(b"Content-Type", content_type) request.setHeader(b"Cache-Control", "no-cache, no-store") if path in gzip_routes: request.setHeader("Content-Encoding", "gzip") request.write(gzip.compress(file_content)) else: + request.setHeader(b"Content-Length", str(len(file_content))) request.write(file_content) self.setResponseCode(HTTPStatus.OK) except (FileNotFoundError, IsADirectoryError, PermissionError): diff --git a/tests/sync/test_har.py b/tests/sync/test_har.py index 2561c7fd6..479c97e0a 100644 --- a/tests/sync/test_har.py +++ b/tests/sync/test_har.py @@ -14,6 +14,8 @@ import json import os +import re +import zipfile from pathlib import Path from playwright.sync_api import Browser @@ -32,6 +34,24 @@ def test_should_work(browser: Browser, server: Server, tmpdir: Path) -> None: def test_should_omit_content(browser: Browser, server: Server, tmpdir: Path) -> None: + path = os.path.join(tmpdir, "log.har") + context = browser.new_context(record_har_path=path, record_har_content="omit") + page = context.new_page() + page.goto(server.PREFIX + "/har.html") + context.close() + with open(path) as f: + data = json.load(f) + assert "log" in data + log = data["log"] + + content1 = log["entries"][0]["response"]["content"] + assert "text" not in content1 + assert "encoding" not in content1 + + +def test_should_omit_content_legacy( + browser: Browser, server: Server, tmpdir: Path +) -> None: path = os.path.join(tmpdir, "log.har") context = browser.new_context(record_har_path=path, record_har_omit_content=True) page = context.new_page() @@ -43,10 +63,67 @@ def test_should_omit_content(browser: Browser, server: Server, tmpdir: Path) -> log = data["log"] content1 = log["entries"][0]["response"]["content"] - assert "text" in content1 + assert "text" not in content1 assert "encoding" not in content1 +def test_should_attach_content(browser: Browser, server: Server, tmpdir: Path) -> None: + path = os.path.join(tmpdir, "log.har.zip") + context = browser.new_context( + record_har_path=path, + record_har_content="attach", + ) + page = context.new_page() + page.goto(server.PREFIX + "/har.html") + page.evaluate("() => fetch('/pptr.png').then(r => r.arrayBuffer())") + context.close() + with zipfile.ZipFile(path) as z: + with z.open("har.har") as har: + entries = json.load(har)["log"]["entries"] + + assert "encoding" not in entries[0]["response"]["content"] + assert ( + entries[0]["response"]["content"]["mimeType"] + == "text/html; charset=utf-8" + ) + assert ( + "75841480e2606c03389077304342fac2c58ccb1b" + in entries[0]["response"]["content"]["_file"] + ) + assert entries[0]["response"]["content"]["size"] >= 96 + assert entries[0]["response"]["content"]["compression"] == 0 + + assert "encoding" not in entries[1]["response"]["content"] + assert ( + entries[1]["response"]["content"]["mimeType"] + == "text/css; charset=utf-8" + ) + assert ( + "79f739d7bc88e80f55b9891a22bf13a2b4e18adb" + in entries[1]["response"]["content"]["_file"] + ) + assert entries[1]["response"]["content"]["size"] >= 37 + assert entries[1]["response"]["content"]["compression"] == 0 + + assert "encoding" not in entries[2]["response"]["content"] + assert entries[2]["response"]["content"]["mimeType"] == "image/png" + assert ( + "a4c3a18f0bb83f5d9fe7ce561e065c36205762fa" + in entries[2]["response"]["content"]["_file"] + ) + assert entries[2]["response"]["content"]["size"] >= 6000 + assert entries[2]["response"]["content"]["compression"] == 0 + + with z.open("75841480e2606c03389077304342fac2c58ccb1b.html") as f: + assert b"HAR Page" in f.read() + + with z.open("79f739d7bc88e80f55b9891a22bf13a2b4e18adb.css") as f: + assert b"pink" in f.read() + + with z.open("a4c3a18f0bb83f5d9fe7ce561e065c36205762fa.png") as f: + assert len(f.read()) == entries[2]["response"]["content"]["size"] + + def test_should_include_content(browser: Browser, server: Server, tmpdir: Path) -> None: path = os.path.join(tmpdir, "log.har") context = browser.new_context(record_har_path=path) @@ -59,5 +136,78 @@ def test_should_include_content(browser: Browser, server: Server, tmpdir: Path) log = data["log"] content1 = log["entries"][0]["response"]["content"] - assert content1["mimeType"] == "text/html" + assert content1["mimeType"] == "text/html; charset=utf-8" assert "HAR Page" in content1["text"] + + +def test_should_default_to_full_mode( + browser: Browser, server: Server, tmpdir: Path +) -> None: + path = os.path.join(tmpdir, "log.har") + context = browser.new_context( + record_har_path=path, + ) + page = context.new_page() + page.goto(server.PREFIX + "/har.html") + context.close() + with open(path) as f: + data = json.load(f) + assert "log" in data + log = data["log"] + assert log["entries"][0]["request"]["bodySize"] >= 0 + + +def test_should_support_minimal_mode( + browser: Browser, server: Server, tmpdir: Path +) -> None: + path = os.path.join(tmpdir, "log.har") + context = browser.new_context( + record_har_path=path, + record_har_mode="minimal", + ) + page = context.new_page() + page.goto(server.PREFIX + "/har.html") + context.close() + with open(path) as f: + data = json.load(f) + assert "log" in data + log = data["log"] + assert log["entries"][0]["request"]["bodySize"] == -1 + + +def test_should_filter_by_glob(browser: Browser, server: Server, tmpdir: str) -> None: + path = os.path.join(tmpdir, "log.har") + context = browser.new_context( + base_url=server.PREFIX, + record_har_path=path, + record_har_url_filter="/*.css", + ignore_https_errors=True, + ) + page = context.new_page() + page.goto(server.PREFIX + "/har.html") + context.close() + with open(path) as f: + data = json.load(f) + assert "log" in data + log = data["log"] + assert len(log["entries"]) == 1 + assert log["entries"][0]["request"]["url"].endswith("one-style.css") + + +def test_should_filter_by_regexp(browser: Browser, server: Server, tmpdir: str) -> None: + path = os.path.join(tmpdir, "log.har") + context = browser.new_context( + base_url=server.PREFIX, + record_har_path=path, + record_har_url_filter=re.compile("HAR.X?HTML", re.I), + ignore_https_errors=True, + ) + page = context.new_page() + page.goto(server.PREFIX + "/har.html") + context.close() + with open(path) as f: + data = json.load(f) + assert "log" in data + log = data["log"] + assert len(log["entries"]) == 1 + assert log["entries"][0]["request"]["url"].endswith("har.html") diff --git a/tests/sync/test_request_intercept.py b/tests/sync/test_request_intercept.py index dc66000e7..dc714e832 100644 --- a/tests/sync/test_request_intercept.py +++ b/tests/sync/test_request_intercept.py @@ -43,7 +43,9 @@ def handle(route: Route) -> None: def test_should_fulfill_response_with_empty_body(page: Page, server: Server) -> None: def handle(route: Route) -> None: response = page.request.fetch(route.request) - route.fulfill(response=response, status=201, body="") + route.fulfill( + response=response, status=201, body="", headers={"content-length": "0"} + ) page.route("**/*", handle) response = page.goto(server.PREFIX + "/title.html")