From 1d4927f1a119c47c7ff45f4b4e522ea9b484dbbf Mon Sep 17 00:00:00 2001 From: benoit74 Date: Tue, 30 Sep 2025 15:02:23 +0000 Subject: [PATCH 1/4] Backport https://github.com/webrecorder/wabac.js/commit/588954f1f540d21aafd7a166c73e30a1f047a91d --- src/zimscraperlib/rewriting/js.py | 2 +- tests/rewriting/test_js_rewriting.py | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/src/zimscraperlib/rewriting/js.py b/src/zimscraperlib/rewriting/js.py index 2faf56c..a9c5977 100644 --- a/src/zimscraperlib/rewriting/js.py +++ b/src/zimscraperlib/rewriting/js.py @@ -162,7 +162,7 @@ def create_js_rules() -> list[TransformationRule]: (re.compile(r"\.postMessage\b\("), add_prefix(".__WB_pmw(self)")), # rewriting `location = ` to custom expression `(...).href =` assignement ( - re.compile(r"(?:^|[^$.+*/%^-])\s?\blocation\b\s*[=]\s*(?![\s\d=])"), + re.compile(r"(?:^|[^$.+*/%^-])\s?\blocation\b\s*[=]\s*(?![\s\d=>])"), add_suffix_non_prop(check_loc), ), # rewriting `return this` diff --git a/tests/rewriting/test_js_rewriting.py b/tests/rewriting/test_js_rewriting.py index 6ab7177..4a4e082 100644 --- a/tests/rewriting/test_js_rewriting.py +++ b/tests/rewriting/test_js_rewriting.py @@ -186,6 +186,10 @@ def wrap_script(text: str) -> str: "self.__WB_check_loc(location, argument" "s)) || {}).href = http://example.com/", ), + WrappedTestContent( + input_='location => "http://example.com/"', + expected='location => "http://example.com/"', + ), WrappedTestContent( input_=" location = http://example.com/2", expected=" location = ((self.__WB_check_loc && " @@ -220,6 +224,24 @@ def wrap_script(text: str) -> str: expected="if (self.foo) { console.log('blah') }", ), WrappedTestContent(input_="window.x = 5", expected="window.x = 5"), + WrappedTestContent( + input_=""" + class A {} + const B = 5; + let C = 4; + var D = 3; + + location = "http://example.com/2" """, + expected=""" + class A {} + const B = 5; + let C = 4; + var D = 3; + + location = ((self.__WB_check_loc && """ + "self.__WB_check_loc(location, arguments)) || {}).href " + """= "http://example.com/2" """, + ), WrappedTestContent(input_=" var self ", expected=" let self "), ] ) From d5ba369b68d583ccae3bc639bfe62acbc673b4b3 Mon Sep 17 00:00:00 2001 From: benoit74 Date: Tue, 30 Sep 2025 15:10:16 +0000 Subject: [PATCH 2/4] Backport https://github.com/webrecorder/wabac.js/commit/bc4efe3759db371c5ae02cbe91d559d8c508e9b3 --- src/zimscraperlib/rewriting/js.py | 12 ++++++------ tests/rewriting/test_js_rewriting.py | 14 ++++++++++++++ 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/src/zimscraperlib/rewriting/js.py b/src/zimscraperlib/rewriting/js.py index a9c5977..4c1b129 100644 --- a/src/zimscraperlib/rewriting/js.py +++ b/src/zimscraperlib/rewriting/js.py @@ -29,13 +29,13 @@ from zimscraperlib.rewriting.url_rewriting import ArticleUrlRewriter, ZimPath # The regex used to rewrite `import ...` in module code. -IMPORT_MATCH_RX = re.compile( - r"""^\s*?import(?:['"\s]*(?:[\w*${}\s,]+from\s*)?['"\s]?['"\s])(?:.*?)['"\s]""", +IMPORT_EXPORT_MATCH_RX = re.compile( + r"""(^|;)\s*?(?:im|ex)port(?:['"\s]*(?:[\w*${}\s,]+from\s*)?['"\s]?['"\s])(?:.*?)['"\s]""", ) # A sub regex used inside `import ...` rewrite to rewrite http url imported -IMPORT_HTTP_RX = re.compile( - r"""(import(?:['"\s]*(?:[\w*${}\s,]+from\s*)?['"\s]?['"\s]))((?:https?|[./]).*?)(['"\s])""", +IMPORT_EXPORT_HTTP_RX = re.compile( + r"""((?:im|ex)port(?:['"\s]*(?:[\w*${}\s,]+from\s*)?['"\s]?['"\s]))((?:https?|[./]).*?)(['"\s])""", ) # This list of global variables we want to wrap. @@ -312,8 +312,8 @@ def sub_funct(match: re.Match[str]) -> str: f"{match.group(3)}" ) - return IMPORT_HTTP_RX.sub(sub_funct, m_object[0]) + return IMPORT_EXPORT_HTTP_RX.sub(sub_funct, m_object[0]) return func - return (IMPORT_MATCH_RX, rewrite_import()) + return (IMPORT_EXPORT_MATCH_RX, rewrite_import()) diff --git a/tests/rewriting/test_js_rewriting.py b/tests/rewriting/test_js_rewriting.py index 4a4e082..5fa8731 100644 --- a/tests/rewriting/test_js_rewriting.py +++ b/tests/rewriting/test_js_rewriting.py @@ -310,6 +310,20 @@ def wrap_import(text: str) -> str: export { a }; """, ), + # rewrite import same line + ImportTestContent( + input_='import{A, B} from "https://example.com/";' + 'import{C, D} from "https://example.org"', + expected='import{A, B} from "../../../example.com/";' + 'import{C, D} from "../../../example.org/"', + ), + # rewrite import / export same line + ImportTestContent( + input_='import{A, B} from "https://example.com/";' + 'export{C, D} from "/another/path/to/file"', + expected='import{A, B} from "../../../example.com/";' + 'export{C, D} from "../../another/path/to/file"', + ), # rewrite ESM module import ImportTestContent( input_='import "https://example.com/file.js"', From 43d81ee501be8afba96019470510cc110aec8e07 Mon Sep 17 00:00:00 2001 From: benoit74 Date: Tue, 30 Sep 2025 18:38:05 +0000 Subject: [PATCH 3/4] Backport https://github.com/webrecorder/wabac.js/commit/b244c30642cc42232ceab6c196ffcb213e59a90a --- src/zimscraperlib/rewriting/js.py | 5 ++++- tests/rewriting/test_js_rewriting.py | 3 +++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/zimscraperlib/rewriting/js.py b/src/zimscraperlib/rewriting/js.py index 4c1b129..78b1649 100644 --- a/src/zimscraperlib/rewriting/js.py +++ b/src/zimscraperlib/rewriting/js.py @@ -153,7 +153,10 @@ def create_js_rules() -> list[TransformationRule]: return [ # rewriting `eval(...)` - invocation - (re.compile(r"(?:^|\s)\beval\s*\("), replace_prefix_from(eval_str, "eval")), + ( + re.compile(r"(? Date: Thu, 2 Oct 2025 08:19:43 +0000 Subject: [PATCH 4/4] Add details about upstream commit backported --- src/zimscraperlib/rewriting/js.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/zimscraperlib/rewriting/js.py b/src/zimscraperlib/rewriting/js.py index 78b1649..22ffbed 100644 --- a/src/zimscraperlib/rewriting/js.py +++ b/src/zimscraperlib/rewriting/js.py @@ -11,6 +11,10 @@ it appropriately - a specific JS file (provided in `statics` folder) for JS modules is included in the ZIM at `_zim_static/__wb_module_decl.js` + +This code is based on https://github.com/webrecorder/wabac.js/blob/main/src/rewrite/jsrewriter.ts +Last backport of upstream changes is from Sept 13, 2025 +Commit 6dd2d9ae664cfcd2ea8637d7d6c7ed7a0ca332a0 """ import re