diff --git a/src/zimscraperlib/rewriting/js.py b/src/zimscraperlib/rewriting/js.py index 2faf56c..22ffbed 100644 --- a/src/zimscraperlib/rewriting/js.py +++ b/src/zimscraperlib/rewriting/js.py @@ -11,6 +11,10 @@ it appropriately - a specific JS file (provided in `statics` folder) for JS modules is included in the ZIM at `_zim_static/__wb_module_decl.js` + +This code is based on https://github.com/webrecorder/wabac.js/blob/main/src/rewrite/jsrewriter.ts +Last backport of upstream changes is from Sept 13, 2025 +Commit 6dd2d9ae664cfcd2ea8637d7d6c7ed7a0ca332a0 """ import re @@ -29,13 +33,13 @@ from zimscraperlib.rewriting.url_rewriting import ArticleUrlRewriter, ZimPath # The regex used to rewrite `import ...` in module code. -IMPORT_MATCH_RX = re.compile( - r"""^\s*?import(?:['"\s]*(?:[\w*${}\s,]+from\s*)?['"\s]?['"\s])(?:.*?)['"\s]""", +IMPORT_EXPORT_MATCH_RX = re.compile( + r"""(^|;)\s*?(?:im|ex)port(?:['"\s]*(?:[\w*${}\s,]+from\s*)?['"\s]?['"\s])(?:.*?)['"\s]""", ) # A sub regex used inside `import ...` rewrite to rewrite http url imported -IMPORT_HTTP_RX = re.compile( - r"""(import(?:['"\s]*(?:[\w*${}\s,]+from\s*)?['"\s]?['"\s]))((?:https?|[./]).*?)(['"\s])""", +IMPORT_EXPORT_HTTP_RX = re.compile( + r"""((?:im|ex)port(?:['"\s]*(?:[\w*${}\s,]+from\s*)?['"\s]?['"\s]))((?:https?|[./]).*?)(['"\s])""", ) # This list of global variables we want to wrap. @@ -153,7 +157,10 @@ def create_js_rules() -> list[TransformationRule]: return [ # rewriting `eval(...)` - invocation - (re.compile(r"(?:^|\s)\beval\s*\("), replace_prefix_from(eval_str, "eval")), + ( + re.compile(r"(? list[TransformationRule]: (re.compile(r"\.postMessage\b\("), add_prefix(".__WB_pmw(self)")), # rewriting `location = ` to custom expression `(...).href =` assignement ( - re.compile(r"(?:^|[^$.+*/%^-])\s?\blocation\b\s*[=]\s*(?![\s\d=])"), + re.compile(r"(?:^|[^$.+*/%^-])\s?\blocation\b\s*[=]\s*(?![\s\d=>])"), add_suffix_non_prop(check_loc), ), # rewriting `return this` @@ -312,8 +319,8 @@ def sub_funct(match: re.Match[str]) -> str: f"{match.group(3)}" ) - return IMPORT_HTTP_RX.sub(sub_funct, m_object[0]) + return IMPORT_EXPORT_HTTP_RX.sub(sub_funct, m_object[0]) return func - return (IMPORT_MATCH_RX, rewrite_import()) + return (IMPORT_EXPORT_MATCH_RX, rewrite_import()) diff --git a/tests/rewriting/test_js_rewriting.py b/tests/rewriting/test_js_rewriting.py index 6ab7177..c75e90e 100644 --- a/tests/rewriting/test_js_rewriting.py +++ b/tests/rewriting/test_js_rewriting.py @@ -186,6 +186,10 @@ def wrap_script(text: str) -> str: "self.__WB_check_loc(location, argument" "s)) || {}).href = http://example.com/", ), + WrappedTestContent( + input_='location => "http://example.com/"', + expected='location => "http://example.com/"', + ), WrappedTestContent( input_=" location = http://example.com/2", expected=" location = ((self.__WB_check_loc && " @@ -220,6 +224,24 @@ def wrap_script(text: str) -> str: expected="if (self.foo) { console.log('blah') }", ), WrappedTestContent(input_="window.x = 5", expected="window.x = 5"), + WrappedTestContent( + input_=""" + class A {} + const B = 5; + let C = 4; + var D = 3; + + location = "http://example.com/2" """, + expected=""" + class A {} + const B = 5; + let C = 4; + var D = 3; + + location = ((self.__WB_check_loc && """ + "self.__WB_check_loc(location, arguments)) || {}).href " + """= "http://example.com/2" """, + ), WrappedTestContent(input_=" var self ", expected=" let self "), ] ) @@ -288,6 +310,20 @@ def wrap_import(text: str) -> str: export { a }; """, ), + # rewrite import same line + ImportTestContent( + input_='import{A, B} from "https://example.com/";' + 'import{C, D} from "https://example.org"', + expected='import{A, B} from "../../../example.com/";' + 'import{C, D} from "../../../example.org/"', + ), + # rewrite import / export same line + ImportTestContent( + input_='import{A, B} from "https://example.com/";' + 'export{C, D} from "/another/path/to/file"', + expected='import{A, B} from "../../../example.com/";' + 'export{C, D} from "../../another/path/to/file"', + ), # rewrite ESM module import ImportTestContent( input_='import "https://example.com/file.js"', @@ -387,6 +423,9 @@ def test_import_rewrite(rewrite_import_content: ImportTestContent): ",eval(a)", "this.$eval(a)", "x = $eval; x(a);", + "static eval(a,b){ }", + "function eval(a,b){ }", + "} eval(a,b){ }", "obj = { eval : 1 }", "x = obj.eval", "x = obj.eval(a)",