From 70e02dcfc7501004c717906bf90b0bb2c5e7bc84 Mon Sep 17 00:00:00 2001 From: Karl Seguin Date: Mon, 3 Nov 2025 20:21:46 +0800 Subject: [PATCH 1/4] Refactor the ScriptManager This PR introduces two major changes to the ScriptManager. 1 - Simplification. Rather than having a `Script`, `PendingScript`, `AsyncModule` and `SyncModule`, there is only a `Script`, with an added `mode` union. All of the previous objects had the same behavior (collect the response in a buffer), up to the point of execution, which is where the mode comes in. 2 - Correctness Whether or not the previous version was "incorrect", it was difficult to use correctly. Specifically, the previous version would execute async scripts and async modules as soon as they're done. That seems allowed, but it caused issues with module loading in Context.js. Specifically, between compiling and instantiating a module, or between instantiation and evaluation, an async script or module could be evaluated. It isn't clear whether v8 allows that, but if it does, it introduces a lot of new potential states (specifically, unexpected changes to the v8.Module's status) that we have to handle. This version only evaluate scripts in the `evaluate`, which doesn't allow recursive calls (so a waitForImport, which continues to pump the HTTP loop, can never result in `evaluate` being called again). This undoes the change made in https://github.com/lightpanda-io/browser/pull/1158 because I do not think it's possible to have multiple waiters waiting for the same (or even different) modules. The linked issue points to a crash in https://www.nytimes.com which doesn't crash with this version. --- src/browser/ScriptManager.zig | 772 +++++++++++++--------------------- src/browser/js/Context.zig | 20 +- 2 files changed, 305 insertions(+), 487 deletions(-) diff --git a/src/browser/ScriptManager.zig b/src/browser/ScriptManager.zig index a23ba068c..664e3e28c 100644 --- a/src/browser/ScriptManager.zig +++ b/src/browser/ScriptManager.zig @@ -43,103 +43,103 @@ static_scripts_done: bool, // List of async scripts. We don't care about the execution order of these, but // on shutdown/abort, we need to cleanup any pending ones. -asyncs: OrderList, +async_scripts: std.DoublyLinkedList, // Normal scripts (non-deferred & non-async). These must be executed in order -scripts: OrderList, +normal_scripts: std.DoublyLinkedList, // List of deferred scripts. These must be executed in order, but only once // dom_loaded == true, -deferreds: OrderList, +defer_scripts: std.DoublyLinkedList, + +// When an async script is ready, it's queued here. We played with executing +// them as they complete, but it can cause timing issues with v8 module loading. +ready_scripts: std.DoublyLinkedList, shutdown: bool = false, client: *Http.Client, allocator: Allocator, buffer_pool: BufferPool, -script_pool: std.heap.MemoryPool(PendingScript), -sync_module_pool: std.heap.MemoryPool(SyncModule), -async_module_pool: std.heap.MemoryPool(AsyncModule), + +script_pool: std.heap.MemoryPool(Script), // We can download multiple sync modules in parallel, but we want to process -// then in order. We can't use an OrderList, like the other script types, +// them in order. We can't use an std.DoublyLinkedList, like the other script types, // because the order we load them might not be the order we want to process // them in (I'm not sure this is true, but as far as I can tell, v8 doesn't // make any guarantees about the list of sub-module dependencies it gives us -// So this is more like a cache. When a SyncModule is complete, it's put here -// and can be requested as needed. -sync_modules: std.StringHashMapUnmanaged(*SyncModule), +// So this is more like a cache. When an imported module is completed, its +// source is placed here (keyed by the full url) for some point in the future +// when v8 asks for it. +// The type is confusing (too confusing? move to a union). Starts of as `null` +// then transitions to either an error (from errorCalback) or the completed +// buffer from doneCallback +imported_modules: std.StringHashMapUnmanaged(?error{Failed}!std.ArrayList(u8)), // Mapping between module specifier and resolution. // see https://developer.mozilla.org/en-US/docs/Web/HTML/Reference/Elements/script/type/importmap // importmap contains resolved urls. importmap: std.StringHashMapUnmanaged([:0]const u8), -const OrderList = std.DoublyLinkedList; - pub fn init(browser: *Browser, page: *Page) ScriptManager { // page isn't fully initialized, we can setup our reference, but that's it. const allocator = browser.allocator; return .{ .page = page, - .asyncs = .{}, - .scripts = .{}, - .deferreds = .{}, + .async_scripts = .{}, + .normal_scripts = .{}, + .defer_scripts = .{}, + .ready_scripts = .{}, .importmap = .empty, - .sync_modules = .empty, .is_evaluating = false, .allocator = allocator, + .imported_modules = .empty, .client = browser.http_client, .static_scripts_done = false, .buffer_pool = BufferPool.init(allocator, 5), - .script_pool = std.heap.MemoryPool(PendingScript).init(allocator), - .sync_module_pool = std.heap.MemoryPool(SyncModule).init(allocator), - .async_module_pool = std.heap.MemoryPool(AsyncModule).init(allocator), + .script_pool = std.heap.MemoryPool(Script).init(allocator), }; } pub fn deinit(self: *ScriptManager) void { self.reset(); - var it = self.sync_modules.valueIterator(); - while (it.next()) |value_ptr| { - value_ptr.*.buffer.deinit(self.allocator); - self.sync_module_pool.destroy(value_ptr.*); - } - self.buffer_pool.deinit(); self.script_pool.deinit(); - self.sync_module_pool.deinit(); - self.async_module_pool.deinit(); - - self.sync_modules.deinit(self.allocator); + self.imported_modules.deinit(self.allocator); // we don't deinit self.importmap b/c we use the page's arena for its // allocations. } pub fn reset(self: *ScriptManager) void { - var it = self.sync_modules.valueIterator(); - while (it.next()) |value_ptr| { - value_ptr.*.buffer.deinit(self.allocator); - self.sync_module_pool.destroy(value_ptr.*); + { + var it = self.imported_modules.valueIterator(); + while (it.next()) |value_ptr| { + // might have not been loaded yet (null) + const result = value_ptr.* orelse continue; + // might have loaded an error, in which case there's nothing to free + var buf = result catch continue; + buf.deinit(self.allocator); + } + self.imported_modules.clearRetainingCapacity(); } - self.sync_modules.clearRetainingCapacity(); + // Our allocator is the page arena, it's been reset. We cannot use // clearAndRetainCapacity, since that space is no longer ours self.importmap = .empty; - self.clearList(&self.asyncs); - self.clearList(&self.scripts); - self.clearList(&self.deferreds); + clearList(&self.normal_scripts); + clearList(&self.defer_scripts); + clearList(&self.async_scripts); + clearList(&self.ready_scripts); self.static_scripts_done = false; } -fn clearList(_: *const ScriptManager, list: *OrderList) void { - while (list.first) |node| { - const pending_script: *PendingScript = @fieldParentPtr("node", node); - // this removes it from the list - pending_script.deinit(); +fn clearList(list: *std.DoublyLinkedList) void { + while (list.popFirst()) |n| { + const script: *Script = @fieldParentPtr("node", n); + script.deinit(true); } - std.debug.assert(list.first == null); } pub fn addFromElement(self: *ScriptManager, element: *parser.Element, comptime ctx: []const u8) !void { @@ -200,39 +200,51 @@ pub fn addFromElement(self: *ScriptManager, element: *parser.Element, comptime c source = .{ .@"inline" = inline_source }; } - var script = Script{ + const script = try self.script_pool.create(); + errdefer self.script_pool.destroy(script); + + script.* = .{ .kind = kind, - .element = element, + .node = .{}, + .manager = self, .source = source, + .element = element, + .complete = source == .@"inline", .url = remote_url orelse page.url.raw, - .is_defer = if (remote_url == null) false else try parser.elementGetAttribute(element, "defer") != null, - .is_async = if (remote_url == null) false else try parser.elementGetAttribute(element, "async") != null, + .mode = blk: { + if (source == .@"inline") { + // inline modules are deferred, all other inline scripts have a + // normal execution flow + break :blk if (kind == .module) .@"defer" else .normal; + } + if (try parser.elementGetAttribute(element, "async") != null) { + break :blk .async; + } + if (try parser.elementGetAttribute(element, "defer") != null) { + break :blk .@"defer"; + } + break :blk .normal; + }, }; - if (source == .@"inline" and self.scripts.first == null) { - // inline script with no pending scripts, execute it immediately. - // (if there is a pending script, then we cannot execute this immediately - // as it needs to be executed in order) - return script.eval(page); - } + if (remote_url) |url| { + var headers = try self.client.newHeaders(); + try page.requestCookie(.{}).headersForRequest(page.arena, url, &headers); - const pending_script = try self.script_pool.create(); - errdefer self.script_pool.destroy(pending_script); - pending_script.* = .{ - .script = script, - .complete = false, - .manager = self, - .node = .{}, - }; + try self.client.request(.{ + .url = url, + .ctx = script, + .method = .GET, + .headers = headers, + .cookie_jar = page.cookie_jar, + .resource_type = .script, + .start_callback = if (log.enabled(.http, .debug)) Script.startCallback else null, + .header_callback = Script.headerCallback, + .data_callback = Script.dataCallback, + .done_callback = Script.doneCallback, + .error_callback = Script.errorCallback, + }); - if (source == .@"inline") { - // if we're here, it means that we have pending scripts (i.e. self.scripts - // is not empty). Because the script is inline, it's complete/ready, but - // we need to process them in order. - pending_script.complete = true; - pending_script.getList().append(&pending_script.node); - return; - } else { log.debug(.http, "script queue", .{ .ctx = ctx, .url = remote_url.?, @@ -240,26 +252,15 @@ pub fn addFromElement(self: *ScriptManager, element: *parser.Element, comptime c }); } - pending_script.getList().append(&pending_script.node); - - errdefer pending_script.deinit(); - - var headers = try self.client.newHeaders(); - try page.requestCookie(.{}).headersForRequest(page.arena, remote_url.?, &headers); + self.scriptList(script).append(&script.node); +} - try self.client.request(.{ - .url = remote_url.?, - .ctx = pending_script, - .method = .GET, - .headers = headers, - .cookie_jar = page.cookie_jar, - .resource_type = .script, - .start_callback = if (log.enabled(.http, .debug)) startCallback else null, - .header_callback = headerCallback, - .data_callback = dataCallback, - .done_callback = doneCallback, - .error_callback = errorCallback, - }); +fn scriptList(self: *ScriptManager, script: *const Script) *std.DoublyLinkedList { + return switch (script.mode) { + .normal => if (script.kind == .module) &self.defer_scripts else &self.normal_scripts, + .@"defer" => &self.defer_scripts, + .async, .import_async, .import => &self.async_scripts, + }; } // Resolve a module specifier to an valid URL. @@ -277,19 +278,28 @@ pub fn resolveSpecifier(self: *ScriptManager, arena: Allocator, specifier: []con ); } -pub fn getModule(self: *ScriptManager, url: [:0]const u8, referrer: []const u8) !void { - const gop = try self.sync_modules.getOrPut(self.allocator, url); +pub fn preloadImport(self: *ScriptManager, url: [:0]const u8, referrer: []const u8) !void { + const gop = try self.imported_modules.getOrPut(self.allocator, url); if (gop.found_existing) { - // already requested return; } - errdefer _ = self.sync_modules.remove(url); + errdefer _ = self.imported_modules.remove(url); - const sync = try self.sync_module_pool.create(); - errdefer self.sync_module_pool.destroy(sync); + const script = try self.script_pool.create(); + errdefer self.script_pool.destroy(script); - sync.* = .{ .manager = self }; - gop.value_ptr.* = sync; + script.* = .{ + .kind = .module, + .url = url, + .node = .{}, + .manager = self, + .element = null, + .complete = false, + .source = .{ .remote = .{} }, + .mode = .import, + }; + + gop.value_ptr.* = null; var headers = try self.client.newHeaders(); try self.page.requestCookie(.{}).headersForRequest(self.page.arena, url, &headers); @@ -303,73 +313,69 @@ pub fn getModule(self: *ScriptManager, url: [:0]const u8, referrer: []const u8) try self.client.request(.{ .url = url, - .ctx = sync, + .ctx = script, .method = .GET, .headers = headers, .cookie_jar = self.page.cookie_jar, .resource_type = .script, - .start_callback = if (log.enabled(.http, .debug)) SyncModule.startCallback else null, - .header_callback = SyncModule.headerCallback, - .data_callback = SyncModule.dataCallback, - .done_callback = SyncModule.doneCallback, - .error_callback = SyncModule.errorCallback, + .start_callback = if (log.enabled(.http, .debug)) Script.startCallback else null, + .header_callback = Script.headerCallback, + .data_callback = Script.dataCallback, + .done_callback = Script.doneCallback, + .error_callback = Script.errorCallback, }); + + // This seems wrong since we're not dealing with an async import (unlike + // getAsyncModule below), but all we're trying to do here is pre-load the + // script for execution at some point in the future (when waitForModule is + // called). + self.async_scripts.append(&script.node); } -pub fn waitForModule(self: *ScriptManager, url: [:0]const u8) !GetResult { - // Normally it's dangerous to hold on to map pointers. But here, the map - // can't change. It's possible that by calling `tick`, other entries within - // the map will have their value change, but the map itself is immutable - // during this tick. - const entry = self.sync_modules.getEntry(url) orelse { +pub fn waitForImport(self: *ScriptManager, url: [:0]const u8) !ModuleSource { + const entry = self.imported_modules.getEntry(url) orelse { + // It shouldn't be possible for v8 to ask for a module that we didn't + // `preloadImport` above. return error.UnknownModule; }; - const sync = entry.value_ptr.*; - // We can have multiple scripts waiting for the same module in concurrency. - // We use the waiters to ensures only the last waiter deinit the resources. - sync.waiters += 1; - defer sync.waiters -= 1; + const was_evaluating = self.is_evaluating; + self.is_evaluating = true; + defer self.is_evaluating = was_evaluating; var client = self.client; - while (true) { - switch (sync.state) { - .loading => {}, - .done => { - if (sync.waiters == 1) { - // Our caller has its own higher level cache (caching the - // actual compiled module). There's no reason for us to keep - // this if we are the last waiter. - defer self.sync_module_pool.destroy(sync); - defer self.sync_modules.removeByPtr(entry.key_ptr); - return .{ - .shared = false, - .buffer = sync.buffer, - .buffer_pool = &self.buffer_pool, - }; - } - - return .{ - .shared = true, - .buffer = sync.buffer, - .buffer_pool = &self.buffer_pool, - }; - }, - .err => |err| return err, - } + while (entry.value_ptr.* == null) { // rely on http's timeout settings to avoid an endless/long loop. _ = try client.tick(200); } + + defer self.imported_modules.removeByPtr(entry.key_ptr); + + // it's possible we stored an error in the map, if so, we'll return it now + const buf = try (entry.value_ptr.*.?); + + return .{ + .buffer = buf, + .buffer_pool = &self.buffer_pool, + }; } -pub fn getAsyncModule(self: *ScriptManager, url: [:0]const u8, cb: AsyncModule.Callback, cb_data: *anyopaque, referrer: []const u8) !void { - const async = try self.async_module_pool.create(); - errdefer self.async_module_pool.destroy(async); +pub fn getAsyncImport(self: *ScriptManager, url: [:0]const u8, cb: ImportAsync.Callback, cb_data: *anyopaque, referrer: []const u8) !void { + const script = try self.script_pool.create(); + errdefer self.script_pool.destroy(script); - async.* = .{ - .cb = cb, + script.* = .{ + .kind = .module, + .url = url, + .node = .{}, .manager = self, - .cb_data = cb_data, + .element = null, + .complete = false, + .source = .{ .remote = .{} }, + .mode = .{ .import_async = .{ + .callback = cb, + .data = cb_data, + } }, }; var headers = try self.client.newHeaders(); @@ -386,30 +392,30 @@ pub fn getAsyncModule(self: *ScriptManager, url: [:0]const u8, cb: AsyncModule.C .url = url, .method = .GET, .headers = headers, - .cookie_jar = self.page.cookie_jar, - .ctx = async, + .ctx = script, .resource_type = .script, - .start_callback = if (log.enabled(.http, .debug)) AsyncModule.startCallback else null, - .header_callback = AsyncModule.headerCallback, - .data_callback = AsyncModule.dataCallback, - .done_callback = AsyncModule.doneCallback, - .error_callback = AsyncModule.errorCallback, + .cookie_jar = self.page.cookie_jar, + .start_callback = if (log.enabled(.http, .debug)) Script.startCallback else null, + .header_callback = Script.headerCallback, + .data_callback = Script.dataCallback, + .done_callback = Script.doneCallback, + .error_callback = Script.errorCallback, }); + + self.async_scripts.append(&script.node); } + +// Called from the Page to let us know it's done parsing the HTML. Necessary that +// we know this so that we know that we can start evaluating deferred scripts. pub fn staticScriptsDone(self: *ScriptManager) void { std.debug.assert(self.static_scripts_done == false); self.static_scripts_done = true; self.evaluate(); } -// try to evaluate completed scripts (in order). This is called whenever a script -// is completed. fn evaluate(self: *ScriptManager) void { if (self.is_evaluating) { // It's possible for a script.eval to cause evaluate to be called again. - // This is particularly true with blockingGet, but even without this, - // it's theoretically possible (but unlikely). We could make this work - // but there's little reason to support the complexity. return; } @@ -417,13 +423,33 @@ fn evaluate(self: *ScriptManager) void { self.is_evaluating = true; defer self.is_evaluating = false; - while (self.scripts.first) |n| { - var pending_script: *PendingScript = @fieldParentPtr("node", n); - if (pending_script.complete == false) { + while (self.ready_scripts.popFirst()) |n| { + var script: *Script = @fieldParentPtr("node", n); + switch (script.mode) { + .async => { + defer script.deinit(true); + script.eval(page); + }, + .import_async => |ia| { + defer script.deinit(false); + ia.callback(ia.data, .{ + .buffer = script.source.remote, + .buffer_pool = &self.buffer_pool, + }); + }, + else => unreachable, // no other script is put in this list + } + } + + while (self.normal_scripts.first) |n| { + // These need to be processed in-order + var script: *Script = @fieldParentPtr("node", n); + if (script.complete == false) { return; } - defer pending_script.deinit(); - pending_script.script.eval(page); + defer script.deinit(true); + defer _ = self.normal_scripts.popFirst(); + script.eval(page); } if (self.static_scripts_done == false) { @@ -437,70 +463,37 @@ fn evaluate(self: *ScriptManager) void { return; } - while (self.deferreds.first) |n| { - var pending_script: *PendingScript = @fieldParentPtr("node", n); - if (pending_script.complete == false) { + while (self.defer_scripts.first) |n| { + var script: *Script = @fieldParentPtr("node", n); + if (script.complete == false) { return; } - defer pending_script.deinit(); - pending_script.script.eval(page); + defer script.deinit(true); + defer _ = self.defer_scripts.popFirst(); + script.eval(page); } + // At this point all normal scripts and deferred scripts are done, PLUS + // the page has signaled that it's done parsing HTML (static_scripts_done == true). + // + // When all scripts (normal and deferred) are done loading, the document - // state changes (this ultimately triggers the DOMContentLoaded event) + // state changes (this ultimately triggers the DOMContentLoaded event). + // Page makes this safe to call multiple times. page.documentIsLoaded(); - if (self.asyncs.first == null) { - // 1 - there are no async scripts pending - // 2 - we checkecked static_scripts_done == true above - // 3 - we drained self.scripts above - // 4 - we drained self.deferred above + if (self.async_scripts.first == null) { + // Looks like all async scripts are done too! + // Page makes this safe to call multiple times. page.documentIsComplete(); } } pub fn isDone(self: *const ScriptManager) bool { - return self.asyncs.first == null and // there are no more async scripts - self.static_scripts_done and // and we've finished parsing the HTML to queue all - self.scripts.first == null and // and there are no more