diff --git a/CMakeLists.txt b/CMakeLists.txt index 446bef16848cb..53a870f996d35 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -307,6 +307,7 @@ option(USE_CUSTOM_TINYCC "Use Bun's recommended version of tinycc" ON) option(USE_CUSTOM_LIBUV "Use Bun's recommended version of libuv (Windows only)" ON) option(USE_CUSTOM_LSHPACK "Use Bun's recommended version of ls-hpack" ON) option(USE_BASELINE_BUILD "Build Bun for baseline (older) CPUs" OFF) +option(USE_SYSTEM_ICU "Use the system-provided libicu. May fix startup crashes when building WebKit yourself." OFF) option(USE_VALGRIND "Build Bun with Valgrind support (Linux only)" OFF) @@ -320,7 +321,6 @@ option(USE_LTO "Enable Link-Time Optimization" ${DEFAULT_LTO}) option(BUN_TIDY_ONLY "Only run clang-tidy" OFF) option(BUN_TIDY_ONLY_EXTRA " Only run clang-tidy, with extra checks for local development" OFF) - if(NOT ZIG_LIB_DIR) cmake_path(SET ZIG_LIB_DIR NORMALIZE "${CMAKE_CURRENT_SOURCE_DIR}/src/deps/zig/lib") endif() @@ -1179,9 +1179,15 @@ if(UNIX AND NOT APPLE) target_link_libraries(${bun} PRIVATE "libatomic.a") endif() - target_link_libraries(${bun} PRIVATE "${WEBKIT_LIB_DIR}/libicudata.a") - target_link_libraries(${bun} PRIVATE "${WEBKIT_LIB_DIR}/libicui18n.a") - target_link_libraries(${bun} PRIVATE "${WEBKIT_LIB_DIR}/libicuuc.a") + if(USE_SYSTEM_ICU) + target_link_libraries(${bun} PRIVATE "libicudata.a") + target_link_libraries(${bun} PRIVATE "libicui18n.a") + target_link_libraries(${bun} PRIVATE "libicuuc.a") + else() + target_link_libraries(${bun} PRIVATE "${WEBKIT_LIB_DIR}/libicudata.a") + target_link_libraries(${bun} PRIVATE "${WEBKIT_LIB_DIR}/libicui18n.a") + target_link_libraries(${bun} PRIVATE "${WEBKIT_LIB_DIR}/libicuuc.a") + endif() set_target_properties(${bun} PROPERTIES LINK_DEPENDS "${BUN_SRC}/linker.lds") set_target_properties(${bun} PROPERTIES LINK_DEPENDS "${BUN_SRC}/symbols.dyn") @@ -1446,7 +1452,7 @@ if(BUN_TIDY_ONLY) set_target_properties(${bun} PROPERTIES CXX_CLANG_TIDY "${CLANG_TIDY_COMMAND}") endif() -if (BUN_TIDY_ONLY_EXTRA) +if(BUN_TIDY_ONLY_EXTRA) find_program(CLANG_TIDY_EXE NAMES "clang-tidy") set(CLANG_TIDY_COMMAND "${CLANG_TIDY_EXE}" "-checks=-*,clang-analyzer-*,performance-*,-clang-analyzer-webkit.UncountedLambdaCapturesChecker" "--fix" "--fix-errors" "--format-style=webkit" "--warnings-as-errors=*") set_target_properties(${bun} PROPERTIES CXX_CLANG_TIDY "${CLANG_TIDY_COMMAND}") diff --git a/packages/bun-internal-test/src/runner.node.mjs b/packages/bun-internal-test/src/runner.node.mjs index 9eba40bb8eddb..70b7b372f2150 100644 --- a/packages/bun-internal-test/src/runner.node.mjs +++ b/packages/bun-internal-test/src/runner.node.mjs @@ -186,7 +186,7 @@ function checkSlowTests() { ); proc?.stdout?.destroy?.(); proc?.stderr?.destroy?.(); - proc?.kill?.(); + proc?.kill?.(9); } else if (now - start > SHORT_TIMEOUT_DURATION) { console.error( `\x1b[33mwarning\x1b[0;2m:\x1b[0m Test ${JSON.stringify(path)} has been running for ${Math.ceil( diff --git a/src/bun.js/ConsoleObject.zig b/src/bun.js/ConsoleObject.zig index 2a702d5c73009..0f4b9a6db13c1 100644 --- a/src/bun.js/ConsoleObject.zig +++ b/src/bun.js/ConsoleObject.zig @@ -630,9 +630,18 @@ pub fn writeTrace(comptime Writer: type, writer: Writer, global: *JSGlobalObject defer holder.deinit(vm); const exception = holder.zigException(); + var source_code_slice: ?ZigString.Slice = null; + defer if (source_code_slice) |slice| slice.deinit(); + var err = ZigString.init("trace output").toErrorInstance(global); err.toZigException(global, exception); - vm.remapZigException(exception, err, null, &holder.need_to_clear_parser_arena_on_deinit); + vm.remapZigException( + exception, + err, + null, + &holder.need_to_clear_parser_arena_on_deinit, + &source_code_slice, + ); if (Output.enable_ansi_colors_stderr) VirtualMachine.printStackTrace( diff --git a/src/bun.js/WebKit b/src/bun.js/WebKit index ddc47cfa03b87..413f6fc119cfc 160000 --- a/src/bun.js/WebKit +++ b/src/bun.js/WebKit @@ -1 +1 @@ -Subproject commit ddc47cfa03b87d48217079b7aaa64d23ebd3c3a2 +Subproject commit 413f6fc119cfca98d5063049ce444cc4eb56f0a4 diff --git a/src/bun.js/bindings/BunString.cpp b/src/bun.js/bindings/BunString.cpp index c156d6a57f412..bf8013ee7309a 100644 --- a/src/bun.js/bindings/BunString.cpp +++ b/src/bun.js/bindings/BunString.cpp @@ -1,4 +1,5 @@ +#include "helpers.h" #include "root.h" #include "headers-handwritten.h" #include @@ -196,6 +197,13 @@ BunString toStringRef(WTF::StringImpl* wtfString) return { BunStringTag::WTFStringImpl, { .wtf = wtfString } }; } +BunString toStringView(StringView view) { + return { + BunStringTag::ZigString, + { .zig = toZigString(view) } + }; +} + } extern "C" JSC::EncodedJSValue BunString__toJS(JSC::JSGlobalObject* globalObject, const BunString* bunString) @@ -582,4 +590,4 @@ extern "C" void JSC__JSValue__putBunString( WTF::String str = key->tag == BunStringTag::Empty ? WTF::String(""_s) : key->toWTFString(); Identifier id = Identifier::fromString(vm, str); target->putDirect(vm, id, value, 0); -} \ No newline at end of file +} diff --git a/src/bun.js/bindings/ZigGlobalObject.cpp b/src/bun.js/bindings/ZigGlobalObject.cpp index c89cbc72164fa..d8c5f78d94bdd 100644 --- a/src/bun.js/bindings/ZigGlobalObject.cpp +++ b/src/bun.js/bindings/ZigGlobalObject.cpp @@ -400,7 +400,7 @@ WTF::String Bun::formatStackTrace(JSC::VM& vm, JSC::JSGlobalObject* globalObject sb.append(" at ("_s); - sb.append(sourceURLForFrame); + sb.append(remappedFrame.source_url.toWTFString()); if (remappedFrame.remapped) { errorInstance->putDirect(vm, Identifier::fromString(vm, "originalLine"_s), jsNumber(originalLine.oneBasedInt()), 0); diff --git a/src/bun.js/bindings/ZigSourceProvider.cpp b/src/bun.js/bindings/ZigSourceProvider.cpp index b5a6eae7728f4..48ddef08ea6bb 100644 --- a/src/bun.js/bindings/ZigSourceProvider.cpp +++ b/src/bun.js/bindings/ZigSourceProvider.cpp @@ -66,10 +66,15 @@ JSC::SourceID sourceIDForSourceURL(const WTF::String& sourceURL) } extern "C" bool BunTest__shouldGenerateCodeCoverage(BunString sourceURL); - -Ref SourceProvider::create(Zig::GlobalObject* globalObject, ResolvedSource& resolvedSource, JSC::SourceProviderSourceType sourceType, bool isBuiltin) -{ - +extern "C" void Bun__addSourceProviderSourceMap(void* bun_vm, SourceProvider* opaque_source_provider, BunString* specifier); +extern "C" void Bun__removeSourceProviderSourceMap(void* bun_vm, SourceProvider* opaque_source_provider, BunString* specifier); + +Ref SourceProvider::create( + Zig::GlobalObject* globalObject, + ResolvedSource& resolvedSource, + JSC::SourceProviderSourceType sourceType, + bool isBuiltin +) { auto string = resolvedSource.source_code.toWTFString(BunString::ZeroCopy); auto sourceURLString = resolvedSource.source_url.toWTFString(BunString::ZeroCopy); @@ -99,9 +104,20 @@ Ref SourceProvider::create(Zig::GlobalObject* globalObject, Reso ByteRangeMapping__generate(Bun::toString(provider->sourceURL()), Bun::toString(provider->source().toStringWithoutCopying()), provider->asID()); } + if (resolvedSource.already_bundled) { + Bun__addSourceProviderSourceMap(globalObject->bunVM(), provider.ptr(), &resolvedSource.source_url); + } + return provider; } +SourceProvider::~SourceProvider() { + if(m_resolvedSource.already_bundled) { + BunString str = Bun::toString(sourceURL()); + Bun__removeSourceProviderSourceMap(m_globalObject->bunVM(), this, &str); + } +} + unsigned SourceProvider::hash() const { if (m_hash) { @@ -139,9 +155,8 @@ void SourceProvider::cacheBytecode(const BytecodeCacheGenerator& generator) if (update) m_cachedBytecode->addGlobalUpdate(*update); } -SourceProvider::~SourceProvider() -{ -} + + void SourceProvider::commitCachedBytecode() { // if (!m_resolvedSource.bytecodecache_fd || !m_cachedBytecode || !m_cachedBytecode->hasUpdates()) @@ -229,4 +244,9 @@ int SourceProvider::readCache(JSC::VM& vm, const JSC::SourceCode& sourceCode) // return 0; // } } + +extern "C" BunString ZigSourceProvider__getSourceSlice(SourceProvider* provider) { + return Bun::toStringView(provider->source()); +} + }; // namespace Zig diff --git a/src/bun.js/bindings/ZigSourceProvider.h b/src/bun.js/bindings/ZigSourceProvider.h index 6c04c14b26dc6..d8ccaa69ee528 100644 --- a/src/bun.js/bindings/ZigSourceProvider.h +++ b/src/bun.js/bindings/ZigSourceProvider.h @@ -36,17 +36,18 @@ class SourceProvider final : public JSC::SourceProvider { using SourceOrigin = JSC::SourceOrigin; public: - static Ref create(Zig::GlobalObject*, ResolvedSource& resolvedSource, JSC::SourceProviderSourceType sourceType = JSC::SourceProviderSourceType::Module, bool isBuiltIn = false); + static Ref create( + Zig::GlobalObject*, + ResolvedSource& resolvedSource, + JSC::SourceProviderSourceType sourceType = JSC::SourceProviderSourceType::Module, + bool isBuiltIn = false); ~SourceProvider(); unsigned hash() const override; StringView source() const override { return StringView(m_source.get()); } + RefPtr cachedBytecode() { - // if (m_resolvedSource.bytecodecache_fd == 0) { return nullptr; - // } - - // return m_cachedBytecode; }; void updateCache(const UnlinkedFunctionExecutable* executable, const SourceCode&, @@ -65,19 +66,16 @@ class SourceProvider final : public JSC::SourceProvider { const SourceOrigin& sourceOrigin, WTF::String&& sourceURL, const TextPosition& startPosition, JSC::SourceProviderSourceType sourceType) : Base(sourceOrigin, WTFMove(sourceURL), String(), taintedness, startPosition, sourceType) + , m_globalObject(globalObject) , m_source(sourceImpl) { - m_resolvedSource = resolvedSource; } + Zig::GlobalObject* m_globalObject; RefPtr m_cachedBytecode; Ref m_source; - bool did_free_source_code = false; - Zig::GlobalObject* m_globalObjectForSourceProviderMap; unsigned m_hash = 0; - - // JSC::SourceCodeKey key; }; -} // namespace Zig \ No newline at end of file +} // namespace Zig diff --git a/src/bun.js/bindings/exports.zig b/src/bun.js/bindings/exports.zig index 11a1a42e97844..31375ae373e96 100644 --- a/src/bun.js/bindings/exports.zig +++ b/src/bun.js/bindings/exports.zig @@ -218,6 +218,7 @@ pub const ResolvedSource = extern struct { /// This is for source_code source_code_needs_deref: bool = true, + already_bundled: bool = false, pub const Tag = @import("ResolvedSourceTag").ResolvedSourceTag; }; diff --git a/src/bun.js/bindings/headers-handwritten.h b/src/bun.js/bindings/headers-handwritten.h index c68a1d3dab752..bef00c36bdeab 100644 --- a/src/bun.js/bindings/headers-handwritten.h +++ b/src/bun.js/bindings/headers-handwritten.h @@ -99,6 +99,7 @@ typedef struct ResolvedSource { JSC::EncodedJSValue jsvalue_for_export; uint32_t tag; bool needsDeref; + bool already_bundled; } ResolvedSource; static const uint32_t ResolvedSourceTagPackageJSONTypeModule = 1; typedef union ErrorableResolvedSourceResult { @@ -278,6 +279,10 @@ BunString toStringRef(JSC::JSGlobalObject* globalObject, JSC::JSValue value); BunString toStringRef(WTF::String& wtfString); BunString toStringRef(const WTF::String& wtfString); BunString toStringRef(WTF::StringImpl* wtfString); + +// This creates a detached string view, which cannot be ref/unref. +// Be very careful using this, and ensure the memory owner does not get destroyed. +BunString toStringView(WTF::StringView view); } using Uint8Array_alias = JSC::JSUint8Array; diff --git a/src/bun.js/javascript.zig b/src/bun.js/javascript.zig index 98fffea38a97e..04e9c38f9ff31 100644 --- a/src/bun.js/javascript.zig +++ b/src/bun.js/javascript.zig @@ -114,13 +114,18 @@ export var has_bun_garbage_collector_flag_enabled = false; const SourceMap = @import("../sourcemap/sourcemap.zig"); const ParsedSourceMap = SourceMap.Mapping.ParsedSourceMap; const MappingList = SourceMap.Mapping.List; +const SourceProviderMap = SourceMap.SourceProviderMap; const uv = bun.windows.libuv; pub const SavedSourceMap = struct { + /// This is a pointer to the map located on the VirtualMachine struct + map: *HashTable, + mutex: bun.Lock = bun.Lock.init(), + pub const vlq_offset = 24; - // For bun.js, we store the number of mappings and how many bytes the final list is at the beginning of the array + // For the runtime, we store the number of mappings and how many bytes the final list is at the beginning of the array // The first 8 bytes are the length of the array // The second 8 bytes are the number of mappings pub const SavedMappings = struct { @@ -172,16 +177,58 @@ pub const SavedSourceMap = struct { } }; + /// ParsedSourceMap is the canonical form for sourcemaps, + /// + /// but `SavedMappings` and `SourceProviderMap` are much cheaper to construct. + /// In `fn get`, this value gets converted to ParsedSourceMap always pub const Value = TaggedPointerUnion(.{ ParsedSourceMap, SavedMappings, + SourceProviderMap, }); - pub const HashTable = std.HashMap(u64, *anyopaque, IdentityContext(u64), 80); - /// This is a pointer to the map located on the VirtualMachine struct - map: *HashTable, + pub const MissingSourceMapNoteInfo = struct { + pub var storage: bun.PathBuffer = undefined; + pub var path: ?[]const u8 = null; - mutex: bun.Lock = bun.Lock.init(), + pub fn print() void { + if (path) |note| { + Output.note( + "missing sourcemaps for {s}", + .{note}, + ); + Output.note("consider bundling with '--sourcemap' to get an unminified traces", .{}); + } + } + }; + + pub fn putZigSourceProvider(this: *SavedSourceMap, opaque_source_provider: *anyopaque, path: []const u8) void { + const source_provider: *SourceProviderMap = @ptrCast(opaque_source_provider); + this.putValue(path, Value.init(source_provider)) catch bun.outOfMemory(); + } + + pub fn removeZigSourceProvider(this: *SavedSourceMap, opaque_source_provider: *anyopaque, path: []const u8) void { + this.mutex.lock(); + defer this.mutex.unlock(); + + const entry = this.map.getEntry(bun.hash(path)) orelse return; + const old_value = Value.from(entry.value_ptr.*); + if (old_value.get(SourceProviderMap)) |prov| { + if (@intFromPtr(prov) == @intFromPtr(opaque_source_provider)) { + // there is nothing to unref or deinit + this.map.removeByPtr(entry.key_ptr); + } + } else if (old_value.get(ParsedSourceMap)) |map| { + if (map.underlying_provider.provider()) |prov| { + if (@intFromPtr(prov) == @intFromPtr(opaque_source_provider)) { + map.deinit(default_allocator); + this.map.removeByPtr(entry.key_ptr); + } + } + } + } + + pub const HashTable = std.HashMap(u64, *anyopaque, IdentityContext(u64), 80); pub fn onSourceMapChunk(this: *SavedSourceMap, chunk: SourceMap.Chunk, source: logger.Source) anyerror!void { try this.putMappings(source, chunk.buffer); @@ -192,6 +239,8 @@ pub const SavedSourceMap = struct { pub fn deinit(this: *SavedSourceMap) void { { this.mutex.lock(); + defer this.mutex.unlock(); + var iter = this.map.valueIterator(); while (iter.next()) |val| { var value = Value.from(val.*); @@ -201,39 +250,48 @@ pub const SavedSourceMap = struct { } else if (value.get(SavedMappings)) |saved_mappings| { var saved = SavedMappings{ .data = @as([*]u8, @ptrCast(saved_mappings)) }; saved.deinit(); + } else if (value.get(SourceProviderMap)) |provider| { + _ = provider; // do nothing, we did not hold a ref to ZigSourceProvider } } - - this.mutex.unlock(); } this.map.deinit(); } pub fn putMappings(this: *SavedSourceMap, source: logger.Source, mappings: MutableString) !void { + try this.putValue(source.path.text, Value.init(bun.cast(*SavedMappings, mappings.list.items.ptr))); + } + + fn putValue(this: *SavedSourceMap, path: []const u8, value: Value) !void { this.mutex.lock(); defer this.mutex.unlock(); - const entry = try this.map.getOrPut(bun.hash(source.path.text)); + const entry = try this.map.getOrPut(bun.hash(path)); if (entry.found_existing) { - var value = Value.from(entry.value_ptr.*); - if (value.get(ParsedSourceMap)) |source_map_| { - var source_map: *ParsedSourceMap = source_map_; + var old_value = Value.from(entry.value_ptr.*); + if (old_value.get(ParsedSourceMap)) |parsed_source_map| { + var source_map: *ParsedSourceMap = parsed_source_map; source_map.deinit(default_allocator); - } else if (value.get(SavedMappings)) |saved_mappings| { + } else if (old_value.get(SavedMappings)) |saved_mappings| { var saved = SavedMappings{ .data = @as([*]u8, @ptrCast(saved_mappings)) }; - saved.deinit(); + } else if (old_value.get(SourceProviderMap)) |provider| { + _ = provider; // do nothing, we did not hold a ref to ZigSourceProvider } } - - entry.value_ptr.* = Value.init(bun.cast(*SavedMappings, mappings.list.items.ptr)).ptr(); + entry.value_ptr.* = value.ptr(); } - pub fn get(this: *SavedSourceMap, path: string) ?ParsedSourceMap { - const mapping = this.map.getEntry(bun.hash(path)) orelse return null; + pub fn getWithContent( + this: *SavedSourceMap, + path: string, + hint: SourceMap.ParseUrlResultHint, + ) SourceMap.ParseUrl { + const hash = bun.hash(path); + const mapping = this.map.getEntry(hash) orelse return .{}; switch (Value.from(mapping.value_ptr.*).tag()) { Value.Tag.ParsedSourceMap => { - return Value.from(mapping.value_ptr.*).as(ParsedSourceMap).*; + return .{ .map = Value.from(mapping.value_ptr.*).as(ParsedSourceMap) }; }, Value.Tag.SavedMappings => { var saved = SavedMappings{ .data = @as([*]u8, @ptrCast(Value.from(mapping.value_ptr.*).as(ParsedSourceMap))) }; @@ -241,26 +299,66 @@ pub const SavedSourceMap = struct { const result = default_allocator.create(ParsedSourceMap) catch unreachable; result.* = saved.toMapping(default_allocator, path) catch { _ = this.map.remove(mapping.key_ptr.*); - return null; + return .{}; }; mapping.value_ptr.* = Value.init(result).ptr(); - return result.*; + return .{ .map = result }; + }, + Value.Tag.SourceProviderMap => { + var ptr = Value.from(mapping.value_ptr.*).as(SourceProviderMap); + + if (ptr.getSourceMap(path, .none, hint)) |parse| + if (parse.map) |map| { + mapping.value_ptr.* = Value.init(map).ptr(); + return parse; + }; + + // does not have a valid source map. let's not try again + _ = this.map.remove(hash); + + // Store path for a user note. + const storage = MissingSourceMapNoteInfo.storage[0..path.len]; + @memcpy(storage, path); + MissingSourceMapNoteInfo.path = storage; + return .{}; + }, + else => { + if (Environment.allow_assert) { + @panic("Corrupt pointer tag"); + } + return .{}; }, - else => return null, } } + pub fn get(this: *SavedSourceMap, path: string) ?*ParsedSourceMap { + return this.getWithContent(path, .mappings_only).map; + } + pub fn resolveMapping( this: *SavedSourceMap, path: []const u8, line: i32, column: i32, - ) ?SourceMap.Mapping { + source_handling: SourceMap.SourceContentHandling, + ) ?SourceMap.Mapping.Lookup { this.mutex.lock(); defer this.mutex.unlock(); - const parsed_mappings = this.get(path) orelse return null; - return SourceMap.Mapping.find(parsed_mappings.mappings, line, column); + const parse = this.getWithContent(path, switch (source_handling) { + .no_source_contents => .mappings_only, + .source_contents => .{ .all = .{ .line = line, .column = column } }, + }); + const map = parse.map orelse return null; + const mapping = parse.mapping orelse + SourceMap.Mapping.find(map.mappings, line, column) orelse + return null; + + return .{ + .mapping = mapping, + .source_map = map, + .prefetched_source_code = parse.source_contents, + }; } }; const uws = bun.uws; @@ -542,6 +640,7 @@ pub const VirtualMachine = struct { /// only use it through /// source_mappings saved_source_map_table: SavedSourceMap.HashTable = undefined, + source_mappings: SavedSourceMap = undefined, arena: *Arena = undefined, has_loaded: bool = false, @@ -591,8 +690,6 @@ pub const VirtualMachine = struct { ref_strings: JSC.RefString.Map = undefined, ref_strings_mutex: Lock = undefined, - source_mappings: SavedSourceMap = undefined, - active_tasks: usize = 0, rare_data: ?*JSC.RareData = null, @@ -2730,7 +2827,13 @@ pub const VirtualMachine = struct { sourceURL.slice(), @max(frame.position.line, 0), @max(frame.position.column_start, 0), - )) |mapping| { + .no_source_contents, + )) |lookup| { + if (lookup.displaySourceURLIfNeeded(sourceURL.slice())) |source_url| { + frame.source_url.deref(); + frame.source_url = source_url; + } + const mapping = lookup.mapping; frame.position.line = mapping.original.lines; frame.position.column_start = mapping.original.columns; frame.remapped = true; @@ -2741,8 +2844,16 @@ pub const VirtualMachine = struct { } } - pub fn remapZigException(this: *VirtualMachine, exception: *ZigException, error_instance: JSValue, exception_list: ?*ExceptionList, must_reset_parser_arena_later: *bool) void { + pub fn remapZigException( + this: *VirtualMachine, + exception: *ZigException, + error_instance: JSValue, + exception_list: ?*ExceptionList, + must_reset_parser_arena_later: *bool, + source_code_slice: *?ZigString.Slice, + ) void { error_instance.toZigException(this.global, exception); + // defer this so that it copies correctly defer { if (exception_list) |list| { @@ -2819,28 +2930,51 @@ pub const VirtualMachine = struct { var top_source_url = top.source_url.toUTF8(bun.default_allocator); defer top_source_url.deinit(); - const mapping_ = if (top.remapped) - SourceMap.Mapping{ - .generated = .{}, - .original = .{ - .lines = @max(top.position.line, 0), - .columns = @max(top.position.column_start, 0), + const maybe_lookup = if (top.remapped) + SourceMap.Mapping.Lookup{ + .mapping = .{ + .generated = .{}, + .original = .{ + .lines = @max(top.position.line, 0), + .columns = @max(top.position.column_start, 0), + }, + .source_index = 0, }, - .source_index = 0, + // undefined is fine, because these two values are never read if `top.remapped == true` + .source_map = undefined, + .prefetched_source_code = undefined, } else this.source_mappings.resolveMapping( top_source_url.slice(), @max(top.position.line, 0), @max(top.position.column_start, 0), + .source_contents, ); - if (mapping_) |mapping| { - var log = logger.Log.init(default_allocator); - var original_source = fetchWithoutOnLoadPlugins(this, this.global, top.source_url, bun.String.empty, &log, .print_source) catch return; - must_reset_parser_arena_later.* = true; - const code = original_source.source_code.toUTF8(bun.default_allocator); - defer code.deinit(); + if (maybe_lookup) |lookup| { + const mapping = lookup.mapping; + + if (!top.remapped) { + if (lookup.displaySourceURLIfNeeded(top_source_url.slice())) |src| { + top.source_url.deref(); + top.source_url = src; + } + } + + const code = code: { + if (!top.remapped and lookup.source_map.isExternal()) { + if (lookup.getSourceCode(top_source_url.slice())) |src| { + break :code src; + } + } + + var log = logger.Log.init(default_allocator); + var original_source = fetchWithoutOnLoadPlugins(this, this.global, top.source_url, bun.String.empty, &log, .print_source) catch return; + must_reset_parser_arena_later.* = true; + break :code original_source.source_code.toUTF8(bun.default_allocator); + }; + source_code_slice.* = code; top.position.line = mapping.original.lines; top.position.line_start = mapping.original.lines; @@ -2895,7 +3029,13 @@ pub const VirtualMachine = struct { source_url.slice(), @max(frame.position.line, 0), @max(frame.position.column_start, 0), - )) |mapping| { + .no_source_contents, + )) |lookup| { + if (lookup.displaySourceURLIfNeeded(source_url.slice())) |src| { + frame.source_url.deref(); + frame.source_url = src; + } + const mapping = lookup.mapping; frame.position.line = mapping.original.lines; frame.remapped = true; frame.position.column_start = mapping.original.columns; @@ -2908,7 +3048,17 @@ pub const VirtualMachine = struct { var exception_holder = ZigException.Holder.init(); var exception = exception_holder.zigException(); defer exception_holder.deinit(this); - this.remapZigException(exception, error_instance, exception_list, &exception_holder.need_to_clear_parser_arena_on_deinit); + + var source_code_slice: ?ZigString.Slice = null; + defer if (source_code_slice) |slice| slice.deinit(); + + this.remapZigException( + exception, + error_instance, + exception_list, + &exception_holder.need_to_clear_parser_arena_on_deinit, + &source_code_slice, + ); const prev_had_errors = this.had_errors; this.had_errors = true; defer this.had_errors = prev_had_errors; @@ -3827,4 +3977,18 @@ pub fn NewHotReloader(comptime Ctx: type, comptime EventLoopType: type, comptime }; } +export fn Bun__addSourceProviderSourceMap(vm: *VirtualMachine, opaque_source_provider: *anyopaque, specifier: *bun.String) void { + var sfb = std.heap.stackFallback(4096, bun.default_allocator); + const slice = specifier.toUTF8(sfb.get()); + defer slice.deinit(); + vm.source_mappings.putZigSourceProvider(opaque_source_provider, slice.slice()); +} + +export fn Bun__removeSourceProviderSourceMap(vm: *VirtualMachine, opaque_source_provider: *anyopaque, specifier: *bun.String) void { + var sfb = std.heap.stackFallback(4096, bun.default_allocator); + const slice = specifier.toUTF8(sfb.get()); + defer slice.deinit(); + vm.source_mappings.removeZigSourceProvider(opaque_source_provider, slice.slice()); +} + pub export var isBunTest: bool = false; diff --git a/src/bun.js/module_loader.zig b/src/bun.js/module_loader.zig index 1e08e4b6e4dc4..7d03dd286fe53 100644 --- a/src/bun.js/module_loader.zig +++ b/src/bun.js/module_loader.zig @@ -592,6 +592,7 @@ pub const RuntimeTranspilerStore = struct { .source_code = bun.String.createLatin1(parse_result.source.contents), .specifier = duped, .source_url = duped.createIfDifferent(path.text), + .already_bundled = true, .hash = 0, }; this.resolved_source.source_code.ensureHash(); @@ -1796,7 +1797,7 @@ pub const ModuleLoader = struct { .source_code = bun.String.createLatin1(parse_result.source.contents), .specifier = input_specifier, .source_url = input_specifier.createIfDifferent(path.text), - + .already_bundled = true, .hash = 0, }; } diff --git a/src/bun_js.zig b/src/bun_js.zig index 8ccb36c6be8ed..1147d08d72248 100644 --- a/src/bun_js.zig +++ b/src/bun_js.zig @@ -284,6 +284,8 @@ pub const Run = struct { vm.onExit(); if (run.any_unhandled) { + bun.JSC.SavedSourceMap.MissingSourceMapNoteInfo.print(); + Output.prettyErrorln( "\n{s}", .{Global.unhandled_error_bun_version_string}, @@ -315,6 +317,8 @@ pub const Run = struct { vm.exit_handler.exit_code = 1; vm.onExit(); if (run.any_unhandled) { + bun.JSC.SavedSourceMap.MissingSourceMapNoteInfo.print(); + Output.prettyErrorln( "\n{s}", .{Global.unhandled_error_bun_version_string}, @@ -417,6 +421,8 @@ pub const Run = struct { if (this.any_unhandled and this.vm.exit_handler.exit_code == 0) { this.vm.exit_handler.exit_code = 1; + bun.JSC.SavedSourceMap.MissingSourceMapNoteInfo.print(); + Output.prettyErrorln( "\n{s}", .{Global.unhandled_error_bun_version_string}, diff --git a/src/js_ast.zig b/src/js_ast.zig index 6bf4496fceabb..414f17bfc2891 100644 --- a/src/js_ast.zig +++ b/src/js_ast.zig @@ -2429,7 +2429,7 @@ pub const E = struct { { s.resolveRopeIfNeeded(allocator); - const decoded = js_lexer.decodeUTF8(s.slice(allocator), allocator) catch unreachable; + const decoded = js_lexer.decodeStringLiteralEscapeSequencesToUTF16(s.slice(allocator), allocator) catch unreachable; defer allocator.free(decoded); var out, const chars = bun.String.createUninitialized(.utf16, decoded.len); diff --git a/src/js_lexer.zig b/src/js_lexer.zig index 96e396c8de7a9..50142410481a5 100644 --- a/src/js_lexer.zig +++ b/src/js_lexer.zig @@ -77,7 +77,7 @@ pub const JSONOptions = struct { always_decode_escape_sequences: bool = false, }; -pub fn decodeUTF8(bytes: string, allocator: std.mem.Allocator) ![]const u16 { +pub fn decodeStringLiteralEscapeSequencesToUTF16(bytes: string, allocator: std.mem.Allocator) ![]const u16 { var log = logger.Log.init(allocator); defer log.deinit(); const source = logger.Source.initEmptyFile(""); diff --git a/src/sourcemap/CodeCoverage.zig b/src/sourcemap/CodeCoverage.zig index 5bfef9cc1890c..b86a30059e24f 100644 --- a/src/sourcemap/CodeCoverage.zig +++ b/src/sourcemap/CodeCoverage.zig @@ -369,9 +369,7 @@ pub const ByteRangeMapping = struct { var executable_lines: Bitset = Bitset{}; var lines_which_have_executed: Bitset = Bitset{}; - const parsed_mappings_ = bun.JSC.VirtualMachine.get().source_mappings.get( - source_url.slice(), - ); + const parsed_mappings_ = bun.JSC.VirtualMachine.get().source_mappings.get(source_url.slice()); var functions = std.ArrayListUnmanaged(CodeCoverageReport.Block){}; try functions.ensureTotalCapacityPrecise(allocator, function_blocks.len); diff --git a/src/sourcemap/sourcemap.zig b/src/sourcemap/sourcemap.zig index be4eca94e6a49..7eb950d76a3b9 100644 --- a/src/sourcemap/sourcemap.zig +++ b/src/sourcemap/sourcemap.zig @@ -39,77 +39,201 @@ sources_content: []string, mapping: Mapping.List = .{}, allocator: std.mem.Allocator, -pub fn parse( - allocator: std.mem.Allocator, - json_source: *const Logger.Source, - log: *Logger.Log, -) !SourceMap { - var json = try bun.JSON.ParseJSONUTF8(json_source, log, allocator); - var mappings = bun.sourcemap.Mapping.List{}; +/// Dictates what parseUrl/parseJSON return. +pub const ParseUrlResultHint = union(enum) { + mappings_only, + /// Source Index to fetch + source_only: u32, + /// In order to fetch source contents, you need to know the + /// index, but you cant know the index until the mappings + /// are loaded. So pass in line+col. + all: struct { line: i32, column: i32 }, +}; + +pub const ParseUrl = struct { + /// Populated when `mappings_only` or `all`. + map: ?*Mapping.ParsedSourceMap = null, + /// Populated when `all` + /// May be `null` even when requested. + mapping: ?Mapping = null, + /// Populated when `source_only` or `all` + /// May be `null` even when requested, if did not exist in map. + source_contents: ?[]const u8 = null, +}; + +/// Parses an inline source map url like `data:application/json,....` +/// Currently does not handle non-inline source maps. +/// +/// `source` must be in UTF-8 and can be freed after this call. +/// The mappings are owned by the `alloc` allocator. +/// Temporary allocations are made to the `arena` allocator, which +/// should be an arena allocator (caller is assumed to call `deinit`). +pub fn parseUrl( + alloc: std.mem.Allocator, + arena: std.mem.Allocator, + source: []const u8, + hint: ParseUrlResultHint, +) !ParseUrl { + const json_bytes = json_bytes: { + const data_prefix = "data:application/json"; + if (bun.strings.hasPrefixComptime(source, data_prefix) and source.len > (data_prefix.len + 1)) try_data_url: { + switch (source[data_prefix.len]) { + ';' => { + const encoding = bun.sliceTo(source[data_prefix.len + 1 ..], ','); + if (!bun.strings.eqlComptime(encoding, "base64")) break :try_data_url; + const base64_data = source[data_prefix.len + ";base64,".len ..]; + + const len = bun.base64.decodeLen(base64_data); + const bytes = arena.alloc(u8, len) catch bun.outOfMemory(); + const decoded = bun.base64.decode(bytes, base64_data); + if (decoded.fail) { + return error.InvalidBase64; + } + break :json_bytes bytes[0..decoded.written]; + }, + ',' => break :json_bytes source[data_prefix.len + 1 ..], + else => break :try_data_url, + } + } + + return error.UnsupportedFormat; + }; + + return parseJSON(alloc, arena, json_bytes, hint); +} + +/// Parses a JSON source-map +/// +/// `source` must be in UTF-8 and can be freed after this call. +/// The mappings are owned by the `alloc` allocator. +/// Temporary allocations are made to the `arena` allocator, which +/// should be an arena allocator (caller is assumed to call `deinit`). +pub fn parseJSON( + alloc: std.mem.Allocator, + arena: std.mem.Allocator, + source: []const u8, + hint: ParseUrlResultHint, +) !ParseUrl { + const json_src = bun.logger.Source.initPathString("sourcemap.json", source); + var log = bun.logger.Log.init(arena); + defer log.deinit(); + + var json = bun.JSON.ParseJSON(&json_src, &log, arena) catch { + return error.InvalidJSON; + }; + + // the allocator given to the JS parser is not respected for all parts + // of the parse, so we need to remember to reset the ast store + defer { + bun.JSAst.Expr.Data.Store.reset(); + bun.JSAst.Stmt.Data.Store.reset(); + } if (json.get("version")) |version| { if (version.data != .e_number or version.data.e_number.value != 3.0) { - return error.@"Unsupported sourcemap version"; + return error.UnsupportedVersion; } } - if (json.get("mappings")) |mappings_str| { - if (mappings_str.data != .e_string) { - return error.@"Invalid sourcemap mappings"; - } + const mappings_str = json.get("mappings") orelse { + return error.UnsupportedVersion; + }; - var parsed = bun.sourcemap.Mapping.parse(allocator, try mappings_str.data.e_string.toUTF8(allocator), null, std.math.maxInt(i32)); - if (parsed == .fail) { - try log.addMsg(bun.logger.Msg{ - .data = parsed.fail.toData("sourcemap.json"), - .kind = .err, - }); - return error.@"Failed to parse sourcemap mappings"; - } + if (mappings_str.data != .e_string) { + return error.InvalidSourceMap; + } + + const sources_content = switch ((json.get("sourcesContent") orelse return error.InvalidSourceMap).data) { + .e_array => |arr| arr, + else => return error.InvalidSourceMap, + }; - mappings = parsed.success; + const sources_paths = switch ((json.get("sources") orelse return error.InvalidSourceMap).data) { + .e_array => |arr| arr, + else => return error.InvalidSourceMap, + }; + + if (sources_content.items.len != sources_paths.items.len) { + return error.InvalidSourceMap; } - var sources = std.ArrayList(bun.string).init(allocator); - var sources_content = std.ArrayList(string).init(allocator); + var i: usize = 0; - if (json.get("sourcesContent")) |mappings_str| { - if (mappings_str.data != .e_array) { - return error.@"Invalid sourcemap sources"; - } + const source_paths_slice = if (hint != .source_only) + alloc.alloc([]const u8, sources_content.items.len) catch bun.outOfMemory() + else + null; + errdefer if (hint != .source_only) { + for (source_paths_slice.?[0..i]) |item| alloc.free(item); + alloc.free(source_paths_slice.?); + }; - try sources_content.ensureTotalCapacityPrecise(mappings_str.data.e_array.items.len); - for (mappings_str.data.e_array.items.slice()) |source| { - if (source.data != .e_string) { - return error.@"Invalid sourcemap source"; - } + if (hint != .source_only) for (sources_paths.items.slice()) |item| { + if (item.data != .e_string) + return error.InvalidSourceMap; + + const utf16_decode = try bun.js_lexer.decodeStringLiteralEscapeSequencesToUTF16(item.data.e_string.string(arena) catch bun.outOfMemory(), arena); + defer arena.free(utf16_decode); + source_paths_slice.?[i] = bun.strings.toUTF8Alloc(alloc, utf16_decode) catch + return error.InvalidSourceMap; - try source.data.e_string.toUTF8(allocator); - sources_content.appendAssumeCapacity(source.data.e_string.slice()); + i += 1; + }; + + const map = if (hint != .source_only) map: { + const map_data = switch (Mapping.parse( + alloc, + mappings_str.data.e_string.slice(arena), + null, + std.math.maxInt(i32), + std.math.maxInt(i32), + )) { + .success => |x| x, + .fail => |fail| return fail.err, + }; + + const ptr = bun.default_allocator.create(Mapping.ParsedSourceMap) catch bun.outOfMemory(); + ptr.* = map_data; + ptr.external_source_names = source_paths_slice.?; + break :map ptr; + } else null; + errdefer if (map) |m| m.deinit(bun.default_allocator); + + const mapping, const source_index = switch (hint) { + .source_only => |index| .{ null, index }, + .all => |loc| brk: { + const mapping = Mapping.find(map.?.mappings, loc.line, loc.column) orelse + break :brk .{ null, null }; + break :brk .{ mapping, std.math.cast(u32, mapping.source_index) }; + }, + .mappings_only => .{ null, null }, + }; + + const content_slice: ?[]const u8 = if (hint != .mappings_only and + source_index != null and + source_index.? < sources_content.items.len) + content: { + const item = sources_content.items.slice()[source_index.?]; + if (item.data != .e_string) { + break :content null; } - } - if (json.get("sources")) |mappings_str| { - if (mappings_str.data != .e_array) { - return error.@"Invalid sourcemap sources"; + const str = item.data.e_string.string(arena) catch bun.outOfMemory(); + if (str.len == 0) { + break :content null; } - try sources.ensureTotalCapacityPrecise(mappings_str.data.e_array.items.len); - for (mappings_str.data.e_array.items.slice()) |source| { - if (source.data != .e_string) { - return error.@"Invalid sourcemap source"; - } + const utf16_decode = try bun.js_lexer.decodeStringLiteralEscapeSequencesToUTF16(str, arena); + defer arena.free(utf16_decode); - try source.data.e_string.toUTF8(allocator); - sources.appendAssumeCapacity(source.data.e_string.slice()); - } - } + break :content bun.strings.toUTF8Alloc(alloc, utf16_decode) catch + return error.InvalidSourceMap; + } else null; - return SourceMap{ - .mapping = mappings, - .allocator = allocator, - .sources_content = sources_content.items, - .sources = sources.items, + return .{ + .map = map, + .mapping = mapping, + .source_contents = content_slice, }; } @@ -118,6 +242,80 @@ pub const Mapping = struct { original: LineColumnOffset, source_index: i32, + pub const Lookup = struct { + mapping: Mapping, + source_map: *ParsedSourceMap, + /// Owned by default_allocator always + /// use `getSourceCode` to access this as a Slice + prefetched_source_code: ?[]const u8, + + /// This creates a bun.String if the source remap *changes* the source url, + /// a case that happens only when the source map points to another file. + pub fn displaySourceURLIfNeeded(lookup: Lookup, base_filename: []const u8) ?bun.String { + // See doc comment on `external_source_names` + if (lookup.source_map.external_source_names.len == 0) + return null; + if (lookup.mapping.source_index >= lookup.source_map.external_source_names.len) + return null; + + const name = lookup.source_map.external_source_names[@intCast(lookup.mapping.source_index)]; + + if (std.fs.path.isAbsolute(base_filename)) { + const dir = bun.path.dirname(base_filename, .auto); + return bun.String.init(bun.path.joinAbs(dir, .auto, name)); + } + + return bun.String.init(name); + } + + /// Only valid if `lookup.source_map.isExternal()` + /// This has the possibility of invoking a call to the filesystem. + pub fn getSourceCode(lookup: Lookup, base_filename: []const u8) ?bun.JSC.ZigString.Slice { + const bytes = bytes: { + assert(lookup.source_map.isExternal()); + if (lookup.prefetched_source_code) |code| { + break :bytes code; + } + + const provider = lookup.source_map.underlying_provider.provider() orelse + return null; + + const index = lookup.mapping.source_index; + + if (provider.getSourceMap( + base_filename, + lookup.source_map.underlying_provider.load_hint, + .{ .source_only = @intCast(index) }, + )) |parsed| + if (parsed.source_contents) |contents| + break :bytes contents; + + if (index >= lookup.source_map.external_source_names.len) + return null; + + const name = lookup.source_map.external_source_names[@intCast(index)]; + + var buf: bun.PathBuffer = undefined; + const normalized = bun.path.joinAbsStringBufZ( + bun.path.dirname(base_filename, .auto), + &buf, + &.{name}, + .loose, + ); + switch (bun.sys.File.readFrom( + std.fs.cwd(), + normalized, + bun.default_allocator, + )) { + .result => |r| break :bytes r, + .err => return null, + } + }; + + return bun.JSC.ZigString.Slice.init(bun.default_allocator, bytes); + } + }; + pub const List = std.MultiArrayList(Mapping); pub inline fn generatedLine(mapping: Mapping) i32 { @@ -398,14 +596,165 @@ pub const Mapping = struct { pub const ParsedSourceMap = struct { input_line_count: usize = 0, mappings: Mapping.List = .{}, + /// If this is empty, this implies that the source code is a single file + /// transpiled on-demand. If there are items, then it means this is a file + /// loaded without transpilation but with external sources. This array + /// maps `source_index` to the correct filename. + external_source_names: []const []const u8 = &.{}, + /// In order to load source contents from a source-map after the fact, + /// a handle to the underying source provider is stored. Within this pointer, + /// a flag is stored if it is known to be an inline or external source map. + /// + /// Source contents are large, we don't preserve them in memory. This has + /// the downside of repeatedly re-decoding sourcemaps if multiple errors + /// are emitted (specifically with Bun.inspect / unhandled; the ones that + /// rely on source contents) + underlying_provider: SourceContentPtr = .{ .data = 0 }, + + const SourceContentPtr = packed struct(u64) { + load_hint: SourceMapLoadHint = .none, + data: u62, + + fn fromProvider(p: *SourceProviderMap) SourceContentPtr { + return .{ .data = @intCast(@intFromPtr(p)) }; + } + + pub fn provider(sc: SourceContentPtr) ?*SourceProviderMap { + return @ptrFromInt(sc.data); + } + }; + + pub fn isExternal(psm: *ParsedSourceMap) bool { + return psm.external_source_names.len != 0; + } pub fn deinit(this: *ParsedSourceMap, allocator: std.mem.Allocator) void { this.mappings.deinit(allocator); + + if (this.external_source_names.len > 0) { + for (this.external_source_names) |name| + allocator.free(name); + allocator.free(this.external_source_names); + } + allocator.destroy(this); } }; }; +/// For some sourcemap loading code, this enum is used as a hint if it should +/// bother loading source code into memory. Most uses of source maps only care +/// about filenames and source mappings, and we should avoid loading contents +/// whenever possible. +pub const SourceContentHandling = enum { + no_source_contents, + source_contents, +}; + +/// For some sourcemap loading code, this enum is used as a hint if we already +/// know if the sourcemap is located on disk or inline in the source code. +pub const SourceMapLoadHint = enum { + none, + is_inline_map, + is_external_map, +}; + +/// This is a pointer to a ZigSourceProvider that may or may not have a `//# sourceMappingURL` comment +/// when we want to lookup this data, we will then resolve it to a ParsedSourceMap if it does. +/// +/// This is used for files that were pre-bundled with `bun build --target=bun --sourcemap` +pub const SourceProviderMap = opaque { + extern fn ZigSourceProvider__getSourceSlice(*SourceProviderMap) bun.String; + + fn findSourceMappingURL(comptime T: type, source: []const T, alloc: std.mem.Allocator) ?bun.JSC.ZigString.Slice { + const needle = comptime bun.strings.literal(T, "//# sourceMappingURL="); + const found = bun.strings.indexOfT(T, source, needle) orelse return null; + const end = std.mem.indexOfScalarPos(T, source, found + needle.len, '\n') orelse source.len; + const url = std.mem.trimRight(T, source[found + needle.len .. end], &.{ ' ', '\r' }); + return switch (T) { + u8 => bun.JSC.ZigString.Slice.fromUTF8NeverFree(url), + u16 => bun.JSC.ZigString.Slice.init( + alloc, + bun.strings.toUTF8Alloc(alloc, url) catch bun.outOfMemory(), + ), + else => @compileError("Not Supported"), + }; + } + + /// The last two arguments to this specify loading hints + pub fn getSourceMap( + provider: *SourceProviderMap, + source_filename: []const u8, + load_hint: SourceMapLoadHint, + result: ParseUrlResultHint, + ) ?SourceMap.ParseUrl { + var sfb = std.heap.stackFallback(65536, bun.default_allocator); + var arena = bun.ArenaAllocator.init(sfb.get()); + defer arena.deinit(); + + const new_load_hint: SourceMapLoadHint, const parsed = parsed: { + // try to get an inline source map + if (load_hint != .is_external_map) try_inline: { + const source = ZigSourceProvider__getSourceSlice(provider); + defer source.deref(); + bun.assert(source.tag == .ZigString); + + const found_url = (if (source.is8Bit()) + findSourceMappingURL(u8, source.latin1(), arena.allocator()) + else + findSourceMappingURL(u16, source.utf16(), arena.allocator())) orelse + break :try_inline; + defer found_url.deinit(); + + break :parsed .{ + .is_inline_map, + parseUrl( + bun.default_allocator, + arena.allocator(), + found_url.slice(), + result, + ) catch return null, + }; + } + + // try to load a .map file + if (load_hint != .is_inline_map) try_external: { + var load_path_buf: bun.PathBuffer = undefined; + if (source_filename.len + 4 > load_path_buf.len) + break :try_external; + @memcpy(load_path_buf[0..source_filename.len], source_filename); + @memcpy(load_path_buf[source_filename.len..][0..4], ".map"); + + const data = switch (bun.sys.File.readFrom( + std.fs.cwd(), + load_path_buf[0 .. source_filename.len + 4], + arena.allocator(), + )) { + .err => break :try_external, + .result => |data| data, + }; + + break :parsed .{ + .is_external_map, + parseJSON( + bun.default_allocator, + arena.allocator(), + data, + result, + ) catch return null, + }; + } + + return null; + }; + if (parsed.map) |ptr| { + ptr.underlying_provider = Mapping.ParsedSourceMap.SourceContentPtr.fromProvider(provider); + ptr.underlying_provider.load_hint = new_load_hint; + } + return parsed; + } +}; + pub const LineColumnOffset = struct { lines: i32 = 0, columns: i32 = 0, @@ -559,14 +908,14 @@ pub const SourceMapPieces = struct { const potential_start_of_run = current; - current = decodeVLQ(mappings, current).start; - current = decodeVLQ(mappings, current).start; - current = decodeVLQ(mappings, current).start; + current = decodeVLQAssumeValid(mappings, current).start; + current = decodeVLQAssumeValid(mappings, current).start; + current = decodeVLQAssumeValid(mappings, current).start; if (current < mappings.len) { const c = mappings[current]; if (c != ',' and c != ';') { - current = decodeVLQ(mappings, current).start; + current = decodeVLQAssumeValid(mappings, current).start; } } @@ -594,7 +943,8 @@ pub const SourceMapPieces = struct { assert(shift.before.lines == shift.after.lines); const shift_column_delta = shift.after.columns - shift.before.columns; - const encode = encodeVLQ(decode_result.value + shift_column_delta - prev_shift_column_delta); + const vlq_value = decode_result.value + shift_column_delta - prev_shift_column_delta; + const encode = encodeVLQ(vlq_value); j.push(encode.bytes[0..encode.len]); prev_shift_column_delta = shift_column_delta; @@ -637,14 +987,16 @@ pub fn appendSourceMapChunk(j: *Joiner, allocator: std.mem.Allocator, prev_end_s // Strip off the first mapping from the buffer. The first mapping should be // for the start of the original file (the printer always generates one for // the start of the file). + // + // Bun has a 24-byte header for source map meta-data var i: usize = 0; - const generated_column_ = decodeVLQ(source_map, 0); + const generated_column_ = decodeVLQAssumeValid(source_map, i); i = generated_column_.start; - const source_index_ = decodeVLQ(source_map, i); + const source_index_ = decodeVLQAssumeValid(source_map, i); i = source_index_.start; - const original_line_ = decodeVLQ(source_map, i); + const original_line_ = decodeVLQAssumeValid(source_map, i); i = original_line_.start; - const original_column_ = decodeVLQ(source_map, i); + const original_column_ = decodeVLQAssumeValid(source_map, i); i = original_column_.start; source_map = source_map[i..]; @@ -658,7 +1010,12 @@ pub fn appendSourceMapChunk(j: *Joiner, allocator: std.mem.Allocator, prev_end_s start_state.original_column += original_column_.value; j.append( - appendMappingToBuffer(MutableString.initEmpty(allocator), j.lastByte(), prev_end_state, start_state).list.items, + appendMappingToBuffer( + MutableString.initEmpty(allocator), + j.lastByte(), + prev_end_state, + start_state, + ).list.items, 0, allocator, ); @@ -694,9 +1051,7 @@ pub const VLQ = struct { } }; -pub fn encodeVLQWithLookupTable( - value: i32, -) VLQ { +pub fn encodeVLQWithLookupTable(value: i32) VLQ { return if (value >= 0 and value <= 255) vlq_lookup_table[@as(usize, @intCast(value))] else @@ -797,6 +1152,39 @@ pub fn decodeVLQ(encoded: []const u8, start: usize) VLQResult { return VLQResult{ .start = start + encoded_.len, .value = 0 }; } +pub fn decodeVLQAssumeValid(encoded: []const u8, start: usize) VLQResult { + var shift: u8 = 0; + var vlq: u32 = 0; + + // hint to the compiler what the maximum value is + const encoded_ = encoded[start..][0..@min(encoded.len - start, comptime (vlq_max_in_bytes + 1))]; + + // inlining helps for the 1 or 2 byte case, hurts a little for larger + comptime var i: usize = 0; + inline while (i < vlq_max_in_bytes + 1) : (i += 1) { + bun.assert(encoded_[i] < std.math.maxInt(u7)); // invalid base64 character + const index = @as(u32, base64_lut[@as(u7, @truncate(encoded_[i]))]); + bun.assert(index != std.math.maxInt(u7)); // invalid base64 character + + // decode a byte + vlq |= (index & 31) << @as(u5, @truncate(shift)); + shift += 5; + + // Stop if there's no continuation bit + if ((index & 32) == 0) { + return VLQResult{ + .start = start + comptime (i + 1), + .value = if ((vlq & 1) == 0) + @as(i32, @intCast(vlq >> 1)) + else + -@as(i32, @intCast((vlq >> 1))), + }; + } + } + + return VLQResult{ .start = start + encoded_.len, .value = 0 }; +} + pub const LineOffsetTable = struct { /// The source map specification is very loose and does not specify what /// column numbers actually mean. The popular "source-map" library from Mozilla @@ -1368,9 +1756,9 @@ pub const Chunk = struct { pub fn addSourceMapping(b: *ThisBuilder, loc: Logger.Loc, output: []const u8) void { if ( - // exclude generated code from source + // don't insert mappings for same location twice b.prev_loc.eql(loc) or - // don't insert mappings for same location twice + // exclude generated code from source loc.start == Logger.Loc.Empty.start) return; diff --git a/src/string.zig b/src/string.zig index c3cd95d4287e1..d6814ca0f5219 100644 --- a/src/string.zig +++ b/src/string.zig @@ -266,10 +266,22 @@ pub const StringImplAllocator = struct { }; pub const Tag = enum(u8) { + /// String is not valid. Observed on some failed operations. + /// To prevent crashes, this value acts similarly to .Empty (such as length = 0) Dead = 0, + /// String is backed by a WTF::StringImpl from JavaScriptCore. + /// Can be in either `latin1` or `utf16le` encodings. WTFStringImpl = 1, + /// Memory has an unknown owner, likely in Bun's Zig codebase. If `isGloballyAllocated` + /// is set, then it is owned by mimalloc. When converted to JSValue it has to be cloned + /// into a WTF::String. + /// Can be in either `utf8` or `utf16le` encodings. ZigString = 2, + /// Static memory that is guarenteed to never be freed. When converted to WTF::String, + /// the memory is not cloned, but instead referenced with WTF::ExternalStringImpl. + /// Can be in either `utf8` or `utf16le` encodings. StaticZigString = 3, + /// String is "" Empty = 4, }; diff --git a/src/string_immutable.zig b/src/string_immutable.zig index cf43e1f43c358..f47d0f174b985 100644 --- a/src/string_immutable.zig +++ b/src/string_immutable.zig @@ -312,6 +312,11 @@ pub inline fn indexOf(self: string, str: string) ?usize { return @as(usize, @intCast(i)); } +pub fn indexOfT(comptime T: type, haystack: []const T, needle: []const T) ?usize { + if (T == u8) return indexOf(haystack, needle); + return std.mem.indexOf(T, haystack, needle); +} + pub fn split(self: string, delimiter: string) SplitIterator { return SplitIterator{ .buffer = self, diff --git a/test/bundler/bundler_bun.test.ts b/test/bundler/bundler_bun.test.ts index 5f2413df54936..663186068445d 100644 --- a/test/bundler/bundler_bun.test.ts +++ b/test/bundler/bundler_bun.test.ts @@ -2,6 +2,7 @@ import assert from "assert"; import dedent from "dedent"; import { ESBUILD, itBundled, testForFile } from "./expectBundled"; import { Database } from "bun:sqlite"; +import { isWindows } from "harness"; var { describe, test, expect } = testForFile(import.meta.path); describe("bundler", () => { @@ -42,4 +43,60 @@ describe("bundler", () => { }, run: { stdout: "Hello, world!", setCwd: true }, }); + itBundled("bun/TargetBunNoSourcemapMessage", { + target: "bun", + files: { + "/entry.ts": /* js */ ` + // this file has comments and weird whitespace, intentionally + // to make it obvious if sourcemaps were generated and mapped properly + if (true) code(); + function code() { + // hello world + throw new + Error("Hello World"); + } + `, + }, + run: { + exitCode: 1, + validate({ stderr }) { + expect(stderr).toInclude("\nnote: missing sourcemaps for "); + expect(stderr).toInclude("\nnote: consider bundling with '--sourcemap' to get an unminified traces\n"); + }, + }, + }); + itBundled("bun/TargetBunSourcemapInline", { + target: "bun", + files: { + "/entry.ts": /* js */ ` + // this file has comments and weird whitespace, intentionally + // to make it obvious if sourcemaps were generated and mapped properly + if (true) code(); + function code() { + // hello world + throw new + Error("Hello World"); + } + `, + }, + sourceMap: "inline", + run: { + exitCode: 1, + validate({ stderr }) { + assert( + stderr.startsWith( + `1 | // this file has comments and weird whitespace, intentionally +2 | // to make it obvious if sourcemaps were generated and mapped properly +3 | if (true) code(); +4 | function code() { +5 | // hello world +6 | throw new + ^ +error: Hello World`, + ) || void console.error(stderr), + ); + expect(stderr).toInclude("entry.ts:6:19"); + }, + }, + }); }); diff --git a/test/bundler/expectBundled.ts b/test/bundler/expectBundled.ts index a28dbcc3f9eec..f405addbb39a1 100644 --- a/test/bundler/expectBundled.ts +++ b/test/bundler/expectBundled.ts @@ -307,6 +307,10 @@ export interface BundlerTestRunOptions { runtime?: "bun" | "node"; setCwd?: boolean; + /** Expect a certain non-zero exit code */ + exitCode?: number; + /** Run a function with stdout and stderr. Use expect to assert exact outputs */ + validate?: (ctx: { stdout: string; stderr: string }) => void; } /** given when you do itBundled('id', (this object) => BundlerTestInput) */ @@ -1259,13 +1263,15 @@ for (const [key, blob] of build.outputs) { throw new Error(prefix + "run.file is required when there is more than one entrypoint."); } - const { success, stdout, stderr } = Bun.spawnSync({ - cmd: [ - ...(compile ? [] : [(run.runtime ?? "bun") === "bun" ? bunExe() : "node"]), - ...(run.bunArgs ?? []), - file, - ...(run.args ?? []), - ] as [string, ...string[]], + const args = [ + ...(compile ? [] : [(run.runtime ?? "bun") === "bun" ? bunExe() : "node"]), + ...(run.bunArgs ?? []), + file, + ...(run.args ?? []), + ] as [string, ...string[]]; + + const { success, stdout, stderr, exitCode, signalCode } = Bun.spawnSync({ + cmd: args, env: { ...bunEnv, FORCE_COLOR: "0", @@ -1275,6 +1281,10 @@ for (const [key, blob] of build.outputs) { cwd: run.setCwd ? root : undefined, }); + if (signalCode === "SIGTRAP") { + throw new Error(prefix + "Runtime failed\n" + stdout!.toUnixString() + "\n" + stderr!.toUnixString()); + } + if (run.error) { if (success) { throw new Error( @@ -1326,7 +1336,15 @@ for (const [key, blob] of build.outputs) { } } } else if (!success) { - throw new Error(prefix + "Runtime failed\n" + stdout!.toUnixString() + "\n" + stderr!.toUnixString()); + if (run.exitCode) { + expect([exitCode, signalCode]).toEqual([run.exitCode, undefined]); + } else { + throw new Error(prefix + "Runtime failed\n" + stdout!.toUnixString() + "\n" + stderr!.toUnixString()); + } + } + + if (run.validate) { + run.validate({ stderr: stderr.toUnixString(), stdout: stdout.toUnixString() }); } if (run.stdout !== undefined) { diff --git a/test/cli/hot/hot.test.ts b/test/cli/hot/hot.test.ts index 682f6ec49c0e0..c699638fb33ec 100644 --- a/test/cli/hot/hot.test.ts +++ b/test/cli/hot/hot.test.ts @@ -1,6 +1,6 @@ import { spawn } from "bun"; -import { beforeAll, beforeEach, expect, it } from "bun:test"; -import { bunExe, bunEnv, tmpdirSync } from "harness"; +import { beforeEach, expect, it } from "bun:test"; +import { bunExe, bunEnv, tmpdirSync, isDebug, isWindows } from "harness"; import { cpSync, readFileSync, renameSync, rmSync, unlinkSync, writeFileSync, copyFileSync } from "fs"; import { join } from "path"; @@ -319,3 +319,252 @@ it("should hot reload when a file is renamed() into place", async () => { runner?.kill?.(9); } }); + +const comment_spam = ("//" + "B".repeat(2000) + "\n").repeat(1000); +it( + "should work with sourcemap generation", + async () => { + writeFileSync( + hotRunnerRoot, + `// source content +${comment_spam} +throw new Error('0');`, + ); + await using runner = spawn({ + cmd: [bunExe(), "--smol", "--hot", "run", hotRunnerRoot], + env: bunEnv, + cwd, + stdout: "ignore", + stderr: "pipe", + stdin: "ignore", + }); + let reloadCounter = 0; + function onReload() { + writeFileSync( + hotRunnerRoot, + `// source content +${comment_spam} +${" ".repeat(reloadCounter * 2)}throw new Error(${reloadCounter});`, + ); + } + let str = ""; + outer: for await (const chunk of runner.stderr) { + str += new TextDecoder().decode(chunk); + var any = false; + if (!/error: .*[0-9]\n.*?\n/g.test(str)) continue; + + let it = str.split("\n"); + let line; + while ((line = it.shift())) { + if (!line.includes("error")) continue; + str = ""; + + if (reloadCounter === 50) { + runner.kill(); + break; + } + + if (line.includes(`error: ${reloadCounter - 1}`)) { + onReload(); // re-save file to prevent deadlock + continue outer; + } + expect(line).toContain(`error: ${reloadCounter}`); + reloadCounter++; + + let next = it.shift()!; + if (!next) throw new Error(line); + const match = next.match(/\s*at.*?:1003:(\d+)$/); + if (!match) throw new Error("invalid string: " + next); + const col = match[1]; + expect(Number(col)).toBe(1 + "throw ".length + (reloadCounter - 1) * 2); + any = true; + } + + if (any) await onReload(); + } + await runner.exited; + expect(reloadCounter).toBe(50); + }, + isDebug ? Infinity : 10_000, +); + +it("should work with sourcemap loading", async () => { + let bundleIn = join(cwd, "bundle_in.ts"); + rmSync(hotRunnerRoot); + writeFileSync( + bundleIn, + `// source content +// +// +throw new Error('0');`, + ); + await using bundler = spawn({ + cmd: [bunExe(), "build", "--watch", bundleIn, "--target=bun", "--sourcemap", "--outfile", hotRunnerRoot], + env: bunEnv, + cwd, + stdout: "pipe", + stderr: "pipe", + stdin: "ignore", + }); + await using runner = spawn({ + cmd: [bunExe(), "--hot", "run", hotRunnerRoot], + env: bunEnv, + cwd, + stdout: "ignore", + stderr: "pipe", + stdin: "ignore", + }); + let reloadCounter = 0; + function onReload() { + writeFileSync( + bundleIn, + `// source content +// etc etc +// etc etc +${" ".repeat(reloadCounter * 2)}throw new Error(${reloadCounter});`, + ); + } + let str = ""; + outer: for await (const chunk of runner.stderr) { + str += new TextDecoder().decode(chunk); + var any = false; + if (!/error: .*[0-9]\n.*?\n/g.test(str)) continue; + + let it = str.split("\n"); + let line; + while ((line = it.shift())) { + if (!line.includes("error")) continue; + str = ""; + + if (reloadCounter === 50) { + runner.kill(); + break; + } + + if (line.includes(`error: ${reloadCounter - 1}`)) { + onReload(); // re-save file to prevent deadlock + continue outer; + } + expect(line).toContain(`error: ${reloadCounter}`); + reloadCounter++; + + let next = it.shift()!; + expect(next).toInclude("bundle_in.ts"); + const col = next.match(/\s*at.*?:4:(\d+)$/)![1]; + expect(Number(col)).toBe(1 + "throw ".length + (reloadCounter - 1) * 2); + any = true; + } + + if (any) await onReload(); + } + expect(reloadCounter).toBe(50); + bundler.kill(); +}); + +const long_comment = "BBBB".repeat(100000); + +it( + "should work with sourcemap loading with large files", + async () => { + let bundleIn = join(cwd, "bundle_in.ts"); + rmSync(hotRunnerRoot); + writeFileSync( + bundleIn, + `// ${long_comment} +// +console.error("RSS: %s", process.memoryUsage().rss); +throw new Error('0');`, + ); + await using bundler = spawn({ + cmd: [ + // + bunExe(), + "build", + "--watch", + bundleIn, + "--target=bun", + "--sourcemap", + "--outfile", + hotRunnerRoot, + ], + env: bunEnv, + cwd, + stdout: "ignore", + stderr: "ignore", + stdin: "ignore", + }); + await using runner = spawn({ + cmd: [ + // + bunExe(), + "--hot", + "run", + hotRunnerRoot, + ], + env: bunEnv, + cwd, + stdout: "inherit", + stderr: "pipe", + stdin: "ignore", + }); + let reloadCounter = 0; + function onReload() { + writeFileSync( + bundleIn, + `// ${long_comment} +console.error("RSS: %s", process.memoryUsage().rss); +// +${" ".repeat(reloadCounter * 2)}throw new Error(${reloadCounter});`, + ); + } + let str = ""; + let sampleMemory10: number | undefined; + let sampleMemory100: number | undefined; + outer: for await (const chunk of runner.stderr) { + str += new TextDecoder().decode(chunk); + var any = false; + if (!/error: .*[0-9]\n.*?\n/g.test(str)) continue; + + let it = str.split("\n"); + let line; + while ((line = it.shift())) { + if (!line.includes("error:")) continue; + let rssMatch = str.match(/RSS: (\d+(\.\d+)?)\n/); + let rss; + if (rssMatch) rss = Number(rssMatch[1]); + str = ""; + + if (reloadCounter == 10) { + sampleMemory10 = rss; + } + + if (reloadCounter >= 50) { + sampleMemory100 = rss; + runner.kill(); + break; + } + + if (line.includes(`error: ${reloadCounter - 1}`)) { + onReload(); // re-save file to prevent deadlock + continue outer; + } + expect(line).toContain(`error: ${reloadCounter}`); + + reloadCounter++; + let next = it.shift()!; + expect(next).toInclude("bundle_in.ts"); + const col = next.match(/\s*at.*?:4:(\d+)$/)![1]; + expect(Number(col)).toBe(1 + "throw ".length + (reloadCounter - 1) * 2); + any = true; + } + + if (any) await onReload(); + } + expect(reloadCounter).toBe(50); + bundler.kill(); + await runner.exited; + // TODO: bun has a memory leak when --hot is used on very large files + // console.log({ sampleMemory10, sampleMemory100 }); + }, + isDebug ? Infinity : 20_000, +); diff --git a/test/harness.ts b/test/harness.ts index bcca18648b5d3..6ca1dac819fe1 100644 --- a/test/harness.ts +++ b/test/harness.ts @@ -13,6 +13,7 @@ export const isLinux = process.platform === "linux"; export const isPosix = isMacOS || isLinux; export const isWindows = process.platform === "win32"; export const isIntelMacOS = isMacOS && process.arch === "x64"; +export const isDebug = Bun.version.includes("debug"); export const bunEnv: NodeJS.ProcessEnv = { ...process.env,