Skip to content

Commit

Permalink
20% faster TextDecoder on small inputs
Browse files Browse the repository at this point in the history
  • Loading branch information
Jarred-Sumner committed Nov 8, 2022
1 parent 6c6e680 commit 1604666
Show file tree
Hide file tree
Showing 6 changed files with 3,969 additions and 4,448 deletions.
8,303 changes: 3,877 additions & 4,426 deletions src/bun.js/bindings/ZigGeneratedClasses.cpp

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions src/bun.js/bindings/generated_classes.zig
Expand Up @@ -1814,6 +1814,8 @@ pub const JSTextDecoder = struct {
@compileLog("TextDecoder.finalize is not a finalizer");
}

if (@TypeOf(TextDecoder.decodeWithoutTypeChecks) != fn (*TextDecoder, *JSC.JSGlobalObject, *JSC.JSUint8Array) callconv(.C) JSC.JSValue)
@compileLog("Expected TextDecoder.decodeWithoutTypeChecks to be a DOMJIT function");
if (@TypeOf(TextDecoder.decode) != CallbackType)
@compileLog("Expected TextDecoder.decode to be a callback");
if (@TypeOf(TextDecoder.getEncoding) != GetterType)
Expand All @@ -1825,6 +1827,7 @@ pub const JSTextDecoder = struct {
if (!JSC.is_bindgen) {
@export(TextDecoder.constructor, .{ .name = "TextDecoderClass__construct" });
@export(TextDecoder.decode, .{ .name = "TextDecoderPrototype__decode" });
@export(TextDecoder.decodeWithoutTypeChecks, .{ .name = "TextDecoderPrototype__decodeWithoutTypeChecks" });
@export(TextDecoder.finalize, .{ .name = "TextDecoderClass__finalize" });
@export(TextDecoder.getEncoding, .{ .name = "TextDecoderPrototype__getEncoding" });
@export(TextDecoder.getFatal, .{ .name = "TextDecoderPrototype__getFatal" });
Expand Down
4 changes: 3 additions & 1 deletion src/bun.js/scripts/generate-classes.ts
Expand Up @@ -110,7 +110,9 @@ function DOMJITFunctionDeclaration(jsClassName, fnName, { args, returns }) {
)}Wrapper,
${jsClassName}::info(),
JSC::DOMJIT::Effect::forReadWrite(JSC::DOMJIT::HeapRange::top(), JSC::DOMJIT::HeapRange::top()),
${DOMJITType("JSValue")}, ${args.map(DOMJITType).join(", ")});
${returns === "JSString" ? "JSC::SpecString" : DOMJITType("JSValue")}, ${args
.map(DOMJITType)
.join(", ")});
`.trim();
}

Expand Down
5 changes: 5 additions & 0 deletions src/bun.js/webcore/encoding.classes.ts
Expand Up @@ -19,6 +19,11 @@ export default [
decode: {
fn: "decode",
length: 1,

DOMJIT: {
returns: "JSString",
args: ["JSUint8Array"],
},
},
},
}),
Expand Down
65 changes: 65 additions & 0 deletions src/bun.js/webcore/encoding.zig
Expand Up @@ -651,6 +651,71 @@ pub const TextDecoder = struct {
}
}

pub fn decodeWithoutTypeChecks(this: *TextDecoder, globalThis: *JSC.JSGlobalObject, uint8array: *JSC.JSUint8Array) callconv(.C) JSValue {
const buffer_slice = uint8array.slice();
switch (this.encoding) {
EncodingLabel.@"latin1" => {
return ZigString.init(buffer_slice).toValueGC(globalThis);
},
EncodingLabel.@"UTF-8" => {
if (this.fatal) {
if (strings.toUTF16Alloc(default_allocator, buffer_slice, true)) |result_| {
if (result_) |result| {
return ZigString.toExternalU16(result.ptr, result.len, globalThis);
}
} else |err| {
switch (err) {
error.InvalidByteSequence => {
globalThis.throw("Invalid byte sequence", .{});
return JSValue.zero;
},
error.OutOfMemory => {
globalThis.throw("Out of memory", .{});
return JSValue.zero;
},
else => {
globalThis.throw("Unknown error", .{});
return JSValue.zero;
},
}
}
} else {
if (strings.toUTF16Alloc(default_allocator, buffer_slice, false)) |result_| {
if (result_) |result| {
return ZigString.toExternalU16(result.ptr, result.len, globalThis);
}
} else |err| {
switch (err) {
error.OutOfMemory => {
globalThis.throw("Out of memory", .{});
return JSValue.zero;
},
else => {
globalThis.throw("Unknown error", .{});
return JSValue.zero;
},
}
}
}

// Experiment: using mimalloc directly is slightly slower
return ZigString.init(buffer_slice).toValueGC(globalThis);
},

EncodingLabel.@"UTF-16LE" => {
if (std.mem.isAligned(@ptrToInt(buffer_slice.ptr), @alignOf([*]u16))) {
return this.decodeUTF16WithAlignment([]u16, @alignCast(2, std.mem.bytesAsSlice(u16, buffer_slice)), globalThis);
}

return this.decodeUTF16WithAlignment([]align(1) u16, std.mem.bytesAsSlice(u16, buffer_slice), globalThis);
},
else => {
globalThis.throwInvalidArguments("TextDecoder.decode set to unsupported encoding", .{});
return JSValue.zero;
},
}
}

pub fn constructor(
globalThis: *JSC.JSGlobalObject,
callframe: *JSC.CallFrame,
Expand Down
37 changes: 16 additions & 21 deletions src/string_immutable.zig
Expand Up @@ -942,16 +942,13 @@ pub fn toUTF16Alloc(allocator: std.mem.Allocator, bytes: []const u8, comptime fa
var remaining = chunk;

{
var sequence: [4]u8 = undefined;

if (remaining.len >= 4) {
sequence = remaining[0..4].*;
} else {
sequence[0] = remaining[0];
sequence[1] = if (remaining.len > 1) remaining[1] else 0;
sequence[2] = if (remaining.len > 2) remaining[2] else 0;
sequence[3] = 0;
}
const sequence: [4]u8 = switch (remaining.len) {
0 => unreachable,
1 => [_]u8{ remaining[0], 0, 0, 0 },
2 => [_]u8{ remaining[0], remaining[1], 0, 0 },
3 => [_]u8{ remaining[0], remaining[1], remaining[2], 0 },
else => remaining[0..4].*,
};

const replacement = strings.convertUTF8BytesIntoUTF16(&sequence);
if (comptime fail_if_invalid) {
Expand Down Expand Up @@ -980,16 +977,13 @@ pub fn toUTF16Alloc(allocator: std.mem.Allocator, bytes: []const u8, comptime fa
const last = remaining[0..j];
remaining = remaining[j..];

var sequence: [4]u8 = undefined;

if (remaining.len >= 4) {
sequence = remaining[0..4].*;
} else {
sequence[0] = remaining[0];
sequence[1] = if (remaining.len > 1) remaining[1] else 0;
sequence[2] = if (remaining.len > 2) remaining[2] else 0;
sequence[3] = 0;
}
const sequence: [4]u8 = switch (remaining.len) {
0 => unreachable,
1 => [_]u8{ remaining[0], 0, 0, 0 },
2 => [_]u8{ remaining[0], remaining[1], 0, 0 },
3 => [_]u8{ remaining[0], remaining[1], remaining[2], 0 },
else => remaining[0..4].*,
};

const replacement = strings.convertUTF8BytesIntoUTF16(&sequence);
if (comptime fail_if_invalid) {
Expand All @@ -1016,7 +1010,8 @@ pub fn toUTF16Alloc(allocator: std.mem.Allocator, bytes: []const u8, comptime fa
}

if (remaining.len > 0) {
try output.ensureUnusedCapacity(remaining.len);
try output.ensureTotalCapacityPrecise(output.items.len + remaining.len);

output.items.len += remaining.len;
strings.copyU8IntoU16(output.items[output.items.len - remaining.len ..], remaining);
}
Expand Down

0 comments on commit 1604666

Please sign in to comment.