Skip to content

Commit

Permalink
impl #1
Browse files Browse the repository at this point in the history
  • Loading branch information
Jarred-Sumner committed Jun 4, 2022
1 parent af6859a commit 9f640ff
Show file tree
Hide file tree
Showing 4 changed files with 403 additions and 0 deletions.
86 changes: 86 additions & 0 deletions bench/snippets/escapeHTML.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
import { group } from "mitata";
import { bench, run } from "mitata";

var bunEscapeHTML = Bun.escapeHTML;

const matchHtmlRegExp = /["'&<>]/;

/**
* Escapes special characters and HTML entities in a given html string.
*
* @param {string} string HTML string to escape for later insertion
* @return {string}
* @public
*/

function reactEscapeHtml(string) {
const str = "" + string;
const match = matchHtmlRegExp.exec(str);

if (!match) {
return str;
}

let escape;
let html = "";
let index;
let lastIndex = 0;

for (index = match.index; index < str.length; index++) {
switch (str.charCodeAt(index)) {
case 34: // "
escape = "&quot;";
break;
case 38: // &
escape = "&amp;";
break;
case 39: // '
escape = "&#x27;"; // modified from escape-html; used to be '&#39'
break;
case 60: // <
escape = "&lt;";
break;
case 62: // >
escape = "&gt;";
break;
default:
continue;
}

if (lastIndex !== index) {
html += str.substring(lastIndex, index);
}

lastIndex = index + 1;
html += escape;
}

return lastIndex !== index ? html + str.substring(lastIndex, index) : html;
}

const long = ("lalala" + "<script>alert(1)</script>" + "lalala").repeat(9000);
const short = "lalala" + "<script>alert(1)</script>" + "lalala";
const middle =
"lalala".repeat(2000) + "<script>alert(1)</script>" + "lalala".repeat(2000);
const nothing = "lalala".repeat(9999);
group(`long (${long.length})`, () => {
bench("react's escapeHTML", () => reactEscapeHtml(long));
bench("bun's escapeHTML", () => bunEscapeHTML(long));
});

group(`short (${short.length})`, () => {
bench("react's escapeHTML", () => reactEscapeHtml(short));
bench("bun's escapeHTML", () => bunEscapeHTML(short));
});

group(`middle (${middle.length})`, () => {
bench("react's escapeHTML", () => reactEscapeHtml(middle));
bench("bun's escapeHTML", () => bunEscapeHTML(middle));
});

group(`nothing (${nothing.length})`, () => {
bench("react's escapeHTML", () => reactEscapeHtml(nothing));
bench("bun's escapeHTML", () => bunEscapeHTML(nothing));
});

await run();
54 changes: 54 additions & 0 deletions integration/bunjs-only-snippets/escapeHTML.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import { describe, it, expect } from "bun:test";
import { gcTick } from "./gc";

describe("Bun.escapeHTML", () => {
it("works", () => {
expect(Bun.escapeHTML("<script>alert(1)</script>")).toBe(
"&lt;script&gt;alert(1)&lt;/script&gt;"
);
expect(Bun.escapeHTML("<")).toBe("&lt;");
expect(Bun.escapeHTML(">")).toBe("&gt;");
expect(Bun.escapeHTML("&")).toBe("&amp;");
expect(Bun.escapeHTML("'")).toBe("&#x27;");
expect(Bun.escapeHTML('"')).toBe("&quot;");
expect(Bun.escapeHTML("\n")).toBe("\n");
expect(Bun.escapeHTML("\r")).toBe("\r");
expect(Bun.escapeHTML("\t")).toBe("\t");
expect(Bun.escapeHTML("\f")).toBe("\f");
expect(Bun.escapeHTML("\v")).toBe("\v");
expect(Bun.escapeHTML("\b")).toBe("\b");
expect(Bun.escapeHTML("\u00A0")).toBe("\u00A0");

// The matrix of cases we need to test for:
// 1. Works with short strings
// 2. Works with long strings
// 3. Works with latin1 strings
// 4. Works with utf16 strings
// 5. Works when the text to escape is somewhere in the middle
// 6. Works when the text to escape is in the beginning
// 7. Works when the text to escape is in the end
// 8. Returns the same string when there's no need to escape
expect(
Bun.escapeHTML("lalala" + "<script>alert(1)</script>" + "lalala")
).toBe("lalala&lt;script&gt;alert(1)&lt;/script&gt;lalala");

expect(Bun.escapeHTML("<script>alert(1)</script>" + "lalala")).toBe(
"&lt;script&gt;alert(1)&lt;/script&gt;lalala"
);
expect(Bun.escapeHTML("lalala" + "<script>alert(1)</script>")).toBe(
"lalala" + "&lt;script&gt;alert(1)&lt;/script&gt;"
);

expect(
Bun.escapeHTML(
("lalala" + "<script>alert(1)</script>" + "lalala").repeat(900)
)
).toBe("lalala&lt;script&gt;alert(1)&lt;/script&gt;lalala".repeat(900));
expect(
Bun.escapeHTML(("<script>alert(1)</script>" + "lalala").repeat(900))
).toBe("&lt;script&gt;alert(1)&lt;/script&gt;lalala".repeat(900));
expect(
Bun.escapeHTML(("lalala" + "<script>alert(1)</script>").repeat(900))
).toBe(("lalala" + "&lt;script&gt;alert(1)&lt;/script&gt;").repeat(900));
});
});
39 changes: 39 additions & 0 deletions src/javascript/jsc/api/bun.zig
Original file line number Diff line number Diff line change
Expand Up @@ -1150,6 +1150,9 @@ pub const Class = NewClass(
.inflateSync = .{
.rfn = JSC.wrapWithHasContainer(JSZlib, "inflateSync", false, false, true),
},
.escapeHTML = .{
.rfn = Bun.escapeHTML,
},
},
.{
.main = .{
Expand Down Expand Up @@ -1612,6 +1615,42 @@ pub fn serve(
unreachable;
}

pub fn escapeHTML(
_: void,
ctx: js.JSContextRef,
_: js.JSObjectRef,
_: js.JSObjectRef,
arguments: []const js.JSValueRef,
exception: js.ExceptionRef,
) js.JSValueRef {
if (arguments.len < 1) {
return ZigString.init("").toValue(ctx).asObjectRef();
}

const input_value = arguments[0].?.value();
const zig_str = input_value.getZigString(ctx);
if (zig_str.is16Bit()) {
return input_value.asObjectRef();
} else {
var input_slice = zig_str.slice();
var escaped_html = strings.escapeHTMLForLatin1Input(ctx.bunVM().allocator, input_slice) catch {
JSC.JSError(undefined, "Out of memory", .{}, ctx, exception);
return null;
};

if (escaped_html.ptr == input_slice.ptr and escaped_html.len == input_slice.len) {
return input_value.asObjectRef();
}

if (input_slice.len == 1) {
// single character escaped strings are statically allocated
return ZigString.init(escaped_html).toValue(ctx).asObjectRef();
}

return ZigString.init(escaped_html).toExternalValue(ctx).asObjectRef();
}
}

pub fn allocUnsafe(
_: void,
ctx: js.JSContextRef,
Expand Down
Loading

0 comments on commit 9f640ff

Please sign in to comment.