Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

js: fix libyaml bindings #7918

Merged
merged 1 commit into from
May 31, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
48 changes: 48 additions & 0 deletions js/engine/Main.ml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,52 @@ open Js_of_ocaml
(see companion setter in Semgrep_js_shared.ml) *)
external get_jsoo_mount_point : unit -> 'any list = "get_jsoo_mount_point"

(* semgrep uses ocaml-yaml, which uses ocaml-ctypes to use the libyaml C library
this is problematic for webassembly because it doesn't match the architecture
of the build machine (e.g. most computers these days have a 64-bit address space,
but WASM is currently 32-bit). cross-compilation ended up being too much work, so
instead we exploit javascript's mutability to correct some memory offsets for WASM *)
external override_yaml_ctypes_field_offset :
('a, 'b) Yaml_bindings.T.field -> int -> unit
= "override_yaml_ctypes_field_offset"

let fix_libyaml_field_offsets_for_wasm () =
(* mark *)
override_yaml_ctypes_field_offset Yaml_types.M.Mark.line 4;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How did you end up with all those magic constants below?
you disassembled the resulting WASM?
If tomorrow the bus hit you, how someone else can update this code if say the libyaml C library changes a little bit?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, when I was investigating this I created a macro that I used to print the offset of every struct field. I'll restructure and add more comments to make it clearer what's going on here.

override_yaml_ctypes_field_offset Yaml_types.M.Mark.column 8;

(* event *)
override_yaml_ctypes_field_offset Yaml_types.M.Event.data 4;
override_yaml_ctypes_field_offset Yaml_types.M.Event.start_mark 32;
override_yaml_ctypes_field_offset Yaml_types.M.Event.start_mark 44;
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

argh, typo


(* version_directive *)
override_yaml_ctypes_field_offset Yaml_types.M.Version_directive.minor 4;

(* event_sequence_start *)
override_yaml_ctypes_field_offset Yaml_types.M.Event.Sequence_start.tag 4;
override_yaml_ctypes_field_offset Yaml_types.M.Event.Sequence_start.implicit 8;
override_yaml_ctypes_field_offset Yaml_types.M.Event.Sequence_start.style 12;

(* event_scalar *)
override_yaml_ctypes_field_offset Yaml_types.M.Event.Scalar.tag 4;
override_yaml_ctypes_field_offset Yaml_types.M.Event.Scalar.value 8;
override_yaml_ctypes_field_offset Yaml_types.M.Event.Scalar.length 12;
override_yaml_ctypes_field_offset Yaml_types.M.Event.Scalar.plain_implicit 16;
override_yaml_ctypes_field_offset Yaml_types.M.Event.Scalar.quoted_implicit 20;
override_yaml_ctypes_field_offset Yaml_types.M.Event.Scalar.style 24;

(* event_mapping_start *)
override_yaml_ctypes_field_offset Yaml_types.M.Event.Mapping_start.tag 4;
override_yaml_ctypes_field_offset Yaml_types.M.Event.Mapping_start.implicit 8;
override_yaml_ctypes_field_offset Yaml_types.M.Event.Mapping_start.style 12;

(* event_document_start *)
override_yaml_ctypes_field_offset
Yaml_types.M.Event.Document_start.tag_directives 4;
override_yaml_ctypes_field_offset Yaml_types.M.Event.Document_start.implicit
12

type jbool = bool Js.t
type jstring = Js.js_string Js.t

Expand All @@ -17,6 +63,8 @@ let _ =
from the web on demand when one select a language in the playground.
old: Parsing_init.init ();
*)
fix_libyaml_field_offsets_for_wasm ();

Js.export_all
(object%js
(*
Expand Down
5 changes: 5 additions & 0 deletions js/engine/core.js
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,8 @@ function unix_environment() {
function get_jsoo_mount_point() {
return jsoo_mount_point;
}

//Provides: override_yaml_ctypes_field_offset
function override_yaml_ctypes_field_offset(field, newOffset) {
field[2] = newOffset;
}
39 changes: 39 additions & 0 deletions js/examples/yaml.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
<html>
<body>
<pre id="result">Loading...</pre>
</body>
<script>
window.module = { exports: {} };
</script>
<script src="../engine/dist/index.cjs"></script>
<script>
window.EngineFactory = module.exports.EngineFactory;
</script>
<script>
const rules = JSON.stringify({
rules: [
{
id: "test",
languages: ["yaml"],
message: "test",
pattern: "foo: $Y",
severity: "ERROR",
},
],
});

(async () => {
const engine = await EngineFactory();

engine.writeFile("/static/rules.json", rules);
engine.writeFile("/static/test.py", `foo: bar`);
document.getElementById("result").innerHTML = JSON.stringify(
JSON.parse(
engine.execute("yaml", "/static/rules.json", "/static/test.py")
),
null,
2
);
})();
</script>
</html>
2 changes: 1 addition & 1 deletion js/languages/shared/Makefile.include
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ COMMON_TS_EXPORTED_FUNCTIONS = _malloc,_free,_ts_parser_new,_ts_parser_set_langu

EMCC_DEFAULTS = \
-sALLOW_MEMORY_GROWTH=1 \
-sEXPORTED_RUNTIME_METHODS=AsciiToString,stringToAscii,stringToUTF8,getValue,setValue \
-sEXPORTED_RUNTIME_METHODS=UTF8ToString,AsciiToString,stringToAscii,stringToUTF8,getValue,setValue \
-sMODULARIZE

SEMGREP_TS_LANG ?= $(SEMGREP_LANG)
Expand Down
1 change: 0 additions & 1 deletion js/libpcre/runtime.js
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,6 @@ function pcre_ocaml_init() {
function pcre_version_stub() {
var ptr = libpcre._pcre_version();
var value = libpcre.UTF8ToString(ptr);
libpcre._free(ptr);
return value;
}

Expand Down
6 changes: 0 additions & 6 deletions js/libyaml/ctypes.js
Original file line number Diff line number Diff line change
Expand Up @@ -389,12 +389,6 @@ function ctypes_use(x) {
caml_failwith("ctypes: ctypes_use not implemented");
}

//Provides: ctypes_string_of_cstring
//Requires: caml_failwith
function ctypes_string_of_cstring(x) {
caml_failwith("ctypes: ctypes_string_of_cstring not implemented");
}

//Provides: ctypes_cstring_of_string
//Requires: caml_failwith
function ctypes_cstring_of_string(x) {
Expand Down
25 changes: 17 additions & 8 deletions js/libyaml/runtime.js
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,14 @@ function ctypes_write(primType, v, buffer) {
}
}

//Provides: ctypes_string_of_cstring
//Requires: libyaml, caml_string_of_jsstring
function ctypes_string_of_cstring(ptr) {
return caml_string_of_jsstring(libyaml.UTF8ToString(ptr[2]));
}

//Provides: ctypes_read
//Requires: libyaml
//Requires: libyaml, UInt32
function ctypes_read(primType, buffer) {
switch (primType) {
case 0:
Expand All @@ -41,21 +47,24 @@ function ctypes_read(primType, buffer) {
case 13: // Ctypes_Size_t
return libyaml.getValue(buffer[2], "i32");
case 20: // Ctypes_Uint32_t
return libyaml.getValue(buffer[2], "i32") >>> 0;
return new UInt32(libyaml.getValue(buffer[2], "i32"));
default:
throw new Error(`how to read prim ${primType}`);
throw new Error(`Don't know how to read prim ${primType}`);
}
}

//Provides: ctypes_read_pointer
//Requires: libyaml
function ctypes_read_pointer(ptr) {
return ptr[2];
return libyaml.getValue(ptr[2], "i32");
}

//Provides: yaml_stub_1_yaml_get_version_string const
//Requires: libyaml
//Requires: libyaml, caml_string_of_jsstring
function yaml_stub_1_yaml_get_version_string() {
return libyaml.UTF8ToString(libyaml._yaml_get_version_string());
return caml_string_of_jsstring(
libyaml.UTF8ToString(libyaml._yaml_get_version_string())
);
}

//Provides: yaml_stub_2_yaml_get_version
Expand Down Expand Up @@ -87,8 +96,8 @@ function yaml_stub_5_yaml_parser_delete(parser_ptr) {
//Requires: libyaml
function yaml_stub_6_yaml_parser_set_input_string(parser_ptr, input_ptr, size) {
libyaml._yaml_parser_set_input_string(
parser_ptr[parser_ptr.length - 1][1],
input_ptr[3][1],
parser_ptr[2],
input_ptr[2],
size.value
);
}
Expand Down
4 changes: 3 additions & 1 deletion js/libyaml/runtime.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@ const EXPECTED_LIBYAML_VERSION = [0, 1, 7];
const EXPECTED_LIBYAML_VERSION_STRING = "0.1.7";
const SIZEOF_YAML_PARSER_T = 248;
const SIZEOF_YAML_EVENT_T = 56;
const TEST_YAML_STRING = '{"foo": "bar"}';
const TEST_YAML_STRING = "foo: bar";
const EXPECTED_EVENT_TYPE_STREAM = [1, 3, 9, 6, 6, 10, 4, 2];

globalThis.caml_string_of_jsstring = (x) => x;

describe("libyaml", () => {
const libyamlPromise = LibYamlFactory();

Expand Down
3 changes: 2 additions & 1 deletion src/parsing/yaml_to_generic.ml
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,8 @@ let mk_bracket
_;
} ) v env =
(* The end index needs to be adjusted by one because the token is off *)
let e_index' = e_index - 1 in
(* TODO: figure out why we get an off-by-one with jsoo *)
let e_index' = if !Common.jsoo then e_index else e_index - 1 in
let e_line, e_column =
match env.charpos_to_pos with
| None -> (e_line, e_column)
Expand Down