diff --git a/crates/cli-support/src/js/js2rust.rs b/crates/cli-support/src/js/js2rust.rs index 1edf41a17aa..4ecf0ed1504 100644 --- a/crates/cli-support/src/js/js2rust.rs +++ b/crates/cli-support/src/js/js2rust.rs @@ -392,7 +392,11 @@ impl<'a, 'b> Js2Rust<'a, 'b> { if arg.is_ref_anyref() { self.js_arguments.push((name.clone(), "any".to_string())); self.cx.expose_borrowed_objects(); - self.finally("stack.pop();"); + self.cx.expose_global_stack_pointer(); + // the "stack-ful" nature means that we're always popping from the + // stack, and make sure that we actually clear our reference to + // allow stale values to get GC'd + self.finally("heap[stack_pointer++] = undefined;"); self.rust_arguments .push(format!("addBorrowedObject({})", name)); return Ok(self); diff --git a/crates/cli-support/src/js/mod.rs b/crates/cli-support/src/js/mod.rs index 99698f40203..9dc6c3ee1a4 100644 --- a/crates/cli-support/src/js/mod.rs +++ b/crates/cli-support/src/js/mod.rs @@ -114,7 +114,9 @@ enum Import<'a> { }, } -const INITIAL_SLAB_VALUES: &[&str] = &["undefined", "null", "true", "false"]; +const INITIAL_HEAP_VALUES: &[&str] = &["undefined", "null", "true", "false"]; +// Must be kept in sync with `src/lib.rs` of the `wasm-bindgen` crate +const INITIAL_HEAP_OFFSET: usize = 32; impl<'a> Context<'a> { fn export(&mut self, name: &str, contents: &str, comments: Option) { @@ -168,44 +170,20 @@ impl<'a> Context<'a> { self.write_classes()?; self.bind("__wbindgen_object_clone_ref", &|me| { - me.expose_add_heap_object(); me.expose_get_object(); - let bump_cnt = if me.config.debug { - String::from( - " - if (typeof(val) === 'number') throw new Error('corrupt slab'); - val.cnt += 1; - ", - ) - } else { - String::from("val.cnt += 1;") - }; - Ok(format!( + me.expose_add_heap_object(); + Ok(String::from( + " + function(idx) { + return addHeapObject(getObject(idx)); + } " - function(idx) {{ - // If this object is on the stack promote it to the heap. - if ((idx & 1) === 1) return addHeapObject(getObject(idx)); - - // Otherwise if the object is on the heap just bump the - // refcount and move on - const val = slab[idx >> 1]; - {} - return idx; - }} - ", - bump_cnt )) })?; self.bind("__wbindgen_object_drop_ref", &|me| { me.expose_drop_ref(); - Ok(String::from( - " - function(i) { - dropRef(i); - } - ", - )) + Ok(String::from("function(i) { dropObject(i); }")) })?; self.bind("__wbindgen_string_new", &|me| { @@ -222,13 +200,7 @@ impl<'a> Context<'a> { self.bind("__wbindgen_number_new", &|me| { me.expose_add_heap_object(); - Ok(String::from( - " - function(i) { - return addHeapObject(i); - } - ", - )) + Ok(String::from("function(i) { return addHeapObject(i); }")) })?; self.bind("__wbindgen_number_get", &|me| { @@ -370,7 +342,7 @@ impl<'a> Context<'a> { " function(i) { const obj = getObject(i).original; - dropRef(i); + dropObject(i); if (obj.cnt-- == 1) { obj.a = 0; return 1; @@ -383,7 +355,7 @@ impl<'a> Context<'a> { self.bind("__wbindgen_cb_forget", &|me| { me.expose_drop_ref(); - Ok("dropRef".to_string()) + Ok("dropObject".to_string()) })?; self.bind("__wbindgen_json_parse", &|me| { @@ -427,14 +399,7 @@ impl<'a> Context<'a> { self.bind("__wbindgen_memory", &|me| { me.expose_add_heap_object(); let mem = me.memory(); - Ok(format!( - " - function() {{ - return addHeapObject({}); - }} - ", - mem - )) + Ok(format!("function() {{ return addHeapObject({}); }}", mem)) })?; self.bind("__wbindgen_module", &|me| { @@ -916,149 +881,54 @@ impl<'a> Context<'a> { if !self.exposed_globals.insert("drop_ref") { return; } - self.expose_global_slab(); - self.expose_global_slab_next(); - let validate_owned = if self.config.debug { - String::from( - " - if ((idx & 1) === 1) throw new Error('cannot drop ref of stack objects'); - ", - ) - } else { - String::new() - }; - let dec_ref = if self.config.debug { - String::from( - " - if (typeof(obj) === 'number') throw new Error('corrupt slab'); - obj.cnt -= 1; - if (obj.cnt > 0) return; - ", - ) - } else { - String::from( - " - obj.cnt -= 1; - if (obj.cnt > 0) return; - ", - ) - }; + self.expose_global_heap(); + self.expose_global_heap_next(); + + // Note that here we check if `idx` shouldn't actually be dropped. This + // is due to the fact that `JsValue::null()` and friends can be passed + // by value to JS where we'll automatically call this method. Those + // constants, however, cannot be dropped. See #1054 for removing this + // branch. + // + // Otherwise the free operation here is pretty simple, just appending to + // the linked list of heap slots that are free. self.global(&format!( " - function dropRef(idx) {{ - {} - idx = idx >> 1; + function dropObject(idx) {{ if (idx < {}) return; - let obj = slab[idx]; - {} - // If we hit 0 then free up our space in the slab - slab[idx] = slab_next; - slab_next = idx; + heap[idx] = heap_next; + heap_next = idx; }} ", - validate_owned, - INITIAL_SLAB_VALUES.len(), - dec_ref + INITIAL_HEAP_OFFSET + INITIAL_HEAP_VALUES.len(), )); } - fn expose_global_stack(&mut self) { - if !self.exposed_globals.insert("stack") { + fn expose_global_heap(&mut self) { + if !self.exposed_globals.insert("heap") { return; } - self.global(&format!( - " - const stack = []; - " - )); - if self.config.debug { - self.export( - "assertStackEmpty", - " - function() { - if (stack.length === 0) return; - throw new Error('stack is not currently empty'); - } - ", - None, - ); - } + self.global(&format!("const heap = new Array({});", INITIAL_HEAP_OFFSET)); + self.global(&format!("heap.push({});", INITIAL_HEAP_VALUES.join(", "))); } - fn expose_global_slab(&mut self) { - if !self.exposed_globals.insert("slab") { + fn expose_global_heap_next(&mut self) { + if !self.exposed_globals.insert("heap_next") { return; } - let initial_values = INITIAL_SLAB_VALUES - .iter() - .map(|s| format!("{{ obj: {} }}", s)) - .collect::>(); - self.global(&format!("const slab = [{}];", initial_values.join(", "))); - if self.config.debug { - self.export( - "assertSlabEmpty", - &format!( - " - function() {{ - for (let i = {}; i < slab.length; i++) {{ - if (typeof(slab[i]) === 'number') continue; - throw new Error('slab is not currently empty'); - }} - }} - ", - initial_values.len() - ), - None, - ); - } - } - - fn expose_global_slab_next(&mut self) { - if !self.exposed_globals.insert("slab_next") { - return; - } - self.expose_global_slab(); - self.global( - " - let slab_next = slab.length; - ", - ); + self.expose_global_heap(); + self.global("let heap_next = heap.length;"); } fn expose_get_object(&mut self) { if !self.exposed_globals.insert("get_object") { return; } - self.expose_global_stack(); - self.expose_global_slab(); + self.expose_global_heap(); - let get_obj = if self.config.debug { - String::from( - " - if (typeof(val) === 'number') throw new Error('corrupt slab'); - return val.obj; - ", - ) - } else { - String::from( - " - return val.obj; - ", - ) - }; - self.global(&format!( - " - function getObject(idx) {{ - if ((idx & 1) === 1) {{ - return stack[idx >> 1]; - }} else {{ - const val = slab[idx >> 1]; - {} - }} - }} - ", - get_obj - )); + // Accessing a heap object is just a simple index operation due to how + // the stack/heap are laid out. + self.global("function getObject(idx) { return heap[idx]; }"); } fn expose_assert_num(&mut self) { @@ -1510,18 +1380,32 @@ impl<'a> Context<'a> { ); } + fn expose_global_stack_pointer(&mut self) { + if !self.exposed_globals.insert("stack_pointer") { + return; + } + self.global(&format!("let stack_pointer = {};", INITIAL_HEAP_OFFSET)); + } + fn expose_borrowed_objects(&mut self) { if !self.exposed_globals.insert("borrowed_objects") { return; } - self.expose_global_stack(); + self.expose_global_heap(); + self.expose_global_stack_pointer(); + // Our `stack_pointer` points to where we should start writing stack + // objects, and the `stack_pointer` is incremented in a `finally` block + // after executing this. Once we've reserved stack space we write the + // value. Eventually underflow will throw an exception, but JS sort of + // just handles it today... self.global( " function addBorrowedObject(obj) { - stack.push(obj); - return ((stack.length - 1) << 1) | 1; + if (stack_pointer == 1) throw new Error('out of js stack'); + heap[--stack_pointer] = obj; + return stack_pointer; } - ", + " ); } @@ -1535,7 +1419,7 @@ impl<'a> Context<'a> { " function takeObject(idx) { const ret = getObject(idx); - dropRef(idx); + dropObject(idx); return ret; } ", @@ -1546,34 +1430,34 @@ impl<'a> Context<'a> { if !self.exposed_globals.insert("add_heap_object") { return; } - self.expose_global_slab(); - self.expose_global_slab_next(); - let set_slab_next = if self.config.debug { + self.expose_global_heap(); + self.expose_global_heap_next(); + let set_heap_next = if self.config.debug { String::from( " - if (typeof(next) !== 'number') throw new Error('corrupt slab'); - slab_next = next; + if (typeof(heap_next) !== 'number') throw new Error('corrupt heap'); ", ) } else { - String::from( - " - slab_next = next; - ", - ) + String::new() }; + + // Allocating a slot on the heap first goes through the linked list + // (starting at `heap_next`). Once that linked list is exhausted we'll + // be pointing beyond the end of the array, at which point we'll reserve + // one more slot and use that. self.global(&format!( " function addHeapObject(obj) {{ - if (slab_next === slab.length) slab.push(slab.length + 1); - const idx = slab_next; - const next = slab[idx]; + if (heap_next === heap.length) heap.push(heap.length + 1); + const idx = heap_next; + heap_next = heap[idx]; {} - slab[idx] = {{ obj, cnt: 1 }}; - return idx << 1; + heap[idx] = obj; + return idx; }} ", - set_slab_next + set_heap_next )); } diff --git a/guide/src/contributing/design/js-objects-in-rust.md b/guide/src/contributing/design/js-objects-in-rust.md index 7729c066d6e..7408e62c301 100644 --- a/guide/src/contributing/design/js-objects-in-rust.md +++ b/guide/src/contributing/design/js-objects-in-rust.md @@ -5,18 +5,21 @@ around JS objects in wasm, but that's not allowed today! While indeed true, that's where the polyfill comes in. The question here is how we shoehorn JS objects into a `u32` for wasm to use. -The current strategy for this approach is to maintain two module-local variables -in the generated `foo.js` file: a stack and a heap. +The current strategy for this approach is to maintain a module-local variable +in the generated `foo.js` file: a `heap`. -### Temporary JS objects on the stack +### Temporary JS objects on the "stack" -The stack in `foo.js` is, well, a stack. JS objects are pushed on the top of the -stack, and their index in the stack is the identifier that's passed to wasm. JS -objects are then only removed from the top of the stack as well. This data -structure is mainly useful for efficiently passing a JS object into wasm without -a sort of "heap allocation". The downside of this, however, is that it only -works for when wasm doesn't hold onto a JS object (aka it only gets a -"reference" in Rust parlance). +The first slots in the `heap` in `foo.js` are considered a stack. This stack, +like typical program execution stacks, grows down. JS objects are pushed on the +bottom of the stack, and their index in the stack is the identifier that's passed +to wasm. A stack pointer is maintained to figure out where the next item is +pushed. + +JS objects are then only removed from the bottom of the stack as well. Removal +is simply storing null then incrementing a counter. Because of the "stack-y" +nature of this sceheme it only works for when wasm doesn't hold onto a JS object +(aka it only gets a "reference" in Rust parlance). Let's take a look at an example. @@ -47,11 +50,14 @@ and what we actually generate looks something like: // foo.js import * as wasm from './foo_bg'; -const stack = []; +const heap = new Array(32); +heap.push(undefined, null, true, false); +let stack_pointer = 32; function addBorrowedObject(obj) { - stack.push(obj); - return stack.length - 1; + stack_pointer -= 1; + heap[stack_pointer] = obj; + return stack_pointer; } export function foo(arg0) { @@ -59,7 +65,7 @@ export function foo(arg0) { try { wasm.foo(idx0); } finally { - stack.pop(); + heap[stack_pointer++] = undefined; } } ``` @@ -68,13 +74,13 @@ Here we can see a few notable points of action: * The wasm file was renamed to `foo_bg.wasm`, and we can see how the JS module generated here is importing from the wasm file. -* Next we can see our `stack` module variable which is used to push/pop items - from the stack. +* Next we can see our `heap` module variable which is to store all JS values + reference-able from wasm. * Our exported function `foo`, takes an arbitrary argument, `arg0`, which is converted to an index with the `addBorrowedObject` object function. The index is then passed to wasm so wasm can operate with it. * Finally, we have a `finally` which frees the stack slot as it's no longer - used, issuing a `pop` for what was pushed at the start of the function. + used, popping the value that was pushed at the start of the function. It's also helpful to dig into the Rust side of things to see what's going on there! Let's take a look at the code that `#[wasm_bindgen]` generates in Rust: @@ -104,12 +110,13 @@ And as with the JS, the notable points here are: in a `JsValue`. There's some trickery here that's not worth going into just yet, but we'll see in a bit what's happening under the hood. -### Long-lived JS objects in a slab +### Long-lived JS objects The above strategy is useful when JS objects are only temporarily used in Rust, for example only during one function call. Sometimes, though, objects may have a dynamic lifetime or otherwise need to be stored on Rust's heap. To cope with -this there's a second half of management of JS objects, a slab. +this there's a second half of management of JS objects, naturally corresponding +to the other side of the JS `heap` array. JS Objects passed to wasm that are not references are assumed to have a dynamic lifetime inside of the wasm module. As a result the strict push/pop of the stack @@ -135,16 +142,16 @@ different. Let's see the generated JS's slab in action: ```js import * as wasm from './foo_bg'; // imports from wasm file -const slab = []; -let slab_next = 0; +const heap = new Array(32); +heap.push(undefined, null, true, false); +let heap_next = 36; function addHeapObject(obj) { - if (slab_next === slab.length) - slab.push(slab.length + 1); - const idx = slab_next; - const next = slab[idx]; - slab_next = next; - slab[idx] = { obj, cnt: 1 }; + if (heap_next === heap.length) + heap.push(heap.length + 1); + const idx = heap_next; + heap_next = heap[idx]; + heap[idx] = obj; return idx; } @@ -154,24 +161,17 @@ export function foo(arg0) { } export function __wbindgen_object_drop_ref(idx) { - let obj = slab[idx]; - obj.cnt -= 1; - if (obj.cnt > 0) - return; - // If we hit 0 then free up our space in the slab - slab[idx] = slab_next; - slab_next = idx; + heap[idx ] = heap_next; + heap_next = idx; } ``` Unlike before we're now calling `addHeapObject` on the argument to `foo` rather -than `addBorrowedObject`. This function will use `slab` and `slab_next` as a +than `addBorrowedObject`. This function will use `heap` and `heap_next` as a slab allocator to acquire a slot to store the object, placing a structure there -once it's found. - -Note here that a reference count is used in addition to storing the object. -That's so we can create multiple references to the JS object in Rust without -using `Rc`, but it's overall not too important to worry about here. +once it's found. Note that this is going on the right-half of the array, unlike +the stack which resides on the left half. This discipline mirrors the stack/heap +in normal programs, roughly. Another curious aspect of this generated module is the `__wbindgen_object_drop_ref` function. This is one that's actually imported from @@ -229,10 +229,9 @@ If you'll recall as well, when we took `&JsValue` above we generated a wrapper of `ManuallyDrop` around the local binding, and that's because we wanted to avoid invoking this destructor when the object comes from the stack. -### Indexing both a slab and the stack +### Working with `heap` in reality -You might be thinking at this point that this system may not work! There's -indexes into both the slab and the stack mixed up, but how do we differentiate? -It turns out that the examples above have been simplified a bit, but otherwise -the lowest bit is currently used as an indicator of whether you're a slab or a -stack index. +The above explanations are pretty close to what happens today, but in reality +there's a few differences especially around handling constant values like +`undefined`, `null`, etc. Be sure to check out the actual generated JS and the +generation code for the full details! diff --git a/src/lib.rs b/src/lib.rs index 72e89142d0b..f8a0f12ff03 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -67,11 +67,12 @@ pub struct JsValue { _marker: marker::PhantomData<*mut u8>, // not at all threadsafe } -const JSIDX_UNDEFINED: u32 = 0; -const JSIDX_NULL: u32 = 2; -const JSIDX_TRUE: u32 = 4; -const JSIDX_FALSE: u32 = 6; -const JSIDX_RESERVED: u32 = 8; +const JSIDX_OFFSET: u32 = 32; // keep in sync with js/mod.rs +const JSIDX_UNDEFINED: u32 = JSIDX_OFFSET + 0; +const JSIDX_NULL: u32 = JSIDX_OFFSET + 1; +const JSIDX_TRUE: u32 = JSIDX_OFFSET + 2; +const JSIDX_FALSE: u32 = JSIDX_OFFSET + 3; +const JSIDX_RESERVED: u32 = JSIDX_OFFSET + 4; impl JsValue { /// The `null` JS value constant. @@ -533,13 +534,12 @@ impl Drop for JsValue { #[inline] fn drop(&mut self) { unsafe { - // The first bit indicates whether this is a stack value or not. - // Stack values should never be dropped (they're always in - // `ManuallyDrop`) - debug_assert!(self.idx & 1 == 0); + // We definitely should never drop anything in the stack area + debug_assert!(self.idx >= JSIDX_OFFSET); - // We don't want to drop the first few elements as they're all - // reserved, but everything else is safe to drop. + // Otherwise if we're not dropping one of our reserved values, + // actually call the intrinsic. See #1054 for eventually removing + // this branch. if self.idx >= JSIDX_RESERVED { __wbindgen_object_drop_ref(self.idx); }