Generated Pread code is large/slow #53

jrmuizel · 2018-05-02T03:01:40Z

The following rust code:

#[derive(Pread, Pwrite, IOread, IOwrite)]
struct Data {
    id: u32,
    x: u32,
    y: u32,
    z1: u32,
}

#[inline(never)]
fn make(input: &[u8]) -> Result<Data, scroll::Error> {
    let data = input.pread_with::<Data>(0, LE);
    data
}

fn main (){
    let bytes = Vec::new();
    make(&bytes).unwrap();
}

compiles to:

pread_example::make:
 push    rbp
 mov     rbp, rsp
 mov     r8d, 1
 test    rdx, rdx
 je      LBB19_4
 cmp     rdx, 4
 jae     LBB19_5
 mov     esi, 4
 xor     eax, eax
LBB19_3:
 xor     ecx, ecx
 jmp     LBB19_9
LBB19_4:
 xor     esi, esi
 mov     eax, 1
 xor     ecx, ecx
 jmp     LBB19_8
LBB19_5:
 cmp     rdx, 4
 jne     LBB19_11
 xor     ecx, ecx
 mov     esi, 4
LBB19_7:
 mov     eax, 1
LBB19_8:
LBB19_9:
LBB19_10:
 or      rax, rcx
 mov     dword, ptr, [rdi], r8d
 mov     dword, ptr, [rdi, +, 4], r9d
 mov     qword, ptr, [rdi, +, 8], rax
 mov     qword, ptr, [rdi, +, 16], rsi
 mov     qword, ptr, [rdi, +, 24], rdx
 pop     rbp
 ret
LBB19_11:
 lea     r9, [rdx, -, 4]
 cmp     r9, 4
 jae     LBB19_13
LBB19_12:
 mov     esi, 4
 xor     eax, eax
 xor     ecx, ecx
 mov     rdx, r9
 jmp     LBB19_9
LBB19_13:
 cmp     rdx, 9
 jb      LBB19_18
 lea     r9, [rdx, -, 8]
 cmp     r9, 4
 jb      LBB19_12
 cmp     rdx, 13
 jb      LBB19_19
 add     rdx, -12
 xor     eax, eax
 cmp     rdx, 4
 jae     LBB19_20
 mov     esi, 4
 jmp     LBB19_3
LBB19_18:
 xor     ecx, ecx
 mov     esi, 8
 jmp     LBB19_7
LBB19_19:
 xor     ecx, ecx
 mov     esi, 12
 jmp     LBB19_7
LBB19_20:
 mov     r9d, dword, ptr, [rsi]
 mov     eax, dword, ptr, [rsi, +, 4]
 mov     ecx, dword, ptr, [rsi, +, 8]
 shl     rcx, 32
 mov     esi, dword, ptr, [rsi, +, 12]
 xor     r8d, r8d
 jmp     LBB19_10

It would be nice if make() compiled to a single length check and a single memcpy for all of the fields. We did some work on improving bincode's deserialization code for WebRender and I have a rough idea for how to fix this.

cc: @mystor, @gankro, @mstange

regexident · 2018-08-08T12:05:52Z

[…] I have a rough idea for how to fix this.

@jrmuizel Any chance for a brief write-up of this so others could take it up?

jrmuizel · 2018-08-10T21:20:30Z

Sorry for taking so long to respond.

So there are some different approaches to this. The simplest is to add unsafe trait with a method like size() -> Option. This would return Some(size) for structs that have a fixed size serialization. The generated deserialization code can then look something like:

if Some(size) = self.size() {
  // length check
  unsafe {
    field1 = unchecked_deserialize();
    field2 = unchecked_deserialize();
  }
  ...
} else {
 ... current deserialization code
}

If we want to handle structs that have different serialized sizes efficiently than a more complicated strategy is needed like: https://github.com/jrmuizel/fast-serialization

Let me know if you want more details or have other questions.

m4b · 2021-12-14T05:29:13Z

not sure what's actionable here; please reopen if have other ideas or what specifically could be done scroll side, or whether issue needs to be filed upstream

m4b transferred this issue from m4b/scroll_derive Sep 6, 2019

m4b closed this as completed Dec 14, 2021

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Generated Pread code is large/slow #53

Generated Pread code is large/slow #53

jrmuizel commented May 2, 2018

regexident commented Aug 8, 2018 •

edited

jrmuizel commented Aug 10, 2018

m4b commented Dec 14, 2021

Generated Pread code is large/slow #53

Generated Pread code is large/slow #53

Comments

jrmuizel commented May 2, 2018

regexident commented Aug 8, 2018 • edited

jrmuizel commented Aug 10, 2018

m4b commented Dec 14, 2021

regexident commented Aug 8, 2018 •

edited