optimize from_page_table_indices #456

Freax13 · 2024-02-09T06:53:04Z

The new code behaves exactly the same as the old code, but can be much better optimized by LLVM. In particular the bounds checks previously implied through set_bits and VirtAddr::new can now be elided. This also increases the chances of these functions being inlined.

The generated assembly for the old implementation looks like this:

x86_64::structures::paging::page::Page::from_page_table_indices:
 push    rax
 movzx   edi, di
 cmp     edi, 512
 jae     .LBB48_8
 movzx   esi, si
 cmp     esi, 512
 jae     .LBB48_8
 movzx   edx, dx
 cmp     edx, 512
 jae     .LBB48_8
 movzx   eax, cx
 cmp     eax, 512
 jae     .LBB48_8
 mov     rcx, rdi
 shl     rcx, 39
 shl     rsi, 30
 or      rsi, rcx
 shl     rdx, 21
 shl     rax, 12
 or      rax, rdx
 or      rax, rsi
 shr     edi, 8
 je      .LBB48_7
 cmp     edi, 1
 jne     .LBB48_9
 shl     rax, 16
 sar     rax, 16
.LBB48_7:
 shl     rax, 16
 sar     rax, 16
 and     rax, -4096
 pop     rcx
 ret
.LBB48_8:
 lea     rdi, [rip, +, .L__unnamed_46]
 lea     rdx, [rip, +, .L__unnamed_15]
 mov     esi, 33
 call    qword, ptr, [rip, +, _ZN4core9panicking5panic17h87fd92496103e3b8E@GOTPCREL]
.LBB48_9:
 mov     qword, ptr, [rsp], rax
 lea     rdi, [rip, +, .L__unnamed_9]
 lea     rcx, [rip, +, .L__unnamed_10]
 lea     r8, [rip, +, .L__unnamed_11]
 mov     rdx, rsp
 mov     esi, 74
 call    qword, ptr, [rip, +, _ZN4core6result13unwrap_failed17hc28f4ee1a6255957E@GOTPCREL]

The generated assembly for the new implementation looks like this:

x86_64::structures::paging::page::Page::from_page_table_indices:
 shl     rdi, 55
 movzx   esi, si
 shl     rsi, 46
 or      rsi, rdi
 movzx   edx, dx
 shl     rdx, 37
 movzx   eax, cx
 shl     rax, 28
 or      rax, rdx
 or      rax, rsi
 sar     rax, 16
 ret

The new code behaves exactly the same as the old code, but can be much better optimized by LLVM. In particular the bounds checks previously implied through `set_bits` and `VirtAddr::new` can now be elided. This also increases the chances of these functions being inlined. The generated assembly for the old implementation looks like this: ```asm x86_64::structures::paging::page::Page::from_page_table_indices: push rax movzx edi, di cmp edi, 512 jae .LBB48_8 movzx esi, si cmp esi, 512 jae .LBB48_8 movzx edx, dx cmp edx, 512 jae .LBB48_8 movzx eax, cx cmp eax, 512 jae .LBB48_8 mov rcx, rdi shl rcx, 39 shl rsi, 30 or rsi, rcx shl rdx, 21 shl rax, 12 or rax, rdx or rax, rsi shr edi, 8 je .LBB48_7 cmp edi, 1 jne .LBB48_9 shl rax, 16 sar rax, 16 .LBB48_7: shl rax, 16 sar rax, 16 and rax, -4096 pop rcx ret .LBB48_8: lea rdi, [rip, +, .L__unnamed_46] lea rdx, [rip, +, .L__unnamed_15] mov esi, 33 call qword, ptr, [rip, +, _ZN4core9panicking5panic17h87fd92496103e3b8E@GOTPCREL] .LBB48_9: mov qword, ptr, [rsp], rax lea rdi, [rip, +, .L__unnamed_9] lea rcx, [rip, +, .L__unnamed_10] lea r8, [rip, +, .L__unnamed_11] mov rdx, rsp mov esi, 74 call qword, ptr, [rip, +, _ZN4core6result13unwrap_failed17hc28f4ee1a6255957E@GOTPCREL] ``` The generated assembly for the new implementation looks like this: ```asm x86_64::structures::paging::page::Page::from_page_table_indices: shl rdi, 55 movzx esi, si shl rsi, 46 or rsi, rdi movzx edx, dx shl rdx, 37 movzx eax, cx shl rax, 28 or rax, rdx or rax, rsi sar rax, 16 ret ```

josephlr

Nice Fix!

josephlr approved these changes Feb 10, 2024

View reviewed changes

josephlr merged commit f1cb4a0 into rust-osdev:master Feb 10, 2024
12 checks passed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

optimize from_page_table_indices #456

optimize from_page_table_indices #456

Freax13 commented Feb 9, 2024

josephlr left a comment

optimize from_page_table_indices #456

optimize from_page_table_indices #456

Conversation

Freax13 commented Feb 9, 2024

josephlr left a comment

Choose a reason for hiding this comment