Skip to content

Missed optimization: Vec::splice() is not zero-cost #64984

@Vlad-Shcherbina

Description

@Vlad-Shcherbina

To reproduce, compile the following two functions with, say, -C opt-level=3 using, say, rustc 1.38.0:

pub fn one(xs: &mut Vec<i32>) {
    xs.clear();
    xs.push(1);
}

pub fn two(xs: &mut Vec<i32>) {
    xs.splice(.., Some(1));
}

Godbolt

Since they are obviously equivalent, they should compile to comparable code.
Instead, the splice version performs a lot of unnecessary work.

example::one:
        push    rbx
        mov     rbx, rdi
        mov     qword ptr [rdi + 16], 0
        cmp     qword ptr [rdi + 8], 0
        je      .LBB0_2
        mov     rax, qword ptr [rbx]
        mov     dword ptr [rax], 1
        add     qword ptr [rbx + 16], 1
        pop     rbx
        ret
.LBB0_2:
        mov     edi, 4
        mov     esi, 4
        call    qword ptr [rip + __rust_alloc@GOTPCREL]
        test    rax, rax
        je      .LBB0_5
        mov     qword ptr [rbx], rax
        mov     qword ptr [rbx + 8], 1
        mov     dword ptr [rax], 1
        add     qword ptr [rbx + 16], 1
        pop     rbx
        ret
.LBB0_5:
        mov     edi, 4
        mov     esi, 4
        call    qword ptr [rip + alloc::alloc::handle_alloc_error@GOTPCREL]
        ud2

example::two:
        push    r15
        push    r14
        push    r13
        push    r12
        push    rbx
        sub     rsp, 48
        mov     r15, rdi
        mov     rax, qword ptr [rdi]
        mov     rcx, qword ptr [rdi + 16]
        mov     qword ptr [rdi + 16], 0
        lea     rbx, [rax + 4*rcx]
        mov     qword ptr [rsp], rcx
        mov     qword ptr [rsp + 8], 0
        mov     qword ptr [rsp + 16], rax
        mov     qword ptr [rsp + 24], rbx
        mov     qword ptr [rsp + 32], rdi
        movabs  rdx, 4294967297
        mov     qword ptr [rsp + 40], rdx
        test    rcx, rcx
        je      .LBB1_1
        mov     rcx, rbx
        sub     rcx, rax
        add     rcx, -4
        and     rcx, -4
        lea     r12, [rcx + rax]
        add     r12, 4
        mov     qword ptr [rsp + 16], r12
        mov     rcx, qword ptr [r15 + 8]
        test    rcx, rcx
        je      .LBB1_5
.LBB1_4:
        xor     ecx, ecx
        mov     rdx, qword ptr [rsp + 40]
        mov     qword ptr [rsp + 40], 0
        cmp     edx, 1
        je      .LBB1_8
        jmp     .LBB1_10
.LBB1_1:
        mov     r12, rax
        mov     rcx, qword ptr [r15 + 8]
        test    rcx, rcx
        jne     .LBB1_4
.LBB1_5:
        add     rcx, rcx
        mov     r13d, 1
        cmovne  r13, rcx
        lea     r14, [4*r13]
        mov     esi, 4
        mov     rdi, r14
        call    qword ptr [rip + __rust_alloc@GOTPCREL]
        test    rax, rax
        je      .LBB1_17
        mov     qword ptr [r15], rax
        mov     qword ptr [r15 + 8], r13
        mov     rcx, qword ptr [r15 + 16]
        mov     rdx, qword ptr [rsp + 40]
        mov     qword ptr [rsp + 40], 0
        cmp     edx, 1
        jne     .LBB1_10
.LBB1_8:
        shr     rdx, 32
        mov     dword ptr [rax + 4*rcx], edx
        add     rcx, 1
        mov     rdx, qword ptr [rsp + 40]
        mov     qword ptr [rsp + 40], 0
        cmp     edx, 1
        je      .LBB1_8
        mov     r12, qword ptr [rsp + 16]
        mov     rbx, qword ptr [rsp + 24]
.LBB1_10:
        mov     qword ptr [r15 + 16], rcx
        cmp     r12, rbx
        je      .LBB1_12
        sub     rbx, r12
        add     rbx, -4
        and     rbx, -4
        lea     rax, [rbx + r12]
        add     rax, 4
        mov     qword ptr [rsp + 16], rax
.LBB1_12:
        mov     r15, qword ptr [rsp + 8]
        test    r15, r15
        je      .LBB1_16
        mov     rax, qword ptr [rsp]
        mov     r14, qword ptr [rsp + 32]
        mov     rbx, qword ptr [r14 + 16]
        cmp     rax, rbx
        je      .LBB1_15
        mov     rcx, qword ptr [r14]
        lea     rsi, [rcx + 4*rax]
        lea     rdi, [rcx + 4*rbx]
        lea     rdx, [4*r15]
        call    qword ptr [rip + memmove@GOTPCREL]
.LBB1_15:
        add     rbx, r15
        mov     qword ptr [r14 + 16], rbx
.LBB1_16:
        add     rsp, 48
        pop     rbx
        pop     r12
        pop     r13
        pop     r14
        pop     r15
        ret
.LBB1_17:
        mov     esi, 4
        mov     rdi, r14
        call    qword ptr [rip + alloc::alloc::handle_alloc_error@GOTPCREL]
        ud2

Metadata

Metadata

Assignees

No one assigned

    Labels

    A-collectionsArea: `std::collections`C-bugCategory: This is a bug.I-slowIssue: Problems and improvements with respect to performance of generated code.T-compilerRelevant to the compiler team, which will review and decide on the PR/issue.T-libs-apiRelevant to the library API team, which will review and decide on the PR/issue.

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions