Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

panic_bounds_check with for on an interval closed on the right #75024

Open
leonardo-m opened this issue Aug 1, 2020 · 0 comments
Open

panic_bounds_check with for on an interval closed on the right #75024

leonardo-m opened this issue Aug 1, 2020 · 0 comments
Labels
C-bug Category: This is a bug. I-slow Issue: Problems and improvements with respect to performance of generated code.

Comments

@leonardo-m
Copy link

This bug report is similar to others but in my opinion sufficiently different. This code:

pub fn e76() -> u32 {
    const N: usize = 100;
    let mut ways = [0_u32; N + 1];
    ways[0] = 1;
    for j in 1 .. N {
        for i in j ..= N {
            ways[i] += ways[i - j];
        }
    }
    ways[N]
}

Using rustc 1.47.0-nightly cfc572cae 2020-07-30 contains a panic_bounds_check:

example::e76:
        sub     rsp, 408
        vxorps  xmm0, xmm0, xmm0
        vmovups ymmword ptr [rsp + 376], ymm0
        vmovups ymmword ptr [rsp + 360], ymm0
        vmovups ymmword ptr [rsp + 328], ymm0
        vmovups ymmword ptr [rsp + 296], ymm0
        vmovups ymmword ptr [rsp + 264], ymm0
        vmovups ymmword ptr [rsp + 232], ymm0
        vmovups ymmword ptr [rsp + 200], ymm0
        vmovups ymmword ptr [rsp + 168], ymm0
        vmovups ymmword ptr [rsp + 136], ymm0
        vmovups ymmword ptr [rsp + 104], ymm0
        vmovups ymmword ptr [rsp + 72], ymm0
        vmovups ymmword ptr [rsp + 40], ymm0
        vmovups ymmword ptr [rsp + 8], ymm0
        mov     dword ptr [rsp + 4], 1
        mov     ecx, 1
        mov     r8d, 100
.LBB0_1:
        lea     rdx, [rcx + 1]
        mov     rax, rcx
.LBB0_2:
        mov     rdi, rax
        sub     rdi, rcx
        cmp     rdi, 100
        ja      .LBB0_11
        lea     rsi, [rax + 1]
        cmp     rax, 100
        cmovae  rsi, r8
        mov     edi, dword ptr [rsp + 4*rdi + 4]
        add     dword ptr [rsp + 4*rax + 4], edi
        cmp     rsi, 100
        ja      .LBB0_4
        cmp     rax, 99
        mov     rax, rsi
        jbe     .LBB0_2
.LBB0_4:
        cmp     rdx, 100
        je      .LBB0_5
        add     rcx, 2
        mov     rsi, rdx
.LBB0_8:
        mov     rdi, rsi
        sub     rdi, rdx
        cmp     rdi, 100
        ja      .LBB0_11
        lea     rax, [rsi + 1]
        cmp     rsi, 100
        cmovae  rax, r8
        mov     edi, dword ptr [rsp + 4*rdi + 4]
        add     dword ptr [rsp + 4*rsi + 4], edi
        cmp     rax, 100
        ja      .LBB0_1
        cmp     rsi, 99
        mov     rsi, rax
        jbe     .LBB0_8
        jmp     .LBB0_1
.LBB0_5:
        mov     eax, dword ptr [rsp + 404]
        add     rsp, 408
        vzeroupper
        ret
.LBB0_11:
        lea     rdx, [rip + .L__unnamed_1]
        mov     esi, 101
        vzeroupper
        call    qword ptr [rip + core::panicking::panic_bounds_check@GOTPCREL]
        ud2

While the same code with ".. N + 1" instead of "..= N":

pub fn e76() -> u32 {
    const N: usize = 100;
    let mut ways = [0_u32; N + 1];
    ways[0] = 1;
    for j in 1 .. N {
        for i in j .. N + 1 {
            ways[i] += ways[i - j];
        }
    }
    ways[N]
}

Contains no panic_bounds_check (and gets vectorized):

example::e76:
        push    rbp
        push    r15
        push    r14
        push    r13
        push    r12
        push    rbx
        sub     rsp, 404
        vpxor   xmm0, xmm0, xmm0
        vmovdqu ymmword ptr [rsp + 372], ymm0
        vmovdqu ymmword ptr [rsp + 356], ymm0
        vmovdqu ymmword ptr [rsp + 324], ymm0
        vmovdqu ymmword ptr [rsp + 292], ymm0
        vmovdqu ymmword ptr [rsp + 260], ymm0
        vmovdqu ymmword ptr [rsp + 228], ymm0
        vmovdqu ymmword ptr [rsp + 196], ymm0
        vmovdqu ymmword ptr [rsp + 164], ymm0
        vmovdqu ymmword ptr [rsp + 132], ymm0
        vmovdqu ymmword ptr [rsp + 100], ymm0
        vmovdqu ymmword ptr [rsp + 68], ymm0
        lea     rax, [rsp + 4]
        vmovdqu ymmword ptr [rsp + 36], ymm0
        vmovdqu ymmword ptr [rsp + 4], ymm0
        mov     dword ptr [rsp], 1
        lea     rcx, [rsp + 228]
        mov     r9d, 1
        mov     r8d, 4
        mov     r10d, 92
        mov     r11, -4
        xor     r15d, r15d
        jmp     .LBB0_2
.LBB0_1:
        inc     r15
        add     rcx, 4
        add     r8, 4
        dec     r10
        add     r11, -4
        add     rax, -4
        cmp     r9, 100
        je      .LBB0_18
.LBB0_2:
        mov     rsi, r9
        mov     r13d, 100
        sub     r13, r15
        inc     r9
        cmp     r13, 8
        jb      .LBB0_13
        lea     rdx, [rsp + 4*r13]
        lea     rdi, [rsp + 4*r15]
        add     rdi, 4
        cmp     rdi, rdx
        jb      .LBB0_13
        mov     edi, 92
        sub     rdi, r15
        mov     rdx, rdi
        shr     rdx, 3
        inc     rdx
        mov     r14d, edx
        and     r14d, 7
        cmp     rdi, 56
        jae     .LBB0_6
        xor     ebx, ebx
        jmp     .LBB0_8
.LBB0_6:
        sub     rdx, r14
        xor     ebx, ebx
.LBB0_7:
        vmovdqu ymm0, ymmword ptr [rcx + 4*rbx - 224]
        vmovdqu ymm1, ymmword ptr [rcx + 4*rbx - 192]
        vmovdqu ymm2, ymmword ptr [rcx + 4*rbx - 160]
        vmovdqu ymm3, ymmword ptr [rcx + 4*rbx - 128]
        vpaddd  ymm0, ymm0, ymmword ptr [rsp + 4*rbx]
        vmovdqu ymmword ptr [rcx + 4*rbx - 224], ymm0
        vpaddd  ymm0, ymm1, ymmword ptr [rsp + 4*rbx + 32]
        vmovdqu ymmword ptr [rcx + 4*rbx - 192], ymm0
        vpaddd  ymm0, ymm2, ymmword ptr [rsp + 4*rbx + 64]
        vmovdqu ymmword ptr [rcx + 4*rbx - 160], ymm0
        vpaddd  ymm0, ymm3, ymmword ptr [rsp + 4*rbx + 96]
        vmovdqu ymmword ptr [rcx + 4*rbx - 128], ymm0
        vmovdqu ymm0, ymmword ptr [rcx + 4*rbx - 96]
        vpaddd  ymm0, ymm0, ymmword ptr [rsp + 4*rbx + 128]
        vmovdqu ymmword ptr [rcx + 4*rbx - 96], ymm0
        vmovdqu ymm0, ymmword ptr [rcx + 4*rbx - 64]
        vpaddd  ymm0, ymm0, ymmword ptr [rsp + 4*rbx + 160]
        vmovdqu ymmword ptr [rcx + 4*rbx - 64], ymm0
        vmovdqu ymm0, ymmword ptr [rcx + 4*rbx - 32]
        vpaddd  ymm0, ymm0, ymmword ptr [rsp + 4*rbx + 192]
        vmovdqu ymmword ptr [rcx + 4*rbx - 32], ymm0
        vmovdqu ymm0, ymmword ptr [rcx + 4*rbx]
        vpaddd  ymm0, ymm0, ymmword ptr [rsp + 4*rbx + 224]
        vmovdqu ymmword ptr [rcx + 4*rbx], ymm0
        add     rbx, 64
        add     rdx, -8
        jne     .LBB0_7
.LBB0_8:
        mov     r12, r13
        and     r12, -8
        test    r14, r14
        je      .LBB0_11
        mov     edx, r10d
        shr     dl, 3
        inc     dl
        movzx   r14d, dl
        and     r14d, 7
        shl     r14, 5
        lea     rbx, [rsp + 4*rbx]
        lea     rdi, [rbx + r8]
        xor     edx, edx
.LBB0_10:
        vmovdqu ymm0, ymmword ptr [rdi + rdx]
        vpaddd  ymm0, ymm0, ymmword ptr [rbx + rdx]
        vmovdqu ymmword ptr [rdi + rdx], ymm0
        add     rdx, 32
        cmp     r14, rdx
        jne     .LBB0_10
.LBB0_11:
        cmp     r13, r12
        je      .LBB0_1
        add     rsi, r12
.LBB0_13:
        mov     edi, 1
        sub     edi, esi
        mov     edx, 100
        sub     rdx, rsi
        and     rdi, 3
        je      .LBB0_16
        lea     rbx, [rsp + r11]
.LBB0_15:
        mov     ebp, dword ptr [rbx + 4*rsi]
        add     dword ptr [rsp + 4*rsi], ebp
        inc     rsi
        dec     rdi
        jne     .LBB0_15
.LBB0_16:
        cmp     rdx, 3
        jb      .LBB0_1
.LBB0_17:
        mov     edx, dword ptr [rax + 4*rsi - 8]
        add     dword ptr [rsp + 4*rsi], edx
        mov     edx, dword ptr [rax + 4*rsi - 4]
        add     dword ptr [rsp + 4*rsi + 4], edx
        mov     edx, dword ptr [rax + 4*rsi]
        add     dword ptr [rsp + 4*rsi + 8], edx
        mov     edx, dword ptr [rax + 4*rsi + 4]
        add     dword ptr [rsp + 4*rsi + 12], edx
        add     rsi, 4
        cmp     rsi, 101
        jne     .LBB0_17
        jmp     .LBB0_1
.LBB0_18:
        mov     eax, dword ptr [rsp + 400]
        add     rsp, 404
        pop     rbx
        pop     r12
        pop     r13
        pop     r14
        pop     r15
        pop     rbp
        vzeroupper
        ret
@leonardo-m leonardo-m added the C-bug Category: This is a bug. label Aug 1, 2020
@ChrisDenton ChrisDenton added the needs-triage-legacy Old issue that were never triaged. Remove this label once the issue has been sufficiently triaged. label Jul 16, 2023
@saethlin saethlin added I-slow Issue: Problems and improvements with respect to performance of generated code. and removed needs-triage-legacy Old issue that were never triaged. Remove this label once the issue has been sufficiently triaged. labels Jul 16, 2023
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
C-bug Category: This is a bug. I-slow Issue: Problems and improvements with respect to performance of generated code.
Projects
None yet
Development

No branches or pull requests

3 participants