Skip to content

[SLP] Missed vectorization - failed to move load with loop-invariant address #47233

@davidbolvansky

Description

@davidbolvansky
Bugzilla Link 47889
Version trunk
OS Linux
CC @alexey-bataev,@anton-afanasyev,@fhahn,@RKSimon,@sjoerdmeijer,@rotateright,@vporpo

Extended Description

#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>

#define N 16

struct osmesa_context {
unsigned int clearpixel;
void *buffer;
};

struct osmesa_context *osmesa;

int run (void *p)
{
unsigned int i, n, *ptr;

n = N;

ptr = (unsigned int *) osmesa->buffer;
for (i = 0; i < n; i++) {
*ptr++ = osmesa->clearpixel;
}

return 0;
}

ICC and GCC can vectorize this loop.

ICC generates:
run:
mov rax, QWORD PTR osmesa[rip] #​20.26
mov rdx, QWORD PTR [8+rax] #​20.26
mov rcx, rdx #​22.14
sub rcx, rax #​22.14
cmp rcx, 16 #​21.3
jge ..B1.3 # Prob 50% #​21.3
neg rcx #​22.6
cmp rcx, 64 #​21.3
jl ..B1.4 # Prob 50% #​21.3
..B1.3: # Preds ..B1.1 ..B1.2
movd xmm0, DWORD PTR [rax] #​22.14
pshufd xmm1, xmm0, 0 #​22.14
movdqu XMMWORD PTR [rdx], xmm1 #​22.6
movdqu XMMWORD PTR [16+rdx], xmm1 #​22.6
movdqu XMMWORD PTR [32+rdx], xmm1 #​22.6
movdqu XMMWORD PTR [48+rdx], xmm1 #​22.6
jmp ..B1.5 # Prob 100% #​22.6
..B1.4: # Preds ..B1.2
mov ecx, DWORD PTR [rax] #​22.14
mov DWORD PTR [rdx], ecx #​22.6
mov esi, DWORD PTR [rax] #​22.14
mov DWORD PTR [4+rdx], esi #​22.6
mov edi, DWORD PTR [rax] #​22.14
mov DWORD PTR [8+rdx], edi #​22.6
mov r8d, DWORD PTR [rax] #​22.14
mov DWORD PTR [12+rdx], r8d #​22.6
mov r9d, DWORD PTR [rax] #​22.14
mov DWORD PTR [16+rdx], r9d #​22.6
mov r10d, DWORD PTR [rax] #​22.14
mov DWORD PTR [20+rdx], r10d #​22.6
mov r11d, DWORD PTR [rax] #​22.14
mov DWORD PTR [24+rdx], r11d #​22.6
mov ecx, DWORD PTR [rax] #​22.14
mov DWORD PTR [28+rdx], ecx #​22.6
mov esi, DWORD PTR [rax] #​22.14
mov DWORD PTR [32+rdx], esi #​22.6
mov edi, DWORD PTR [rax] #​22.14
mov DWORD PTR [36+rdx], edi #​22.6
mov r8d, DWORD PTR [rax] #​22.14
mov DWORD PTR [40+rdx], r8d #​22.6
mov r9d, DWORD PTR [rax] #​22.14
mov DWORD PTR [44+rdx], r9d #​22.6
mov r10d, DWORD PTR [rax] #​22.14
mov DWORD PTR [48+rdx], r10d #​22.6
mov r11d, DWORD PTR [rax] #​22.14
mov DWORD PTR [52+rdx], r11d #​22.6
mov ecx, DWORD PTR [rax] #​22.14
mov DWORD PTR [56+rdx], ecx #​22.6
mov eax, DWORD PTR [rax] #​22.14
mov DWORD PTR [60+rdx], eax #​22.6
..B1.5: # Preds ..B1.4 ..B1.3
xor eax, eax #​25.10
ret #​25.10

Codegen: https://godbolt.org/z/5f6YnY

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions