[vectorization] gcc generate better code for a loop body with control flow #64292

zhongsir1 · 2023-08-01T06:26:39Z

void s1161(void)
{
        for (int i = 0; i < LEN_1D-1; ++i) {
            if (c[i] < (real_t)0.) {
                goto L20;
            }
            a[i] = c[i] + d[i] * e[i];
            goto L10;
L20:
            b[i] = a[i] + d[i] * d[i];
L10:
            ;
        }
}

gcc: generate a sve loop

.L2:
  lsl x1, x0, 3
  ld1d z31.d, p7/z, [x9, x0, lsl 3]
  ld1d z29.d, p7/z, [x8, x0, lsl 3]
  add x2, x7, x1
  fcmlt p6.d, p7/z, z31.d, #0.0
  ld1d z30.d, p7/z, [x2]
  not p6.b, p7/z, p6.b
  add x4, x5, x1
  add x1, x6, x1
  ld1d z28.d, p7/z, [x4]
  fcmlt p7.d, p7/z, z31.d, #0.0
  fmla z31.d, p6/m, z29.d, z28.d
  fmla z30.d, p7/m, z29.d, z29.d
  st1d z31.d, p6, [x2]
  st1d z30.d, p7, [x1]
  add x0, x0, x10
  whilelo p7.d, w0, w3
  b.any .L2

llvm: failed with vectorization

.LBB0_1: // in Loop: Header=BB0_2 Depth=1
  ldr d0, [x11, x8]
  mov x15, x13
  ldr d1, [x12, x8]
  fmadd d0, d1, d1, d0
  str d0, [x13, x8]
  add x8, x8, #8
  cmp x8, x9
  b.eq .LBB0_4
.LBB0_2: // =>This Inner Loop Header: Depth=1
  ldr d0, [x10, x8]
  fcmp d0, #0.0
  b.mi .LBB0_1
// %bb.3: // in Loop: Header=BB0_2 Depth=1
  ldr d1, [x12, x8]
  mov x15, x11
  ldr d2, [x14, x8]
  fmadd d0, d1, d2, d0
  str d0, [x11, x8]
  add x8, x8, #8
  cmp x8, x9
  b.ne .LBB0_2

The text was updated successfully, but these errors were encountered:

vfdff · 2023-08-03T11:08:43Z

slightly simplified test: https://godbolt.org/z/cTs784zG7

void s1161(void) {
  for (int i = 0; i < LEN_1D-1; ++i) {
    if (c[i] < (real_t)0)
      b[i] = a[i] + d[i];
    else
      a[i] = c[i] + d[i];
  }
}

gcc:

.L2:
        lsl     x1, x0, 3                    ; i * 8, double ?
        ld1d    z0.d, p0/z, [x6, x0, lsl 3]  ; d[i]
        ld1d    z2.d, p0/z, [x7, x0, lsl 3]  ; c[i]
        add     x2, x5, x1                   ; &a[i]
        fcmlt   p2.d, p0/z, z2.d, #0.0       ; if (c[i] < (real_t)0.)
        ld1d    z1.d, p0/z, [x2]             ; a[i]
        add     x1, x4, x1                   ; &b[i]
        fadd    z1.d, z0.d, z1.d             ; b[i] = a[i] + d[i];
        fcmge   p0.d, p0/z, z2.d, #0.0       ; if (c[i] >= (real_t)0.)
        fadd    z0.d, z2.d, z0.d             ; a[i] = c[i] + d[i];
        st1d    z1.d, p2, [x1]
        st1d    z0.d, p0, [x2]
        incd    x0
        whilelo p0.d, w0, w3
        b.any   .L2

llvm: LV: Can't vectorize due to memory conflicts, -mllvm -debug-only=loop-accesses

LAA: We can't vectorize because we can't find the array bounds

vfdff · 2023-08-15T02:38:52Z

Add option -mllvm -simplifycfg-sink-common=false can vectorize the loop https://godbolt.org/z/hhcjxGEG7
- [LAA] Analyze pointers forked by a select，D108699
- [LAA] Init analyze pointers forked by a phi，D102266 expect the following PHI node form
```
 if.then:      %gep.1 = getelementptr inbounds double, ptr %B, i64 %iv
 if.else:      %gep.2 = getelementptr inbounds double, ptr %C, i64 %iv
 loop.latch:%gep.2.sink = phi ptr [ %gep.2, %if.else ], [ %gep.1, %if.then ]
```

vfdff · 2023-08-19T16:26:17Z

s1161_ptr is a more simple case, https://godbolt.org/z/xaqacGo81,
which may require similar extension as the D114480 for PHINode

github-actions bot added the new issue label Aug 1, 2023

EugeneZelenko added vectorization and removed new issue labels Aug 1, 2023

vfdff mentioned this issue Aug 22, 2023

[vectorization] support forked pointer from loop body with control flow #64888

Closed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[vectorization] gcc generate better code for a loop body with control flow #64292

[vectorization] gcc generate better code for a loop body with control flow #64292

zhongsir1 commented Aug 1, 2023 •

edited

vfdff commented Aug 3, 2023 •

edited

vfdff commented Aug 15, 2023 •

edited

vfdff commented Aug 19, 2023 •

edited

[vectorization] gcc generate better code for a loop body with control flow #64292

[vectorization] gcc generate better code for a loop body with control flow #64292

Comments

zhongsir1 commented Aug 1, 2023 • edited

vfdff commented Aug 3, 2023 • edited

vfdff commented Aug 15, 2023 • edited

vfdff commented Aug 19, 2023 • edited

zhongsir1 commented Aug 1, 2023 •

edited

vfdff commented Aug 3, 2023 •

edited

vfdff commented Aug 15, 2023 •

edited

vfdff commented Aug 19, 2023 •

edited