-
Notifications
You must be signed in to change notification settings - Fork 15.2k
Description
| Bugzilla Link | 30729 |
| Version | trunk |
| OS | Linux |
| CC | @weiguozhi,@hfinkel,@lei137 |
Extended Description
In f(), LLVM hoists the loads for constants k1 and k2 outside loop. In both g() and h(), the loads stay inside, resulting in a longer loop body. Both g() and h() have an if-statement enclosing the loads, so always loading the constants is not necessarily better, since it is possible that the if-condition may not be true. However, when adding __builtin_expect in h() to hint the compiler that the loads are very likely to be executed. In that case, I'd probably expect the compiler to move the loads out of the loop body.
#include <altivec.h>
#include <stdint.h>
struct S {
vector int k1;
vector int k2;
vector int f(uint64_t n, vector int x) const;
vector int g(uint64_t n, vector int x) const;
vector int h(uint64_t n, vector int x) const;
};
vector int S::f(uint64_t n, vector int x) const {
const uint64_t base = 16;
for ( ; n ; n /= base) {
x += vec_ld(0, &k1);
x ^= vec_ld(0, &k2);
}
return x;
}
vector int S::g(uint64_t n, vector int x) const {
const uint64_t base = 16;
for ( ; n ; n /= base) {
const uint64_t d = n % base;
if (d != 0) {
x += vec_ld(0, &k1);
x ^= vec_ld(0, &k2);
}
}
return x;
}
vector int S::h(uint64_t n, vector int x) const {
const uint64_t base = 16;
for ( ; n ; n /= base) {
const uint64_t d = n % base;
if (__builtin_expect(d != 0, 1)) {
x += vec_ld(0, &k1);
x ^= vec_ld(0, &k2);
}
}
return x;
}
.text
.abiversion 2
.file "constants.cc"
.globl _ZNK1S1fEyDv4_i
.p2align 4
.type _ZNK1S1fEyDv4_i,@function
_ZNK1S1fEyDv4_i: # @_ZNK1S1fEyDv4_i
.Lfunc_begin0:
BB#0:
cmpldi 4, 0
beq 0, .LBB0_4
BB#1:
li 5, 16
lvx 3, 0, 3
xxlor 34, 34, 34
lvx 4, 3, 5
.p2align 4
.LBB0_2: # =>This Inner Loop Header: Depth=1
vadduwm 2, 3, 2
rldicl. 4, 4, 60, 4
xxlxor 34, 34, 36
bne 0, .LBB0_2
BB#3:
# kill: %V2<def> %V2<kill> %VSH2<kill>
blr
.LBB0_4:
xxlor 34, 34, 34
# kill: %V2 %V2 %VSH2
blr
.long 0
.quad 0
.Lfunc_end0:
.size _ZNK1S1fEyDv4_i, .Lfunc_end0-.Lfunc_begin0
.globl _ZNK1S1gEyDv4_i
.p2align 4
.type _ZNK1S1gEyDv4_i,@function
_ZNK1S1gEyDv4_i: # @_ZNK1S1gEyDv4_i
.Lfunc_begin1:
BB#0:
cmpldi 4, 0
beq 0, .LBB1_6
BB#1:
xxlor 34, 34, 34
addi 5, 3, 16
.p2align 5
.LBB1_2: # =>This Inner Loop Header: Depth=1
rldicl. 6, 4, 0, 60
beq 0, .LBB1_4
BB#3: # in Loop: Header=BB1_2 Depth=1
lvx 3, 0, 3
lvx 4, 0, 5
vadduwm 2, 3, 2
xxlxor 34, 34, 36
.LBB1_4: # in Loop: Header=BB1_2 Depth=1
rldicl. 4, 4, 60, 4
bne 0, .LBB1_2
BB#5:
# kill: %V2<def> %V2<kill> %VSH2<kill>
blr
.LBB1_6:
xxlor 34, 34, 34
# kill: %V2 %V2 %VSH2
blr
.long 0
.quad 0
.Lfunc_end1:
.size _ZNK1S1gEyDv4_i, .Lfunc_end1-.Lfunc_begin1
.globl _ZNK1S1hEyDv4_i
.p2align 4
.type _ZNK1S1hEyDv4_i,@function
_ZNK1S1hEyDv4_i: # @_ZNK1S1hEyDv4_i
.Lfunc_begin2:
BB#0:
cmpldi 4, 0
beq 0, .LBB2_6
BB#1:
xxlor 34, 34, 34
addi 5, 3, 16
.p2align 5
.LBB2_2: # =>This Inner Loop Header: Depth=1
rldicl. 6, 4, 0, 60
beq 0, .LBB2_4
BB#3: # in Loop: Header=BB2_2 Depth=1
lvx 3, 0, 3
lvx 4, 0, 5
vadduwm 2, 3, 2
xxlxor 34, 34, 36
.LBB2_4: # in Loop: Header=BB2_2 Depth=1
rldicl. 4, 4, 60, 4
bne 0, .LBB2_2
BB#5:
# kill: %V2<def> %V2<kill> %VSH2<kill>
blr
.LBB2_6:
xxlor 34, 34, 34
# kill: %V2 %V2 %VSH2
blr
.long 0
.quad 0
.Lfunc_end2:
.size _ZNK1S1hEyDv4_i, .Lfunc_end2-.Lfunc_begin2