-
Notifications
You must be signed in to change notification settings - Fork 15.3k
Closed
Labels
Description
Extends https://reviews.llvm.org/D132520 which did the same for i8/i16 cases.
Related to Issue #57810 - on some x64 targets without BMI, we'd be better off performing CTTZ (with zero handling) as a BTS + (REP) BSFq. Atom/Silvermont in particular would benefit from avoiding a i64 immediate, but anything with a fastish BTS as well.
auto ctz32(uint32_t x) {
return x ? __builtin_ctz(x) : 32;
}
auto ctz32_64(uint64_t x) {
return __builtin_ctzll(x | (1ULL << 32));
}Current Codegen:
ctz32(unsigned int): # @ctz32(unsigned int)
testl %edi, %edi
je .LBB0_2
rep bsfl %edi, %eax
retq
.LBB0_2: # %cond.end
movl $32, %eax
retq
ctz32_64(unsigned long): # @ctz32_64(unsigned long)
movabsq $4294967296, %rax # imm = 0x100000000
orq %rdi, %rax
rep bsfq %rax, %rax
retqCould be:
ctz32_64(unsigned long): # @ctz32_64(unsigned long)
btsq $32, %rdi
rep bsfq %rdi, %rax
retq