Skip to content

Commit

Permalink
[PowerPC] Exploit the rlwinm instructions for "and" with constant
Browse files Browse the repository at this point in the history
For now, PowerPC will using several instructions to get the constant and "and" it with the following case:

define i32 @test1(i32 %a) {
  %and = and i32 %a, -2
  ret i32 %and
}

However, we could exploit it with the rotate mask instructions.
               MB  ME
+----------------------+
|xxxxxxxxxxx00011111000|
+----------------------+
 0         32         64
Notice that, we can only do it if the MB is larger than 32 and MB <= ME as
RLWINM will replace the content of [0 - 32) with [32 - 64) even we didn't rotate it.

Differential Revision: https://reviews.llvm.org/D71829
  • Loading branch information
QingShan Zhang committed Dec 30, 2019
1 parent 266cd77 commit 874a800
Show file tree
Hide file tree
Showing 5 changed files with 49 additions and 8 deletions.
24 changes: 24 additions & 0 deletions llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
Expand Up @@ -82,6 +82,30 @@ static inline bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME) {
return false;
}

static inline bool isRunOfOnes64(uint64_t Val, unsigned &MB, unsigned &ME) {
if (!Val)
return false;

if (isShiftedMask_64(Val)) {
// look for the first non-zero bit
MB = countLeadingZeros(Val);
// look for the first zero bit after the run of ones
ME = countLeadingZeros((Val - 1) ^ Val);
return true;
} else {
Val = ~Val; // invert mask
if (isShiftedMask_64(Val)) {
// effectively look for the first zero bit
ME = countLeadingZeros(Val) - 1;
// effectively look for the first one bit after the run of zeros
MB = countLeadingZeros((Val - 1) ^ Val) + 1;
return true;
}
}
// no run present
return false;
}

} // end namespace llvm

// Generated files will use "namespace PPC". To avoid symbol clash,
Expand Down
20 changes: 20 additions & 0 deletions llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
Expand Up @@ -4456,6 +4456,26 @@ bool PPCDAGToDAGISel::tryAndWithMask(SDNode *N) {
CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, Ops);
return true;
}

// It is not 16-bit imm that means we need two instructions at least if
// using "and" instruction. Try to exploit it with rotate mask instructions.
if (isRunOfOnes64(Imm64, MB, ME)) {
if (MB >= 32 && MB <= ME) {
// MB ME
// +----------------------+
// |xxxxxxxxxxx00011111000|
// +----------------------+
// 0 32 64
// We can only do it if the MB is larger than 32 and MB <= ME
// as RLWINM will replace the content of [0 - 32) with [32 - 64) even
// we didn't rotate it.
SDValue Ops[] = { Val, getI64Imm(0, dl), getI64Imm(MB - 32, dl),
getI64Imm(ME - 32, dl) };
CurDAG->SelectNodeTo(N, PPC::RLWINM8, MVT::i64, Ops);
return true;
}
// TODO - handle it with rldicl + rldicl
}
}

return false;
Expand Down
5 changes: 1 addition & 4 deletions llvm/test/CodeGen/PowerPC/and-mask.ll
Expand Up @@ -5,10 +5,7 @@
define i32 @test1(i32 %a) {
; CHECK-LABEL: test1:
; CHECK: # %bb.0:
; CHECK-NEXT: lis 4, 32767
; CHECK-NEXT: ori 4, 4, 65535
; CHECK-NEXT: sldi 4, 4, 1
; CHECK-NEXT: and 3, 3, 4
; CHECK-NEXT: rlwinm 3, 3, 0, 0, 30
; CHECK-NEXT: blr
%and = and i32 %a, -2
ret i32 %and
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/PowerPC/opt-cmp-inst-cr0-live.ll
Expand Up @@ -7,12 +7,12 @@ define signext i32 @fn1(i32 %baz) {
%2 = zext i32 %1 to i64
%3 = shl i64 %2, 48
%4 = ashr exact i64 %3, 48
; CHECK: ANDI8o killed {{[^,]+}}, 65520, implicit-def dead $cr0
; CHECK: RLWINM8 killed {{[^,]+}}, 0, 16, 27
; CHECK: CMPLDI
; CHECK: BCC

; CHECK: ANDI8o {{[^,]+}}, 65520, implicit-def $cr0
; CHECK: COPY $cr0
; CHECK: COPY killed $cr0
; CHECK: BCC
%5 = icmp eq i64 %4, 0
br i1 %5, label %foo, label %bar
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/PowerPC/popcnt-zext.ll
Expand Up @@ -299,7 +299,7 @@ define i64 @popa_i16_i64(i16 %x) {
; FAST: # %bb.0:
; FAST-NEXT: clrldi 3, 3, 48
; FAST-NEXT: popcntd 3, 3
; FAST-NEXT: andi. 3, 3, 16
; FAST-NEXT: rlwinm 3, 3, 0, 27, 27
; FAST-NEXT: blr
;
; SLOW-LABEL: popa_i16_i64:
Expand All @@ -325,7 +325,7 @@ define i64 @popa_i16_i64(i16 %x) {
; SLOW-NEXT: ori 4, 4, 257
; SLOW-NEXT: mullw 3, 3, 4
; SLOW-NEXT: srwi 3, 3, 24
; SLOW-NEXT: andi. 3, 3, 16
; SLOW-NEXT: rlwinm 3, 3, 0, 27, 27
; SLOW-NEXT: blr
%pop = call i16 @llvm.ctpop.i16(i16 %x)
%z = zext i16 %pop to i64 ; SimplifyDemandedBits may turn zext (or sext) into aext
Expand Down

0 comments on commit 874a800

Please sign in to comment.