Skip to content

Commit

Permalink
[PowerPC] Exploit the rldicl + rldicl when and with mask
Browse files Browse the repository at this point in the history
If we are and the constant like 0xFFFFFFC00000, for now, we are using several
instructions to generate this 48bit constant and final an "and". However, we
could exploit it with two rotate instructions.

       MB          ME               MB+63-ME
+----------------------+     +----------------------+
|0000001111111111111000| ->  |0000000001111111111111|
+----------------------+     +----------------------+
 0                    63      0                    63
Rotate left ME + 1 bit first, and then, mask it with (MB + 63 - ME, 63),
finally, rotate back. Notice that, we need to round it with 64 bit for the
wrapping case.

Reviewed by: ChenZheng, Nemanjai

Differential Revision: https://reviews.llvm.org/D71831
  • Loading branch information
QingShan Zhang committed Apr 17, 2020
1 parent 5034df8 commit 4bd186c
Show file tree
Hide file tree
Showing 6 changed files with 83 additions and 45 deletions.
57 changes: 56 additions & 1 deletion llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
Expand Up @@ -351,6 +351,7 @@ namespace {
bool tryAsSingleRLWINM(SDNode *N);
bool tryAsSingleRLWINM8(SDNode *N);
bool tryAsSingleRLWIMI(SDNode *N);
bool tryAsPairOfRLDICL(SDNode *N);

void PeepholePPC64();
void PeepholePPC64ZExt();
Expand Down Expand Up @@ -4439,6 +4440,60 @@ bool PPCDAGToDAGISel::tryAsSingleRLWINM8(SDNode *N) {
return false;
}

bool PPCDAGToDAGISel::tryAsPairOfRLDICL(SDNode *N) {
assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
uint64_t Imm64;
if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64))
return false;

// Do nothing if it is 16-bit imm as the pattern in the .td file handle
// it well with "andi.".
if (isUInt<16>(Imm64))
return false;

SDLoc Loc(N);
SDValue Val = N->getOperand(0);

// Optimized with two rldicl's as follows:
// Add missing bits on left to the mask and check that the mask is a
// wrapped run of ones, i.e.
// Change pattern |0001111100000011111111|
// to |1111111100000011111111|.
unsigned NumOfLeadingZeros = countLeadingZeros(Imm64);
if (NumOfLeadingZeros != 0)
Imm64 |= maskLeadingOnes<uint64_t>(NumOfLeadingZeros);

unsigned MB, ME;
if (!isRunOfOnes64(Imm64, MB, ME))
return false;

// ME MB MB-ME+63
// +----------------------+ +----------------------+
// |1111111100000011111111| -> |0000001111111111111111|
// +----------------------+ +----------------------+
// 0 63 0 63
// There are ME + 1 ones on the left and (MB - ME + 63) & 63 zeros in between.
unsigned OnesOnLeft = ME + 1;
unsigned ZerosInBetween = (MB - ME + 63) & 63;
// Rotate left by OnesOnLeft (so leading ones are now trailing ones) and clear
// on the left the bits that are already zeros in the mask.
Val = SDValue(CurDAG->getMachineNode(PPC::RLDICL, Loc, MVT::i64, Val,
getI64Imm(OnesOnLeft, Loc),
getI64Imm(ZerosInBetween, Loc)),
0);
// MB-ME+63 ME MB
// +----------------------+ +----------------------+
// |0000001111111111111111| -> |0001111100000011111111|
// +----------------------+ +----------------------+
// 0 63 0 63
// Rotate back by 64 - OnesOnLeft to undo previous rotate. Then clear on the
// left the number of ones we previously added.
SDValue Ops[] = {Val, getI64Imm(64 - OnesOnLeft, Loc),
getI64Imm(NumOfLeadingZeros, Loc)};
CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops);
return true;
}

bool PPCDAGToDAGISel::tryAsSingleRLWIMI(SDNode *N) {
assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
unsigned Imm;
Expand Down Expand Up @@ -4766,7 +4821,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
case ISD::AND:
// If this is an 'and' with a mask, try to emit rlwinm/rldicl/rldicr
if (tryAsSingleRLWINM(N) || tryAsSingleRLWIMI(N) || tryAsSingleRLDICL(N) ||
tryAsSingleRLDICR(N) || tryAsSingleRLWINM8(N))
tryAsSingleRLDICR(N) || tryAsSingleRLWINM8(N) || tryAsPairOfRLDICL(N))
return;

// Other cases are autogenerated.
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/PowerPC/2016-04-17-combine.ll
Expand Up @@ -7,8 +7,8 @@ target triple = "powerpc64le-unknown-linux-gnu"
%typ = type { i32, i32 }

; On release builds, it doesn't crash, spewing nonsense instead.
; To make sure it works, check that and is still alive.
; CHECK: and
; To make sure it works, check that rldicl is still alive.
; CHECK: rldicl
; Also, in release, it emits a COPY from a 32-bit register to
; a 64-bit register, which happens to be emitted as cror [!]
; by the confused CodeGen. Just to be sure, check there isn't one.
Expand Down
18 changes: 7 additions & 11 deletions llvm/test/CodeGen/PowerPC/Frames-dyn-alloca.ll
Expand Up @@ -43,15 +43,13 @@ define i32* @f1(i32 %n) nounwind {
; PPC64-LINUX-LABEL: f1
; PPC64-LINUX: std 31, -8(1)
; PPC64-LINUX-NEXT: stdu 1, -64(1)
; PPC64-LINUX-NEXT: lis 4, 32767
; PPC64-LINUX-NEXT: rldic 3, 3, 2, 30
; PPC64-LINUX-NEXT: ori 4, 4, 65535
; PPC64-LINUX-NEXT: addi 3, 3, 15
; PPC64-LINUX-NEXT: sldi 4, 4, 4
; PPC64-LINUX-NEXT: mr 31, 1
; PPC64-LINUX-NEXT: and 3, 3, 4
; PPC64-LINUX-NEXT: neg 3, 3
; PPC64-LINUX-NEXT: addi 3, 3, 15
; PPC64-LINUX-NEXT: rldicl 3, 3, 60, 4
; PPC64-LINUX-NEXT: addi 4, 31, 64
; PPC64-LINUX-NEXT: rldicl 3, 3, 4, 29
; PPC64-LINUX-NEXT: neg 3, 3
; PPC64-LINUX-NEXT: stdux 4, 1, 3

; The linkage area is always put on the top of the stack.
Expand Down Expand Up @@ -82,14 +80,12 @@ define i32* @f1(i32 %n) nounwind {
; PPC64-AIX-LABEL: f1
; PPC64-AIX: std 31, -8(1)
; PPC64-AIX-NEXT: stdu 1, -64(1)
; PPC64-AIX-NEXT: lis 4, 32767
; PPC64-AIX-NEXT: rldic 3, 3, 2, 30
; PPC64-AIX-NEXT: ori 4, 4, 65535
; PPC64-AIX-NEXT: addi 3, 3, 15
; PPC64-AIX-NEXT: sldi 4, 4, 4
; PPC64-AIX-NEXT: mr 31, 1
; PPC64-AIX-NEXT: and 3, 3, 4
; PPC64-AIX-NEXT: addi 3, 3, 15
; PPC64-AIX-NEXT: addi 4, 31, 64
; PPC64-AIX-NEXT: rldicl 3, 3, 60, 4
; PPC64-AIX-NEXT: rldicl 3, 3, 4, 29
; PPC64-AIX-NEXT: neg 3, 3
; PPC64-AIX-NEXT: stdux 4, 1, 3

Expand Down
29 changes: 10 additions & 19 deletions llvm/test/CodeGen/PowerPC/and-mask.ll
Expand Up @@ -15,8 +15,8 @@ define i32 @test1(i32 %a) {
define i64 @test2(i64 %a) {
; CHECK-LABEL: test2:
; CHECK: # %bb.0:
; CHECK-NEXT: li 4, -7
; CHECK-NEXT: and 3, 3, 4
; CHECK-NEXT: rldicl 3, 3, 61, 2
; CHECK-NEXT: rotldi 3, 3, 3
; CHECK-NEXT: blr
%and = and i64 %a, -7
ret i64 %and
Expand All @@ -26,10 +26,8 @@ define i64 @test2(i64 %a) {
define i64 @test3(i64 %a) {
; CHECK-LABEL: test3:
; CHECK: # %bb.0:
; CHECK-NEXT: lis 4, 1023
; CHECK-NEXT: ori 4, 4, 65535
; CHECK-NEXT: sldi 4, 4, 22
; CHECK-NEXT: and 3, 3, 4
; CHECK-NEXT: rldicl 3, 3, 42, 22
; CHECK-NEXT: rldicl 3, 3, 22, 16
; CHECK-NEXT: blr
%and = and i64 %a, 281474972516352
ret i64 %and
Expand All @@ -39,10 +37,8 @@ define i64 @test3(i64 %a) {
define i64 @test4(i64 %a) {
; CHECK-LABEL: test4:
; CHECK: # %bb.0:
; CHECK-NEXT: li 4, 12
; CHECK-NEXT: sldi 4, 4, 32
; CHECK-NEXT: ori 4, 4, 255
; CHECK-NEXT: and 3, 3, 4
; CHECK-NEXT: rldicl 3, 3, 30, 26
; CHECK-NEXT: rldicl 3, 3, 34, 28
; CHECK-NEXT: blr
%and = and i64 %a, 51539607807
ret i64 %and
Expand All @@ -52,10 +48,8 @@ define i64 @test4(i64 %a) {
define i64 @test5(i64 %a) {
; CHECK-LABEL: test5:
; CHECK: # %bb.0:
; CHECK-NEXT: li 4, 0
; CHECK-NEXT: oris 4, 4, 65472
; CHECK-NEXT: ori 4, 4, 65535
; CHECK-NEXT: and 3, 3, 4
; CHECK-NEXT: rldicl 3, 3, 42, 6
; CHECK-NEXT: rldicl 3, 3, 22, 32
; CHECK-NEXT: blr
%and = and i64 %a, 4290838527
ret i64 %and
Expand All @@ -77,11 +71,8 @@ define i64 @test6(i64 %a) {
define i64 @test7(i64 %a) {
; CHECK-LABEL: test7:
; CHECK: # %bb.0:
; CHECK-NEXT: li 4, -32767
; CHECK-NEXT: sldi 4, 4, 32
; CHECK-NEXT: oris 4, 4, 65024
; CHECK-NEXT: rldicr 4, 4, 17, 63
; CHECK-NEXT: and 3, 3, 4
; CHECK-NEXT: rldicl 3, 3, 22, 25
; CHECK-NEXT: rldicl 3, 3, 42, 14
; CHECK-NEXT: blr
%and = and i64 %a, 1121501860462591
ret i64 %and
Expand Down
16 changes: 6 additions & 10 deletions llvm/test/CodeGen/PowerPC/cmpb.ll
Expand Up @@ -123,11 +123,9 @@ entry:
ret i32 %or55

; CHECK-LABEL: @test32p1
; CHECK: li [[REG1:[0-9]+]], 0
; CHECK: cmpb [[REG4:[0-9]+]], 4, 3
; CHECK: oris [[REG2:[0-9]+]], [[REG1]], 65287
; CHECK: ori [[REG3:[0-9]+]], [[REG2]], 65535
; CHECK: and 3, [[REG4]], [[REG3]]
; CHECK: cmpb [[REG1:[0-9]+]], 4, 3
; CHECK: rldicl [[REG2:[0-9]+]], [[REG1]], 40, 5
; CHECK: rldicl 3, [[REG2]], 24, 32
; CHECK: blr
}

Expand All @@ -147,11 +145,9 @@ entry:
ret i32 %or37

; CHECK-LABEL: @test32p2
; CHECK: li [[REG1:[0-9]+]], 0
; CHECK: cmpb [[REG4:[0-9]+]], 4, 3
; CHECK: oris [[REG2:[0-9]+]], [[REG1]], 65280
; CHECK: ori [[REG3:[0-9]+]], [[REG2]], 65535
; CHECK: and 3, [[REG4]], [[REG3]]
; CHECK: cmpb [[REG1:[0-9]+]], 4, 3
; CHECK: rldicl [[REG2:[0-9]+]], [[REG1]], 40, 8
; CHECK: rldicl 3, [[REG2]], 24, 32
; CHECK: blr
}

Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/PowerPC/setcc-logic.ll
Expand Up @@ -481,9 +481,9 @@ define <4 x i1> @and_eq_vec(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32>
define i1 @or_icmps_const_1bit_diff(i64 %x) {
; CHECK-LABEL: or_icmps_const_1bit_diff:
; CHECK: # %bb.0:
; CHECK-NEXT: li 4, -5
; CHECK-NEXT: addi 3, 3, -13
; CHECK-NEXT: and 3, 3, 4
; CHECK-NEXT: rldicl 3, 3, 61, 1
; CHECK-NEXT: rotldi 3, 3, 3
; CHECK-NEXT: cntlzd 3, 3
; CHECK-NEXT: rldicl 3, 3, 58, 63
; CHECK-NEXT: blr
Expand Down

0 comments on commit 4bd186c

Please sign in to comment.