Skip to content

Commit

Permalink
[PowerPC] Extend folding RLWINM + RLWINM to post-RA.
Browse files Browse the repository at this point in the history
Summary: This patch depends on D89846. We have the patterns to fold 2 RLWINMs in ppc-mi-peephole, while some RLWINM will be generated after RA, for example rGc4690b007743. If the RLWINM generated after RA followed by another RLWINM, we expect to perform the optimization after RA, too.

Reviewed By: shchenz, steven.zhang

Differential Revision: https://reviews.llvm.org/D89855
  • Loading branch information
EsmeYi committed Nov 3, 2020
1 parent 529ba61 commit 119ab21
Show file tree
Hide file tree
Showing 7 changed files with 174 additions and 26 deletions.
74 changes: 55 additions & 19 deletions llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
Expand Up @@ -3190,18 +3190,55 @@ bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,
return false;
}

bool PPCInstrInfo::combineRLWINM(MachineInstr &MI,
MachineInstr **ToErase) const {
// This function tries to combine two RLWINMs. We not only perform such
// optimization in SSA, but also after RA, since some RLWINM is generated after
// RA.
bool PPCInstrInfo::simplifyRotateAndMaskInstr(MachineInstr &MI,
MachineInstr *&ToErase) const {
bool Is64Bit = false;
switch (MI.getOpcode()) {
case PPC::RLWINM:
case PPC::RLWINM_rec:
break;
case PPC::RLWINM8:
case PPC::RLWINM8_rec:
Is64Bit = true;
break;
default:
return false;
}
MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo();
unsigned FoldingReg = MI.getOperand(1).getReg();
if (!Register::isVirtualRegister(FoldingReg))
Register FoldingReg = MI.getOperand(1).getReg();
MachineInstr *SrcMI = nullptr;
bool NoUse = false;
if (MRI->isSSA()) {
if (!Register::isVirtualRegister(FoldingReg))
return false;
SrcMI = MRI->getVRegDef(FoldingReg);
} else {
bool OtherIntermediateUse = false;
SrcMI = getDefMIPostRA(FoldingReg, MI, OtherIntermediateUse);
NoUse = !OtherIntermediateUse && MI.getOperand(1).isKill();
}
if (!SrcMI)
return false;
MachineInstr *SrcMI = MRI->getVRegDef(FoldingReg);
if (SrcMI->getOpcode() != PPC::RLWINM &&
SrcMI->getOpcode() != PPC::RLWINM_rec &&
SrcMI->getOpcode() != PPC::RLWINM8 &&
SrcMI->getOpcode() != PPC::RLWINM8_rec)
// TODO: The pairs of RLWINM8(RLWINM) or RLWINM(RLWINM8) never occur before
// RA, but after RA. And We can fold RLWINM8(RLWINM) -> RLWINM8, or
// RLWINM(RLWINM8) -> RLWINM.
switch (SrcMI->getOpcode()) {
case PPC::RLWINM:
case PPC::RLWINM_rec:
if (Is64Bit)
return false;
break;
case PPC::RLWINM8:
case PPC::RLWINM8_rec:
if (!Is64Bit)
return false;
break;
default:
return false;
}
assert((MI.getOperand(2).isImm() && MI.getOperand(3).isImm() &&
MI.getOperand(4).isImm() && SrcMI->getOperand(2).isImm() &&
SrcMI->getOperand(3).isImm() && SrcMI->getOperand(4).isImm()) &&
Expand Down Expand Up @@ -3256,8 +3293,6 @@ bool PPCInstrInfo::combineRLWINM(MachineInstr &MI,

// If final mask is 0, MI result should be 0 too.
if (FinalMask.isNullValue()) {
bool Is64Bit =
(MI.getOpcode() == PPC::RLWINM8 || MI.getOpcode() == PPC::RLWINM8_rec);
Simplified = true;
LLVM_DEBUG(dbgs() << "Replace Instr: ");
LLVM_DEBUG(MI.dump());
Expand Down Expand Up @@ -3315,14 +3350,15 @@ bool PPCInstrInfo::combineRLWINM(MachineInstr &MI,
LLVM_DEBUG(dbgs() << "To: ");
LLVM_DEBUG(MI.dump());
}
if (Simplified & MRI->use_nodbg_empty(FoldingReg) &&
!SrcMI->hasImplicitDef()) {
// If FoldingReg has no non-debug use and it has no implicit def (it
// is not RLWINMO or RLWINM8o), it's safe to delete its def SrcMI.
// Otherwise keep it.
*ToErase = SrcMI;
LLVM_DEBUG(dbgs() << "Delete dead instruction: ");
LLVM_DEBUG(SrcMI->dump());
if (Simplified && !SrcMI->hasImplicitDef()) {
// If SrcMI has no implicit def, and FoldingReg has no non-debug use or
// its flag is "killed", it's safe to delete SrcMI. Otherwise keep it.
if ((!MRI->isSSA() && NoUse) ||
(MRI->isSSA() && MRI->use_nodbg_empty(FoldingReg))) {
ToErase = SrcMI;
LLVM_DEBUG(dbgs() << "Delete dead instruction: ");
LLVM_DEBUG(SrcMI->dump());
}
}
return Simplified;
}
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/PowerPC/PPCInstrInfo.h
Expand Up @@ -564,7 +564,8 @@ class PPCInstrInfo : public PPCGenInstrInfo {
bool convertToImmediateForm(MachineInstr &MI,
MachineInstr **KilledDef = nullptr) const;
bool foldFrameOffset(MachineInstr &MI) const;
bool combineRLWINM(MachineInstr &MI, MachineInstr **ToErase = nullptr) const;
bool simplifyRotateAndMaskInstr(MachineInstr &MI,
MachineInstr *&ToErase) const;
bool isADDIInstrEligibleForFolding(MachineInstr &ADDIMI, int64_t &Imm) const;
bool isADDInstrEligibleForFolding(MachineInstr &ADDMI) const;
bool isImmInstrEligibleForFolding(MachineInstr &MI, unsigned &BaseReg,
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
Expand Up @@ -848,7 +848,7 @@ bool PPCMIPeephole::simplifyCode(void) {
case PPC::RLWINM_rec:
case PPC::RLWINM8:
case PPC::RLWINM8_rec: {
Simplified = TII->combineRLWINM(MI, &ToErase);
Simplified = TII->simplifyRotateAndMaskInstr(MI, ToErase);
if (Simplified)
++NumRotatesCollapsed;
break;
Expand Down
9 changes: 9 additions & 0 deletions llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
Expand Up @@ -37,6 +37,8 @@ STATISTIC(NumberOfSelfCopies,
"Number of self copy instructions eliminated");
STATISTIC(NumFrameOffFoldInPreEmit,
"Number of folding frame offset by using r+r in pre-emit peephole");
STATISTIC(NumRotateInstrFoldInPreEmit,
"Number of folding Rotate instructions in pre-emit peephole");

static cl::opt<bool>
EnablePCRelLinkerOpt("ppc-pcrel-linker-opt", cl::Hidden, cl::init(true),
Expand Down Expand Up @@ -413,6 +415,13 @@ static bool hasPCRelativeForm(MachineInstr &Use) {
LLVM_DEBUG(dbgs() << "Frame offset folding by using index form: ");
LLVM_DEBUG(MI.dump());
}
MachineInstr *ToErase = nullptr;
if (TII->simplifyRotateAndMaskInstr(MI, ToErase)) {
Changed = true;
NumRotateInstrFoldInPreEmit++;
if (ToErase)
InstrsToErase.push_back(ToErase);
}
}

// Eliminate conditional branch based on a constant CR bit by
Expand Down
104 changes: 104 additions & 0 deletions llvm/test/CodeGen/PowerPC/fold-rlwinm-after-ra.mir
@@ -0,0 +1,104 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown -stop-after \
# RUN: ppc-pre-emit-peephole %s -o - | FileCheck %s

---
name: testFoldRLWINM
tracksRegLiveness: true
body: |
bb.0.entry:
liveins: $r3
; CHECK-LABEL: name: testFoldRLWINM
; CHECK: liveins: $r3
; CHECK: renamable $r3 = RLWINM killed renamable $r3, 14, 0, 12, implicit-def $x3
; CHECK: BLR8 implicit $lr8, implicit $rm, implicit killed $x3
$r3 = RLWINM killed $r3, 27, 5, 31
dead renamable $r3 = RLWINM killed renamable $r3, 19, 0, 12, implicit-def $x3
BLR8 implicit $lr8, implicit $rm, implicit killed $x3
...
---
name: testFoldRLWINMSrcFullMask
tracksRegLiveness: true
body: |
bb.0.entry:
liveins: $r3
; CHECK-LABEL: name: testFoldRLWINMSrcFullMask
; CHECK: liveins: $r3
; CHECK: renamable $r3 = RLWINM killed renamable $r3, 14, 0, 12, implicit-def $x3
; CHECK: BLR8 implicit $lr8, implicit $rm, implicit killed $x3
$r3 = RLWINM killed $r3, 27, 0, 31
dead renamable $r3 = RLWINM killed renamable $r3, 19, 0, 12, implicit-def $x3
BLR8 implicit $lr8, implicit $rm, implicit killed $x3
...
---
name: testFoldRLWINMSrcWrapped
tracksRegLiveness: true
body: |
bb.0.entry:
liveins: $r3
; CHECK-LABEL: name: testFoldRLWINMSrcWrapped
; CHECK: liveins: $r3
; CHECK: renamable $r3 = RLWINM killed renamable $r3, 14, 11, 12, implicit-def $x3
; CHECK: BLR8 implicit $lr8, implicit $rm, implicit killed $x3
$r3 = RLWINM killed $r3, 27, 30, 10
dead renamable $r3 = RLWINM killed renamable $r3, 19, 0, 12, implicit-def $x3
BLR8 implicit $lr8, implicit $rm, implicit killed $x3
...
---
name: testFoldRLWINMToZero
tracksRegLiveness: true
body: |
bb.0.entry:
liveins: $r3
; CHECK-LABEL: name: testFoldRLWINMToZero
; CHECK: liveins: $r3
; CHECK: renamable $r3 = LI 0, implicit-def $x3
; CHECK: BLR8 implicit $lr8, implicit $rm, implicit killed $x3
$r3 = RLWINM killed $r3, 27, 5, 10
dead renamable $r3 = RLWINM killed renamable $r3, 8, 5, 10, implicit-def $x3
BLR8 implicit $lr8, implicit $rm, implicit killed $x3
...
---
name: testFoldRLWINM_recToZero
tracksRegLiveness: true
body: |
bb.0.entry:
liveins: $r3
; CHECK-LABEL: name: testFoldRLWINM_recToZero
; CHECK: liveins: $r3
; CHECK: dead renamable $r3 = ANDI_rec killed renamable $r3, 0, implicit-def $cr0
; CHECK: BLR8 implicit $lr8, implicit $rm, implicit killed $cr0
$r3 = RLWINM killed $r3, 27, 5, 10
dead renamable $r3 = RLWINM_rec killed renamable $r3, 8, 5, 10, implicit-def $cr0
BLR8 implicit $lr8, implicit $rm, implicit killed $cr0
...
---
name: testFoldRLWINMoToZeroSrcCanNotBeDeleted
tracksRegLiveness: true
body: |
bb.0.entry:
liveins: $r3
; CHECK-LABEL: name: testFoldRLWINMoToZeroSrcCanNotBeDeleted
; CHECK: liveins: $r3
; CHECK: $r3 = RLWINM_rec $r3, 27, 5, 10, implicit-def dead $cr0
; CHECK: dead renamable $r3 = ANDI_rec killed renamable $r3, 0, implicit-def $cr0
; CHECK: BLR8 implicit $lr8, implicit $rm, implicit killed $cr0
$r3 = RLWINM_rec $r3, 27, 5, 10, implicit-def $cr0
dead renamable $r3 = RLWINM_rec killed renamable $r3, 8, 5, 10, implicit-def $cr0
BLR8 implicit $lr8, implicit $rm, implicit killed $cr0
...
---
name: testFoldRLWINMInvalidMask
tracksRegLiveness: true
body: |
bb.0.entry:
liveins: $r3
; CHECK-LABEL: name: testFoldRLWINMInvalidMask
; CHECK: liveins: $r3
; CHECK: $r3 = RLWINM killed $r3, 20, 5, 31
; CHECK: renamable $r3 = RLWINM killed renamable $r3, 19, 10, 20, implicit-def $x3
; CHECK: BLR8 implicit $lr8, implicit $rm, implicit killed $x3
$r3 = RLWINM killed $r3, 20, 5, 31
dead renamable $r3 = RLWINM killed renamable $r3, 19, 10, 20, implicit-def $x3
BLR8 implicit $lr8, implicit $rm, implicit killed $x3
...
5 changes: 2 additions & 3 deletions llvm/test/CodeGen/PowerPC/sms-phi-5.ll
Expand Up @@ -14,9 +14,8 @@ define void @phi5() unnamed_addr {
; CHECK-NEXT: # %bb.2:
; CHECK-NEXT: lhz 3, 0(3)
; CHECK-NEXT: slwi 3, 3, 15
; CHECK-NEXT: clrlwi 3, 3, 31
; CHECK-NEXT: rlwinm 4, 3, 31, 17, 31
; CHECK-NEXT: or 3, 3, 4
; CHECK-NEXT: li 4, 0
; CHECK-NEXT: ori 3, 4, 0
; CHECK-NEXT: rlwimi 3, 3, 15, 0, 16
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: blr
Expand Down
3 changes: 1 addition & 2 deletions llvm/test/CodeGen/PowerPC/vsx_builtins.ll
Expand Up @@ -131,8 +131,7 @@ define i32 @xvtdivdp_shift(<2 x double> %a, <2 x double> %b) {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvtdivdp cr0, v2, v3
; CHECK-NEXT: mfocrf r3, 128
; CHECK-NEXT: srwi r3, r3, 28
; CHECK-NEXT: rlwinm r3, r3, 28, 31, 31
; CHECK-NEXT: li r3, 0
; CHECK-NEXT: blr
entry:
%0 = tail call i32 @llvm.ppc.vsx.xvtdivdp(<2 x double> %a, <2 x double> %b)
Expand Down

1 comment on commit 119ab21

@lei137
Copy link
Contributor

@lei137 lei137 commented on 119ab21 Nov 3, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This commit caused stage2 LIT failures on bots ppc64le-lld-multistage and ppc64le-clang-multistage-test :

********************
Failed Tests (8):
  Clang :: CodeGen/builtins-systemz-zvector.c
  LLVM :: CodeGen/SystemZ/splitMove_undefReg_mverifier_2.ll
  LLVM :: CodeGen/SystemZ/vec-cmp-cmp-logic-select.ll
  LLVM :: CodeGen/SystemZ/vec-cmpsel.ll
  LLVM :: CodeGen/SystemZ/vec-combine-02.ll
  LLVM :: CodeGen/SystemZ/vec-move-17.ll
  LLVM :: CodeGen/SystemZ/vec-perm-06.ll
  LLVM :: CodeGen/SystemZ/vec-zext.ll
Testing Time: 148.90s
  Unsupported      :   729
  Passed           : 68607
  Expectedly Failed:   179
  Failed           :     8

Please sign in to comment.