Skip to content

Commit

Permalink
[PowerPC] Optimize compare by using record form in post-RA.
Browse files Browse the repository at this point in the history
Summary: We currently optimize the comparison only in SSA, therefore we will miss some optimization opportunities where the input of comparison is lowered from COPY in post-RA.
Ie. ExpandPostRA::LowerCopy is called after PPCInstrInfo::optimizeCompareInstr.
This patch optimizes the comparison in post-RA and only the cases that compare against zero can be handled.
D131374 converts the comparison and its user to a compare against zero with the appropriate predicate on the branch, which creates additional opportunities for this patch.

Reviewed By: shchenz, lkail

Differential Revision: https://reviews.llvm.org/D131873
  • Loading branch information
EsmeYi committed Oct 31, 2022
1 parent cb33ef7 commit d1115c2
Show file tree
Hide file tree
Showing 5 changed files with 242 additions and 9 deletions.
86 changes: 84 additions & 2 deletions llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
Expand Up @@ -2768,6 +2768,85 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
return true;
}

bool PPCInstrInfo::optimizeCmpPostRA(MachineInstr &CmpMI) const {
MachineRegisterInfo *MRI = &CmpMI.getParent()->getParent()->getRegInfo();
if (MRI->isSSA())
return false;

Register SrcReg, SrcReg2;
int64_t CmpMask, CmpValue;
if (!analyzeCompare(CmpMI, SrcReg, SrcReg2, CmpMask, CmpValue))
return false;

// Try to optimize the comparison against 0.
if (CmpValue || !CmpMask || SrcReg2)
return false;

// The record forms set the condition register based on a signed comparison
// with zero (see comments in optimizeCompareInstr). Since we can't do the
// equality checks in post-RA, we are more restricted on a unsigned
// comparison.
unsigned Opc = CmpMI.getOpcode();
if (Opc == PPC::CMPLWI || Opc == PPC::CMPLDI)
return false;

// The record forms are always based on a 64-bit comparison on PPC64
// (similary, a 32-bit comparison on PPC32), while the CMPWI is a 32-bit
// comparison. Since we can't do the equality checks in post-RA, we bail out
// the case.
if (Subtarget.isPPC64() && Opc == PPC::CMPWI)
return false;

// CmpMI can't be deleted if it has implicit def.
if (CmpMI.hasImplicitDef())
return false;

bool SrcRegHasOtherUse = false;
MachineInstr *SrcMI = getDefMIPostRA(SrcReg, CmpMI, SrcRegHasOtherUse);
if (!SrcMI || !SrcMI->definesRegister(SrcReg))
return false;

MachineOperand RegMO = CmpMI.getOperand(0);
Register CRReg = RegMO.getReg();
if (CRReg != PPC::CR0)
return false;

// Make sure there is no def/use of CRReg between SrcMI and CmpMI.
bool SeenUseOfCRReg = false;
bool IsCRRegKilled = false;
if (!isRegElgibleForForwarding(RegMO, *SrcMI, CmpMI, false, IsCRRegKilled,
SeenUseOfCRReg) ||
SrcMI->definesRegister(CRReg) || SeenUseOfCRReg)
return false;

int SrcMIOpc = SrcMI->getOpcode();
int NewOpC = PPC::getRecordFormOpcode(SrcMIOpc);
if (NewOpC == -1)
return false;

LLVM_DEBUG(dbgs() << "Replace Instr: ");
LLVM_DEBUG(SrcMI->dump());

const MCInstrDesc &NewDesc = get(NewOpC);
SrcMI->setDesc(NewDesc);
MachineInstrBuilder(*SrcMI->getParent()->getParent(), SrcMI)
.addReg(CRReg, RegState::ImplicitDefine);
SrcMI->clearRegisterDeads(CRReg);

// Fix up killed/dead flag for SrcReg after transformation.
if (SrcRegHasOtherUse || CmpMI.getOperand(1).isKill())
fixupIsDeadOrKill(SrcMI, &CmpMI, SrcReg);

assert(SrcMI->definesRegister(PPC::CR0) &&
"Record-form instruction does not define cr0?");

LLVM_DEBUG(dbgs() << "with: ");
LLVM_DEBUG(SrcMI->dump());
LLVM_DEBUG(dbgs() << "Delete dead instruction: ");
LLVM_DEBUG(CmpMI.dump());
return true;
}

bool PPCInstrInfo::getMemOperandsWithOffsetWidth(
const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps,
int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,
Expand Down Expand Up @@ -4427,7 +4506,7 @@ bool PPCInstrInfo::isDefMIElgibleForForwarding(MachineInstr &DefMI,
bool PPCInstrInfo::isRegElgibleForForwarding(
const MachineOperand &RegMO, const MachineInstr &DefMI,
const MachineInstr &MI, bool KillDefMI,
bool &IsFwdFeederRegKilled) const {
bool &IsFwdFeederRegKilled, bool &SeenIntermediateUse) const {
// x = addi y, imm
// ...
// z = lfdx 0, x -> z = lfd imm(y)
Expand All @@ -4449,6 +4528,8 @@ bool PPCInstrInfo::isRegElgibleForForwarding(
return false;
else if (It->killsRegister(Reg, &getRegisterInfo()) && (&*It) != &DefMI)
IsFwdFeederRegKilled = true;
if (It->readsRegister(Reg, &getRegisterInfo()) && (&*It) != &DefMI)
SeenIntermediateUse = true;
// Made it to DefMI without encountering a clobber.
if ((&*It) == &DefMI)
break;
Expand Down Expand Up @@ -4888,9 +4969,10 @@ bool PPCInstrInfo::transformToImmFormFedByAdd(
return false;

bool IsFwdFeederRegKilled = false;
bool SeenIntermediateUse = false;
// Check if the RegMO can be forwarded to MI.
if (!isRegElgibleForForwarding(*RegMO, DefMI, MI, KillDefMI,
IsFwdFeederRegKilled))
IsFwdFeederRegKilled, SeenIntermediateUse))
return false;

// Get killed info in case fixup needed after transformation.
Expand Down
5 changes: 4 additions & 1 deletion llvm/lib/Target/PowerPC/PPCInstrInfo.h
Expand Up @@ -251,7 +251,8 @@ class PPCInstrInfo : public PPCGenInstrInfo {
bool isRegElgibleForForwarding(const MachineOperand &RegMO,
const MachineInstr &DefMI,
const MachineInstr &MI, bool KillDefMI,
bool &IsFwdFeederRegKilled) const;
bool &IsFwdFeederRegKilled,
bool &SeenIntermediateUse) const;
unsigned getSpillTarget() const;
const unsigned *getStoreOpcodesForSpillArray() const;
const unsigned *getLoadOpcodesForSpillArray() const;
Expand Down Expand Up @@ -644,6 +645,8 @@ class PPCInstrInfo : public PPCGenInstrInfo {
int64_t &Offset, unsigned &Width,
const TargetRegisterInfo *TRI) const;

bool optimizeCmpPostRA(MachineInstr &MI) const;

/// Get the base operand and byte offset of an instruction that reads/writes
/// memory.
bool getMemOperandsWithOffsetWidth(
Expand Down
9 changes: 9 additions & 0 deletions llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
Expand Up @@ -38,6 +38,8 @@ STATISTIC(NumberOfSelfCopies,
"Number of self copy instructions eliminated");
STATISTIC(NumFrameOffFoldInPreEmit,
"Number of folding frame offset by using r+r in pre-emit peephole");
STATISTIC(NumCmpsInPreEmit,
"Number of compares eliminated in pre-emit peephole");

static cl::opt<bool>
EnablePCRelLinkerOpt("ppc-pcrel-linker-opt", cl::Hidden, cl::init(true),
Expand Down Expand Up @@ -508,6 +510,13 @@ static bool hasPCRelativeForm(MachineInstr &Use) {
LLVM_DEBUG(dbgs() << "Frame offset folding by using index form: ");
LLVM_DEBUG(MI.dump());
}
if (TII->optimizeCmpPostRA(MI)) {
Changed = true;
NumCmpsInPreEmit++;
LLVM_DEBUG(dbgs() << "Optimize compare by using record form: ");
LLVM_DEBUG(MI.dump());
InstrsToErase.push_back(&MI);
}
}

// Eliminate conditional branch based on a constant CR bit by
Expand Down
142 changes: 142 additions & 0 deletions llvm/test/CodeGen/PowerPC/opt-cmp-rec-postra.mir
@@ -0,0 +1,142 @@
# RUN: llc -mtriple=powerpc64le-linux-gnu -stop-after ppc-pre-emit-peephole %s -o - -verify-machineinstrs | FileCheck %s

---
name: test1
# The cmp instr is optimized with the record form.
tracksRegLiveness: true
body: |
bb.0.entry:
successors: %bb.1(0x30000000), %bb.2(0x50000000)
liveins: $x3, $x4
renamable $x3 = OR8 killed renamable $x3, killed renamable $x4
renamable $cr0 = CMPDI renamable $x3, 0, implicit killed $x3
; CHECK-LABEL: name: test1
; CHECK: renamable $x3 = OR8_rec renamable $x3, killed renamable $x4, implicit-def $cr0
; CHECK-NOT: CMPDI
BCC 68, killed renamable $cr0, %bb.2
bb.1:
$x3 = LI8 102
BLR8 implicit $lr8, implicit $rm, implicit $x3
bb.2:
$x3 = LI8 116
BLR8 implicit $lr8, implicit $rm, implicit $x3
...

---
name: test2
# The imm of the comparison instr isn't 0.
tracksRegLiveness: true
body: |
bb.0.entry:
successors: %bb.1(0x30000000), %bb.2(0x50000000)
liveins: $x3, $x4
renamable $x3 = OR8 killed renamable $x3, killed renamable $x4
renamable $cr0 = CMPDI renamable $x3, 2, implicit killed $x3
; CHECK-LABEL: name: test2
; CHECK: CMPDI
BCC 68, killed renamable $cr0, %bb.2
bb.1:
$x3 = LI8 102
BLR8 implicit $lr8, implicit $rm, implicit $x3
bb.2:
$x3 = LI8 116
BLR8 implicit $lr8, implicit $rm, implicit $x3
...

---
name: test3
# The comparison instr has a implicit def.
tracksRegLiveness: true
body: |
bb.0.entry:
successors: %bb.1(0x30000000), %bb.2(0x50000000)
liveins: $x3, $x4
renamable $x3 = OR8 killed renamable $x3, killed renamable $x4
renamable $cr0 = CMPDI renamable $x3, 0, implicit-def $x3
; CHECK-LABEL: name: test3
; CHECK: CMPDI
BCC 68, killed renamable $cr0, %bb.2
bb.1:
$x3 = LI8 102
BLR8 implicit $lr8, implicit $rm, implicit $x3
bb.2:
$x3 = LI8 116
BLR8 implicit $lr8, implicit $rm, implicit $x3
...

---
name: test4
# There is another use for cr0 between OR8 instr and CMPWI instr.
tracksRegLiveness: true
body: |
bb.0.entry:
successors: %bb.1(0x30000000), %bb.2(0x50000000)
liveins: $x3, $x4, $cr0
renamable $x3 = OR8 killed renamable $x3, killed renamable $x4
renamable $cr1 = MCRF killed $cr0, implicit $x3
renamable $cr0 = CMPDI renamable $x3, 0, implicit killed $x3, implicit $cr1
; CHECK-LABEL: name: test4
; CHECK: CMPDI
BCC 68, killed renamable $cr0, %bb.2
bb.1:
$x3 = LI8 102
BLR8 implicit $lr8, implicit $rm, implicit $x3
bb.2:
$x3 = LI8 116
BLR8 implicit $lr8, implicit $rm, implicit $x3
...

---
name: test5
# There is another def for cr0 between OR8 instr and CMPWI instr.
tracksRegLiveness: true
body: |
bb.0.entry:
successors: %bb.1(0x30000000), %bb.2(0x50000000)
liveins: $x3, $x4
renamable $x3 = OR8 killed renamable $x3, renamable $x4
renamable $cr1 = CMPD renamable $x3, renamable $x4, implicit-def $cr0
renamable $cr0 = CMPDI renamable $x3, 0, implicit killed $x3, implicit $cr1
; CHECK-LABEL: name: test5
; CHECK: CMPDI
BCC 68, killed renamable $cr0, %bb.2
bb.1:
$x3 = LI8 102
BLR8 implicit $lr8, implicit $rm, implicit $x3
bb.2:
$x3 = LI8 116
BLR8 implicit $lr8, implicit $rm, implicit $x3
...

---
name: test6
# The SrcReg isn't CR0.
tracksRegLiveness: true
body: |
bb.0.entry:
successors: %bb.1(0x30000000), %bb.2(0x50000000)
liveins: $x3, $x4
renamable $x3 = OR8 killed renamable $x3, killed renamable $x4
renamable $cr1 = CMPDI renamable $x3, 0, implicit killed $x3
; CHECK-LABEL: name: test6
; CHECK: CMPDI
BCC 68, killed renamable $cr1, %bb.2
bb.1:
$x3 = LI8 102
BLR8 implicit $lr8, implicit $rm, implicit $x3
bb.2:
$x3 = LI8 116
BLR8 implicit $lr8, implicit $rm, implicit $x3
...
9 changes: 3 additions & 6 deletions llvm/test/CodeGen/PowerPC/ppc64-rop-protection.ll
Expand Up @@ -2946,10 +2946,9 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 {
; LE-P10-O0-NEXT: std r0, 16(r1)
; LE-P10-O0-NEXT: hashst r0, -8(r1)
; LE-P10-O0-NEXT: stdu r1, -64(r1)
; LE-P10-O0-NEXT: mr r4, r3
; LE-P10-O0-NEXT: mr. r4, r3
; LE-P10-O0-NEXT: std r4, 40(r1) # 8-byte Folded Spill
; LE-P10-O0-NEXT: li r3, 0
; LE-P10-O0-NEXT: cmpdi r4, 0
; LE-P10-O0-NEXT: stw r3, 48(r1) # 4-byte Folded Spill
; LE-P10-O0-NEXT: beq cr0, .LBB2_2
; LE-P10-O0-NEXT: # %bb.1: # %if.end
Expand Down Expand Up @@ -2979,10 +2978,9 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 {
; LE-P9-O0-NEXT: std r0, 16(r1)
; LE-P9-O0-NEXT: hashst r0, -8(r1)
; LE-P9-O0-NEXT: stdu r1, -128(r1)
; LE-P9-O0-NEXT: mr r4, r3
; LE-P9-O0-NEXT: mr. r4, r3
; LE-P9-O0-NEXT: std r4, 104(r1) # 8-byte Folded Spill
; LE-P9-O0-NEXT: li r3, 0
; LE-P9-O0-NEXT: cmpdi r4, 0
; LE-P9-O0-NEXT: stw r3, 112(r1) # 4-byte Folded Spill
; LE-P9-O0-NEXT: beq cr0, .LBB2_2
; LE-P9-O0-NEXT: # %bb.1: # %if.end
Expand Down Expand Up @@ -3012,10 +3010,9 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 {
; LE-P8-O0-NEXT: std r0, 16(r1)
; LE-P8-O0-NEXT: hashst r0, -8(r1)
; LE-P8-O0-NEXT: stdu r1, -128(r1)
; LE-P8-O0-NEXT: mr r4, r3
; LE-P8-O0-NEXT: mr. r4, r3
; LE-P8-O0-NEXT: std r4, 104(r1) # 8-byte Folded Spill
; LE-P8-O0-NEXT: li r3, 0
; LE-P8-O0-NEXT: cmpdi r4, 0
; LE-P8-O0-NEXT: stw r3, 112(r1) # 4-byte Folded Spill
; LE-P8-O0-NEXT: beq cr0, .LBB2_2
; LE-P8-O0-NEXT: # %bb.1: # %if.end
Expand Down

0 comments on commit d1115c2

Please sign in to comment.