Skip to content

Commit

Permalink
[PowerPC] Add peephole to remove redundant accumulator prime/unprime …
Browse files Browse the repository at this point in the history
…instructions

In some situations, the compiler may insert an accumulator prime instruction and
an accumulator unprime instruction with no use of that accumulator between the two.
That's for example the case when we store an accumulator after assembling it or
restoring it. This patch adds a peephole to remove these prime and unprime instructions.

Differential Revision: https://reviews.llvm.org/D91386
  • Loading branch information
Baptiste Saleil committed Nov 18, 2020
1 parent 7bf89c2 commit 18db29e
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 34 deletions.
59 changes: 59 additions & 0 deletions llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
Expand Up @@ -349,6 +349,64 @@ static bool hasPCRelativeForm(MachineInstr &Use) {
return MadeChange;
}

// This function removes redundant pairs of accumulator prime/unprime
// instructions. In some situations, it's possible the compiler inserts an
// accumulator prime instruction followed by an unprime instruction (e.g.
// when we store an accumulator after restoring it from a spill). If the
// accumulator is not used between the two, they can be removed. This
// function removes these redundant pairs from basic blocks.
// The algorithm is quite straightforward - every time we encounter a prime
// instruction, the primed register is added to a candidate set. Any use
// other than a prime removes the candidate from the set and any de-prime
// of a current candidate marks both the prime and de-prime for removal.
// This way we ensure we only remove prime/de-prime *pairs* with no
// intervening uses.
bool removeAccPrimeUnprime(MachineBasicBlock &MBB) {
DenseSet<MachineInstr *> InstrsToErase;
// Initially, none of the acc registers are candidates.
SmallVector<MachineInstr *, 8> Candidates(
PPC::UACCRCRegClass.getNumRegs(), nullptr);

for (MachineInstr &BBI : MBB.instrs()) {
unsigned Opc = BBI.getOpcode();
// If we are visiting a xxmtacc instruction, we add it and its operand
// register to the candidate set.
if (Opc == PPC::XXMTACC) {
Register Acc = BBI.getOperand(0).getReg();
assert(PPC::ACCRCRegClass.contains(Acc) &&
"Unexpected register for XXMTACC");
Candidates[Acc - PPC::ACC0] = &BBI;
}
// If we are visiting a xxmfacc instruction and its operand register is
// in the candidate set, we mark the two instructions for removal.
else if (Opc == PPC::XXMFACC) {
Register Acc = BBI.getOperand(0).getReg();
assert(PPC::ACCRCRegClass.contains(Acc) &&
"Unexpected register for XXMFACC");
if (!Candidates[Acc - PPC::ACC0])
continue;
InstrsToErase.insert(&BBI);
InstrsToErase.insert(Candidates[Acc - PPC::ACC0]);
}
// If we are visiting an instruction using an accumulator register
// as operand, we remove it from the candidate set.
else {
for (MachineOperand &Operand : BBI.operands()) {
if (!Operand.isReg())
continue;
Register Reg = Operand.getReg();
if (PPC::ACCRCRegClass.contains(Reg))
Candidates[Reg - PPC::ACC0] = nullptr;
}
}
}

for (MachineInstr *MI : InstrsToErase)
MI->eraseFromParent();
NumRemovedInPreEmit += InstrsToErase.size();
return !InstrsToErase.empty();
}

bool runOnMachineFunction(MachineFunction &MF) override {
if (skipFunction(MF.getFunction()) || !RunPreEmitPeephole) {
// Remove UNENCODED_NOP even when this pass is disabled.
Expand All @@ -370,6 +428,7 @@ static bool hasPCRelativeForm(MachineInstr &Use) {
for (MachineBasicBlock &MBB : MF) {
Changed |= removeRedundantLIs(MBB, TRI);
Changed |= addLinkerOpt(MBB, TRI);
Changed |= removeAccPrimeUnprime(MBB);
for (MachineInstr &MI : MBB) {
unsigned Opc = MI.getOpcode();
if (Opc == PPC::UNENCODED_NOP) {
Expand Down
12 changes: 0 additions & 12 deletions llvm/test/CodeGen/PowerPC/mma-acc-memops.ll
Expand Up @@ -16,8 +16,6 @@ define void @testLdSt(i64 %SrcIdx, i64 %DstIdx) {
; LE-PAIRED-NEXT: plxv vs0, f@PCREL+112(0), 1
; LE-PAIRED-NEXT: plxv vs3, f@PCREL+64(0), 1
; LE-PAIRED-NEXT: plxv vs2, f@PCREL+80(0), 1
; LE-PAIRED-NEXT: xxmtacc acc0
; LE-PAIRED-NEXT: xxmfacc acc0
; LE-PAIRED-NEXT: pstxv vs0, f@PCREL+176(0), 1
; LE-PAIRED-NEXT: pstxv vs1, f@PCREL+160(0), 1
; LE-PAIRED-NEXT: pstxv vs2, f@PCREL+144(0), 1
Expand All @@ -32,8 +30,6 @@ define void @testLdSt(i64 %SrcIdx, i64 %DstIdx) {
; BE-PAIRED-NEXT: lxv vs0, 64(r3)
; BE-PAIRED-NEXT: lxv vs3, 112(r3)
; BE-PAIRED-NEXT: lxv vs2, 96(r3)
; BE-PAIRED-NEXT: xxmtacc acc0
; BE-PAIRED-NEXT: xxmfacc acc0
; BE-PAIRED-NEXT: stxv vs1, 144(r3)
; BE-PAIRED-NEXT: stxv vs0, 128(r3)
; BE-PAIRED-NEXT: stxv vs3, 176(r3)
Expand All @@ -58,8 +54,6 @@ define void @testXLdSt(i64 %SrcIdx, i64 %DstIdx) {
; LE-PAIRED-NEXT: lxvx vs3, r5, r3
; LE-PAIRED-NEXT: lxv vs2, 16(r6)
; LE-PAIRED-NEXT: sldi r3, r4, 6
; LE-PAIRED-NEXT: xxmtacc acc0
; LE-PAIRED-NEXT: xxmfacc acc0
; LE-PAIRED-NEXT: stxvx vs3, r5, r3
; LE-PAIRED-NEXT: add r3, r5, r3
; LE-PAIRED-NEXT: stxv vs0, 48(r3)
Expand All @@ -78,8 +72,6 @@ define void @testXLdSt(i64 %SrcIdx, i64 %DstIdx) {
; BE-PAIRED-NEXT: lxv vs1, 16(r6)
; BE-PAIRED-NEXT: lxv vs3, 48(r6)
; BE-PAIRED-NEXT: lxv vs2, 32(r6)
; BE-PAIRED-NEXT: xxmtacc acc0
; BE-PAIRED-NEXT: xxmfacc acc0
; BE-PAIRED-NEXT: stxvx vs0, r5, r3
; BE-PAIRED-NEXT: add r3, r5, r3
; BE-PAIRED-NEXT: stxv vs1, 16(r3)
Expand All @@ -101,8 +93,6 @@ define void @testUnalignedLdSt() {
; LE-PAIRED-NEXT: plxv vs0, f@PCREL+59(0), 1
; LE-PAIRED-NEXT: plxv vs3, f@PCREL+11(0), 1
; LE-PAIRED-NEXT: plxv vs2, f@PCREL+27(0), 1
; LE-PAIRED-NEXT: xxmtacc acc0
; LE-PAIRED-NEXT: xxmfacc acc0
; LE-PAIRED-NEXT: pstxv vs0, f@PCREL+67(0), 1
; LE-PAIRED-NEXT: pstxv vs1, f@PCREL+51(0), 1
; LE-PAIRED-NEXT: pstxv vs2, f@PCREL+35(0), 1
Expand All @@ -122,8 +112,6 @@ define void @testUnalignedLdSt() {
; BE-PAIRED-NEXT: li r4, 59
; BE-PAIRED-NEXT: lxvx vs3, r3, r4
; BE-PAIRED-NEXT: li r4, 35
; BE-PAIRED-NEXT: xxmtacc acc0
; BE-PAIRED-NEXT: xxmfacc acc0
; BE-PAIRED-NEXT: stxvx vs1, r3, r4
; BE-PAIRED-NEXT: li r4, 19
; BE-PAIRED-NEXT: stxvx vs0, r3, r4
Expand Down
22 changes: 0 additions & 22 deletions llvm/test/CodeGen/PowerPC/mma-intrinsics.ll
Expand Up @@ -16,8 +16,6 @@ define void @ass_acc(<512 x i1>* %ptr, <16 x i8> %vc) {
; CHECK-NEXT: xxlor vs1, v3, v3
; CHECK-NEXT: xxlor vs2, v2, v2
; CHECK-NEXT: xxlor vs3, v3, v3
; CHECK-NEXT: xxmtacc acc0
; CHECK-NEXT: xxmfacc acc0
; CHECK-NEXT: stxv vs0, 48(r3)
; CHECK-NEXT: stxv vs1, 32(r3)
; CHECK-NEXT: stxv vs2, 16(r3)
Expand All @@ -31,8 +29,6 @@ define void @ass_acc(<512 x i1>* %ptr, <16 x i8> %vc) {
; CHECK-BE-NEXT: xxlor vs1, v3, v3
; CHECK-BE-NEXT: xxlor vs2, v2, v2
; CHECK-BE-NEXT: xxlor vs3, v3, v3
; CHECK-BE-NEXT: xxmtacc acc0
; CHECK-BE-NEXT: xxmfacc acc0
; CHECK-BE-NEXT: stxv vs1, 16(r3)
; CHECK-BE-NEXT: stxv vs0, 0(r3)
; CHECK-BE-NEXT: stxv vs3, 48(r3)
Expand Down Expand Up @@ -77,8 +73,6 @@ define void @int_xxmtacc(<512 x i1>* %ptr, <16 x i8> %vc) {
; CHECK-NEXT: xxlor vs2, v2, v2
; CHECK-NEXT: xxlor vs3, v3, v3
; CHECK-NEXT: xxmtacc acc0
; CHECK-NEXT: xxmtacc acc0
; CHECK-NEXT: xxmfacc acc0
; CHECK-NEXT: stxv vs0, 48(r3)
; CHECK-NEXT: stxv vs1, 32(r3)
; CHECK-NEXT: stxv vs2, 16(r3)
Expand All @@ -93,8 +87,6 @@ define void @int_xxmtacc(<512 x i1>* %ptr, <16 x i8> %vc) {
; CHECK-BE-NEXT: xxlor vs2, v2, v2
; CHECK-BE-NEXT: xxlor vs3, v3, v3
; CHECK-BE-NEXT: xxmtacc acc0
; CHECK-BE-NEXT: xxmtacc acc0
; CHECK-BE-NEXT: xxmfacc acc0
; CHECK-BE-NEXT: stxv vs1, 16(r3)
; CHECK-BE-NEXT: stxv vs0, 0(r3)
; CHECK-BE-NEXT: stxv vs3, 48(r3)
Expand All @@ -119,9 +111,6 @@ define void @int_xxmfacc(<512 x i1>* %ptr, <16 x i8> %vc) {
; CHECK-NEXT: xxlor vs1, v3, v3
; CHECK-NEXT: xxlor vs2, v2, v2
; CHECK-NEXT: xxlor vs3, v3, v3
; CHECK-NEXT: xxmtacc acc0
; CHECK-NEXT: xxmfacc acc0
; CHECK-NEXT: xxmfacc acc0
; CHECK-NEXT: stxv vs0, 48(r3)
; CHECK-NEXT: stxv vs1, 32(r3)
; CHECK-NEXT: stxv vs2, 16(r3)
Expand All @@ -135,9 +124,6 @@ define void @int_xxmfacc(<512 x i1>* %ptr, <16 x i8> %vc) {
; CHECK-BE-NEXT: xxlor vs1, v3, v3
; CHECK-BE-NEXT: xxlor vs2, v2, v2
; CHECK-BE-NEXT: xxlor vs3, v3, v3
; CHECK-BE-NEXT: xxmtacc acc0
; CHECK-BE-NEXT: xxmfacc acc0
; CHECK-BE-NEXT: xxmfacc acc0
; CHECK-BE-NEXT: stxv vs1, 16(r3)
; CHECK-BE-NEXT: stxv vs0, 0(r3)
; CHECK-BE-NEXT: stxv vs3, 48(r3)
Expand Down Expand Up @@ -262,8 +248,6 @@ define void @testBranch(<512 x i1>* %ptr, <16 x i8> %vc, i32 %val) {
; CHECK-NEXT: xvi4ger8pp acc0, v2, v2
; CHECK-NEXT: .LBB7_3: # %if.end
; CHECK-NEXT: xxmfacc acc0
; CHECK-NEXT: xxmtacc acc0
; CHECK-NEXT: xxmfacc acc0
; CHECK-NEXT: stxv vs0, 48(r3)
; CHECK-NEXT: stxv vs1, 32(r3)
; CHECK-NEXT: stxv vs2, 16(r3)
Expand All @@ -286,8 +270,6 @@ define void @testBranch(<512 x i1>* %ptr, <16 x i8> %vc, i32 %val) {
; CHECK-BE-NEXT: xvi4ger8pp acc0, v2, v2
; CHECK-BE-NEXT: .LBB7_3: # %if.end
; CHECK-BE-NEXT: xxmfacc acc0
; CHECK-BE-NEXT: xxmtacc acc0
; CHECK-BE-NEXT: xxmfacc acc0
; CHECK-BE-NEXT: stxv vs1, 16(r3)
; CHECK-BE-NEXT: stxv vs0, 0(r3)
; CHECK-BE-NEXT: stxv vs3, 48(r3)
Expand Down Expand Up @@ -637,8 +619,6 @@ define void @testRedundantPrimeUnprime(<512 x i1>* %dst, <16 x i8> %vc) nounwind
; CHECK-NEXT: lxvp vsp0, r1(r3)
; CHECK-NEXT: li r3, 32
; CHECK-NEXT: lxvp vsp2, r1(r3)
; CHECK-NEXT: xxmtacc acc0
; CHECK-NEXT: xxmfacc acc0
; CHECK-NEXT: stxv vs0, 112(r30)
; CHECK-NEXT: stxv vs1, 96(r30)
; CHECK-NEXT: stxv vs2, 80(r30)
Expand Down Expand Up @@ -675,8 +655,6 @@ define void @testRedundantPrimeUnprime(<512 x i1>* %dst, <16 x i8> %vc) nounwind
; CHECK-BE-NEXT: lxvp vsp0, r1(r3)
; CHECK-BE-NEXT: li r3, 144
; CHECK-BE-NEXT: lxvp vsp2, r1(r3)
; CHECK-BE-NEXT: xxmtacc acc0
; CHECK-BE-NEXT: xxmfacc acc0
; CHECK-BE-NEXT: stxv vs3, 112(r30)
; CHECK-BE-NEXT: stxv vs2, 96(r30)
; CHECK-BE-NEXT: stxv vs1, 80(r30)
Expand Down

0 comments on commit 18db29e

Please sign in to comment.