diff --git a/llvm/include/llvm/MCA/CustomBehaviour.h b/llvm/include/llvm/MCA/CustomBehaviour.h index 0ce3993be95ba..8ad674c4ecf13 100644 --- a/llvm/include/llvm/MCA/CustomBehaviour.h +++ b/llvm/include/llvm/MCA/CustomBehaviour.h @@ -49,8 +49,7 @@ class InstrPostProcess { /// object after it has been lowered from the MCInst. /// This is generally a less disruptive alternative to modifying the /// scheduling model. - virtual void postProcessInstruction(std::unique_ptr &Inst, - const MCInst &MCI) {} + virtual void postProcessInstruction(Instruction &Inst, const MCInst &MCI) {} // The resetState() method gets invoked at the beginning of each code region // so that targets that override this function can clear any state that they diff --git a/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.cpp b/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.cpp index b8f43c4550b7e..afaa19013bfc2 100644 --- a/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.cpp +++ b/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.cpp @@ -21,8 +21,8 @@ namespace llvm::mca { -void AMDGPUInstrPostProcess::postProcessInstruction( - std::unique_ptr &Inst, const MCInst &MCI) { +void AMDGPUInstrPostProcess::postProcessInstruction(Instruction &Inst, + const MCInst &MCI) { switch (MCI.getOpcode()) { case AMDGPU::S_WAITCNT: case AMDGPU::S_WAITCNT_soft: @@ -44,7 +44,7 @@ void AMDGPUInstrPostProcess::postProcessInstruction( // s_waitcnt instructions encode important information as immediate operands // which are lost during the MCInst -> mca::Instruction lowering. -void AMDGPUInstrPostProcess::processWaitCnt(std::unique_ptr &Inst, +void AMDGPUInstrPostProcess::processWaitCnt(Instruction &Inst, const MCInst &MCI) { for (int Idx = 0, N = MCI.size(); Idx < N; Idx++) { MCAOperand Op; @@ -55,7 +55,7 @@ void AMDGPUInstrPostProcess::processWaitCnt(std::unique_ptr &Inst, Op = MCAOperand::createImm(MCOp.getImm()); } Op.setIndex(Idx); - Inst->addOperand(Op); + Inst.addOperand(Op); } } diff --git a/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.h b/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.h index 85b9c188b5d1a..cbc7427ce6cdf 100644 --- a/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.h +++ b/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.h @@ -26,7 +26,7 @@ namespace llvm { namespace mca { class AMDGPUInstrPostProcess : public InstrPostProcess { - void processWaitCnt(std::unique_ptr &Inst, const MCInst &MCI); + void processWaitCnt(Instruction &Inst, const MCInst &MCI); public: AMDGPUInstrPostProcess(const MCSubtargetInfo &STI, const MCInstrInfo &MCII) @@ -34,8 +34,7 @@ class AMDGPUInstrPostProcess : public InstrPostProcess { ~AMDGPUInstrPostProcess() = default; - void postProcessInstruction(std::unique_ptr &Inst, - const MCInst &MCI) override; + void postProcessInstruction(Instruction &Inst, const MCInst &MCI) override; }; struct WaitCntInfo { diff --git a/llvm/lib/Target/X86/MCA/X86CustomBehaviour.cpp b/llvm/lib/Target/X86/MCA/X86CustomBehaviour.cpp index e2a1bbf383b3c..a69a781bf070b 100644 --- a/llvm/lib/Target/X86/MCA/X86CustomBehaviour.cpp +++ b/llvm/lib/Target/X86/MCA/X86CustomBehaviour.cpp @@ -20,24 +20,22 @@ namespace llvm { namespace mca { -void X86InstrPostProcess::setMemBarriers(std::unique_ptr &Inst, - const MCInst &MCI) { +void X86InstrPostProcess::setMemBarriers(Instruction &Inst, const MCInst &MCI) { switch (MCI.getOpcode()) { case X86::MFENCE: - Inst->setLoadBarrier(true); - Inst->setStoreBarrier(true); + Inst.setLoadBarrier(true); + Inst.setStoreBarrier(true); break; case X86::LFENCE: - Inst->setLoadBarrier(true); + Inst.setLoadBarrier(true); break; case X86::SFENCE: - Inst->setStoreBarrier(true); + Inst.setStoreBarrier(true); break; } } -void X86InstrPostProcess::useStackEngine(std::unique_ptr &Inst, - const MCInst &MCI) { +void X86InstrPostProcess::useStackEngine(Instruction &Inst, const MCInst &MCI) { // TODO(boomanaiden154): We currently do not handle PUSHF/POPF because we // have not done the necessary benchmarking to see if they are also // optimized by the stack engine. @@ -46,18 +44,18 @@ void X86InstrPostProcess::useStackEngine(std::unique_ptr &Inst, // delay subsequent rsp using non-stack instructions. if (X86::isPOP(MCI.getOpcode()) || X86::isPUSH(MCI.getOpcode())) { auto *StackRegisterDef = - llvm::find_if(Inst->getDefs(), [](const WriteState &State) { + llvm::find_if(Inst.getDefs(), [](const WriteState &State) { return State.getRegisterID() == X86::RSP; }); assert( - StackRegisterDef != Inst->getDefs().end() && + StackRegisterDef != Inst.getDefs().end() && "Expected push instruction to implicitly use stack pointer register."); - Inst->getDefs().erase(StackRegisterDef); + Inst.getDefs().erase(StackRegisterDef); } } -void X86InstrPostProcess::postProcessInstruction( - std::unique_ptr &Inst, const MCInst &MCI) { +void X86InstrPostProcess::postProcessInstruction(Instruction &Inst, + const MCInst &MCI) { // Set IsALoadBarrier and IsAStoreBarrier flags. setMemBarriers(Inst, MCI); useStackEngine(Inst, MCI); diff --git a/llvm/lib/Target/X86/MCA/X86CustomBehaviour.h b/llvm/lib/Target/X86/MCA/X86CustomBehaviour.h index c5459e42dfc9f..d6197f3344bbb 100644 --- a/llvm/lib/Target/X86/MCA/X86CustomBehaviour.h +++ b/llvm/lib/Target/X86/MCA/X86CustomBehaviour.h @@ -26,12 +26,12 @@ namespace mca { class X86InstrPostProcess : public InstrPostProcess { /// Called within X86InstrPostProcess to specify certain instructions /// as load and store barriers. - void setMemBarriers(std::unique_ptr &Inst, const MCInst &MCI); + void setMemBarriers(Instruction &Inst, const MCInst &MCI); /// Called within X86InstrPostPorcess to remove some rsp read operands /// on stack instructions to better simulate the stack engine. We currently /// do not model features of the stack engine like sync uops. - void useStackEngine(std::unique_ptr &Inst, const MCInst &MCI); + void useStackEngine(Instruction &Inst, const MCInst &MCI); public: X86InstrPostProcess(const MCSubtargetInfo &STI, const MCInstrInfo &MCII) @@ -39,8 +39,7 @@ class X86InstrPostProcess : public InstrPostProcess { ~X86InstrPostProcess() = default; - void postProcessInstruction(std::unique_ptr &Inst, - const MCInst &MCI) override; + void postProcessInstruction(Instruction &Inst, const MCInst &MCI) override; }; } // namespace mca diff --git a/llvm/tools/llvm-mca/llvm-mca.cpp b/llvm/tools/llvm-mca/llvm-mca.cpp index a4194da4a7b63..a64539c09b81e 100644 --- a/llvm/tools/llvm-mca/llvm-mca.cpp +++ b/llvm/tools/llvm-mca/llvm-mca.cpp @@ -668,7 +668,7 @@ int main(int argc, char **argv) { return 1; } - IPP->postProcessInstruction(Inst.get(), MCI); + IPP->postProcessInstruction(*Inst.get(), MCI); InstToInstruments.insert({&MCI, Instruments}); LoweredSequence.emplace_back(std::move(Inst.get())); } diff --git a/llvm/unittests/tools/llvm-mca/X86/TestIncrementalMCA.cpp b/llvm/unittests/tools/llvm-mca/X86/TestIncrementalMCA.cpp index 17809e7beda95..1b0073e026bae 100644 --- a/llvm/unittests/tools/llvm-mca/X86/TestIncrementalMCA.cpp +++ b/llvm/unittests/tools/llvm-mca/X86/TestIncrementalMCA.cpp @@ -130,6 +130,10 @@ TEST_F(X86TestBase, TestInstructionRecycling) { mca::InstrBuilder IB(*STI, *MCII, *MRI, MCIA.get(), *IM, /*CallLatency=*/100); IB.setInstRecycleCallback(GetRecycledInst); + // Setup a generic IPP that does not do anything (as it is not target + // specific) for testing purposes. + auto IPP = std::make_unique(*STI, *MCII); + const SmallVector Instruments; // Tile size = 7 for (unsigned i = 0U, E = MCIs.size(); i < E;) { @@ -147,8 +151,10 @@ TEST_F(X86TestBase, TestInstructionRecycling) { }); ASSERT_FALSE(bool(RemainingE)); ASSERT_TRUE(RecycledInst); + IPP->postProcessInstruction(*RecycledInst, MCIs[i]); ISM.addRecycledInst(RecycledInst); } else { + IPP->postProcessInstruction(*InstOrErr.get(), MCIs[i]); ISM.addInst(std::move(InstOrErr.get())); } }