diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index 29d22f27a2d8e..d07909251dcfb 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -437,9 +437,6 @@ void GCNHazardRecognizer::RecedeCycle() { enum HazardFnResult { HazardFound, HazardExpired, NoHazardFound }; -using IsExpiredFn = function_ref; -using GetNumWaitStatesFn = function_ref; - // Search for a hazard in a block and its predecessors. template static bool @@ -546,11 +543,14 @@ hasHazard(StateT InitialState, // Returns a minimum wait states since \p I walking all predecessors. // Only scans until \p IsExpired does not return true. // Can only be run in a hazard recognizer mode. -static int getWaitStatesSince( - GCNHazardRecognizer::IsHazardFn IsHazard, const MachineBasicBlock *MBB, - MachineBasicBlock::const_reverse_instr_iterator I, int WaitStates, - IsExpiredFn IsExpired, DenseSet &Visited, - GetNumWaitStatesFn GetNumWaitStates = SIInstrInfo::getNumWaitStates) { +static int +getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard, + const MachineBasicBlock *MBB, + MachineBasicBlock::const_reverse_instr_iterator I, + int WaitStates, GCNHazardRecognizer::IsExpiredFn IsExpired, + DenseSet &Visited, + GCNHazardRecognizer::GetNumWaitStatesFn GetNumWaitStates = + SIInstrInfo::getNumWaitStates) { for (auto E = MBB->instr_rend(); I != E; ++I) { // Don't add WaitStates for parent BUNDLE instructions. if (I->isBundle()) @@ -582,20 +582,26 @@ static int getWaitStatesSince( return MinWaitStates; } -static int getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard, - const MachineInstr *MI, IsExpiredFn IsExpired) { +static int +getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard, + const MachineInstr *MI, + GCNHazardRecognizer::IsExpiredFn IsExpired, + GCNHazardRecognizer::GetNumWaitStatesFn GetNumWaitStates = + SIInstrInfo::getNumWaitStates) { DenseSet Visited; return getWaitStatesSince(IsHazard, MI->getParent(), std::next(MI->getReverseIterator()), 0, IsExpired, - Visited, SIInstrInfo::getNumWaitStates); + Visited, GetNumWaitStates); } -int GCNHazardRecognizer::getWaitStatesSince(IsHazardFn IsHazard, int Limit) { +int GCNHazardRecognizer::getWaitStatesSince( + IsHazardFn IsHazard, int Limit, GetNumWaitStatesFn GetNumWaitStates) { if (IsHazardRecognizerMode) { auto IsExpiredFn = [Limit](const MachineInstr &, int WaitStates) { return WaitStates >= Limit; }; - return ::getWaitStatesSince(IsHazard, CurrCycleInstr, IsExpiredFn); + return ::getWaitStatesSince(IsHazard, CurrCycleInstr, IsExpiredFn, + GetNumWaitStates); } int WaitStates = 0; @@ -607,7 +613,7 @@ int GCNHazardRecognizer::getWaitStatesSince(IsHazardFn IsHazard, int Limit) { if (MI->isInlineAsm()) continue; } - ++WaitStates; + WaitStates += MI ? GetNumWaitStates(*MI) : 1; if (WaitStates >= Limit) break; @@ -1243,6 +1249,20 @@ int GCNHazardRecognizer::checkReadM0Hazards(MachineInstr *MI) { getWaitStatesSinceDef(AMDGPU::M0, IsHazardFn, ReadM0WaitStates); } +// emit V_NOP instructions. \p WaitStatesNeeded is the number of V_NOPs we need +// to insert, negative means not needed. +bool GCNHazardRecognizer::emitVNops(MachineInstr *MI, int WaitStatesNeeded) { + if (WaitStatesNeeded <= 0) + return false; + + const SIInstrInfo *TII = ST.getInstrInfo(); + for (int I = 0; I < WaitStatesNeeded; ++I) + BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), + TII->get(AMDGPU::V_NOP_e32)); + + return true; +} + void GCNHazardRecognizer::fixHazards(MachineInstr *MI) { fixVMEMtoScalarWriteHazards(MI); fixVcmpxPermlaneHazards(MI); @@ -1257,7 +1277,7 @@ void GCNHazardRecognizer::fixHazards(MachineInstr *MI) { fixVALUTransUseHazard(MI); fixVALUTransCoexecutionHazards(MI); fixWMMAHazards(MI); // fall-through if co-execution is enabled. - fixWMMACoexecutionHazards(MI); + emitVNops(MI, checkWMMACoexecutionHazards(MI)); fixShift64HighRegBug(MI); fixVALUMaskWriteHazard(MI); fixRequiredExportPriority(MI); @@ -2045,13 +2065,13 @@ static bool IsWMMAHazardInstInCategory(const MachineInstr &MI, return false; } -bool GCNHazardRecognizer::fixWMMACoexecutionHazards(MachineInstr *MI) { +int GCNHazardRecognizer::checkWMMACoexecutionHazards(MachineInstr *MI) { if (!AMDGPU::isGFX1250(ST)) - return false; + return 0; const SIInstrInfo *TII = ST.getInstrInfo(); if (!TII->isXDLWMMA(*MI) && !isCoexecutableVALUInst(*MI)) - return false; + return 0; const SIRegisterInfo *TRI = ST.getRegisterInfo(); @@ -2129,9 +2149,6 @@ bool GCNHazardRecognizer::fixWMMACoexecutionHazards(MachineInstr *MI) { }; int Limit = 0; - auto IsExpiredFn = [&Limit](const MachineInstr &, int WaitStates) { - return WaitStates >= Limit; - }; auto GetWaitStatesFn = [](const MachineInstr &I) { return SIInstrInfo::isVALU(I) ? 1 : 0; @@ -2141,38 +2158,26 @@ bool GCNHazardRecognizer::fixWMMACoexecutionHazards(MachineInstr *MI) { if (TII->isXDLWMMA(*MI)) { for (Category = 0; WaitStatesNeeded < 0 && Category < 4; Category++) { Limit = WMMAWaitStates[Category]; // for IsExpiredFn. - DenseSet Visited; - // '::getWaitStatesSince' returns the number of VALUs in between if hazard + // 'getWaitStatesSince' returns the number of VALUs in between if hazard // exists, and INT_MAX if there is no hazard. As a result, a negative // WaitStatesNeeded here means no hazard, and we will continue to search // for other categories. WaitStatesNeeded = - Limit - ::getWaitStatesSince(IsWMMAHazardFn, MI->getParent(), - std::next(MI->getReverseIterator()), 0, - IsExpiredFn, Visited, GetWaitStatesFn); + Limit - getWaitStatesSince(IsWMMAHazardFn, Limit, GetWaitStatesFn); } } else { // Must be a co-executable VALU. for (Category = 0; WaitStatesNeeded < 0 && Category < 4; Category++) { Limit = VALUWaitStates[Category]; // for IsExpiredFn. - DenseSet Visited; - // '::getWaitStatesSince' returns the number of VALUs in between if hazard + // 'getWaitStatesSince' returns the number of VALUs in between if hazard // exists, and INT_MAX if there is no hazard. As a result, a negative // WaitStatesNeeded here means no hazard, and we will continue to search // for other categories. WaitStatesNeeded = - Limit - ::getWaitStatesSince(IsVALUHazardFn, MI->getParent(), - std::next(MI->getReverseIterator()), 0, - IsExpiredFn, Visited, GetWaitStatesFn); + Limit - getWaitStatesSince(IsVALUHazardFn, Limit, GetWaitStatesFn); } } - // WaitStatesNeeded now is the number of V_NOPs we need to insert, negative - // means not needed. - for (int i = 0; i < WaitStatesNeeded; i++) - BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), - TII->get(AMDGPU::V_NOP_e32)); - - return true; + return WaitStatesNeeded; } bool GCNHazardRecognizer::fixShift64HighRegBug(MachineInstr *MI) { diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h index 67beffadc0913..e5df71ca3788c 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h @@ -13,6 +13,7 @@ #ifndef LLVM_LIB_TARGET_AMDGPUHAZARDRECOGNIZERS_H #define LLVM_LIB_TARGET_AMDGPUHAZARDRECOGNIZERS_H +#include "SIInstrInfo.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" @@ -25,13 +26,14 @@ class MachineFunction; class MachineInstr; class MachineOperand; class MachineRegisterInfo; -class SIInstrInfo; class SIRegisterInfo; class GCNSubtarget; class GCNHazardRecognizer final : public ScheduleHazardRecognizer { public: typedef function_ref IsHazardFn; + typedef function_ref IsExpiredFn; + typedef function_ref GetNumWaitStatesFn; private: // Distinguish if we are called from scheduler or hazard recognizer @@ -74,7 +76,9 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer { // used on a newly inserted instruction before returning from PreEmitNoops. void runOnInstruction(MachineInstr *MI); - int getWaitStatesSince(IsHazardFn IsHazard, int Limit); + int getWaitStatesSince( + IsHazardFn IsHazard, int Limit, + GetNumWaitStatesFn GetNumWaitStates = SIInstrInfo::getNumWaitStates); int getWaitStatesSinceDef(unsigned Reg, IsHazardFn IsHazardDef, int Limit); int getWaitStatesSinceSetReg(IsHazardFn IsHazard, int Limit); @@ -94,6 +98,9 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer { int checkReadM0Hazards(MachineInstr *SMovRel); int checkNSAtoVMEMHazard(MachineInstr *MI); int checkFPAtomicToDenormModeHazard(MachineInstr *MI); + // emit V_NOP instructions. \p WaitStatesNeeded is the number of V_NOPs we + // need to insert, negative means not needed. + bool emitVNops(MachineInstr *MI, int WaitStatesNeeded); void fixHazards(MachineInstr *MI); bool fixVcmpxPermlaneHazards(MachineInstr *MI); bool fixVMEMtoScalarWriteHazards(MachineInstr *MI); @@ -106,7 +113,7 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer { bool fixVALUTransUseHazard(MachineInstr *MI); bool fixVALUTransCoexecutionHazards(MachineInstr *MI); bool fixWMMAHazards(MachineInstr *MI); - bool fixWMMACoexecutionHazards(MachineInstr *MI); + int checkWMMACoexecutionHazards(MachineInstr *MI); bool fixShift64HighRegBug(MachineInstr *MI); bool fixVALUMaskWriteHazard(MachineInstr *MI); bool fixRequiredExportPriority(MachineInstr *MI);