Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 43 additions & 38 deletions llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -437,9 +437,6 @@ void GCNHazardRecognizer::RecedeCycle() {

enum HazardFnResult { HazardFound, HazardExpired, NoHazardFound };

using IsExpiredFn = function_ref<bool(const MachineInstr &, int WaitStates)>;
using GetNumWaitStatesFn = function_ref<unsigned int(const MachineInstr &)>;

// Search for a hazard in a block and its predecessors.
template <typename StateT>
static bool
Expand Down Expand Up @@ -546,11 +543,14 @@ hasHazard(StateT InitialState,
// Returns a minimum wait states since \p I walking all predecessors.
// Only scans until \p IsExpired does not return true.
// Can only be run in a hazard recognizer mode.
static int getWaitStatesSince(
GCNHazardRecognizer::IsHazardFn IsHazard, const MachineBasicBlock *MBB,
MachineBasicBlock::const_reverse_instr_iterator I, int WaitStates,
IsExpiredFn IsExpired, DenseSet<const MachineBasicBlock *> &Visited,
GetNumWaitStatesFn GetNumWaitStates = SIInstrInfo::getNumWaitStates) {
static int
getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard,
const MachineBasicBlock *MBB,
MachineBasicBlock::const_reverse_instr_iterator I,
int WaitStates, GCNHazardRecognizer::IsExpiredFn IsExpired,
DenseSet<const MachineBasicBlock *> &Visited,
GCNHazardRecognizer::GetNumWaitStatesFn GetNumWaitStates =
SIInstrInfo::getNumWaitStates) {
for (auto E = MBB->instr_rend(); I != E; ++I) {
// Don't add WaitStates for parent BUNDLE instructions.
if (I->isBundle())
Expand Down Expand Up @@ -582,20 +582,26 @@ static int getWaitStatesSince(
return MinWaitStates;
}

static int getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard,
const MachineInstr *MI, IsExpiredFn IsExpired) {
static int
getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard,
const MachineInstr *MI,
GCNHazardRecognizer::IsExpiredFn IsExpired,
GCNHazardRecognizer::GetNumWaitStatesFn GetNumWaitStates =
SIInstrInfo::getNumWaitStates) {
DenseSet<const MachineBasicBlock *> Visited;
return getWaitStatesSince(IsHazard, MI->getParent(),
std::next(MI->getReverseIterator()), 0, IsExpired,
Visited, SIInstrInfo::getNumWaitStates);
Visited, GetNumWaitStates);
}

int GCNHazardRecognizer::getWaitStatesSince(IsHazardFn IsHazard, int Limit) {
int GCNHazardRecognizer::getWaitStatesSince(
IsHazardFn IsHazard, int Limit, GetNumWaitStatesFn GetNumWaitStates) {
if (IsHazardRecognizerMode) {
auto IsExpiredFn = [Limit](const MachineInstr &, int WaitStates) {
return WaitStates >= Limit;
};
return ::getWaitStatesSince(IsHazard, CurrCycleInstr, IsExpiredFn);
return ::getWaitStatesSince(IsHazard, CurrCycleInstr, IsExpiredFn,
GetNumWaitStates);
}

int WaitStates = 0;
Expand All @@ -607,7 +613,7 @@ int GCNHazardRecognizer::getWaitStatesSince(IsHazardFn IsHazard, int Limit) {
if (MI->isInlineAsm())
continue;
}
++WaitStates;
WaitStates += MI ? GetNumWaitStates(*MI) : 1;

if (WaitStates >= Limit)
break;
Expand Down Expand Up @@ -1243,6 +1249,20 @@ int GCNHazardRecognizer::checkReadM0Hazards(MachineInstr *MI) {
getWaitStatesSinceDef(AMDGPU::M0, IsHazardFn, ReadM0WaitStates);
}

// emit V_NOP instructions. \p WaitStatesNeeded is the number of V_NOPs we need
// to insert, negative means not needed.
bool GCNHazardRecognizer::emitVNops(MachineInstr *MI, int WaitStatesNeeded) {
if (WaitStatesNeeded <= 0)
return false;

const SIInstrInfo *TII = ST.getInstrInfo();
for (int I = 0; I < WaitStatesNeeded; ++I)
BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
TII->get(AMDGPU::V_NOP_e32));

return true;
}

void GCNHazardRecognizer::fixHazards(MachineInstr *MI) {
fixVMEMtoScalarWriteHazards(MI);
fixVcmpxPermlaneHazards(MI);
Expand All @@ -1257,7 +1277,7 @@ void GCNHazardRecognizer::fixHazards(MachineInstr *MI) {
fixVALUTransUseHazard(MI);
fixVALUTransCoexecutionHazards(MI);
fixWMMAHazards(MI); // fall-through if co-execution is enabled.
fixWMMACoexecutionHazards(MI);
emitVNops(MI, checkWMMACoexecutionHazards(MI));
fixShift64HighRegBug(MI);
fixVALUMaskWriteHazard(MI);
fixRequiredExportPriority(MI);
Expand Down Expand Up @@ -2045,13 +2065,13 @@ static bool IsWMMAHazardInstInCategory(const MachineInstr &MI,
return false;
}

bool GCNHazardRecognizer::fixWMMACoexecutionHazards(MachineInstr *MI) {
int GCNHazardRecognizer::checkWMMACoexecutionHazards(MachineInstr *MI) {
if (!AMDGPU::isGFX1250(ST))
return false;
return 0;

const SIInstrInfo *TII = ST.getInstrInfo();
if (!TII->isXDLWMMA(*MI) && !isCoexecutableVALUInst(*MI))
return false;
return 0;

const SIRegisterInfo *TRI = ST.getRegisterInfo();

Expand Down Expand Up @@ -2129,9 +2149,6 @@ bool GCNHazardRecognizer::fixWMMACoexecutionHazards(MachineInstr *MI) {
};

int Limit = 0;
auto IsExpiredFn = [&Limit](const MachineInstr &, int WaitStates) {
return WaitStates >= Limit;
};

auto GetWaitStatesFn = [](const MachineInstr &I) {
return SIInstrInfo::isVALU(I) ? 1 : 0;
Expand All @@ -2141,38 +2158,26 @@ bool GCNHazardRecognizer::fixWMMACoexecutionHazards(MachineInstr *MI) {
if (TII->isXDLWMMA(*MI)) {
for (Category = 0; WaitStatesNeeded < 0 && Category < 4; Category++) {
Limit = WMMAWaitStates[Category]; // for IsExpiredFn.
DenseSet<const MachineBasicBlock *> Visited;
// '::getWaitStatesSince' returns the number of VALUs in between if hazard
// 'getWaitStatesSince' returns the number of VALUs in between if hazard
// exists, and INT_MAX if there is no hazard. As a result, a negative
// WaitStatesNeeded here means no hazard, and we will continue to search
// for other categories.
WaitStatesNeeded =
Limit - ::getWaitStatesSince(IsWMMAHazardFn, MI->getParent(),
std::next(MI->getReverseIterator()), 0,
IsExpiredFn, Visited, GetWaitStatesFn);
Limit - getWaitStatesSince(IsWMMAHazardFn, Limit, GetWaitStatesFn);
}
} else { // Must be a co-executable VALU.
for (Category = 0; WaitStatesNeeded < 0 && Category < 4; Category++) {
Limit = VALUWaitStates[Category]; // for IsExpiredFn.
DenseSet<const MachineBasicBlock *> Visited;
// '::getWaitStatesSince' returns the number of VALUs in between if hazard
// 'getWaitStatesSince' returns the number of VALUs in between if hazard
// exists, and INT_MAX if there is no hazard. As a result, a negative
// WaitStatesNeeded here means no hazard, and we will continue to search
// for other categories.
WaitStatesNeeded =
Limit - ::getWaitStatesSince(IsVALUHazardFn, MI->getParent(),
std::next(MI->getReverseIterator()), 0,
IsExpiredFn, Visited, GetWaitStatesFn);
Limit - getWaitStatesSince(IsVALUHazardFn, Limit, GetWaitStatesFn);
}
}

// WaitStatesNeeded now is the number of V_NOPs we need to insert, negative
// means not needed.
for (int i = 0; i < WaitStatesNeeded; i++)
BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
TII->get(AMDGPU::V_NOP_e32));

return true;
return WaitStatesNeeded;
}

bool GCNHazardRecognizer::fixShift64HighRegBug(MachineInstr *MI) {
Expand Down
13 changes: 10 additions & 3 deletions llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#ifndef LLVM_LIB_TARGET_AMDGPUHAZARDRECOGNIZERS_H
#define LLVM_LIB_TARGET_AMDGPUHAZARDRECOGNIZERS_H

#include "SIInstrInfo.h"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't see why you would need to move the include here

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I need SIInstrInfo::getNumWaitStates as a default value in getWaitStatesSince declaration.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could also just not use the defaulted argument and be explicit everywhere

#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
Expand All @@ -25,13 +26,14 @@ class MachineFunction;
class MachineInstr;
class MachineOperand;
class MachineRegisterInfo;
class SIInstrInfo;
class SIRegisterInfo;
class GCNSubtarget;

class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
public:
typedef function_ref<bool(const MachineInstr &)> IsHazardFn;
typedef function_ref<bool(const MachineInstr &, int WaitStates)> IsExpiredFn;
typedef function_ref<unsigned int(const MachineInstr &)> GetNumWaitStatesFn;

private:
// Distinguish if we are called from scheduler or hazard recognizer
Expand Down Expand Up @@ -74,7 +76,9 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
// used on a newly inserted instruction before returning from PreEmitNoops.
void runOnInstruction(MachineInstr *MI);

int getWaitStatesSince(IsHazardFn IsHazard, int Limit);
int getWaitStatesSince(
IsHazardFn IsHazard, int Limit,
GetNumWaitStatesFn GetNumWaitStates = SIInstrInfo::getNumWaitStates);
int getWaitStatesSinceDef(unsigned Reg, IsHazardFn IsHazardDef, int Limit);
int getWaitStatesSinceSetReg(IsHazardFn IsHazard, int Limit);

Expand All @@ -94,6 +98,9 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
int checkReadM0Hazards(MachineInstr *SMovRel);
int checkNSAtoVMEMHazard(MachineInstr *MI);
int checkFPAtomicToDenormModeHazard(MachineInstr *MI);
// emit V_NOP instructions. \p WaitStatesNeeded is the number of V_NOPs we
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// emit V_NOP instructions. \p WaitStatesNeeded is the number of V_NOPs we
// Emit V_NOP instructions. \p WaitStatesNeeded is the number of V_NOPs we

// need to insert, negative means not needed.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems silly to use int here. Zero already means "not needed" so you don't need negative values.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree, but all other check* functions return int as well.

bool emitVNops(MachineInstr *MI, int WaitStatesNeeded);
void fixHazards(MachineInstr *MI);
bool fixVcmpxPermlaneHazards(MachineInstr *MI);
bool fixVMEMtoScalarWriteHazards(MachineInstr *MI);
Expand All @@ -106,7 +113,7 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
bool fixVALUTransUseHazard(MachineInstr *MI);
bool fixVALUTransCoexecutionHazards(MachineInstr *MI);
bool fixWMMAHazards(MachineInstr *MI);
bool fixWMMACoexecutionHazards(MachineInstr *MI);
int checkWMMACoexecutionHazards(MachineInstr *MI);
bool fixShift64HighRegBug(MachineInstr *MI);
bool fixVALUMaskWriteHazard(MachineInstr *MI);
bool fixRequiredExportPriority(MachineInstr *MI);
Expand Down