Skip to content

Commit

Permalink
AMDGPU/SI: Enable the post-ra scheduler
Browse files Browse the repository at this point in the history
Summary:
This includes a hazard recognizer implementation to replace some of
the hazard handling we had during frame index elimination.

Reviewers: arsenm

Subscribers: qcolombet, arsenm, llvm-commits

Differential Revision: http://reviews.llvm.org/D18602

llvm-svn: 268143
  • Loading branch information
tstellarAMD committed Apr 30, 2016
1 parent 52c68bb commit cb6ba62
Show file tree
Hide file tree
Showing 35 changed files with 426 additions and 117 deletions.
11 changes: 11 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
Expand Up @@ -384,6 +384,17 @@ void GCNPassConfig::addPreSched2() {
}

void GCNPassConfig::addPreEmitPass() {

// The hazard recognizer that runs as part of the post-ra scheduler does not
// gaurantee to be able handle all hazards correctly. This is because
// if there are multiple scheduling regions in a basic block, the regions
// are scheduled bottom up, so when we begin to schedule a region we don't
// know what instructions were emitted directly before it.
//
// Here we add a stand-alone hazard recognizer pass which can handle all cases.
// hazard recognizer pass.
addPass(&PostRAHazardRecognizerID);

addPass(createSIInsertWaitsPass(), false);
addPass(createSIShrinkInstructionsPass());
addPass(createSILowerControlFlowPass(), false);
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/CMakeLists.txt
Expand Up @@ -47,6 +47,7 @@ add_llvm_target(AMDGPUCodeGen
AMDGPUInstrInfo.cpp
AMDGPUPromoteAlloca.cpp
AMDGPURegisterInfo.cpp
GCNHazardRecognizer.cpp
R600ClauseMergePass.cpp
R600ControlFlowFinalizer.cpp
R600EmitClauseMarkers.cpp
Expand Down
182 changes: 182 additions & 0 deletions llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -0,0 +1,182 @@
//===-- GCNHazardRecognizers.cpp - GCN Hazard Recognizer Impls ------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements hazard recognizers for scheduling on GCN processors.
//
//===----------------------------------------------------------------------===//

#include "GCNHazardRecognizer.h"
#include "AMDGPUSubtarget.h"
#include "SIInstrInfo.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/Support/Debug.h"

using namespace llvm;

//===----------------------------------------------------------------------===//
// Hazard Recoginizer Implementation
//===----------------------------------------------------------------------===//

GCNHazardRecognizer::GCNHazardRecognizer(const MachineFunction &MF) :
CurrCycleInstr(nullptr),
MF(MF) {
MaxLookAhead = 5;
}

void GCNHazardRecognizer::EmitInstruction(SUnit *SU) {
EmitInstruction(SU->getInstr());
}

void GCNHazardRecognizer::EmitInstruction(MachineInstr *MI) {
CurrCycleInstr = MI;
}

ScheduleHazardRecognizer::HazardType
GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
const SIInstrInfo *TII =
static_cast<const SIInstrInfo*>(MF.getSubtarget().getInstrInfo());
MachineInstr *MI = SU->getInstr();

if (TII->isSMRD(*MI) && checkSMRDHazards(MI) > 0)
return NoopHazard;

if (TII->isVMEM(*MI) && checkVMEMHazards(MI) > 0)
return NoopHazard;

return NoHazard;
}

unsigned GCNHazardRecognizer::PreEmitNoops(SUnit *SU) {
return PreEmitNoops(SU->getInstr());
}

unsigned GCNHazardRecognizer::PreEmitNoops(MachineInstr *MI) {
const SIInstrInfo *TII =
static_cast<const SIInstrInfo*>(MF.getSubtarget().getInstrInfo());

if (TII->isSMRD(*MI))
return std::max(0, checkSMRDHazards(MI));

if (TII->isVMEM(*MI))
return std::max(0, checkVMEMHazards(MI));

return 0;
}

void GCNHazardRecognizer::EmitNoop() {
EmittedInstrs.push_front(nullptr);
}

void GCNHazardRecognizer::AdvanceCycle() {

// When the scheduler detects a stall, it will call AdvanceCycle() without
// emitting any instructions.
if (!CurrCycleInstr)
return;

const SIInstrInfo *TII =
static_cast<const SIInstrInfo*>(MF.getSubtarget().getInstrInfo());
unsigned NumWaitStates = TII->getNumWaitStates(*CurrCycleInstr);

// Keep track of emitted instructions
EmittedInstrs.push_front(CurrCycleInstr);

// Add a nullptr for each additional wait state after the first. Make sure
// not to add more than getMaxLookAhead() items to the list, since we
// truncate the list to that size right after this loop.
for (unsigned i = 1, e = std::min(NumWaitStates, getMaxLookAhead());
i < e; ++i) {
EmittedInstrs.push_front(nullptr);
}

// getMaxLookahead() is the largest number of wait states we will ever need
// to insert, so there is no point in keeping track of more than that many
// wait states.
EmittedInstrs.resize(getMaxLookAhead());

CurrCycleInstr = nullptr;
}

void GCNHazardRecognizer::RecedeCycle() {
llvm_unreachable("hazard recognizer does not support bottom-up scheduling.");
}

//===----------------------------------------------------------------------===//
// Helper Functions
//===----------------------------------------------------------------------===//

int GCNHazardRecognizer::getWaitStatesSinceDef(unsigned Reg,
std::function<bool(MachineInstr*)> IsHazardDef ) {
const TargetRegisterInfo *TRI =
MF.getSubtarget<AMDGPUSubtarget>().getRegisterInfo();

int WaitStates = -1;
for (MachineInstr *MI : EmittedInstrs) {
++WaitStates;
if (!MI || !IsHazardDef(MI))
continue;
if (MI->modifiesRegister(Reg, TRI))
return WaitStates;
}
return std::numeric_limits<int>::max();
}

//===----------------------------------------------------------------------===//
// No-op Hazard Detection
//===----------------------------------------------------------------------===//

int GCNHazardRecognizer::checkSMRDHazards(MachineInstr *SMRD) {
const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
const SIInstrInfo *TII = static_cast<const SIInstrInfo*>(ST.getInstrInfo());

// This SMRD hazard only affects SI.
if (ST.getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS)
return 0;

// A read of an SGPR by SMRD instruction requires 4 wait states when the
// SGPR was written by a VALU instruction.
int SmrdSgprWaitStates = 4;
int WaitStatesNeeded = 0;
auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); };

for (const MachineOperand &Use : SMRD->uses()) {
if (!Use.isReg())
continue;
int WaitStatesNeededForUse =
SmrdSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn);
WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
}
return WaitStatesNeeded;
}

int GCNHazardRecognizer::checkVMEMHazards(MachineInstr* VMEM) {
const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
const SIInstrInfo *TII = static_cast<const SIInstrInfo*>(ST.getInstrInfo());

if (ST.getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS)
return 0;

const SIRegisterInfo &TRI = TII->getRegisterInfo();

// A read of an SGPR by a VMEM instruction requires 5 wait states when the
// SGPR was written by a VALU Instruction.
int VmemSgprWaitStates = 5;
int WaitStatesNeeded = 0;
auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); };

for (const MachineOperand &Use : VMEM->uses()) {
if (!Use.isReg() || TRI.isVGPR(MF.getRegInfo(), Use.getReg()))
continue;

int WaitStatesNeededForUse =
VmemSgprWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardDefFn);
WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
}
return WaitStatesNeeded;
}
59 changes: 59 additions & 0 deletions llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
@@ -0,0 +1,59 @@
//===-- GCNHazardRecognizers.h - GCN Hazard Recognizers ---------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines hazard recognizers for scheduling on GCN processors.
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIB_TARGET_AMDGPUHAZARDRECOGNIZERS_H
#define LLVM_LIB_TARGET_AMDGPUHAZARDRECOGNIZERS_H

#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
#include <functional>
#include <list>

namespace llvm {

class MachineFunction;
class MachineInstr;
class ScheduleDAG;
class SIInstrInfo;

class GCNHazardRecognizer final : public ScheduleHazardRecognizer {

// This variable stores the instruction that has been emitted this cycle.
// It will be added to EmittedInstrs, when AdvanceCycle() or RecedeCycle() is
// called.
MachineInstr *CurrCycleInstr;
std::list<MachineInstr*> EmittedInstrs;
const MachineFunction &MF;

int getWaitStatesSinceDef(unsigned Reg,
std::function<bool(MachineInstr*)> IsHazardDef =
[](MachineInstr*) {return true;});

int checkSMRDHazards(MachineInstr *SMRD);
int checkVMEMHazards(MachineInstr* VMEM);
public:
GCNHazardRecognizer(const MachineFunction &MF);
// We can only issue one instruction per cycle.
bool atIssueLimit() const override { return true; }
void EmitInstruction(SUnit *SU) override;
void EmitInstruction(MachineInstr *MI) override;
HazardType getHazardType(SUnit *SU, int Stalls) override;
void EmitNoop() override;
unsigned PreEmitNoops(SUnit *SU) override;
unsigned PreEmitNoops(MachineInstr *) override;
void AdvanceCycle() override;
void RecedeCycle() override;
};

} // end namespace llvm

#endif //LLVM_LIB_TARGET_AMDGPUHAZARDRECOGNIZERS_H
38 changes: 36 additions & 2 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Expand Up @@ -15,11 +15,13 @@

#include "SIInstrInfo.h"
#include "AMDGPUTargetMachine.h"
#include "GCNHazardRecognizer.h"
#include "SIDefines.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/IR/Function.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/MC/MCInstrDesc.h"
Expand Down Expand Up @@ -816,6 +818,20 @@ void SIInstrInfo::insertWaitStates(MachineBasicBlock &MBB,
}
}

void SIInstrInfo::insertNoop(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI) const {
insertWaitStates(MBB, MI, 1);
}

unsigned SIInstrInfo::getNumWaitStates(const MachineInstr &MI) const {
switch (MI.getOpcode()) {
default: return 1; // FIXME: Do wait states equal cycles?

case AMDGPU::S_NOP:
return MI.getOperand(0).getImm() + 1;
}
}

bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
MachineBasicBlock &MBB = *MI->getParent();
DebugLoc DL = MBB.findDebugLoc(MI);
Expand Down Expand Up @@ -1188,8 +1204,11 @@ bool SIInstrInfo::checkInstOffsetsDoNotOverlap(MachineInstr *MIa,

if (getMemOpBaseRegImmOfs(MIa, BaseReg0, Offset0, &RI) &&
getMemOpBaseRegImmOfs(MIb, BaseReg1, Offset1, &RI)) {
assert(MIa->hasOneMemOperand() && MIb->hasOneMemOperand() &&
"read2 / write2 not expected here yet");

if (!MIa->hasOneMemOperand() || !MIb->hasOneMemOperand()) {
// FIXME: Handle ds_read2 / ds_write2.
return false;
}
unsigned Width0 = (*MIa->memoperands_begin())->getSize();
unsigned Width1 = (*MIb->memoperands_begin())->getSize();
if (BaseReg0 == BaseReg1 &&
Expand Down Expand Up @@ -2964,3 +2983,18 @@ SIInstrInfo::getSerializableTargetIndices() const {
{AMDGPU::TI_SCRATCH_RSRC_DWORD3, "amdgpu-scratch-rsrc-dword3"}};
return makeArrayRef(TargetIndices);
}

/// This is used by the post-RA scheduler (SchedulePostRAList.cpp). The
/// post-RA version of misched uses CreateTargetMIHazardRecognizer.
ScheduleHazardRecognizer *
SIInstrInfo::CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
const ScheduleDAG *DAG) const {
return new GCNHazardRecognizer(DAG->MF);
}

/// This is the hazard recognizer used at -O0 by the PostRAHazardRecognizer
/// pass.
ScheduleHazardRecognizer *
SIInstrInfo::CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const {
return new GCNHazardRecognizer(MF);
}
21 changes: 21 additions & 0 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.h
Expand Up @@ -169,6 +169,14 @@ class SIInstrInfo final : public AMDGPUInstrInfo {
return get(Opcode).TSFlags & SIInstrFlags::VALU;
}

static bool isVMEM(const MachineInstr &MI) {
return isMUBUF(MI) || isMTBUF(MI) || isMIMG(MI);
}

bool isVMEM(uint16_t Opcode) const {
return isMUBUF(Opcode) || isMTBUF(Opcode) || isMIMG(Opcode);
}

static bool isSOP1(const MachineInstr &MI) {
return MI.getDesc().TSFlags & SIInstrFlags::SOP1;
}
Expand Down Expand Up @@ -440,6 +448,12 @@ class SIInstrInfo final : public AMDGPUInstrInfo {
void insertWaitStates(MachineBasicBlock &MBB,MachineBasicBlock::iterator MI,
int Count) const;

void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const;

/// \brief Return the number of wait states that result from executing this
/// instruction.
unsigned getNumWaitStates(const MachineInstr &MI) const;

/// \brief Returns the operand named \p Op. If \p MI does not have an
/// operand named \c Op, this function returns nullptr.
LLVM_READONLY
Expand Down Expand Up @@ -472,6 +486,13 @@ class SIInstrInfo final : public AMDGPUInstrInfo {
ArrayRef<std::pair<int, const char *>>
getSerializableTargetIndices() const override;

ScheduleHazardRecognizer *
CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
const ScheduleDAG *DAG) const override;

ScheduleHazardRecognizer *
CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const override;

};

namespace AMDGPU {
Expand Down

0 comments on commit cb6ba62

Please sign in to comment.