Skip to content

Commit

Permalink
[AMDGPU] Add a new pass to insert waitcnts. Leave under an option for…
Browse files Browse the repository at this point in the history
… testing.

Based on comments in https://reviews.llvm.org/D31161.

llvm-svn: 300023
  • Loading branch information
kanarayan committed Apr 12, 2017
1 parent 04300b0 commit acb089e
Show file tree
Hide file tree
Showing 7 changed files with 1,881 additions and 6 deletions.
4 changes: 4 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPU.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ FunctionPass *createSIFixControlFlowLiveIntervalsPass();
FunctionPass *createSIFixSGPRCopiesPass();
FunctionPass *createSIDebuggerInsertNopsPass();
FunctionPass *createSIInsertWaitsPass();
FunctionPass *createSIInsertWaitcntsPass();
FunctionPass *createAMDGPUCodeGenPreparePass(const GCNTargetMachine *TM = nullptr);

ModulePass *createAMDGPUAnnotateKernelFeaturesPass(const TargetMachine *TM = nullptr);
Expand Down Expand Up @@ -125,6 +126,9 @@ extern char &SIDebuggerInsertNopsID;
void initializeSIInsertWaitsPass(PassRegistry&);
extern char &SIInsertWaitsID;

void initializeSIInsertWaitcntsPass(PassRegistry&);
extern char &SIInsertWaitcntsID;

void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry&);
extern char &AMDGPUUnifyDivergentExitNodesID;

Expand Down
12 changes: 11 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,12 @@ static cl::opt<bool> EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden,
cl::desc("Enable AMDGPU Alias Analysis"),
cl::init(true));

// Option to enable new waitcnt insertion pass.
static cl::opt<bool> EnableSIInsertWaitcntsPass(
"enable-si-insert-waitcnts",
cl::desc("Use new waitcnt insertion pass"),
cl::init(false));

extern "C" void LLVMInitializeAMDGPUTarget() {
// Register the target
RegisterTargetMachine<R600TargetMachine> X(getTheAMDGPUTarget());
Expand All @@ -134,6 +140,7 @@ extern "C" void LLVMInitializeAMDGPUTarget() {
initializeAMDGPUUnifyMetadataPass(*PR);
initializeSIAnnotateControlFlowPass(*PR);
initializeSIInsertWaitsPass(*PR);
initializeSIInsertWaitcntsPass(*PR);
initializeSIWholeQuadModePass(*PR);
initializeSILowerControlFlowPass(*PR);
initializeSIInsertSkipsPass(*PR);
Expand Down Expand Up @@ -810,7 +817,10 @@ void GCNPassConfig::addPreEmitPass() {
// cases.
addPass(&PostRAHazardRecognizerID);

addPass(createSIInsertWaitsPass());
if (EnableSIInsertWaitcntsPass)
addPass(createSIInsertWaitcntsPass());
else
addPass(createSIInsertWaitsPass());
addPass(createSIShrinkInstructionsPass());
addPass(&SIInsertSkipsPassID);
addPass(createSIDebuggerInsertNopsPass());
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ add_llvm_target(AMDGPUCodeGen
SIFrameLowering.cpp
SIInsertSkips.cpp
SIInsertWaits.cpp
SIInsertWaitcnts.cpp
SIInstrInfo.cpp
SIISelLowering.cpp
SILoadStoreOptimizer.cpp
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AMDGPU/DSInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,8 @@ class DS_1A1D_PERMUTE <string opName, SDPatternOperator node = null_frag>
[(set i32:$vdst,
(node (DS1Addr1Offset i32:$addr, i16:$offset), i32:$data0))] > {

let LGKM_CNT = 0;

let mayLoad = 0;
let mayStore = 0;
let isConvergent = 1;
Expand Down
Loading

0 comments on commit acb089e

Please sign in to comment.