Skip to content

Commit

Permalink
[AMDGPU] New clang option for emitting a waitcnt instruction after ea…
Browse files Browse the repository at this point in the history
…ch memory instruction (#79236)

This patch introduces a new command-line option for clang, namely,
amdgpu-precise-mem-op (or precise-memory in the backend). When this option is specified, a waitcnt
instruction is generated after each memory load/store instruction. The
counter values are always 0, but which counters are involved depends on
the memory instruction.

---------

Co-authored-by: Jun Wang <jun.wang7@amd.com>
  • Loading branch information
jwanggit86 and Jun Wang committed Apr 10, 2024
1 parent f27f369 commit 86842e1
Show file tree
Hide file tree
Showing 7 changed files with 1,686 additions and 0 deletions.
3 changes: 3 additions & 0 deletions clang/include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -4912,6 +4912,9 @@ defm tgsplit : SimpleMFlag<"tgsplit", "Enable", "Disable",
defm wavefrontsize64 : SimpleMFlag<"wavefrontsize64",
"Specify wavefront size 64", "Specify wavefront size 32",
" mode (AMDGPU only)">;
defm amdgpu_precise_memory_op
: SimpleMFlag<"amdgpu-precise-memory-op", "Enable", "Disable",
" precise memory mode (AMDGPU only)">;

defm unsafe_fp_atomics : BoolMOption<"unsafe-fp-atomics",
TargetOpts<"AllowAMDGPUUnsafeFPAtomics">, DefaultFalse,
Expand Down
4 changes: 4 additions & 0 deletions clang/lib/Driver/ToolChains/AMDGPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -670,6 +670,10 @@ void amdgpu::getAMDGPUTargetFeatures(const Driver &D,
options::OPT_mno_wavefrontsize64, false))
Features.push_back("+wavefrontsize64");

if (Args.hasFlag(options::OPT_mamdgpu_precise_memory_op,
options::OPT_mno_amdgpu_precise_memory_op, false))
Features.push_back("+precise-memory");

handleTargetFeaturesGroup(D, Triple, Args, Features,
options::OPT_m_amdgpu_Features_Group);
}
Expand Down
6 changes: 6 additions & 0 deletions clang/test/Driver/amdgpu-features.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,9 @@

// RUN: %clang -### -target amdgcn -mcpu=gfx1010 -mno-cumode %s 2>&1 | FileCheck --check-prefix=NO-CUMODE %s
// NO-CUMODE: "-target-feature" "-cumode"

// RUN: %clang -### -target amdgcn -mcpu=gfx1010 -mamdgpu-precise-memory-op %s 2>&1 | FileCheck --check-prefix=PREC-MEM %s
// PREC-MEM: "-target-feature" "+precise-memory"

// RUN: %clang -### -target amdgcn -mcpu=gfx1010 -mno-amdgpu-precise-memory-op %s 2>&1 | FileCheck --check-prefix=NO-PREC-MEM %s
// NO-PREC-MEM-NOT: {{".*precise-memory"}}
4 changes: 4 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,10 @@ def FeatureCuMode : SubtargetFeature<"cumode",
"Enable CU wavefront execution mode"
>;

def FeaturePreciseMemory
: SubtargetFeature<"precise-memory", "EnablePreciseMemory",
"true", "Enable precise memory mode">;

def FeatureSGPRInitBug : SubtargetFeature<"sgpr-init-bug",
"SGPRInitBug",
"true",
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/AMDGPU/GCNSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool EnableTgSplit = false;
bool EnableCuMode = false;
bool TrapHandler = false;
bool EnablePreciseMemory = false;

// Used as options.
bool EnableLoadStoreOpt = false;
Expand Down Expand Up @@ -599,6 +600,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
return EnableCuMode;
}

bool isPreciseMemoryEnabled() const { return EnablePreciseMemory; }

bool hasFlatAddressSpace() const {
return FlatAddressSpace;
}
Expand Down
8 changes: 8 additions & 0 deletions llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2305,6 +2305,14 @@ bool SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
}
#endif

if (ST->isPreciseMemoryEnabled() && Inst.mayLoadOrStore()) {
AMDGPU::Waitcnt Wait = WCG->getAllZeroWaitcnt(
Inst.mayStore() && !SIInstrInfo::isAtomicRet(Inst));
ScoreBrackets.simplifyWaitcnt(Wait);
Modified |= generateWaitcnt(Wait, std::next(Inst.getIterator()), Block,
ScoreBrackets, /*OldWaitcntInstr=*/nullptr);
}

LLVM_DEBUG({
Inst.print(dbgs());
ScoreBrackets.dump();
Expand Down

0 comments on commit 86842e1

Please sign in to comment.