-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[AMDGPU] Remove named-barrier LDS lowering logic from amdgpu-lower-module-lds #166731
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AMDGPU] Remove named-barrier LDS lowering logic from amdgpu-lower-module-lds #166731
Conversation
|
@llvm/pr-subscribers-backend-amdgpu Author: Chaitanya (skc7) ChangesThis PR removes the named-barrier LDS lowering from This PR is 3rd one in the stack. Full diff: https://github.com/llvm/llvm-project/pull/166731.diff 1 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
index a4ef524c43466..3c0328e93ffbd 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
@@ -922,126 +922,6 @@ class AMDGPULowerModuleLDS {
return KernelToCreatedDynamicLDS;
}
- static GlobalVariable *uniquifyGVPerKernel(Module &M, GlobalVariable *GV,
- Function *KF) {
- bool NeedsReplacement = false;
- for (Use &U : GV->uses()) {
- if (auto *I = dyn_cast<Instruction>(U.getUser())) {
- Function *F = I->getFunction();
- if (isKernelLDS(F) && F != KF) {
- NeedsReplacement = true;
- break;
- }
- }
- }
- if (!NeedsReplacement)
- return GV;
- // Create a new GV used only by this kernel and its function
- GlobalVariable *NewGV = new GlobalVariable(
- M, GV->getValueType(), GV->isConstant(), GV->getLinkage(),
- GV->getInitializer(), GV->getName() + "." + KF->getName(), nullptr,
- GV->getThreadLocalMode(), GV->getType()->getAddressSpace());
- NewGV->copyAttributesFrom(GV);
- for (Use &U : make_early_inc_range(GV->uses())) {
- if (auto *I = dyn_cast<Instruction>(U.getUser())) {
- Function *F = I->getFunction();
- if (!isKernelLDS(F) || F == KF) {
- U.getUser()->replaceUsesOfWith(GV, NewGV);
- }
- }
- }
- return NewGV;
- }
-
- bool lowerSpecialLDSVariables(
- Module &M, LDSUsesInfoTy &LDSUsesInfo,
- VariableFunctionMap &LDSToKernelsThatNeedToAccessItIndirectly) {
- bool Changed = false;
- const DataLayout &DL = M.getDataLayout();
- // The 1st round: give module-absolute assignments
- int NumAbsolutes = 0;
- std::vector<GlobalVariable *> OrderedGVs;
- for (auto &K : LDSToKernelsThatNeedToAccessItIndirectly) {
- GlobalVariable *GV = K.first;
- if (!isNamedBarrier(*GV))
- continue;
- // give a module-absolute assignment if it is indirectly accessed by
- // multiple kernels. This is not precise, but we don't want to duplicate
- // a function when it is called by multiple kernels.
- if (LDSToKernelsThatNeedToAccessItIndirectly[GV].size() > 1) {
- OrderedGVs.push_back(GV);
- } else {
- // leave it to the 2nd round, which will give a kernel-relative
- // assignment if it is only indirectly accessed by one kernel
- LDSUsesInfo.direct_access[*K.second.begin()].insert(GV);
- }
- LDSToKernelsThatNeedToAccessItIndirectly.erase(GV);
- }
- OrderedGVs = sortByName(std::move(OrderedGVs));
- for (GlobalVariable *GV : OrderedGVs) {
- unsigned BarrierScope = llvm::AMDGPU::Barrier::BARRIER_SCOPE_WORKGROUP;
- unsigned BarId = NumAbsolutes + 1;
- unsigned BarCnt = DL.getTypeAllocSize(GV->getValueType()) / 16;
- NumAbsolutes += BarCnt;
-
- // 4 bits for alignment, 5 bits for the barrier num,
- // 3 bits for the barrier scope
- unsigned Offset = 0x802000u | BarrierScope << 9 | BarId << 4;
- recordLDSAbsoluteAddress(&M, GV, Offset);
- }
- OrderedGVs.clear();
-
- // The 2nd round: give a kernel-relative assignment for GV that
- // either only indirectly accessed by single kernel or only directly
- // accessed by multiple kernels.
- std::vector<Function *> OrderedKernels;
- for (auto &K : LDSUsesInfo.direct_access) {
- Function *F = K.first;
- assert(isKernelLDS(F));
- OrderedKernels.push_back(F);
- }
- OrderedKernels = sortByName(std::move(OrderedKernels));
-
- llvm::DenseMap<Function *, uint32_t> Kernel2BarId;
- for (Function *F : OrderedKernels) {
- for (GlobalVariable *GV : LDSUsesInfo.direct_access[F]) {
- if (!isNamedBarrier(*GV))
- continue;
-
- LDSUsesInfo.direct_access[F].erase(GV);
- if (GV->isAbsoluteSymbolRef()) {
- // already assigned
- continue;
- }
- OrderedGVs.push_back(GV);
- }
- OrderedGVs = sortByName(std::move(OrderedGVs));
- for (GlobalVariable *GV : OrderedGVs) {
- // GV could also be used directly by other kernels. If so, we need to
- // create a new GV used only by this kernel and its function.
- auto NewGV = uniquifyGVPerKernel(M, GV, F);
- Changed |= (NewGV != GV);
- unsigned BarrierScope = llvm::AMDGPU::Barrier::BARRIER_SCOPE_WORKGROUP;
- unsigned BarId = Kernel2BarId[F];
- BarId += NumAbsolutes + 1;
- unsigned BarCnt = DL.getTypeAllocSize(GV->getValueType()) / 16;
- Kernel2BarId[F] += BarCnt;
- unsigned Offset = 0x802000u | BarrierScope << 9 | BarId << 4;
- recordLDSAbsoluteAddress(&M, NewGV, Offset);
- }
- OrderedGVs.clear();
- }
- // Also erase those special LDS variables from indirect_access.
- for (auto &K : LDSUsesInfo.indirect_access) {
- assert(isKernelLDS(K.first));
- for (GlobalVariable *GV : K.second) {
- if (isNamedBarrier(*GV))
- K.second.erase(GV);
- }
- }
- return Changed;
- }
-
bool runOnModule(Module &M) {
CallGraph CG = CallGraph(M);
bool Changed = superAlignLDSGlobals(M);
@@ -1064,12 +944,6 @@ class AMDGPULowerModuleLDS {
}
}
- if (LDSUsesInfo.HasSpecialGVs) {
- // Special LDS variables need special address assignment
- Changed |= lowerSpecialLDSVariables(
- M, LDSUsesInfo, LDSToKernelsThatNeedToAccessItIndirectly);
- }
-
// Partition variables accessed indirectly into the different strategies
DenseSet<GlobalVariable *> ModuleScopeVariables;
DenseSet<GlobalVariable *> TableLookupVariables;
|
cff633c to
4a810e6
Compare
0a2e9ee to
16a0334
Compare
|
This PR is closed and all commits here C-P'd to #165692 |
This PR removes the named-barrier LDS lowering from
amdgpu-lower-module-ldspass, since it is now handled byamdgpu-lower-exec-syncpassThis PR is 3rd one in the stack.
PR1 : #165692
PR2 : #165746
-> PR3 : #166731