Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[AMDGPU] Promote generic pointer kernel arguments into global
The new pass walks kernel's pointer arguments, then loads from them. If a loaded value is a pointer and loaded pointer is unmodified in the kernel before the load, then promote loaded pointer to global. Then recursively continue. Differential Revision: https://reviews.llvm.org/D111464
- Loading branch information
Showing
6 changed files
with
559 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
195 changes: 195 additions & 0 deletions
195
llvm/lib/Target/AMDGPU/AMDGPUPromoteKernelArguments.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,195 @@ | ||
//===-- AMDGPUPromoteKernelArguments.cpp ----------------------------------===// | ||
// | ||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//===----------------------------------------------------------------------===// | ||
// | ||
/// \file This pass recursively promotes generic pointer arguments of a kernel | ||
/// into the global address space. | ||
/// | ||
/// The pass walks kernel's pointer arguments, then loads from them. If a loaded | ||
/// value is a pointer and loaded pointer is unmodified in the kernel before the | ||
/// load, then promote loaded pointer to global. Then recursively continue. | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#include "AMDGPU.h" | ||
#include "llvm/ADT/SmallVector.h" | ||
#include "llvm/Analysis/MemorySSA.h" | ||
#include "llvm/IR/IRBuilder.h" | ||
#include "llvm/InitializePasses.h" | ||
|
||
#define DEBUG_TYPE "amdgpu-promote-kernel-arguments" | ||
|
||
using namespace llvm; | ||
|
||
namespace { | ||
|
||
class AMDGPUPromoteKernelArguments : public FunctionPass { | ||
MemorySSA *MSSA; | ||
|
||
Instruction *ArgCastInsertPt; | ||
|
||
SmallVector<Value *> Ptrs; | ||
|
||
void enqueueUsers(Value *Ptr); | ||
|
||
bool promotePointer(Value *Ptr); | ||
|
||
public: | ||
static char ID; | ||
|
||
AMDGPUPromoteKernelArguments() : FunctionPass(ID) {} | ||
|
||
bool run(Function &F, MemorySSA &MSSA); | ||
|
||
bool runOnFunction(Function &F) override; | ||
|
||
void getAnalysisUsage(AnalysisUsage &AU) const override { | ||
AU.addRequired<MemorySSAWrapperPass>(); | ||
AU.setPreservesAll(); | ||
} | ||
}; | ||
|
||
} // end anonymous namespace | ||
|
||
void AMDGPUPromoteKernelArguments::enqueueUsers(Value *Ptr) { | ||
SmallVector<User *> PtrUsers(Ptr->users()); | ||
|
||
while (!PtrUsers.empty()) { | ||
Instruction *U = dyn_cast<Instruction>(PtrUsers.pop_back_val()); | ||
if (!U) | ||
continue; | ||
|
||
switch (U->getOpcode()) { | ||
default: | ||
break; | ||
case Instruction::Load: { | ||
LoadInst *LD = cast<LoadInst>(U); | ||
PointerType *PT = dyn_cast<PointerType>(LD->getType()); | ||
if (!PT || | ||
(PT->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS && | ||
PT->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS && | ||
PT->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS) || | ||
LD->getPointerOperand()->stripInBoundsOffsets() != Ptr) | ||
break; | ||
const MemoryAccess *MA = MSSA->getWalker()->getClobberingMemoryAccess(LD); | ||
// TODO: This load poprobably can be promoted to constant address space. | ||
if (MSSA->isLiveOnEntryDef(MA)) | ||
Ptrs.push_back(LD); | ||
break; | ||
} | ||
case Instruction::GetElementPtr: | ||
case Instruction::AddrSpaceCast: | ||
case Instruction::BitCast: | ||
if (U->getOperand(0)->stripInBoundsOffsets() == Ptr) | ||
PtrUsers.append(U->user_begin(), U->user_end()); | ||
break; | ||
} | ||
} | ||
} | ||
|
||
bool AMDGPUPromoteKernelArguments::promotePointer(Value *Ptr) { | ||
enqueueUsers(Ptr); | ||
|
||
PointerType *PT = cast<PointerType>(Ptr->getType()); | ||
if (PT->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS) | ||
return false; | ||
|
||
bool IsArg = isa<Argument>(Ptr); | ||
IRBuilder<> B(IsArg ? ArgCastInsertPt | ||
: &*std::next(cast<Instruction>(Ptr)->getIterator())); | ||
|
||
// Cast pointer to global address space and back to flat and let | ||
// Infer Address Spaces pass to do all necessary rewriting. | ||
PointerType *NewPT = | ||
PointerType::getWithSamePointeeType(PT, AMDGPUAS::GLOBAL_ADDRESS); | ||
Value *Cast = | ||
B.CreateAddrSpaceCast(Ptr, NewPT, Twine(Ptr->getName(), ".global")); | ||
Value *CastBack = | ||
B.CreateAddrSpaceCast(Cast, PT, Twine(Ptr->getName(), ".flat")); | ||
Ptr->replaceUsesWithIf(CastBack, | ||
[Cast](Use &U) { return U.getUser() != Cast; }); | ||
|
||
return true; | ||
} | ||
|
||
// skip allocas | ||
static BasicBlock::iterator getInsertPt(BasicBlock &BB) { | ||
BasicBlock::iterator InsPt = BB.getFirstInsertionPt(); | ||
for (BasicBlock::iterator E = BB.end(); InsPt != E; ++InsPt) { | ||
AllocaInst *AI = dyn_cast<AllocaInst>(&*InsPt); | ||
|
||
// If this is a dynamic alloca, the value may depend on the loaded kernargs, | ||
// so loads will need to be inserted before it. | ||
if (!AI || !AI->isStaticAlloca()) | ||
break; | ||
} | ||
|
||
return InsPt; | ||
} | ||
|
||
bool AMDGPUPromoteKernelArguments::run(Function &F, MemorySSA &MSSA) { | ||
if (skipFunction(F)) | ||
return false; | ||
|
||
CallingConv::ID CC = F.getCallingConv(); | ||
if (CC != CallingConv::AMDGPU_KERNEL || F.arg_empty()) | ||
return false; | ||
|
||
ArgCastInsertPt = &*getInsertPt(*F.begin()); | ||
this->MSSA = &MSSA; | ||
|
||
for (Argument &Arg : F.args()) { | ||
if (Arg.use_empty()) | ||
continue; | ||
|
||
PointerType *PT = dyn_cast<PointerType>(Arg.getType()); | ||
if (!PT || (PT->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS && | ||
PT->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS && | ||
PT->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS)) | ||
continue; | ||
|
||
Ptrs.push_back(&Arg); | ||
} | ||
|
||
bool Changed = false; | ||
while (!Ptrs.empty()) { | ||
Value *Ptr = Ptrs.pop_back_val(); | ||
Changed |= promotePointer(Ptr); | ||
} | ||
|
||
return Changed; | ||
} | ||
|
||
bool AMDGPUPromoteKernelArguments::runOnFunction(Function &F) { | ||
MemorySSA &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA(); | ||
return run(F, MSSA); | ||
} | ||
|
||
INITIALIZE_PASS_BEGIN(AMDGPUPromoteKernelArguments, DEBUG_TYPE, | ||
"AMDGPU Promote Kernel Arguments", false, false) | ||
INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) | ||
INITIALIZE_PASS_END(AMDGPUPromoteKernelArguments, DEBUG_TYPE, | ||
"AMDGPU Promote Kernel Arguments", false, false) | ||
|
||
char AMDGPUPromoteKernelArguments::ID = 0; | ||
|
||
FunctionPass *llvm::createAMDGPUPromoteKernelArgumentsPass() { | ||
return new AMDGPUPromoteKernelArguments(); | ||
} | ||
|
||
PreservedAnalyses | ||
AMDGPUPromoteKernelArgumentsPass::run(Function &F, | ||
FunctionAnalysisManager &AM) { | ||
MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA(); | ||
if (AMDGPUPromoteKernelArguments().run(F, MSSA)) { | ||
PreservedAnalyses PA; | ||
PA.preserveSet<CFGAnalyses>(); | ||
PA.preserve<MemorySSAAnalysis>(); | ||
return PA; | ||
} | ||
return PreservedAnalyses::all(); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.