Skip to content

Commit

Permalink
[AMDGPU] Internalize non-kernel symbols
Browse files Browse the repository at this point in the history
Since we have no call support and late linking we can produce code
only for used symbols. This saves compilation time, size of the final
executable, and size of any intermediate dumps.

Run Internalize pass early in the opt pipeline followed by global
DCE pass. To enable it RT can pass -amdgpu-internalize-symbols option.

Differential Revision: https://reviews.llvm.org/D29214

llvm-svn: 293549
  • Loading branch information
rampitec committed Jan 30, 2017
1 parent e518e0b commit a3b7279
Show file tree
Hide file tree
Showing 2 changed files with 68 additions and 2 deletions.
35 changes: 33 additions & 2 deletions llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
Expand Up @@ -84,6 +84,13 @@ static cl::opt<bool> ScalarizeGlobal(
cl::init(false),
cl::Hidden);

// Option to run internalize pass.
static cl::opt<bool> InternalizeSymbols(
"amdgpu-internalize-symbols",
cl::desc("Enable elimination of non-kernel functions and unused globals"),
cl::init(false),
cl::Hidden);

extern "C" void LLVMInitializeAMDGPUTarget() {
// Register the target
RegisterTargetMachine<R600TargetMachine> X(getTheAMDGPUTarget());
Expand Down Expand Up @@ -207,11 +214,35 @@ StringRef AMDGPUTargetMachine::getFeatureString(const Function &F) const {
}

void AMDGPUTargetMachine::adjustPassManager(PassManagerBuilder &Builder) {
bool Internalize = InternalizeSymbols &&
(getOptLevel() > CodeGenOpt::None) &&
(getTargetTriple().getArch() == Triple::amdgcn);
Builder.addExtension(
PassManagerBuilder::EP_ModuleOptimizerEarly,
[&](const PassManagerBuilder &, legacy::PassManagerBase &PM) {
[Internalize](const PassManagerBuilder &, legacy::PassManagerBase &PM) {
PM.add(createAMDGPUUnifyMetadataPass());
});
if (Internalize) {
PM.add(createInternalizePass([=](const GlobalValue &GV) -> bool {
if (const Function *F = dyn_cast<Function>(&GV)) {
if (F->isDeclaration())
return true;
switch (F->getCallingConv()) {
default:
return false;
case CallingConv::AMDGPU_VS:
case CallingConv::AMDGPU_GS:
case CallingConv::AMDGPU_PS:
case CallingConv::AMDGPU_CS:
case CallingConv::AMDGPU_KERNEL:
case CallingConv::SPIR_KERNEL:
return true;
}
}
return !GV.use_empty();
}));
PM.add(createGlobalDCEPass());
}
});
}

//===----------------------------------------------------------------------===//
Expand Down
35 changes: 35 additions & 0 deletions llvm/test/CodeGen/AMDGPU/internalize.ll
@@ -0,0 +1,35 @@
; RUN: opt -O1 -S -mtriple=amdgcn-unknown-amdhsa -amdgpu-internalize-symbols < %s | FileCheck %s
; CHECK-NOT: unused
; CHECK-NOT: foo_used
; CHECK: gvar_used
; CHECK: main_kernel

@gvar_unused = addrspace(1) global i32 undef, align 4
@gvar_used = addrspace(1) global i32 undef, align 4

; Function Attrs: alwaysinline nounwind
define void @foo_unused(i32 addrspace(1)* %out) local_unnamed_addr #1 {
entry:
store i32 1, i32 addrspace(1)* %out
ret void
}

; Function Attrs: alwaysinline nounwind
define void @foo_used(i32 addrspace(1)* %out, i32 %tid) local_unnamed_addr #1 {
entry:
store i32 %tid, i32 addrspace(1)* %out
ret void
}

define amdgpu_kernel void @main_kernel() {
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
tail call void @foo_used(i32 addrspace(1)* @gvar_used, i32 %tid) nounwind
ret void
}

declare i32 @llvm.amdgcn.workitem.id.x() #0

attributes #0 = { nounwind readnone }

attributes #1 = { alwaysinline nounwind }

0 comments on commit a3b7279

Please sign in to comment.