Skip to content

Commit

Permalink
[HIP] [AlwaysInliner] Disable AlwaysInliner to eliminate undefined sy…
Browse files Browse the repository at this point in the history
…mbols

By default clang emits complete contructors as alias of base constructors if they are the same.
The backend is supposed to emit symbols for the alias, otherwise it causes undefined symbols.
@yaxunl observed that this issue is related to the llvm options `-amdgpu-early-inline-all=true`
and `-amdgpu-function-calls=false`. This issue is resolved by only inlining global values
with internal linkage. The `getCalleeFunction()` in AMDGPUResourceUsageAnalysis also had
to be extended to support aliases to functions. inline-calls.ll was corrected appropriately.

Reviewed By: yaxunl, #amdgpu

Differential Revision: https://reviews.llvm.org/D109707
  • Loading branch information
gandhi56 committed Oct 15, 2021
1 parent 4e572db commit 03375a3
Show file tree
Hide file tree
Showing 5 changed files with 37 additions and 11 deletions.
6 changes: 3 additions & 3 deletions clang/lib/Driver/ToolChains/Clang.cpp
Expand Up @@ -5089,9 +5089,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
}

// Enable -mconstructor-aliases except on darwin, where we have to work around
// a linker bug (see <rdar://problem/7651567>), and CUDA/AMDGPU device code,
// where aliases aren't supported.
if (!RawTriple.isOSDarwin() && !RawTriple.isNVPTX() && !RawTriple.isAMDGPU())
// a linker bug (see <rdar://problem/7651567>), and CUDA device code, where
// aliases aren't supported.
if (!RawTriple.isOSDarwin() && !RawTriple.isNVPTX())
CmdArgs.push_back("-mconstructor-aliases");

// Darwin's kernel doesn't support guard variables; just die if we
Expand Down
17 changes: 17 additions & 0 deletions clang/test/CodeGenCUDA/amdgpu-alias-undef-symbols.cu
@@ -0,0 +1,17 @@
// REQUIRES: amdgpu-registered-target, clang-driver

// RUN: %clang --offload-arch=gfx906 --cuda-device-only -nogpulib -nogpuinc -x hip -emit-llvm -S -o - %s \
// RUN: -fgpu-rdc -O3 -mllvm -amdgpu-early-inline-all=true -mllvm -amdgpu-function-calls=false | \
// RUN: FileCheck %s

#include "Inputs/cuda.h"

// CHECK: %struct.B = type { i8 }
struct B {

// CHECK: @_ZN1BC1Ei = hidden unnamed_addr alias void (%struct.B*, i32), void (%struct.B*, i32)* @_ZN1BC2Ei
__device__ B(int x);
};

__device__ B::B(int x) {
}
5 changes: 5 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
Expand Up @@ -15,6 +15,7 @@
#include "AMDGPU.h"
#include "AMDGPUTargetMachine.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/CodeGen/CommandFlags.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
Expand Down Expand Up @@ -90,9 +91,13 @@ static bool alwaysInlineImpl(Module &M, bool GlobalOpt) {

SmallPtrSet<Function *, 8> FuncsToAlwaysInline;
SmallPtrSet<Function *, 8> FuncsToNoInline;
Triple TT(M.getTargetTriple());

for (GlobalAlias &A : M.aliases()) {
if (Function* F = dyn_cast<Function>(A.getAliasee())) {
if (TT.getArch() == Triple::amdgcn &&
A.getLinkage() != GlobalValue::InternalLinkage)
continue;
A.replaceAllUsesWith(F);
AliasesToRemove.push_back(&A);
}
Expand Down
5 changes: 4 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
Expand Up @@ -29,6 +29,8 @@
#include "SIMachineFunctionInfo.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/Target/TargetMachine.h"

using namespace llvm;
Expand Down Expand Up @@ -61,7 +63,8 @@ static const Function *getCalleeFunction(const MachineOperand &Op) {
assert(Op.getImm() == 0);
return nullptr;
}

if (auto *GA = dyn_cast<GlobalAlias>(Op.getGlobal()))
return cast<Function>(GA->getOperand(0));
return cast<Function>(Op.getGlobal());
}

Expand Down
15 changes: 8 additions & 7 deletions llvm/test/CodeGen/AMDGPU/inline-calls.ll
@@ -1,6 +1,6 @@
; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -mtriple amdgcn-unknown-linux-gnu -mcpu=tahiti -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -mtriple amdgcn-unknown-linux-gnu -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -mtriple r600-unknown-linux-gnu -mcpu=redwood -verify-machineinstrs < %s | FileCheck %s --check-prefix=R600

; ALL-NOT: {{^}}func:
define internal i32 @func(i32 %a) {
Expand All @@ -9,7 +9,7 @@ entry:
ret i32 %tmp0
}

; ALL: {{^}}kernel:
; CHECK: {{^}}kernel:
; GCN-NOT: s_swappc_b64
define amdgpu_kernel void @kernel(i32 addrspace(1)* %out) {
entry:
Expand All @@ -18,12 +18,13 @@ entry:
ret void
}

; CHECK-NOT: func_alias
; ALL-NOT: func_alias
; CHECK: func_alias
; R600-NOT: func_alias
@func_alias = alias i32 (i32), i32 (i32)* @func

; ALL: {{^}}kernel3:
; CHECK-NOT: {{^}}kernel3:
; GCN-NOT: s_swappc_b64
; R600: {{^}}kernel3:
define amdgpu_kernel void @kernel3(i32 addrspace(1)* %out) {
entry:
%tmp0 = call i32 @func_alias(i32 1)
Expand Down

0 comments on commit 03375a3

Please sign in to comment.