Skip to content

Commit

Permalink
[CUDA] Give templated device functions internal linkage, templated ke…
Browse files Browse the repository at this point in the history
…rnels external linkage.

Summary:
This lets LLVM perform IPO over these functions.  In particular, it
allows LLVM to emit ld.global.nc for loads to __restrict pointers in
kernels that are never written to.

Reviewers: rsmith

Subscribers: cfe-commits, tra

Differential Revision: http://reviews.llvm.org/D21337

llvm-svn: 274261
  • Loading branch information
Justin Lebar committed Jun 30, 2016
1 parent cc4bb63 commit 27ee130
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 5 deletions.
15 changes: 12 additions & 3 deletions clang/lib/CodeGen/CodeGenModule.cpp
Expand Up @@ -2671,9 +2671,18 @@ llvm::GlobalValue::LinkageTypes CodeGenModule::getLLVMLinkageForDeclarator(
// explicit instantiations can occur in multiple translation units
// and must all be equivalent. However, we are not allowed to
// throw away these explicit instantiations.
if (Linkage == GVA_StrongODR)
return !Context.getLangOpts().AppleKext ? llvm::Function::WeakODRLinkage
: llvm::Function::ExternalLinkage;
//
// We don't currently support CUDA device code spread out across multiple TUs,
// so say that CUDA templates are either external (for kernels) or internal.
// This lets llvm perform aggressive inter-procedural optimizations.
if (Linkage == GVA_StrongODR) {
if (Context.getLangOpts().AppleKext)
return llvm::Function::ExternalLinkage;
if (Context.getLangOpts().CUDA && Context.getLangOpts().CUDAIsDevice)
return D->hasAttr<CUDAGlobalAttr>() ? llvm::Function::ExternalLinkage
: llvm::Function::InternalLinkage;
return llvm::Function::WeakODRLinkage;
}

// C++ doesn't have tentative definitions and thus cannot have common
// linkage.
Expand Down
4 changes: 2 additions & 2 deletions clang/test/CodeGenCUDA/ptx-kernels.cu
Expand Up @@ -19,11 +19,11 @@ __global__ void global_function() {

// Make sure host-instantiated kernels are preserved on device side.
template <typename T> __global__ void templated_kernel(T param) {}
// CHECK-DAG: define weak_odr void @_Z16templated_kernelIiEvT_(
// CHECK-DAG: define void @_Z16templated_kernelIiEvT_(

namespace {
__global__ void anonymous_ns_kernel() {}
// CHECK-DAG: define weak_odr void @_ZN12_GLOBAL__N_119anonymous_ns_kernelEv(
// CHECK-DAG: define void @_ZN12_GLOBAL__N_119anonymous_ns_kernelEv(
}

void host_function() {
Expand Down

0 comments on commit 27ee130

Please sign in to comment.