Skip to content

Commit

Permalink
[CUDA] Add implicit __attribute__((used)) to all __global__ functions.
Browse files Browse the repository at this point in the history
This allows emitting kernels that were instantiated from the host code
and which would never be explicitly referenced otherwise.

Differential Revision: http://reviews.llvm.org/D11666

llvm-svn: 244501
  • Loading branch information
Artem-B committed Aug 10, 2015
1 parent a01ff22 commit b7e4aab
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 0 deletions.
4 changes: 4 additions & 0 deletions clang/lib/Sema/SemaDeclAttr.cpp
Expand Up @@ -3350,6 +3350,10 @@ static void handleGlobalAttr(Sema &S, Decl *D, const AttributeList &Attr) {
D->addAttr(::new (S.Context)
CUDAGlobalAttr(Attr.getRange(), S.Context,
Attr.getAttributeSpellingListIndex()));

// Add implicit attribute((used)) so we don't eliminate kernels
// because there is nothing referencing them on device side.
D->addAttr(UsedAttr::CreateImplicit(S.Context));
}

static void handleGNUInlineAttr(Sema &S, Decl *D, const AttributeList &Attr) {
Expand Down
15 changes: 15 additions & 0 deletions clang/test/CodeGenCUDA/ptx-kernels.cu
@@ -1,7 +1,16 @@
// Make sure that __global__ functions are emitted along with correct
// annotations and are added to @llvm.used to prevent their elimination.
// REQUIRES: nvptx-registered-target
//
// RUN: %clang_cc1 %s -triple nvptx-unknown-unknown -fcuda-is-device -emit-llvm -o - | FileCheck %s

#include "Inputs/cuda.h"

// Make sure that all __global__ functiona are added to @llvm.used
// CHECK: @llvm.used = appending global
// CHECK-SAME: @global_function
// CHECK-SAME: @_Z16templated_kernelIiEvT_

// CHECK-LABEL: define void @device_function
extern "C"
__device__ void device_function() {}
Expand All @@ -13,4 +22,10 @@ __global__ void global_function() {
device_function();
}

// Make sure host-instantiated kernels are preserved on device side.
template <typename T> __global__ void templated_kernel(T param) {}
// CHECK-LABEL: define linkonce_odr void @_Z16templated_kernelIiEvT_
void host_function() { templated_kernel<<<0,0>>>(0); }

// CHECK: !{{[0-9]+}} = !{void ()* @global_function, !"kernel", i32 1}
// CHECK: !{{[0-9]+}} = !{void (i32)* @_Z16templated_kernelIiEvT_, !"kernel", i32 1}

0 comments on commit b7e4aab

Please sign in to comment.