Skip to content

Commit

Permalink
[CUDA] Add implicit __attribute__((used)) to all __global__ functions.
Browse files Browse the repository at this point in the history
This makes sure that we emit kernels that were instantiated from the
host code and which would never be explicitly referenced by anything
else on device side.

Differential Revision: http://reviews.llvm.org/D11666

llvm-svn: 248293
  • Loading branch information
Artem-B committed Sep 22, 2015
1 parent 64f67be commit c3fa25d
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 0 deletions.
4 changes: 4 additions & 0 deletions clang/lib/Sema/SemaDeclAttr.cpp
Expand Up @@ -3350,6 +3350,10 @@ static void handleGlobalAttr(Sema &S, Decl *D, const AttributeList &Attr) {
D->addAttr(::new (S.Context)
CUDAGlobalAttr(Attr.getRange(), S.Context,
Attr.getAttributeSpellingListIndex()));

// Add implicit attribute((used)) so we don't eliminate kernels
// because there is nothing referencing them on device side.
D->addAttr(UsedAttr::CreateImplicit(S.Context));
}

static void handleGNUInlineAttr(Sema &S, Decl *D, const AttributeList &Attr) {
Expand Down
15 changes: 15 additions & 0 deletions clang/test/CodeGenCUDA/ptx-kernels.cu
@@ -1,7 +1,16 @@
// Make sure that __global__ functions are emitted along with correct
// annotations and are added to @llvm.used to prevent their elimination.
// REQUIRES: nvptx-registered-target
//
// RUN: %clang_cc1 %s -triple nvptx-unknown-unknown -fcuda-is-device -emit-llvm -o - | FileCheck %s

#include "Inputs/cuda.h"

// Make sure that all __global__ functions are added to @llvm.used
// CHECK: @llvm.used = appending global
// CHECK-SAME: @global_function
// CHECK-SAME: @_Z16templated_kernelIiEvT_

// CHECK-LABEL: define void @device_function
extern "C"
__device__ void device_function() {}
Expand All @@ -13,4 +22,10 @@ __global__ void global_function() {
device_function();
}

// Make sure host-instantiated kernels are preserved on device side.
template <typename T> __global__ void templated_kernel(T param) {}
// CHECK-LABEL: define linkonce_odr void @_Z16templated_kernelIiEvT_
void host_function() { templated_kernel<<<0,0>>>(0); }

// CHECK: !{{[0-9]+}} = !{void ()* @global_function, !"kernel", i32 1}
// CHECK: !{{[0-9]+}} = !{void (i32)* @_Z16templated_kernelIiEvT_, !"kernel", i32 1}

0 comments on commit c3fa25d

Please sign in to comment.