From c3fa25def7618c052f68539aa0737bd4a5d70039 Mon Sep 17 00:00:00 2001 From: Artem Belevich Date: Tue, 22 Sep 2015 17:22:51 +0000 Subject: [PATCH] [CUDA] Add implicit __attribute__((used)) to all __global__ functions. This makes sure that we emit kernels that were instantiated from the host code and which would never be explicitly referenced by anything else on device side. Differential Revision: http://reviews.llvm.org/D11666 llvm-svn: 248293 --- clang/lib/Sema/SemaDeclAttr.cpp | 4 ++++ clang/test/CodeGenCUDA/ptx-kernels.cu | 15 +++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index 947103780c24a..ddf189ae12c38 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -3350,6 +3350,10 @@ static void handleGlobalAttr(Sema &S, Decl *D, const AttributeList &Attr) { D->addAttr(::new (S.Context) CUDAGlobalAttr(Attr.getRange(), S.Context, Attr.getAttributeSpellingListIndex())); + + // Add implicit attribute((used)) so we don't eliminate kernels + // because there is nothing referencing them on device side. + D->addAttr(UsedAttr::CreateImplicit(S.Context)); } static void handleGNUInlineAttr(Sema &S, Decl *D, const AttributeList &Attr) { diff --git a/clang/test/CodeGenCUDA/ptx-kernels.cu b/clang/test/CodeGenCUDA/ptx-kernels.cu index 658b3488fc18d..3a8722a342d10 100644 --- a/clang/test/CodeGenCUDA/ptx-kernels.cu +++ b/clang/test/CodeGenCUDA/ptx-kernels.cu @@ -1,7 +1,16 @@ +// Make sure that __global__ functions are emitted along with correct +// annotations and are added to @llvm.used to prevent their elimination. +// REQUIRES: nvptx-registered-target +// // RUN: %clang_cc1 %s -triple nvptx-unknown-unknown -fcuda-is-device -emit-llvm -o - | FileCheck %s #include "Inputs/cuda.h" +// Make sure that all __global__ functions are added to @llvm.used +// CHECK: @llvm.used = appending global +// CHECK-SAME: @global_function +// CHECK-SAME: @_Z16templated_kernelIiEvT_ + // CHECK-LABEL: define void @device_function extern "C" __device__ void device_function() {} @@ -13,4 +22,10 @@ __global__ void global_function() { device_function(); } +// Make sure host-instantiated kernels are preserved on device side. +template __global__ void templated_kernel(T param) {} +// CHECK-LABEL: define linkonce_odr void @_Z16templated_kernelIiEvT_ +void host_function() { templated_kernel<<<0,0>>>(0); } + // CHECK: !{{[0-9]+}} = !{void ()* @global_function, !"kernel", i32 1} +// CHECK: !{{[0-9]+}} = !{void (i32)* @_Z16templated_kernelIiEvT_, !"kernel", i32 1}