Skip to content

Commit

Permalink
[OpenMP] Add kernel string attribute to kernel function
Browse files Browse the repository at this point in the history
This patch adds a function attribute to the kernel function generated in
OpenMP offloading. We already create a `nvvm.annotations` metadata node
indicating the kernels present in the program. However, this created
some indirection when trying to identify if a specific function was an
entry. We add a single function attribute for each function now to
simplify this.

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D118708
  • Loading branch information
jhuber6 committed Feb 1, 2022
1 parent 3e95180 commit 53d5757
Show file tree
Hide file tree
Showing 18 changed files with 900 additions and 895 deletions.
13 changes: 9 additions & 4 deletions clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
Expand Up @@ -1125,20 +1125,25 @@ void CGOpenMPRuntimeGPU::createOffloadEntry(llvm::Constant *ID,
llvm::GlobalValue::LinkageTypes) {
// TODO: Add support for global variables on the device after declare target
// support.
if (!isa<llvm::Function>(Addr))
llvm::Function *Fn = dyn_cast<llvm::Function>(Addr);
if (!Fn)
return;

llvm::Module &M = CGM.getModule();
llvm::LLVMContext &Ctx = CGM.getLLVMContext();

// Get "nvvm.annotations" metadata node
// Get "nvvm.annotations" metadata node.
llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("nvvm.annotations");

llvm::Metadata *MDVals[] = {
llvm::ConstantAsMetadata::get(Addr), llvm::MDString::get(Ctx, "kernel"),
llvm::ConstantAsMetadata::get(Fn), llvm::MDString::get(Ctx, "kernel"),
llvm::ConstantAsMetadata::get(
llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))};
// Append metadata to nvvm.annotations
// Append metadata to nvvm.annotations.
MD->addOperand(llvm::MDNode::get(Ctx, MDVals));

// Add a function attribute for the kernel.
Fn->addFnAttr(llvm::Attribute::get(Ctx, "kernel"));
}

void CGOpenMPRuntimeGPU::emitTargetOutlinedFunction(
Expand Down
8 changes: 4 additions & 4 deletions clang/test/OpenMP/amdgcn-attributes.cpp
Expand Up @@ -32,10 +32,10 @@ int callable(int x) {
return x + 1;
}

// DEFAULT: attributes #0 = { convergent noinline norecurse nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
// CPU: attributes #0 = { convergent noinline norecurse nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" }
// NOIEEE: attributes #0 = { convergent noinline norecurse nounwind optnone "amdgpu-ieee"="false" "frame-pointer"="none" "min-legal-vector-width"="0" "no-nans-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
// UNSAFEATOMIC: attributes #0 = { convergent noinline norecurse nounwind optnone "amdgpu-unsafe-fp-atomics"="true" "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
// DEFAULT: attributes #0 = { convergent noinline norecurse nounwind optnone "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
// CPU: attributes #0 = { convergent noinline norecurse nounwind optnone "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" }
// NOIEEE: attributes #0 = { convergent noinline norecurse nounwind optnone "amdgpu-ieee"="false" "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-nans-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
// UNSAFEATOMIC: attributes #0 = { convergent noinline norecurse nounwind optnone "amdgpu-unsafe-fp-atomics"="true" "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }

// DEFAULT: attributes #1 = { convergent mustprogress noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
// CPU: attributes #1 = { convergent mustprogress noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" }
Expand Down
12 changes: 6 additions & 6 deletions clang/test/OpenMP/nvptx_data_sharing.cpp
Expand Up @@ -427,7 +427,7 @@ void test_ds(){
//
//
// CHECK-LABEL: define {{[^@]+}}@__omp_outlined__
// CHECK-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR0]] {
// CHECK-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
Expand All @@ -441,7 +441,7 @@ void test_ds(){
//
//
// CHECK-LABEL: define {{[^@]+}}@__omp_outlined___wrapper
// CHECK-SAME: (i16 noundef zeroext [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
// CHECK-SAME: (i16 noundef zeroext [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
Expand All @@ -455,12 +455,12 @@ void test_ds(){
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0
// CHECK-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32**
// CHECK-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
// CHECK-NEXT: call void @__omp_outlined__(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR3:[0-9]+]]
// CHECK-NEXT: call void @__omp_outlined__(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR4:[0-9]+]]
// CHECK-NEXT: ret void
//
//
// CHECK-LABEL: define {{[^@]+}}@__omp_outlined__1
// CHECK-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[B:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR0]] {
// CHECK-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[B:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
Expand All @@ -482,7 +482,7 @@ void test_ds(){
//
//
// CHECK-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
// CHECK-SAME: (i16 noundef zeroext [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR2]] {
// CHECK-SAME: (i16 noundef zeroext [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR3]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
Expand All @@ -499,6 +499,6 @@ void test_ds(){
// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 1
// CHECK-NEXT: [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32**
// CHECK-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 8
// CHECK-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]], i32* [[TMP8]]) #[[ATTR3]]
// CHECK-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]], i32* [[TMP8]]) #[[ATTR4]]
// CHECK-NEXT: ret void
//

0 comments on commit 53d5757

Please sign in to comment.