Skip to content

Commit

Permalink
[AMDGPU] Fix lowering enqueue kernel when kernel has no name
Browse files Browse the repository at this point in the history
Since the enqueued kernels have internal linkage, their names may be dropped.
In this case, give them unique names __amdgpu_enqueued_kernel or
__amdgpu_enqueued_kernel.n where n is a sequential number starting from 1.

Differential Revision: https://reviews.llvm.org/D44322

llvm-svn: 327291
  • Loading branch information
yxsamliu committed Mar 12, 2018
1 parent 0185281 commit a99e7d8
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 17 deletions.
24 changes: 16 additions & 8 deletions llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Mangler.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/User.h"
#include "llvm/Pass.h"
Expand Down Expand Up @@ -94,18 +95,25 @@ bool AMDGPUOpenCLEnqueuedBlockLowering::runOnModule(Module &M) {
bool Changed = false;
for (auto &F : M.functions()) {
if (F.hasFnAttribute("enqueued-block")) {
if (!F.hasName()) {
SmallString<64> Name;
Mangler::getNameWithPrefix(Name, "__amdgpu_enqueued_kernel",
M.getDataLayout());
F.setName(Name);
}
auto RuntimeHandle = (F.getName() + ".runtime_handle").str();
auto *GV = new GlobalVariable(
M, Type::getInt8Ty(C)->getPointerTo(AMDGPUAS::GLOBAL_ADDRESS),
/*IsConstant=*/true, GlobalValue::ExternalLinkage,
/*Initializer=*/nullptr, RuntimeHandle, /*InsertBefore=*/nullptr,
GlobalValue::NotThreadLocal, AMDGPUAS::GLOBAL_ADDRESS,
/*IsExternallyInitialized=*/true);
DEBUG(dbgs() << "runtime handle created: " << *GV << '\n');

for (auto U : F.users()) {
if (!isa<ConstantExpr>(&*U))
continue;
auto *BitCast = cast<ConstantExpr>(&*U);
auto RuntimeHandle = (F.getName() + "_runtime_handle").str();
auto *GV = new GlobalVariable(
M, Type::getInt8Ty(C)->getPointerTo(AMDGPUAS::GLOBAL_ADDRESS),
/*IsConstant=*/true, GlobalValue::ExternalLinkage,
/*Initializer=*/nullptr, RuntimeHandle, /*InsertBefore=*/nullptr,
GlobalValue::NotThreadLocal, AMDGPUAS::GLOBAL_ADDRESS,
/*IsExternallyInitialized=*/true);
DEBUG(dbgs() << "runtime handle created: " << *GV << '\n');
auto *NewPtr = ConstantExpr::getPointerCast(GV, BitCast->getType());
BitCast->replaceAllUsesWith(NewPtr);
F.addFnAttr("runtime-handle", RuntimeHandle);
Expand Down
56 changes: 47 additions & 9 deletions llvm/test/CodeGen/AMDGPU/enqueue-kernel.ll
Original file line number Diff line number Diff line change
@@ -1,25 +1,41 @@
; RUN: opt -data-layout=A5 -amdgpu-lower-enqueued-block -S < %s | FileCheck %s

; CHECK: @__test_block_invoke_kernel_runtime_handle = external addrspace(1) externally_initialized constant i8 addrspace(1)*
; CHECK: @__test_block_invoke_2_kernel_runtime_handle = external addrspace(1) externally_initialized constant i8 addrspace(1)*
; CHECK: @__test_block_invoke_kernel.runtime_handle = external addrspace(1) externally_initialized constant i8 addrspace(1)*
; CHECK: @__test_block_invoke_2_kernel.runtime_handle = external addrspace(1) externally_initialized constant i8 addrspace(1)*
; CHECK: @__amdgpu_enqueued_kernel.runtime_handle = external addrspace(1) externally_initialized constant i8 addrspace(1)*
; CHECK: @__amdgpu_enqueued_kernel.1.runtime_handle = external addrspace(1) externally_initialized constant i8 addrspace(1)*

%struct.ndrange_t = type { i32 }
%opencl.queue_t = type opaque

; CHECK: define amdgpu_kernel void @non_caller(i8 addrspace(1)* %a, i8 %b, i64 addrspace(1)* %c, i64 %d) local_unnamed_addr !kernel_arg_addr_space
; CHECK-LABEL: define amdgpu_kernel void @non_caller
; CHECK-NOT: #{{[0-9]+}}
define amdgpu_kernel void @non_caller(i8 addrspace(1)* %a, i8 %b, i64 addrspace(1)* %c, i64 %d) local_unnamed_addr
!kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 {
ret void
}

; CHECK: define amdgpu_kernel void @caller_indirect(i8 addrspace(1)* %a, i8 %b, i64 addrspace(1)* %c, i64 %d) local_unnamed_addr #[[AT_CALLER:[0-9]+]]
; CHECK-LABEL: define amdgpu_kernel void @caller_indirect
; CHECK-SAME: #[[AT_CALLER:[0-9]+]]
define amdgpu_kernel void @caller_indirect(i8 addrspace(1)* %a, i8 %b, i64 addrspace(1)* %c, i64 %d) local_unnamed_addr
!kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 {
call void @caller(i8 addrspace(1)* %a, i8 %b, i64 addrspace(1)* %c, i64 %d)
ret void
}

; CHECK: define amdgpu_kernel void @caller(i8 addrspace(1)* %a, i8 %b, i64 addrspace(1)* %c, i64 %d) local_unnamed_addr #[[AT_CALLER]]
; CHECK-LABEL: define amdgpu_kernel void @caller
; CHECK-SAME: #[[AT_CALLER]]
; CHECK-NOT: @__test_block_invoke_kernel
; CHECK-NOT: @__test_block_invoke_2_kernel
; CHECK-NOT: @__amdgpu_enqueued_kernel
; CHECK-NOT: @__amdgpu_enqueued_kernel.1
; CHECK-NOT: @0
; CHECK-NOT: @1
; CHECK: call i32 @__enqueue_kernel_basic({{.*}}@__test_block_invoke_kernel.runtime_handle
; CHECK: call i32 @__enqueue_kernel_basic({{.*}}@__test_block_invoke_kernel.runtime_handle
; CHECK: call i32 @__enqueue_kernel_basic({{.*}}@__amdgpu_enqueued_kernel.runtime_handle
; CHECK: call i32 @__enqueue_kernel_basic({{.*}}@__amdgpu_enqueued_kernel.1.runtime_handle
; CHECK: call i32 @__enqueue_kernel_basic({{.*}}@__test_block_invoke_2_kernel.runtime_handle
define amdgpu_kernel void @caller(i8 addrspace(1)* %a, i8 %b, i64 addrspace(1)* %c, i64 %d) local_unnamed_addr
!kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 {
entry:
Expand All @@ -41,6 +57,10 @@ entry:
i8* bitcast (void (<{ i32, i32, i8 addrspace(1)*, i8 }>)* @__test_block_invoke_kernel to i8*), i8* nonnull %tmp4) #2
%tmp10 = call i32 @__enqueue_kernel_basic(%opencl.queue_t addrspace(1)* undef, i32 0, %struct.ndrange_t addrspace(5)* byval nonnull %tmp,
i8* bitcast (void (<{ i32, i32, i8 addrspace(1)*, i8 }>)* @__test_block_invoke_kernel to i8*), i8* nonnull %tmp4) #2
%tmp11 = call i32 @__enqueue_kernel_basic(%opencl.queue_t addrspace(1)* undef, i32 0, %struct.ndrange_t addrspace(5)* byval nonnull %tmp,
i8* bitcast (void (<{ i32, i32, i8 addrspace(1)*, i8 }>)* @0 to i8*), i8* nonnull %tmp4) #2
%tmp12 = call i32 @__enqueue_kernel_basic(%opencl.queue_t addrspace(1)* undef, i32 0, %struct.ndrange_t addrspace(5)* byval nonnull %tmp,
i8* bitcast (void (<{ i32, i32, i8 addrspace(1)*, i8 }>)* @1 to i8*), i8* nonnull %tmp4) #2
%block.size4 = getelementptr inbounds <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>, <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }> addrspace(5)* %block2, i32 0, i32 0
store i32 41, i32 addrspace(5)* %block.size4, align 8
%block.align5 = getelementptr inbounds <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>, <{ i32, i32, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }> addrspace(5)* %block2, i32 0, i32 1
Expand All @@ -60,7 +80,8 @@ entry:
ret void
}

; CHECK: define dso_local amdgpu_kernel void @__test_block_invoke_kernel({{.*}}) #[[AT1:[0-9]+]]
; CHECK-LABEL: define dso_local amdgpu_kernel void @__test_block_invoke_kernel
; CHECK-SAME: #[[AT1:[0-9]+]]
define internal amdgpu_kernel void @__test_block_invoke_kernel(<{ i32, i32, i8 addrspace(1)*, i8 }> %arg) #0
!kernel_arg_addr_space !14 !kernel_arg_access_qual !15 !kernel_arg_type !16 !kernel_arg_base_type !16 !kernel_arg_type_qual !17 {
entry:
Expand All @@ -72,7 +93,8 @@ entry:

declare i32 @__enqueue_kernel_basic(%opencl.queue_t addrspace(1)*, i32, %struct.ndrange_t addrspace(5)*, i8*, i8*) local_unnamed_addr

; CHECK: define dso_local amdgpu_kernel void @__test_block_invoke_2_kernel({{.*}}) #[[AT2:[0-9]+]]
; CHECK-LABEL: define dso_local amdgpu_kernel void @__test_block_invoke_2_kernel
; CHECK-SAME: #[[AT2:[0-9]+]]
define internal amdgpu_kernel void @__test_block_invoke_2_kernel(<{ i32, i32, i8 addrspace(1)*,
i64 addrspace(1)*, i64, i8 }> %arg) #0 !kernel_arg_addr_space !14 !kernel_arg_access_qual !15
!kernel_arg_type !16 !kernel_arg_base_type !16 !kernel_arg_type_qual !17 {
Expand All @@ -86,9 +108,25 @@ entry:
ret void
}

; CHECK-LABEL: define dso_local amdgpu_kernel void @__amdgpu_enqueued_kernel
; CHECK-SAME: #[[AT3:[0-9]+]]
define internal amdgpu_kernel void @0(<{ i32, i32, i8 addrspace(1)*, i8 }> %arg) #0
!kernel_arg_addr_space !14 !kernel_arg_access_qual !15 !kernel_arg_type !16 !kernel_arg_base_type !16 !kernel_arg_type_qual !17 {
ret void
}

; CHECK-LABEL: define dso_local amdgpu_kernel void @__amdgpu_enqueued_kernel.1
; CHECK-SAME: #[[AT4:[0-9]+]]
define internal amdgpu_kernel void @1(<{ i32, i32, i8 addrspace(1)*, i8 }> %arg) #0
!kernel_arg_addr_space !14 !kernel_arg_access_qual !15 !kernel_arg_type !16 !kernel_arg_base_type !16 !kernel_arg_type_qual !17 {
ret void
}

; CHECK: attributes #[[AT_CALLER]] = { "calls-enqueue-kernel" }
; CHECK: attributes #[[AT1]] = {{.*}}"runtime-handle"="__test_block_invoke_kernel_runtime_handle"
; CHECK: attributes #[[AT2]] = {{.*}}"runtime-handle"="__test_block_invoke_2_kernel_runtime_handle"
; CHECK: attributes #[[AT1]] = {{.*}}"runtime-handle"="__test_block_invoke_kernel.runtime_handle"
; CHECK: attributes #[[AT2]] = {{.*}}"runtime-handle"="__test_block_invoke_2_kernel.runtime_handle"
; CHECK: attributes #[[AT3]] = {{.*}}"runtime-handle"="__amdgpu_enqueued_kernel.runtime_handle"
; CHECK: attributes #[[AT4]] = {{.*}}"runtime-handle"="__amdgpu_enqueued_kernel.1.runtime_handle"

attributes #0 = { "enqueued-block" }

Expand Down

0 comments on commit a99e7d8

Please sign in to comment.