Skip to content

Commit

Permalink
Prefix the name of the calling host function in the name of callee GP…
Browse files Browse the repository at this point in the history
…U kernel

Summary:
Provide more context to the name of a GPU kernel by prefixing its name with the host function that calls it. E.g. The first kernel called by `gemm` would be `FUNC_gemm_KERNEL_0`.

Kernels currently follow the "kernel_#" (# = 0,1,2,3,...) nomenclature. This patch makes it easier to map host caller and device callee, especially when there are many kernels produced by Polly-ACC.

Reviewers: grosser, Meinersbur, bollu, philip.pfaffe, kbarton!

Reviewed By: grosser

Subscribers: nemanjai, pollydev

Tags: #polly

Differential Revision: https://reviews.llvm.org/D33985

llvm-svn: 307173
  • Loading branch information
singam-sanjay committed Jul 5, 2017
1 parent 7538b35 commit 79f13b9
Show file tree
Hide file tree
Showing 7 changed files with 23 additions and 15 deletions.
14 changes: 11 additions & 3 deletions polly/lib/CodeGen/PPCGCodeGeneration.cpp
Expand Up @@ -299,6 +299,9 @@ class GPUNodeBuilder : public IslNodeBuilder {
/// The maximal number of loops surrounding a parallel kernel.
unsigned DeepestParallel = 0;

/// Return the name to set for the ptx_kernel.
std::string getKernelFuncName(int Kernel_id);

private:
/// A vector of array base pointers for which a new ScopArrayInfo was created.
///
Expand Down Expand Up @@ -662,6 +665,11 @@ class GPUNodeBuilder : public IslNodeBuilder {
Value *Parameters);
};

std::string GPUNodeBuilder::getKernelFuncName(int Kernel_id) {
return "FUNC_" + S.getFunction().getName().str() + "_KERNEL_" +
std::to_string(Kernel_id);
}

void GPUNodeBuilder::initializeAfterRTH() {
BasicBlock *NewBB = SplitBlock(Builder.GetInsertBlock(),
&*Builder.GetInsertPoint(), &DT, &LI);
Expand Down Expand Up @@ -1621,7 +1629,7 @@ void GPUNodeBuilder::createKernel(__isl_take isl_ast_node *KernelStmt) {
Builder.SetInsertPoint(&HostInsertPoint);
Value *Parameters = createLaunchParameters(Kernel, F, SubtreeValues);

std::string Name = "kernel_" + std::to_string(Kernel->id);
std::string Name = getKernelFuncName(Kernel->id);
Value *KernelString = Builder.CreateGlobalStringPtr(ASMString, Name);
Value *NameString = Builder.CreateGlobalStringPtr(Name, Name + "_name");
Value *GPUKernel = createCallGetKernel(KernelString, NameString);
Expand Down Expand Up @@ -1662,7 +1670,7 @@ Function *
GPUNodeBuilder::createKernelFunctionDecl(ppcg_kernel *Kernel,
SetVector<Value *> &SubtreeValues) {
std::vector<Type *> Args;
std::string Identifier = "kernel_" + std::to_string(Kernel->id);
std::string Identifier = getKernelFuncName(Kernel->id);

for (long i = 0; i < Prog->n_array; i++) {
if (!ppcg_kernel_requires_array_argument(Kernel, i))
Expand Down Expand Up @@ -1926,7 +1934,7 @@ void GPUNodeBuilder::createKernelVariables(ppcg_kernel *Kernel, Function *FN) {
void GPUNodeBuilder::createKernelFunction(
ppcg_kernel *Kernel, SetVector<Value *> &SubtreeValues,
SetVector<Function *> &SubtreeFunctions) {
std::string Identifier = "kernel_" + std::to_string(Kernel->id);
std::string Identifier = getKernelFuncName(Kernel->id);
GPUModule.reset(new Module(Identifier, Builder.getContext()));

switch (Arch) {
Expand Down
4 changes: 2 additions & 2 deletions polly/test/GPGPU/cuda-annotations.ll
Expand Up @@ -4,11 +4,11 @@

; REQUIRES: pollyacc

; KERNEL: define ptx_kernel void @kernel_0(i8 addrspace(1)* %MemRef_A, i64 %n) #0 {
; KERNEL: define ptx_kernel void @FUNC_foo_KERNEL_0(i8 addrspace(1)* %MemRef_A, i64 %n) #0 {

; KERNEL: !nvvm.annotations = !{!0}

; KERNEL: !0 = !{void (i8 addrspace(1)*, i64)* @kernel_0, !"maxntidx", i32 32, !"maxntidy", i32 1, !"maxntidz", i32 1}
; KERNEL: !0 = !{void (i8 addrspace(1)*, i64)* @FUNC_foo_KERNEL_0, !"maxntidx", i32 32, !"maxntidy", i32 1, !"maxntidz", i32 1}

target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"

Expand Down
2 changes: 1 addition & 1 deletion polly/test/GPGPU/cuda-managed-memory-simple.ll
Expand Up @@ -54,7 +54,7 @@
; CHECK-NEXT: %22 = getelementptr [4 x i8*], [4 x i8*]* %polly_launch_0_params, i64 0, i64 3
; CHECK-NEXT: %23 = bitcast i32* %polly_launch_0_param_size_1 to i8*
; CHECK-NEXT: store i8* %23, i8** %22
; CHECK-NEXT: %24 = call i8* @polly_getKernel(i8* getelementptr inbounds ([750 x i8], [750 x i8]* @kernel_0, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @kernel_0_name, i32 0, i32 0))
; CHECK-NEXT: %24 = call i8* @polly_getKernel(i8* getelementptr inbounds ([810 x i8], [810 x i8]* @FUNC_copy_KERNEL_0, i32 0, i32 0), i8* getelementptr inbounds ([19 x i8], [19 x i8]* @FUNC_copy_KERNEL_0_name, i32 0, i32 0))
; CHECK-NEXT: call void @polly_launchKernel(i8* %24, i32 2, i32 1, i32 32, i32 1, i32 1, i8* %polly_launch_0_params_i8ptr)
; CHECK-NEXT: call void @polly_freeKernel(i8* %24)
; CHECK-NEXT: call void @polly_synchronizeDevice()
Expand Down
2 changes: 1 addition & 1 deletion polly/test/GPGPU/host-control-flow.ll
Expand Up @@ -42,7 +42,7 @@
; IR-NEXT: %polly.loop_cond = icmp sle i64 %polly.indvar_next, 99
; IR-NEXT: br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit

; KERNEL-IR: define ptx_kernel void @kernel_0(i8 addrspace(1)* %MemRef_A, i64 %c0)
; KERNEL-IR: define ptx_kernel void @FUNC_foo_KERNEL_0(i8 addrspace(1)* %MemRef_A, i64 %c0)
; KERNEL-IR-LABEL: entry:
; KERNEL-IR-NEXT: %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
; KERNEL-IR-NEXT: %b0 = zext i32 %0 to i64
Expand Down
2 changes: 1 addition & 1 deletion polly/test/GPGPU/invariant-load-hoisting.ll
Expand Up @@ -21,7 +21,7 @@
; HOST-IR: call void @polly_launchKernel(i8* %215, i32 %221, i32 1, i32 32, i32 1, i32 1, i8* %polly_launch_0_params_i8ptr)
; HOST-IR-NEXT: call void @polly_freeKernel(i8* %215)
;
; KERNEL-IR: define ptx_kernel void @kernel_0(i8 addrspace(1)* %MemRef_B, i8 addrspace(1)* %MemRef_A, i32 %n, i32 %tmp12) #0 {
; KERNEL-IR: define ptx_kernel void @FUNC_f_KERNEL_0(i8 addrspace(1)* %MemRef_B, i8 addrspace(1)* %MemRef_A, i32 %n, i32 %tmp12) #0 {
;
; Check that we generate correct GPU code in case of invariant load hoisting.
;
Expand Down
12 changes: 6 additions & 6 deletions polly/test/GPGPU/kernel-params-only-some-arrays.ll
Expand Up @@ -16,12 +16,12 @@
; B[i] += 42;
; }

; KERNEL: ; ModuleID = 'kernel_0'
; KERNEL-NEXT: source_filename = "kernel_0"
; KERNEL: ; ModuleID = 'FUNC_kernel_params_only_some_arrays_KERNEL_0'
; KERNEL-NEXT: source_filename = "FUNC_kernel_params_only_some_arrays_KERNEL_0"
; KERNEL-NEXT: target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
; KERNEL-NEXT: target triple = "nvptx64-nvidia-cuda"

; KERNEL: define ptx_kernel void @kernel_0(i8 addrspace(1)* %MemRef_A)
; KERNEL: define ptx_kernel void @FUNC_kernel_params_only_some_arrays_KERNEL_0(i8 addrspace(1)* %MemRef_A)
; KERNEL-NEXT: entry:
; KERNEL-NEXT: %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
; KERNEL-NEXT: %b0 = zext i32 %0 to i64
Expand All @@ -31,12 +31,12 @@
; KERNEL: ret void
; KERNEL-NEXT: }

; KERNEL: ; ModuleID = 'kernel_1'
; KERNEL-NEXT: source_filename = "kernel_1"
; KERNEL: ; ModuleID = 'FUNC_kernel_params_only_some_arrays_KERNEL_1'
; KERNEL-NEXT: source_filename = "FUNC_kernel_params_only_some_arrays_KERNEL_1"
; KERNEL-NEXT: target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
; KERNEL-NEXT: target triple = "nvptx64-nvidia-cuda"

; KERNEL: define ptx_kernel void @kernel_1(i8 addrspace(1)* %MemRef_B)
; KERNEL: define ptx_kernel void @FUNC_kernel_params_only_some_arrays_KERNEL_1(i8 addrspace(1)* %MemRef_B)
; KERNEL-NEXT: entry:
; KERNEL-NEXT: %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
; KERNEL-NEXT: %b0 = zext i32 %0 to i64
Expand Down
2 changes: 1 addition & 1 deletion polly/test/GPGPU/kernel-params-scop-parameter.ll
Expand Up @@ -9,7 +9,7 @@
; A[i] += 42;
; }

; KERNEL-IR: define ptx_kernel void @kernel_0(i8 addrspace(1)* %MemRef_A, i64 %n)
; KERNEL-IR: define ptx_kernel void @FUNC_kernel_params_scop_parameter_KERNEL_0(i8 addrspace(1)* %MemRef_A, i64 %n)

target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"

Expand Down

0 comments on commit 79f13b9

Please sign in to comment.