Skip to content

Commit

Permalink
[AMDGPU] Set default flat work group size to (1,256) for HIP
Browse files Browse the repository at this point in the history
Differential Revision: https://reviews.llvm.org/D67048

llvm-svn: 370808
  • Loading branch information
yxsamliu committed Sep 3, 2019
1 parent 0581a44 commit 1bea97c
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 5 deletions.
7 changes: 4 additions & 3 deletions clang/lib/CodeGen/TargetInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7915,8 +7915,9 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes(

const bool IsOpenCLKernel = M.getLangOpts().OpenCL &&
FD->hasAttr<OpenCLKernelAttr>();
if ((IsOpenCLKernel ||
(M.getLangOpts().HIP && FD->hasAttr<CUDAGlobalAttr>())) &&
const bool IsHIPKernel = M.getLangOpts().HIP &&
FD->hasAttr<CUDAGlobalAttr>();
if ((IsOpenCLKernel || IsHIPKernel) &&
(M.getTriple().getOS() == llvm::Triple::AMDHSA))
F->addFnAttr("amdgpu-implicitarg-num-bytes", "56");

Expand All @@ -7942,7 +7943,7 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes(
F->addFnAttr("amdgpu-flat-work-group-size", AttrVal);
} else
assert(Max == 0 && "Max must be zero");
} else if (IsOpenCLKernel) {
} else if (IsOpenCLKernel || IsHIPKernel) {
// By default, restrict the maximum size to 256.
F->addFnAttr("amdgpu-flat-work-group-size", "1,256");
}
Expand Down
5 changes: 3 additions & 2 deletions clang/test/CodeGenCUDA/kernel-amdgcn.cu
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device -emit-llvm %s -o - | FileCheck %s
// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device -emit-llvm -x hip %s -o - | FileCheck %s
#include "Inputs/cuda.h"

// CHECK: define amdgpu_kernel void @_ZN1A6kernelEv
Expand All @@ -25,7 +25,7 @@ struct Dummy {
EmptyKernelPtr Empty() { return EmptyKernel<void>; }
};

// CHECK: define amdgpu_kernel void @_Z15template_kernelI1AEvT_
// CHECK: define amdgpu_kernel void @_Z15template_kernelI1AEvT_{{.*}} #[[ATTR:[0-9][0-9]*]]
template<class T>
__global__ void template_kernel(T x) {}

Expand All @@ -39,3 +39,4 @@ int main() {
launch((void*)D.Empty());
return 0;
}
// CHECK: attributes #[[ATTR]] = {{.*}}"amdgpu-flat-work-group-size"="1,256"

0 comments on commit 1bea97c

Please sign in to comment.