diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index e1211bb8949b6..85be8bdd00516 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -7995,15 +7995,23 @@ enum SpecialRegisterAccessKind { Write, }; +// Generates the IR for __builtin_read_exec_*. +// Lowers the builtin to amdgcn_ballot intrinsic. static Value *EmitAMDGCNBallotForExec(CodeGenFunction &CGF, const CallExpr *E, llvm::Type *RegisterType, - llvm::Type *ValueType) { + llvm::Type *ValueType, bool isExecHi) { CodeGen::CGBuilderTy &Builder = CGF.Builder; CodeGen::CodeGenModule &CGM = CGF.CGM; - llvm::Type *ResultType = CGF.ConvertType(E->getType()); - Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {ResultType}); + Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {RegisterType}); llvm::Value *Call = Builder.CreateCall(F, {Builder.getInt1(true)}); + + if (isExecHi) { + Value *Rt2 = Builder.CreateLShr(Call, 32); + Rt2 = Builder.CreateTrunc(Rt2, CGF.Int32Ty); + return Rt2; + } + return Call; } @@ -17857,10 +17865,11 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, {Addr, Val, ZeroI32, ZeroI32, ZeroI1}); } case AMDGPU::BI__builtin_amdgcn_read_exec: + return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, false); case AMDGPU::BI__builtin_amdgcn_read_exec_lo: - case AMDGPU::BI__builtin_amdgcn_read_exec_hi: { - return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty); - } + return EmitAMDGCNBallotForExec(*this, E, Int32Ty, Int32Ty, false); + case AMDGPU::BI__builtin_amdgcn_read_exec_hi: + return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, true); case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray: case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_h: case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_l: diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl index a4d14cf1f6cf0..43553131f63c5 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl @@ -13,6 +13,8 @@ void test_ballot_wave32(global uint* out, int a, int b) *out = __builtin_amdgcn_ballot_w32(a == b); } +// CHECK: declare i32 @llvm.amdgcn.ballot.i32(i1) #[[$NOUNWIND_READONLY:[0-9]+]] + // CHECK-LABEL: @test_ballot_wave32_target_attr( // CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 %{{.+}}) __attribute__((target("wavefrontsize32"))) @@ -21,6 +23,28 @@ void test_ballot_wave32_target_attr(global uint* out, int a, int b) *out = __builtin_amdgcn_ballot_w32(a == b); } +// CHECK-LABEL: @test_read_exec( +// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true) +void test_read_exec(global uint* out) { + *out = __builtin_amdgcn_read_exec(); +} + +// CHECK: declare i64 @llvm.amdgcn.ballot.i64(i1) #[[$NOUNWIND_READONLY:[0-9]+]] + +// CHECK-LABEL: @test_read_exec_lo( +// CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 true) +void test_read_exec_lo(global uint* out) { + *out = __builtin_amdgcn_read_exec_lo(); +} + +// CHECK-LABEL: @test_read_exec_hi( +// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true) +// CHECK: lshr i64 [[A:%.*]], 32 +// CHECK: trunc i64 [[B:%.*]] to i32 +void test_read_exec_hi(global uint* out) { + *out = __builtin_amdgcn_read_exec_hi(); +} + #if __AMDGCN_WAVEFRONT_SIZE != 32 #error Wrong wavesize detected #endif diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl index 563c9a2a240c1..53f34c6a44ae7 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-wave64.cl @@ -13,6 +13,8 @@ void test_ballot_wave64(global ulong* out, int a, int b) *out = __builtin_amdgcn_ballot_w64(a == b); } +// CHECK: declare i64 @llvm.amdgcn.ballot.i64(i1) #[[$NOUNWIND_READONLY:[0-9]+]] + // CHECK-LABEL: @test_ballot_wave64_target_attr( // CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 %{{.+}}) __attribute__((target("wavefrontsize64"))) @@ -21,6 +23,27 @@ void test_ballot_wave64_target_attr(global ulong* out, int a, int b) *out = __builtin_amdgcn_ballot_w64(a == b); } +// CHECK-LABEL: @test_read_exec( +// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true) +void test_read_exec(global ulong* out) { + *out = __builtin_amdgcn_read_exec(); +} + +// CHECK-LABEL: @test_read_exec_lo( +// CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 true) +void test_read_exec_lo(global ulong* out) { + *out = __builtin_amdgcn_read_exec_lo(); +} + +// CHECK: declare i32 @llvm.amdgcn.ballot.i32(i1) #[[$NOUNWIND_READONLY:[0-9]+]] + +// CHECK-LABEL: @test_read_exec_hi( +// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true) +// CHECK: lshr i64 [[A:%.*]], 32 +void test_read_exec_hi(global ulong* out) { + *out = __builtin_amdgcn_read_exec_hi(); +} + #if __AMDGCN_WAVEFRONT_SIZE != 64 #error Wrong wavesize detected #endif diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl index 8938642e3b19f..0bc9a54682d3e 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl @@ -526,7 +526,9 @@ void test_read_exec_lo(global uint* out) { // CHECK: declare i32 @llvm.amdgcn.ballot.i32(i1) #[[$NOUNWIND_READONLY:[0-9]+]] // CHECK-LABEL: @test_read_exec_hi( -// CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 true) +// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true) +// CHECK: lshr i64 [[A:%.*]], 32 +// CHECK: trunc i64 [[B:%.*]] to i32 void test_read_exec_hi(global uint* out) { *out = __builtin_amdgcn_read_exec_hi(); }