Skip to content

Commit

Permalink
[AMDGPU] Add InstCombine rule for ballot.i64 intrinsic in wave32 mode. (
Browse files Browse the repository at this point in the history
#71556)

Substitute with zero-extended to i64 ballot.i32 intrinsic.
  • Loading branch information
vpykhtin committed Jan 17, 2024
1 parent 7f7bbb9 commit 57b50ef
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 9 deletions.
8 changes: 2 additions & 6 deletions clang/test/CodeGenOpenCL/builtins-amdgcn-wave32.cl
Original file line number Diff line number Diff line change
Expand Up @@ -24,23 +24,19 @@ void test_ballot_wave32_target_attr(global uint* out, int a, int b)
}

// CHECK-LABEL: @test_read_exec(
// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true)
// CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 true)
void test_read_exec(global uint* out) {
*out = __builtin_amdgcn_read_exec();
}

// CHECK: declare i64 @llvm.amdgcn.ballot.i64(i1) #[[$NOUNWIND_READONLY:[0-9]+]]

// CHECK-LABEL: @test_read_exec_lo(
// CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 true)
void test_read_exec_lo(global uint* out) {
*out = __builtin_amdgcn_read_exec_lo();
}

// CHECK-LABEL: @test_read_exec_hi(
// CHECK: call i64 @llvm.amdgcn.ballot.i64(i1 true)
// CHECK: lshr i64 [[A:%.*]], 32
// CHECK: trunc i64 [[B:%.*]] to i32
// CHECK: store i32 0, ptr addrspace(1) %out
void test_read_exec_hi(global uint* out) {
*out = __builtin_amdgcn_read_exec_hi();
}
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2382,7 +2382,7 @@ void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
auto CC = cast<CondCodeSDNode>(Cond->getOperand(2))->get();
if ((CC == ISD::SETEQ || CC == ISD::SETNE) &&
isNullConstant(Cond->getOperand(1)) &&
// TODO: make condition below an assert after fixing ballot bitwidth.
// We may encounter ballot.i64 in wave32 mode on -O0.
VCMP.getValueType().getSizeInBits() == ST->getWavefrontSize()) {
// %VCMP = i(WaveSize) AMDGPUISD::SETCC ...
// %C = i1 ISD::SETCC %VCMP, 0, setne/seteq
Expand Down
13 changes: 13 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -990,6 +990,19 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
return IC.replaceInstUsesWith(II, Constant::getNullValue(II.getType()));
}
}
if (ST->isWave32() && II.getType()->getIntegerBitWidth() == 64) {
// %b64 = call i64 ballot.i64(...)
// =>
// %b32 = call i32 ballot.i32(...)
// %b64 = zext i32 %b32 to i64
Value *Call = IC.Builder.CreateZExt(
IC.Builder.CreateIntrinsic(Intrinsic::amdgcn_ballot,
{IC.Builder.getInt32Ty()},
{II.getArgOperand(0)}),
II.getType());
Call->takeName(&II);
return IC.replaceInstUsesWith(II, Call);
}
break;
}
case Intrinsic::amdgcn_wqm_vote: {
Expand Down
6 changes: 4 additions & 2 deletions llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2599,7 +2599,8 @@ declare i32 @llvm.amdgcn.ballot.i32(i1) nounwind readnone convergent

define i64 @ballot_nocombine_64(i1 %i) {
; CHECK-LABEL: @ballot_nocombine_64(
; CHECK-NEXT: [[B:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[I:%.*]])
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[I:%.*]])
; CHECK-NEXT: [[B:%.*]] = zext i32 [[TMP1]] to i64
; CHECK-NEXT: ret i64 [[B]]
;
%b = call i64 @llvm.amdgcn.ballot.i64(i1 %i)
Expand All @@ -2616,7 +2617,8 @@ define i64 @ballot_zero_64() {

define i64 @ballot_one_64() {
; CHECK-LABEL: @ballot_one_64(
; CHECK-NEXT: [[B:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 true)
; CHECK-NEXT: [[B:%.*]] = zext i32 [[TMP1]] to i64
; CHECK-NEXT: ret i64 [[B]]
;
%b = call i64 @llvm.amdgcn.ballot.i64(i1 1)
Expand Down

0 comments on commit 57b50ef

Please sign in to comment.