Skip to content

Commit

Permalink
AMDGPU: InstCombine amdgcn.sqrt.f16 to sqrt.f16
Browse files Browse the repository at this point in the history
There's nothing special about f16 sqrt handling.

https://reviews.llvm.org/D158090
  • Loading branch information
arsenm committed Aug 24, 2023
1 parent 8a62d6b commit 61c8af6
Show file tree
Hide file tree
Showing 4 changed files with 51 additions and 9 deletions.
2 changes: 1 addition & 1 deletion clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ void test_rcp_f16(global half* out, half a)
}

// CHECK-LABEL: @test_sqrt_f16
// CHECK: call half @llvm.amdgcn.sqrt.f16
// CHECK: call half @llvm.sqrt.f16
void test_sqrt_f16(global half* out, half a)
{
*out = __builtin_amdgcn_sqrth(a);
Expand Down
8 changes: 8 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -488,6 +488,14 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
return IC.replaceInstUsesWith(II, QNaN);
}

// f16 amdgcn.sqrt is identical to regular sqrt.
if (IID == Intrinsic::amdgcn_sqrt && Src->getType()->isHalfTy()) {
Function *NewDecl = Intrinsic::getDeclaration(
II.getModule(), Intrinsic::sqrt, {II.getType()});
II.setCalledFunction(NewDecl);
return ⅈ
}

break;
}
case Intrinsic::amdgcn_log:
Expand Down
44 changes: 39 additions & 5 deletions llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
Original file line number Diff line number Diff line change
Expand Up @@ -107,16 +107,15 @@ define double @test_constant_fold_sqrt_f64_undef() nounwind {

define half @test_constant_fold_sqrt_f16_0() nounwind {
; CHECK-LABEL: @test_constant_fold_sqrt_f16_0(
; CHECK-NEXT: [[VAL:%.*]] = call half @llvm.amdgcn.sqrt.f16(half 0xH0000) #[[ATTR15:[0-9]+]]
; CHECK-NEXT: ret half [[VAL]]
; CHECK-NEXT: ret half 0xH0000
;
%val = call half @llvm.amdgcn.sqrt.f16(half 0.0) nounwind readnone
ret half %val
}

define float @test_constant_fold_sqrt_f32_0() nounwind {
; CHECK-LABEL: @test_constant_fold_sqrt_f32_0(
; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.sqrt.f32(float 0.000000e+00) #[[ATTR15]]
; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.sqrt.f32(float 0.000000e+00) #[[ATTR15:[0-9]+]]
; CHECK-NEXT: ret float [[VAL]]
;
%val = call float @llvm.amdgcn.sqrt.f32(float 0.0) nounwind readnone
Expand All @@ -134,8 +133,7 @@ define double @test_constant_fold_sqrt_f64_0() nounwind {

define half @test_constant_fold_sqrt_f16_neg0() nounwind {
; CHECK-LABEL: @test_constant_fold_sqrt_f16_neg0(
; CHECK-NEXT: [[VAL:%.*]] = call half @llvm.amdgcn.sqrt.f16(half 0xH8000) #[[ATTR15]]
; CHECK-NEXT: ret half [[VAL]]
; CHECK-NEXT: ret half 0xH8000
;
%val = call half @llvm.amdgcn.sqrt.f16(half -0.0) nounwind readnone
ret half %val
Expand Down Expand Up @@ -186,6 +184,42 @@ define double @test_constant_fold_sqrt_neg1() nounwind {
ret double %val
}

define half @test_amdgcn_sqrt_f16(half %arg) {
; CHECK-LABEL: @test_amdgcn_sqrt_f16(
; CHECK-NEXT: [[VAL:%.*]] = call half @llvm.sqrt.f16(half [[ARG:%.*]])
; CHECK-NEXT: ret half [[VAL]]
;
%val = call half @llvm.amdgcn.sqrt.f16(half %arg)
ret half %val
}

define half @test_amdgcn_sqrt_f16_flags(half %arg) {
; CHECK-LABEL: @test_amdgcn_sqrt_f16_flags(
; CHECK-NEXT: [[VAL:%.*]] = call nnan half @llvm.sqrt.f16(half [[ARG:%.*]])
; CHECK-NEXT: ret half [[VAL]]
;
%val = call nnan half @llvm.amdgcn.sqrt.f16(half %arg)
ret half %val
}

define float @test_amdgcn_sqrt_f32(float %arg) {
; CHECK-LABEL: @test_amdgcn_sqrt_f32(
; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[ARG:%.*]])
; CHECK-NEXT: ret float [[VAL]]
;
%val = call float @llvm.amdgcn.sqrt.f32(float %arg)
ret float %val
}

define double @test_amdgcn_sqrt_f64(double %arg) {
; CHECK-LABEL: @test_amdgcn_sqrt_f64(
; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.sqrt.f64(double [[ARG:%.*]])
; CHECK-NEXT: ret double [[VAL]]
;
%val = call double @llvm.amdgcn.sqrt.f64(double %arg)
ret double %val
}

; --------------------------------------------------------------------
; llvm.amdgcn.rsq
; --------------------------------------------------------------------
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/Transforms/InstCombine/AMDGPU/rcp-contract-rsq.ll
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ define half @amdgcn_rcp_amdgcn_sqrt_f16_contract(half %x) {
define half @amdgcn_rcp_amdgcn_sqrt_f16_missing_contract0(half %x) {
; CHECK-LABEL: define half @amdgcn_rcp_amdgcn_sqrt_f16_missing_contract0
; CHECK-SAME: (half [[X:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: [[SQRT:%.*]] = call half @llvm.amdgcn.sqrt.f16(half [[X]])
; CHECK-NEXT: [[SQRT:%.*]] = call half @llvm.sqrt.f16(half [[X]])
; CHECK-NEXT: [[RSQ:%.*]] = call contract half @llvm.amdgcn.rcp.f16(half [[SQRT]])
; CHECK-NEXT: ret half [[RSQ]]
;
Expand All @@ -112,7 +112,7 @@ define half @amdgcn_rcp_amdgcn_sqrt_f16_missing_contract0(half %x) {
define half @amdgcn_rcp_amdgcn_sqrt_f16_missing_contract1(half %x) {
; CHECK-LABEL: define half @amdgcn_rcp_amdgcn_sqrt_f16_missing_contract1
; CHECK-SAME: (half [[X:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: [[SQRT:%.*]] = call contract half @llvm.amdgcn.sqrt.f16(half [[X]])
; CHECK-NEXT: [[SQRT:%.*]] = call contract half @llvm.sqrt.f16(half [[X]])
; CHECK-NEXT: [[RSQ:%.*]] = call half @llvm.amdgcn.rcp.f16(half [[SQRT]])
; CHECK-NEXT: ret half [[RSQ]]
;
Expand All @@ -125,7 +125,7 @@ define half @amdgcn_rcp_amdgcn_sqrt_f16_missing_contract1(half %x) {
define half @amdgcn_rcp_amdgcn_sqrt_f16_contract_multi_use(half %x, ptr %ptr) {
; CHECK-LABEL: define half @amdgcn_rcp_amdgcn_sqrt_f16_contract_multi_use
; CHECK-SAME: (half [[X:%.*]], ptr [[PTR:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: [[SQRT:%.*]] = call contract half @llvm.amdgcn.sqrt.f16(half [[X]])
; CHECK-NEXT: [[SQRT:%.*]] = call contract half @llvm.sqrt.f16(half [[X]])
; CHECK-NEXT: store half [[SQRT]], ptr [[PTR]], align 2
; CHECK-NEXT: [[RSQ:%.*]] = call contract half @llvm.amdgcn.rcp.f16(half [[SQRT]])
; CHECK-NEXT: ret half [[RSQ]]
Expand Down

0 comments on commit 61c8af6

Please sign in to comment.