Skip to content

Commit

Permalink
[HLSL] move rcp to cgbuiltins (#88401)
Browse files Browse the repository at this point in the history
Removing the intrinsic because there is no opCodes for rcp in DXIL or
SPIR-V.
Moving means we don't have to re-implement this feature for each
backend.

fixes #87784

Co-authored-by: Farzon Lotfi <farzon@farzon.com>
  • Loading branch information
farzonl and Farzon Lotfi committed Apr 11, 2024
1 parent 9a36077 commit 4036a69
Show file tree
Hide file tree
Showing 5 changed files with 224 additions and 116 deletions.
13 changes: 10 additions & 3 deletions clang/lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18303,9 +18303,16 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID,
Value *Op0 = EmitScalarExpr(E->getArg(0));
if (!E->getArg(0)->getType()->hasFloatingRepresentation())
llvm_unreachable("rcp operand must have a float representation");
return Builder.CreateIntrinsic(
/*ReturnType=*/Op0->getType(), Intrinsic::dx_rcp,
ArrayRef<Value *>{Op0}, nullptr, "dx.rcp");
llvm::Type *Ty = Op0->getType();
llvm::Type *EltTy = Ty->getScalarType();
Constant *One =
Ty->isVectorTy()
? ConstantVector::getSplat(
ElementCount::getFixed(
dyn_cast<FixedVectorType>(Ty)->getNumElements()),
ConstantFP::get(EltTy, 1.0))
: ConstantFP::get(EltTy, 1.0);
return Builder.CreateFDiv(One, Op0, "hlsl.rcp");
}
case Builtin::BI__builtin_hlsl_elementwise_rsqrt: {
Value *Op0 = EmitScalarExpr(E->getArg(0));
Expand Down
127 changes: 88 additions & 39 deletions clang/test/CodeGenHLSL/builtins/rcp.hlsl
Original file line number Diff line number Diff line change
@@ -1,53 +1,102 @@
// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
// RUN: dxil-pc-shadermodel6.3-library %s -fnative-half-type \
// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
// RUN: --check-prefixes=CHECK,NATIVE_HALF
// RUN: --check-prefixes=CHECK,DXIL_CHECK,DXIL_NATIVE_HALF,NATIVE_HALF
// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
// RUN: dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \
// RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF

// NATIVE_HALF: define noundef half @
// NATIVE_HALF: %dx.rcp = call half @llvm.dx.rcp.f16(
// NATIVE_HALF: ret half %dx.rcp
// NO_HALF: define noundef float @"?test_rcp_half@@YA$halff@$halff@@Z"(
// NO_HALF: %dx.rcp = call float @llvm.dx.rcp.f32(
// NO_HALF: ret float %dx.rcp
// RUN: -o - | FileCheck %s --check-prefixes=CHECK,DXIL_CHECK,NO_HALF,DXIL_NO_HALF
// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
// RUN: spirv-unknown-vulkan-compute %s -fnative-half-type \
// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \
// RUN: --check-prefixes=CHECK,NATIVE_HALF,SPIR_NATIVE_HALF,SPIR_CHECK
// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \
// RUN: spirv-unknown-vulkan-compute %s -emit-llvm -disable-llvm-passes \
// RUN: -o - | FileCheck %s --check-prefixes=CHECK,NO_HALF,SPIR_NO_HALF,SPIR_CHECK

// DXIL_NATIVE_HALF: define noundef half @
// SPIR_NATIVE_HALF: define spir_func noundef half @
// NATIVE_HALF: %hlsl.rcp = fdiv half 0xH3C00, %{{.*}}
// NATIVE_HALF: ret half %hlsl.rcp
// DXIL_NO_HALF: define noundef float @
// SPIR_NO_HALF: define spir_func noundef float @
// NO_HALF: %hlsl.rcp = fdiv float 1.000000e+00, %{{.*}}
// NO_HALF: ret float %hlsl.rcp
half test_rcp_half(half p0) { return rcp(p0); }
// NATIVE_HALF: define noundef <2 x half> @
// NATIVE_HALF: %dx.rcp = call <2 x half> @llvm.dx.rcp.v2f16
// NATIVE_HALF: ret <2 x half> %dx.rcp
// NO_HALF: define noundef <2 x float> @
// NO_HALF: %dx.rcp = call <2 x float> @llvm.dx.rcp.v2f32(
// NO_HALF: ret <2 x float> %dx.rcp

// DXIL_NATIVE_HALF: define noundef <2 x half> @
// SPIR_NATIVE_HALF: define spir_func noundef <2 x half> @
// NATIVE_HALF: %hlsl.rcp = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, %{{.*}}
// NATIVE_HALF: ret <2 x half> %hlsl.rcp
// DXIL_NO_HALF: define noundef <2 x float> @
// SPIR_NO_HALF: define spir_func noundef <2 x float> @
// NO_HALF: %hlsl.rcp = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, %{{.*}}
// NO_HALF: ret <2 x float> %hlsl.rcp
half2 test_rcp_half2(half2 p0) { return rcp(p0); }
// NATIVE_HALF: define noundef <3 x half> @
// NATIVE_HALF: %dx.rcp = call <3 x half> @llvm.dx.rcp.v3f16
// NATIVE_HALF: ret <3 x half> %dx.rcp
// NO_HALF: define noundef <3 x float> @
// NO_HALF: %dx.rcp = call <3 x float> @llvm.dx.rcp.v3f32(
// NO_HALF: ret <3 x float> %dx.rcp

// DXIL_NATIVE_HALF: define noundef <3 x half> @
// SPIR_NATIVE_HALF: define spir_func noundef <3 x half> @
// NATIVE_HALF: %hlsl.rcp = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, %{{.*}}
// NATIVE_HALF: ret <3 x half> %hlsl.rcp
// DXIL_NO_HALF: define noundef <3 x float> @
// SPIR_NO_HALF: define spir_func noundef <3 x float> @
// NO_HALF: %hlsl.rcp = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %{{.*}}
// NO_HALF: ret <3 x float> %hlsl.rcp
half3 test_rcp_half3(half3 p0) { return rcp(p0); }
// NATIVE_HALF: define noundef <4 x half> @
// NATIVE_HALF: %dx.rcp = call <4 x half> @llvm.dx.rcp.v4f16
// NATIVE_HALF: ret <4 x half> %dx.rcp
// NO_HALF: define noundef <4 x float> @
// NO_HALF: %dx.rcp = call <4 x float> @llvm.dx.rcp.v4f32(
// NO_HALF: ret <4 x float> %dx.rcp

// DXIL_NATIVE_HALF: define noundef <4 x half> @
// SPIR_NATIVE_HALF: define spir_func noundef <4 x half> @
// NATIVE_HALF: %hlsl.rcp = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, %{{.*}}
// NATIVE_HALF: ret <4 x half> %hlsl.rcp
// DXIL_NO_HALF: define noundef <4 x float> @
// SPIR_NO_HALF: define spir_func noundef <4 x float> @
// NO_HALF: %hlsl.rcp = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %{{.*}}
// NO_HALF: ret <4 x float> %hlsl.rcp
half4 test_rcp_half4(half4 p0) { return rcp(p0); }

// CHECK: define noundef float @
// CHECK: %dx.rcp = call float @llvm.dx.rcp.f32(
// CHECK: ret float %dx.rcp
// DXIL_CHECK: define noundef float @
// SPIR_CHECK: define spir_func noundef float @
// CHECK: %hlsl.rcp = fdiv float 1.000000e+00, %{{.*}}
// CHECK: ret float %hlsl.rcp
float test_rcp_float(float p0) { return rcp(p0); }
// CHECK: define noundef <2 x float> @
// CHECK: %dx.rcp = call <2 x float> @llvm.dx.rcp.v2f32
// CHECK: ret <2 x float> %dx.rcp

// DXIL_CHECK: define noundef <2 x float> @
// SPIR_CHECK: define spir_func noundef <2 x float> @
// CHECK: %hlsl.rcp = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, %{{.*}}
// CHECK: ret <2 x float> %hlsl.rcp
float2 test_rcp_float2(float2 p0) { return rcp(p0); }
// CHECK: define noundef <3 x float> @
// CHECK: %dx.rcp = call <3 x float> @llvm.dx.rcp.v3f32
// CHECK: ret <3 x float> %dx.rcp

// DXIL_CHECK: define noundef <3 x float> @
// SPIR_CHECK: define spir_func noundef <3 x float> @
// CHECK: %hlsl.rcp = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %{{.*}}
// CHECK: ret <3 x float> %hlsl.rcp
float3 test_rcp_float3(float3 p0) { return rcp(p0); }
// CHECK: define noundef <4 x float> @
// CHECK: %dx.rcp = call <4 x float> @llvm.dx.rcp.v4f32
// CHECK: ret <4 x float> %dx.rcp

// DXIL_CHECK: define noundef <4 x float> @
// SPIR_CHECK: define spir_func noundef <4 x float> @
// CHECK: %hlsl.rcp = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %{{.*}}
// CHECK: ret <4 x float> %hlsl.rcp
float4 test_rcp_float4(float4 p0) { return rcp(p0); }

// DXIL_CHECK: define noundef double @
// SPIR_CHECK: define spir_func noundef double @
// CHECK: %hlsl.rcp = fdiv double 1.000000e+00, %{{.*}}
// CHECK: ret double %hlsl.rcp
double test_rcp_double(double p0) { return rcp(p0); }

// DXIL_CHECK: define noundef <2 x double> @
// SPIR_CHECK: define spir_func noundef <2 x double> @
// CHECK: %hlsl.rcp = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, %{{.*}}
// CHECK: ret <2 x double> %hlsl.rcp
double2 test_rcp_double2(double2 p0) { return rcp(p0); }

// DXIL_CHECK: define noundef <3 x double> @
// SPIR_CHECK: define spir_func noundef <3 x double> @
// CHECK: %hlsl.rcp = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, %{{.*}}
// CHECK: ret <3 x double> %hlsl.rcp
double3 test_rcp_double3(double3 p0) { return rcp(p0); }

// DXIL_CHECK: define noundef <4 x double> @
// SPIR_CHECK: define spir_func noundef <4 x double> @
// CHECK: %hlsl.rcp = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, %{{.*}}
// CHECK: ret <4 x double> %hlsl.rcp
double4 test_rcp_double4(double4 p0) { return rcp(p0); }
22 changes: 0 additions & 22 deletions llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ static bool isIntrinsicExpansion(Function &F) {
case Intrinsic::dx_clamp:
case Intrinsic::dx_uclamp:
case Intrinsic::dx_lerp:
case Intrinsic::dx_rcp:
case Intrinsic::dx_sdot:
case Intrinsic::dx_udot:
return true;
Expand Down Expand Up @@ -218,25 +217,6 @@ static bool expandPowIntrinsic(CallInst *Orig) {
return true;
}

static bool expandRcpIntrinsic(CallInst *Orig) {
Value *X = Orig->getOperand(0);
IRBuilder<> Builder(Orig->getParent());
Builder.SetInsertPoint(Orig);
Type *Ty = X->getType();
Type *EltTy = Ty->getScalarType();
Constant *One =
Ty->isVectorTy()
? ConstantVector::getSplat(
ElementCount::getFixed(
dyn_cast<FixedVectorType>(Ty)->getNumElements()),
ConstantFP::get(EltTy, 1.0))
: ConstantFP::get(EltTy, 1.0);
auto *Result = Builder.CreateFDiv(One, X, "dx.rcp");
Orig->replaceAllUsesWith(Result);
Orig->eraseFromParent();
return true;
}

static Intrinsic::ID getMaxForClamp(Type *ElemTy,
Intrinsic::ID ClampIntrinsic) {
if (ClampIntrinsic == Intrinsic::dx_uclamp)
Expand Down Expand Up @@ -300,8 +280,6 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) {
return expandClampIntrinsic(Orig, F.getIntrinsicID());
case Intrinsic::dx_lerp:
return expandLerpIntrinsic(Orig);
case Intrinsic::dx_rcp:
return expandRcpIntrinsic(Orig);
case Intrinsic::dx_sdot:
case Intrinsic::dx_udot:
return expandIntegerDot(Orig, F.getIntrinsicID());
Expand Down
52 changes: 0 additions & 52 deletions llvm/test/CodeGen/DirectX/rcp.ll

This file was deleted.

126 changes: 126 additions & 0 deletions llvm/test/CodeGen/SPIRV/hlsl-intrinsics/rcp.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s

; CHECK-DAG: %[[#float_64:]] = OpTypeFloat 64
; CHECK-DAG: %[[#float_32:]] = OpTypeFloat 32
; CHECK-DAG: %[[#float_16:]] = OpTypeFloat 16
; CHECK-DAG: %[[#vec2_float_16:]] = OpTypeVector %[[#float_16]] 2
; CHECK-DAG: %[[#vec2_float_32:]] = OpTypeVector %[[#float_32]] 2
; CHECK-DAG: %[[#vec2_float_64:]] = OpTypeVector %[[#float_64]] 2
; CHECK-DAG: %[[#vec3_float_16:]] = OpTypeVector %[[#float_16]] 3
; CHECK-DAG: %[[#vec3_float_32:]] = OpTypeVector %[[#float_32]] 3
; CHECK-DAG: %[[#vec3_float_64:]] = OpTypeVector %[[#float_64]] 3
; CHECK-DAG: %[[#vec4_float_16:]] = OpTypeVector %[[#float_16]] 4
; CHECK-DAG: %[[#vec4_float_32:]] = OpTypeVector %[[#float_32]] 4
; CHECK-DAG: %[[#vec4_float_64:]] = OpTypeVector %[[#float_64]] 4
; CHECK-DAG: %[[#const_f64_1:]] = OpConstant %[[#float_64]] 1
; CHECK-DAG: %[[#const_f32_1:]] = OpConstant %[[#float_32:]] 1
; CHECK-DAG: %[[#const_f16_1:]] = OpConstant %[[#float_16:]] 1

; CHECK-DAG: %[[#vec2_const_ones_f16:]] = OpConstantComposite %[[#vec2_float_16:]] %[[#const_f16_1:]] %[[#const_f16_1:]]
; CHECK-DAG: %[[#vec3_const_ones_f16:]] = OpConstantComposite %[[#vec3_float_16:]] %[[#const_f16_1:]] %[[#const_f16_1:]] %[[#const_f16_1:]]
; CHECK-DAG: %[[#vec4_const_ones_f16:]] = OpConstantComposite %[[#vec4_float_16:]] %[[#const_f16_1:]] %[[#const_f16_1:]] %[[#const_f16_1:]] %[[#const_f16_1:]]

; CHECK-DAG: %[[#vec2_const_ones_f32:]] = OpConstantComposite %[[#vec2_float_32:]] %[[#const_f32_1:]] %[[#const_f32_1:]]
; CHECK-DAG: %[[#vec3_const_ones_f32:]] = OpConstantComposite %[[#vec3_float_32:]] %[[#const_f32_1:]] %[[#const_f32_1:]] %[[#const_f32_1:]]
; CHECK-DAG: %[[#vec4_const_ones_f32:]] = OpConstantComposite %[[#vec4_float_32:]] %[[#const_f32_1:]] %[[#const_f32_1:]] %[[#const_f32_1:]] %[[#const_f32_1:]]

; CHECK-DAG: %[[#vec2_const_ones_f64:]] = OpConstantComposite %[[#vec2_float_64:]] %[[#const_f64_1:]] %[[#const_f64_1:]]
; CHECK-DAG: %[[#vec3_const_ones_f64:]] = OpConstantComposite %[[#vec3_float_64:]] %[[#const_f64_1:]] %[[#const_f64_1:]] %[[#const_f64_1:]]
; CHECK-DAG: %[[#vec4_const_ones_f64:]] = OpConstantComposite %[[#vec4_float_64:]] %[[#const_f64_1:]] %[[#const_f64_1:]] %[[#const_f64_1:]] %[[#const_f64_1:]]


define spir_func noundef half @test_rcp_half(half noundef %p0) #0 {
entry:
; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#float_16:]]
; CHECK: OpFDiv %[[#float_16:]] %[[#const_f16_1:]] %[[#arg0:]]
%hlsl.rcp = fdiv half 0xH3C00, %p0
ret half %hlsl.rcp
}

define spir_func noundef <2 x half> @test_rcp_half2(<2 x half> noundef %p0) #0 {
entry:
; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec2_float_16:]]
; CHECK: OpFDiv %[[#vec2_float_16:]] %[[#vec2_const_ones_f16:]] %[[#arg0:]]
%hlsl.rcp = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, %p0
ret <2 x half> %hlsl.rcp
}

define spir_func noundef <3 x half> @test_rcp_half3(<3 x half> noundef %p0) #0 {
entry:
; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec3_float_16:]]
; CHECK: OpFDiv %[[#vec3_float_16:]] %[[#vec3_const_ones_f16:]] %[[#arg0:]]
%hlsl.rcp = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, %p0
ret <3 x half> %hlsl.rcp
}

define spir_func noundef <4 x half> @test_rcp_half4(<4 x half> noundef %p0) #0 {
entry:
; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec4_float_16:]]
; CHECK: OpFDiv %[[#vec4_float_16:]] %[[#vec4_const_ones_f16:]] %[[#arg0:]]
%hlsl.rcp = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, %p0
ret <4 x half> %hlsl.rcp
}

define spir_func noundef float @test_rcp_float(float noundef %p0) #0 {
entry:
; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#float_32:]]
; CHECK: OpFDiv %[[#float_32:]] %[[#const_f32_1:]] %[[#arg0:]]
%hlsl.rcp = fdiv float 1.000000e+00, %p0
ret float %hlsl.rcp
}

define spir_func noundef <2 x float> @test_rcp_float2(<2 x float> noundef %p0) #0 {
entry:
; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec2_float_32:]]
; CHECK: OpFDiv %[[#vec2_float_32:]] %[[#vec2_const_ones_f32:]] %[[#arg0:]]
%hlsl.rcp = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, %p0
ret <2 x float> %hlsl.rcp
}

define spir_func noundef <3 x float> @test_rcp_float3(<3 x float> noundef %p0) #0 {
entry:
; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec3_float_32:]]
; CHECK: OpFDiv %[[#vec3_float_32:]] %[[#vec3_const_ones_f32:]] %[[#arg0:]]
%hlsl.rcp = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %p0
ret <3 x float> %hlsl.rcp
}

define spir_func noundef <4 x float> @test_rcp_float4(<4 x float> noundef %p0) #0 {
entry:
; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec4_float_32:]]
; CHECK: OpFDiv %[[#vec4_float_32:]] %[[#vec4_const_ones_f32:]] %[[#arg0:]]
%hlsl.rcp = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %p0
ret <4 x float> %hlsl.rcp
}

define spir_func noundef double @test_rcp_double(double noundef %p0) #0 {
entry:
; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#float_64:]]
; CHECK: OpFDiv %[[#float_64:]] %[[#const_f64_1:]] %[[#arg0:]]
%hlsl.rcp = fdiv double 1.000000e+00, %p0
ret double %hlsl.rcp
}

define spir_func noundef <2 x double> @test_rcp_double2(<2 x double> noundef %p0) #0 {
entry:
; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec2_float_64:]]
; CHECK: OpFDiv %[[#vec2_float_64:]] %[[#vec2_const_ones_f64:]] %[[#arg0:]]
%hlsl.rcp = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, %p0
ret <2 x double> %hlsl.rcp
}

define spir_func noundef <3 x double> @test_rcp_double3(<3 x double> noundef %p0) #0 {
entry:
; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec3_float_64:]]
; CHECK: OpFDiv %[[#vec3_float_64:]] %[[#vec3_const_ones_f64:]] %[[#arg0:]]
%hlsl.rcp = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, %p0
ret <3 x double> %hlsl.rcp
}

define spir_func noundef <4 x double> @test_rcp_double4(<4 x double> noundef %p0) #0 {
entry:
; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#vec4_float_64:]]
; CHECK: OpFDiv %[[#vec4_float_64:]] %[[#vec4_const_ones_f64:]] %[[#arg0:]]
%hlsl.rcp = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, %p0
ret <4 x double> %hlsl.rcp
}

0 comments on commit 4036a69

Please sign in to comment.