Skip to content

Commit

Permalink
clang: Attach !fpmath metadata to __builtin_sqrt based on language flags
Browse files Browse the repository at this point in the history
OpenCL and HIP have -cl-fp32-correctly-rounded-divide-sqrt and
-fno-hip-correctly-rounded-divide-sqrt. The corresponding fpmath metadata
was only set on fdiv, and not sqrt. The backend is currently underutilizing
sqrt lowering options, and the responsibility is split between the libraries
and backend and this metadata is needed.

CUDA/NVCC has -prec-div and -prev-sqrt but clang doesn't appear to be
aiming for compatibility with those. Don't know if OpenMP has a similar
control.
  • Loading branch information
arsenm committed Jul 14, 2023
1 parent c4ccd6e commit bac2a07
Show file tree
Hide file tree
Showing 6 changed files with 98 additions and 24 deletions.
11 changes: 6 additions & 5 deletions clang/lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2544,11 +2544,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_sqrtf:
case Builtin::BI__builtin_sqrtf16:
case Builtin::BI__builtin_sqrtl:
case Builtin::BI__builtin_sqrtf128:
return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E,
Intrinsic::sqrt,
Intrinsic::experimental_constrained_sqrt));

case Builtin::BI__builtin_sqrtf128: {
llvm::Value *Call = emitUnaryMaybeConstrainedFPBuiltin(
*this, E, Intrinsic::sqrt, Intrinsic::experimental_constrained_sqrt);
SetSqrtFPAccuracy(Call);
return RValue::get(Call);
}
case Builtin::BItrunc:
case Builtin::BItruncf:
case Builtin::BItruncl:
Expand Down
42 changes: 42 additions & 0 deletions clang/lib/CodeGen/CGExpr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5577,6 +5577,48 @@ void CodeGenFunction::SetFPAccuracy(llvm::Value *Val, float Accuracy) {
cast<llvm::Instruction>(Val)->setMetadata(llvm::LLVMContext::MD_fpmath, Node);
}

void CodeGenFunction::SetSqrtFPAccuracy(llvm::Value *Val) {
llvm::Type *EltTy = Val->getType()->getScalarType();
if (!EltTy->isFloatTy())
return;

if ((getLangOpts().OpenCL &&
!CGM.getCodeGenOpts().OpenCLCorrectlyRoundedDivSqrt) ||
(getLangOpts().HIP && getLangOpts().CUDAIsDevice &&
!CGM.getCodeGenOpts().HIPCorrectlyRoundedDivSqrt)) {
// OpenCL v1.1 s7.4: minimum accuracy of single precision / is 3ulp
//
// OpenCL v1.2 s5.6.4.2: The -cl-fp32-correctly-rounded-divide-sqrt
// build option allows an application to specify that single precision
// floating-point divide (x/y and 1/x) and sqrt used in the program
// source are correctly rounded.
//
// TODO: CUDA has a prec-sqrt flag
SetFPAccuracy(Val, 3.0f);
}
}

void CodeGenFunction::SetDivFPAccuracy(llvm::Value *Val) {
llvm::Type *EltTy = Val->getType()->getScalarType();
if (!EltTy->isFloatTy())
return;

if ((getLangOpts().OpenCL &&
!CGM.getCodeGenOpts().OpenCLCorrectlyRoundedDivSqrt) ||
(getLangOpts().HIP && getLangOpts().CUDAIsDevice &&
!CGM.getCodeGenOpts().HIPCorrectlyRoundedDivSqrt)) {
// OpenCL v1.1 s7.4: minimum accuracy of single precision / is 2.5ulp
//
// OpenCL v1.2 s5.6.4.2: The -cl-fp32-correctly-rounded-divide-sqrt
// build option allows an application to specify that single precision
// floating-point divide (x/y and 1/x) and sqrt used in the program
// source are correctly rounded.
//
// TODO: CUDA has a prec-div flag
SetFPAccuracy(Val, 2.5f);
}
}

namespace {
struct LValueOrRValue {
LValue LV;
Expand Down
16 changes: 1 addition & 15 deletions clang/lib/CodeGen/CGExprScalar.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3478,21 +3478,7 @@ Value *ScalarExprEmitter::EmitDiv(const BinOpInfo &Ops) {
llvm::Value *Val;
CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, Ops.FPFeatures);
Val = Builder.CreateFDiv(Ops.LHS, Ops.RHS, "div");
if ((CGF.getLangOpts().OpenCL &&
!CGF.CGM.getCodeGenOpts().OpenCLCorrectlyRoundedDivSqrt) ||
(CGF.getLangOpts().HIP && CGF.getLangOpts().CUDAIsDevice &&
!CGF.CGM.getCodeGenOpts().HIPCorrectlyRoundedDivSqrt)) {
// OpenCL v1.1 s7.4: minimum accuracy of single precision / is 2.5ulp
// OpenCL v1.2 s5.6.4.2: The -cl-fp32-correctly-rounded-divide-sqrt
// build option allows an application to specify that single precision
// floating-point divide (x/y and 1/x) and sqrt used in the program
// source are correctly rounded.
llvm::Type *ValTy = Val->getType();
if (ValTy->isFloatTy() ||
(isa<llvm::VectorType>(ValTy) &&
cast<llvm::VectorType>(ValTy)->getElementType()->isFloatTy()))
CGF.SetFPAccuracy(Val, 2.5);
}
CGF.SetDivFPAccuracy(Val);
return Val;
}
else if (Ops.isFixedPointOp())
Expand Down
8 changes: 8 additions & 0 deletions clang/lib/CodeGen/CodeGenFunction.h
Original file line number Diff line number Diff line change
Expand Up @@ -4708,6 +4708,14 @@ class CodeGenFunction : public CodeGenTypeCache {
/// point operation, expressed as the maximum relative error in ulp.
void SetFPAccuracy(llvm::Value *Val, float Accuracy);

/// Set the minimum required accuracy of the given sqrt operation
/// based on CodeGenOpts.
void SetSqrtFPAccuracy(llvm::Value *Val);

/// Set the minimum required accuracy of the given sqrt operation based on
/// CodeGenOpts.
void SetDivFPAccuracy(llvm::Value *Val);

/// Set the codegen fast-math flags.
void SetFastMathFlags(FPOptions FPFeatures);

Expand Down
16 changes: 15 additions & 1 deletion clang/test/CodeGenCUDA/correctly-rounded-div.cu
Original file line number Diff line number Diff line change
Expand Up @@ -32,4 +32,18 @@ __device__ double dpscalardiv(double a, double b) {
return a / b;
}

// NCRDIV: ![[MD]] = !{float 2.500000e+00}
// COMMON-LABEL: @_Z12spscalarsqrt
// NCRDIV: call contract float @llvm.sqrt.f32(float %{{.+}}), !fpmath ![[MD:[0-9]+]]
// CRDIV: call contract float @llvm.sqrt.f32(float %{{.+}}){{$}}
__device__ float spscalarsqrt(float a) {
return __builtin_sqrtf(a);
}

// COMMON-LABEL: @_Z12dpscalarsqrt
// COMMON: call contract double @llvm.sqrt.f64(double %{{.+}}){{$}}
// COMMON-NOT: !fpmath
__device__ double dpscalarsqrt(double a) {
return __builtin_sqrt(a);
}

// NCRSQRT: ![[MD]] = !{float 2.500000e+00}
29 changes: 26 additions & 3 deletions clang/test/CodeGenOpenCL/fpmath.cl
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,26 @@ typedef __attribute__(( ext_vector_type(4) )) float float4;
float spscalardiv(float a, float b) {
// CHECK: @spscalardiv
// CHECK: fdiv{{.*}},
// NODIVOPT: !fpmath ![[MD:[0-9]+]]
// NODIVOPT: !fpmath ![[MD_FDIV:[0-9]+]]
// DIVOPT-NOT: !fpmath !{{[0-9]+}}
return a / b;
}

float4 spvectordiv(float4 a, float4 b) {
// CHECK: @spvectordiv
// CHECK: fdiv{{.*}},
// NODIVOPT: !fpmath ![[MD]]
// NODIVOPT: !fpmath ![[MD_FDIV]]
// DIVOPT-NOT: !fpmath !{{[0-9]+}}
return a / b;
}

float spscalarsqrt(float a) {
// CHECK-LABEL: @spscalarsqrt
// NODIVOPT: call float @llvm.sqrt.f32(float %{{.+}}), !fpmath ![[MD_SQRT:[0-9]+]]
// DIVOPT: call float @llvm.sqrt.f32(float %{{.+}}){{$}}
return __builtin_sqrtf(a);
}

#if __OPENCL_C_VERSION__ >=120
void printf(constant char* fmt, ...);

Expand All @@ -34,11 +41,27 @@ void testdbllit(long *val) {

#ifndef NOFP64
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
typedef __attribute__(( ext_vector_type(4) )) double double4;

double dpscalardiv(double a, double b) {
// CHECK: @dpscalardiv
// CHECK-NOT: !fpmath
return a / b;
}

double4 dpvectordiv(double4 a, double4 b) {
// CHECK: @dpvectordiv
// CHECK-NOT: !fpmath
return a / b;
}

double dpscalarsqrt(double a) {
// CHECK-LABEL: @dpscalarsqrt
// CHECK: call double @llvm.sqrt.f64(double %{{.+}}){{$}}
return __builtin_sqrt(a);
}

#endif

// NODIVOPT: ![[MD]] = !{float 2.500000e+00}
// NODIVOPT: ![[MD_FDIV]] = !{float 2.500000e+00}
// NODIVOPT: ![[MD_SQRT]] = !{float 3.000000e+00}

0 comments on commit bac2a07

Please sign in to comment.