Skip to content

Commit

Permalink
AMDGPU: Replace sqrt OpenCL libcalls with llvm.sqrt (#74197)
Browse files Browse the repository at this point in the history
The library implementation is just a wrapper around a call to the
intrinsic, but loses metadata. Swap out the call site to the intrinsic
so that the lowering can see the !fpmath metadata and fast math flags.

Since d56e0d0, clang started placing
!fpmath on OpenCL library sqrt calls. Also don't bother emitting
native_sqrt anymore, it's just another wrapper around llvm.sqrt.
  • Loading branch information
arsenm committed Jan 9, 2024
1 parent 81df51f commit daecc30
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 70 deletions.
32 changes: 3 additions & 29 deletions llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,9 +87,6 @@ class AMDGPULibCalls {
Constant *copr0, Constant *copr1);
bool evaluateCall(CallInst *aCI, const FuncInfo &FInfo);

// sqrt
bool fold_sqrt(FPMathOperator *FPOp, IRBuilder<> &B, const FuncInfo &FInfo);

/// Insert a value to sincos function \p Fsincos. Returns (value of sin, value
/// of cos, sincos call).
std::tuple<Value *, Value *, Value *> insertSinCos(Value *Arg,
Expand Down Expand Up @@ -672,8 +669,6 @@ bool AMDGPULibCalls::fold(CallInst *CI) {

// Specialized optimizations for each function call.
//
// TODO: Handle other simple intrinsic wrappers. Sqrt.
//
// TODO: Handle native functions
switch (FInfo.getId()) {
case AMDGPULibFunc::EI_EXP:
Expand Down Expand Up @@ -794,7 +789,9 @@ bool AMDGPULibCalls::fold(CallInst *CI) {
case AMDGPULibFunc::EI_ROOTN:
return fold_rootn(FPOp, B, FInfo);
case AMDGPULibFunc::EI_SQRT:
return fold_sqrt(FPOp, B, FInfo);
// TODO: Allow with strictfp + constrained intrinsic
return tryReplaceLibcallWithSimpleIntrinsic(
B, CI, Intrinsic::sqrt, true, true, /*AllowStrictFP=*/false);
case AMDGPULibFunc::EI_COS:
case AMDGPULibFunc::EI_SIN:
return fold_sincos(FPOp, B, FInfo);
Expand Down Expand Up @@ -1273,29 +1270,6 @@ bool AMDGPULibCalls::tryReplaceLibcallWithSimpleIntrinsic(
return true;
}

// fold sqrt -> native_sqrt (x)
bool AMDGPULibCalls::fold_sqrt(FPMathOperator *FPOp, IRBuilder<> &B,
const FuncInfo &FInfo) {
if (!isUnsafeMath(FPOp))
return false;

if (getArgType(FInfo) == AMDGPULibFunc::F32 && (getVecSize(FInfo) == 1) &&
(FInfo.getPrefix() != AMDGPULibFunc::NATIVE)) {
Module *M = B.GetInsertBlock()->getModule();

if (FunctionCallee FPExpr = getNativeFunction(
M, AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) {
Value *opr0 = FPOp->getOperand(0);
LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> "
<< "sqrt(" << *opr0 << ")\n");
Value *nval = CreateCallEx(B,FPExpr, opr0, "__sqrt");
replaceCall(FPOp, nval);
return true;
}
}
return false;
}

std::tuple<Value *, Value *, Value *>
AMDGPULibCalls::insertSinCos(Value *Arg, FastMathFlags FMF, IRBuilder<> &B,
FunctionCallee Fsincos) {
Expand Down

0 comments on commit daecc30

Please sign in to comment.