Skip to content

Commit

Permalink
AMDGPU: Fix sqrt fast math flags spreading to fdiv fast math flags
Browse files Browse the repository at this point in the history
This was working around the lack of operator| on FastMathFlags. We
have that now which revealed the bug.
  • Loading branch information
arsenm committed Aug 30, 2023
1 parent 2263dfe commit 6012fed
Show file tree
Hide file tree
Showing 2 changed files with 319 additions and 302 deletions.
7 changes: 3 additions & 4 deletions llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -891,8 +891,8 @@ bool AMDGPUCodeGenPrepareImpl::canOptimizeWithRsq(const FPMathOperator *SqrtOp,
}

Value *AMDGPUCodeGenPrepareImpl::optimizeWithRsq(
IRBuilder<> &Builder, Value *Num, Value *Den, FastMathFlags DivFMF,
FastMathFlags SqrtFMF, const Instruction *CtxI) const {
IRBuilder<> &Builder, Value *Num, Value *Den, const FastMathFlags DivFMF,
const FastMathFlags SqrtFMF, const Instruction *CtxI) const {
// The rsqrt contraction increases accuracy from ~2ulp to ~1ulp.
assert(DivFMF.allowContract() && SqrtFMF.allowContract());

Expand All @@ -911,8 +911,7 @@ Value *AMDGPUCodeGenPrepareImpl::optimizeWithRsq(
if (CLHS->isExactlyValue(1.0) || (IsNegative = CLHS->isExactlyValue(-1.0))) {
// Add in the sqrt flags.
IRBuilder<>::FastMathFlagGuard Guard(Builder);
DivFMF |= SqrtFMF;
Builder.setFastMathFlags(DivFMF);
Builder.setFastMathFlags(DivFMF | SqrtFMF);

if ((DivFMF.approxFunc() && SqrtFMF.approxFunc()) || HasUnsafeFPMath ||
canIgnoreDenormalInput(Den, CtxI)) {
Expand Down

0 comments on commit 6012fed

Please sign in to comment.