Skip to content

Commit

Permalink
Reapply "InstCombine: Introduce SimplifyDemandedUseFPClass"
Browse files Browse the repository at this point in the history
This reverts commit 26bb22b.
  • Loading branch information
arsenm committed Oct 5, 2023
1 parent 32d16b6 commit 5a36904
Show file tree
Hide file tree
Showing 6 changed files with 246 additions and 129 deletions.
68 changes: 50 additions & 18 deletions clang/test/Headers/__clang_hip_math.hip
Original file line number Diff line number Diff line change
Expand Up @@ -231,26 +231,26 @@ extern "C" __device__ uint64_t test___make_mantissa(const char *p) {

// CHECK-LABEL: @test_abs(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[ABS_I:%.*]] = tail call noundef i32 @llvm.abs.i32(i32 [[X:%.*]], i1 true)
// CHECK-NEXT: ret i32 [[ABS_I]]
// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef i32 @llvm.abs.i32(i32 [[X:%.*]], i1 true)
// CHECK-NEXT: ret i32 [[TMP0]]
//
extern "C" __device__ int test_abs(int x) {
return abs(x);
}

// CHECK-LABEL: @test_labs(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[ABS_I:%.*]] = tail call noundef i64 @llvm.abs.i64(i64 [[X:%.*]], i1 true)
// CHECK-NEXT: ret i64 [[ABS_I]]
// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef i64 @llvm.abs.i64(i64 [[X:%.*]], i1 true)
// CHECK-NEXT: ret i64 [[TMP0]]
//
extern "C" __device__ long test_labs(long x) {
return labs(x);
}

// CHECK-LABEL: @test_llabs(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[ABS_I:%.*]] = tail call noundef i64 @llvm.abs.i64(i64 [[X:%.*]], i1 true)
// CHECK-NEXT: ret i64 [[ABS_I]]
// CHECK-NEXT: [[TMP0:%.*]] = tail call noundef i64 @llvm.abs.i64(i64 [[X:%.*]], i1 true)
// CHECK-NEXT: ret i64 [[TMP0]]
//
extern "C" __device__ long long test_llabs(long x) {
return llabs(x);
Expand Down Expand Up @@ -2557,33 +2557,65 @@ extern "C" __device__ double test_nan(const char *tag) {
return nan(tag);
}

// CHECK-LABEL: @test_nanf_emptystr(
// CHECK-NEXT: entry:
// CHECK-NEXT: ret float 0x7FF8000000000000
// DEFAULT-LABEL: @test_nanf_emptystr(
// DEFAULT-NEXT: entry:
// DEFAULT-NEXT: ret float 0x7FF8000000000000
//
// FINITEONLY-LABEL: @test_nanf_emptystr(
// FINITEONLY-NEXT: entry:
// FINITEONLY-NEXT: ret float poison
//
// APPROX-LABEL: @test_nanf_emptystr(
// APPROX-NEXT: entry:
// APPROX-NEXT: ret float 0x7FF8000000000000
//
extern "C" __device__ float test_nanf_emptystr() {
return nanf("");
}

// CHECK-LABEL: @test_nan_emptystr(
// CHECK-NEXT: entry:
// CHECK-NEXT: ret double 0x7FF8000000000000
// DEFAULT-LABEL: @test_nan_emptystr(
// DEFAULT-NEXT: entry:
// DEFAULT-NEXT: ret double 0x7FF8000000000000
//
// FINITEONLY-LABEL: @test_nan_emptystr(
// FINITEONLY-NEXT: entry:
// FINITEONLY-NEXT: ret double poison
//
// APPROX-LABEL: @test_nan_emptystr(
// APPROX-NEXT: entry:
// APPROX-NEXT: ret double 0x7FF8000000000000
//
extern "C" __device__ double test_nan_emptystr() {
return nan("");
}

// CHECK-LABEL: @test_nanf_fill(
// CHECK-NEXT: entry:
// CHECK-NEXT: ret float 0x7FF8000000000000
// DEFAULT-LABEL: @test_nanf_fill(
// DEFAULT-NEXT: entry:
// DEFAULT-NEXT: ret float 0x7FF8000000000000
//
// FINITEONLY-LABEL: @test_nanf_fill(
// FINITEONLY-NEXT: entry:
// FINITEONLY-NEXT: ret float poison
//
// APPROX-LABEL: @test_nanf_fill(
// APPROX-NEXT: entry:
// APPROX-NEXT: ret float 0x7FF8000000000000
//
extern "C" __device__ float test_nanf_fill() {
return nanf("0x456");
}

// CHECK-LABEL: @test_nan_fill(
// CHECK-NEXT: entry:
// CHECK-NEXT: ret double 0x7FF8000000000000
// DEFAULT-LABEL: @test_nan_fill(
// DEFAULT-NEXT: entry:
// DEFAULT-NEXT: ret double 0x7FF8000000000000
//
// FINITEONLY-LABEL: @test_nan_fill(
// FINITEONLY-NEXT: entry:
// FINITEONLY-NEXT: ret double poison
//
// APPROX-LABEL: @test_nan_fill(
// APPROX-NEXT: entry:
// APPROX-NEXT: ret double 0x7FF8000000000000
//
extern "C" __device__ double test_nan_fill() {
return nan("0x123");
Expand Down
4 changes: 4 additions & 0 deletions llvm/include/llvm/Analysis/ValueTracking.h
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,10 @@ struct KnownFPClass {
/// definitely set or false if the sign bit is definitely unset.
std::optional<bool> SignBit;

bool operator==(KnownFPClass Other) const {
return KnownFPClasses == Other.KnownFPClasses && SignBit == Other.SignBit;
}

/// Return true if it's known this can never be one of the mask entries.
bool isKnownNever(FPClassTest Mask) const {
return (KnownFPClasses & Mask) == fcNone;
Expand Down
9 changes: 9 additions & 0 deletions llvm/lib/Transforms/InstCombine/InstCombineInternal.h
Original file line number Diff line number Diff line change
Expand Up @@ -548,6 +548,15 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
APInt &UndefElts, unsigned Depth = 0,
bool AllowMultipleUsers = false) override;

/// Attempts to replace V with a simpler value based on the demanded
/// floating-point classes
Value *SimplifyDemandedUseFPClass(Value *V, FPClassTest DemandedMask,
KnownFPClass &Known, unsigned Depth,
Instruction *CxtI);
bool SimplifyDemandedFPClass(Instruction *I, unsigned Op,
FPClassTest DemandedMask, KnownFPClass &Known,
unsigned Depth = 0);

/// Canonicalize the position of binops relative to shufflevector.
Instruction *foldVectorBinop(BinaryOperator &Inst);
Instruction *foldVectorSelect(SelectInst &Sel);
Expand Down
113 changes: 113 additions & 0 deletions llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1781,3 +1781,116 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,

return MadeChange ? I : nullptr;
}

/// For floating-point classes that resolve to a single bit pattern, return that
/// value.
static Constant *getFPClassConstant(Type *Ty, FPClassTest Mask) {
switch (Mask) {
case fcPosZero:
return ConstantFP::getZero(Ty);
case fcNegZero:
return ConstantFP::getZero(Ty, true);
case fcPosInf:
return ConstantFP::getInfinity(Ty);
case fcNegInf:
return ConstantFP::getInfinity(Ty, true);
case fcNone:
return PoisonValue::get(Ty);
default:
return nullptr;
}
}

Value *InstCombinerImpl::SimplifyDemandedUseFPClass(
Value *V, const FPClassTest DemandedMask, KnownFPClass &Known,
unsigned Depth, Instruction *CxtI) {
assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
Type *VTy = V->getType();

assert(Known == KnownFPClass() && "expected uninitialized state");

if (DemandedMask == fcNone)
return isa<UndefValue>(V) ? nullptr : PoisonValue::get(VTy);

if (Depth == MaxAnalysisRecursionDepth)
return nullptr;

Instruction *I = dyn_cast<Instruction>(V);
if (!I) {
// Handle constants and arguments
Known = computeKnownFPClass(V, fcAllFlags, CxtI, Depth + 1);
Value *FoldedToConst =
getFPClassConstant(VTy, DemandedMask & Known.KnownFPClasses);
return FoldedToConst == V ? nullptr : FoldedToConst;
}

if (!I->hasOneUse())
return nullptr;

// TODO: Should account for nofpclass/FastMathFlags on current instruction
switch (I->getOpcode()) {
case Instruction::FNeg: {
if (SimplifyDemandedFPClass(I, 0, llvm::fneg(DemandedMask), Known,
Depth + 1))
return I;
Known.fneg();
break;
}
case Instruction::Call: {
CallInst *CI = cast<CallInst>(I);
switch (CI->getIntrinsicID()) {
case Intrinsic::fabs:
if (SimplifyDemandedFPClass(I, 0, llvm::inverse_fabs(DemandedMask), Known,
Depth + 1))
return I;
Known.fabs();
break;
case Intrinsic::arithmetic_fence:
if (SimplifyDemandedFPClass(I, 0, DemandedMask, Known, Depth + 1))
return I;
break;
default:
Known = computeKnownFPClass(I, ~DemandedMask, CxtI, Depth + 1);
break;
}

break;
}
case Instruction::Select: {
KnownFPClass KnownLHS, KnownRHS;
if (SimplifyDemandedFPClass(I, 2, DemandedMask, KnownRHS, Depth + 1) ||
SimplifyDemandedFPClass(I, 1, DemandedMask, KnownLHS, Depth + 1))
return I;

if (KnownLHS.isKnownNever(DemandedMask))
return I->getOperand(2);
if (KnownRHS.isKnownNever(DemandedMask))
return I->getOperand(1);

// TODO: Recognize clamping patterns
Known = KnownLHS | KnownRHS;
break;
}
default:
Known = computeKnownFPClass(I, ~DemandedMask, CxtI, Depth + 1);
break;
}

return getFPClassConstant(VTy, DemandedMask & Known.KnownFPClasses);
}

bool InstCombinerImpl::SimplifyDemandedFPClass(Instruction *I, unsigned OpNo,
FPClassTest DemandedMask,
KnownFPClass &Known,
unsigned Depth) {
Use &U = I->getOperandUse(OpNo);
Value *NewVal =
SimplifyDemandedUseFPClass(U.get(), DemandedMask, Known, Depth, I);
if (!NewVal)
return false;
if (Instruction *OpInst = dyn_cast<Instruction>(U))
salvageDebugInfo(*OpInst);

replaceUse(U, NewVal);
return true;
}
18 changes: 16 additions & 2 deletions llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2732,8 +2732,22 @@ Instruction *InstCombinerImpl::visitFree(CallInst &FI, Value *Op) {
}

Instruction *InstCombinerImpl::visitReturnInst(ReturnInst &RI) {
// Nothing for now.
return nullptr;
Value *RetVal = RI.getReturnValue();
if (!RetVal || !AttributeFuncs::isNoFPClassCompatibleType(RetVal->getType()))
return nullptr;

Function *F = RI.getFunction();
FPClassTest ReturnClass = F->getAttributes().getRetNoFPClass();
if (ReturnClass == fcNone)
return nullptr;

KnownFPClass KnownClass;
Value *Simplified =
SimplifyDemandedUseFPClass(RetVal, ~ReturnClass, KnownClass, 0, &RI);
if (!Simplified)
return nullptr;

return ReturnInst::Create(RI.getContext(), Simplified);
}

// WARNING: keep in sync with SimplifyCFGOpt::simplifyUnreachable()!
Expand Down

2 comments on commit 5a36904

@PiJoules
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi it looks like this patch breaks this code:

#include <array>
#include <cmath>
#include <limits>
#include <cstdio>
 
int main()
{
    std::array<double, 4> vals;
    std::fill(vals.begin(), vals.end(), 0.0f);
    vals.back() = std::numeric_limits<double>::infinity();
    printf("vals.back(): %f\n", vals.back());
}

When compiled with clang++ -ffast-math -O3 -std=c++17 the result is -nan when inf is expected. Even though -ffast-math is used, I would still expect inf to be stored in the array.

@arsenm
Copy link
Contributor Author

@arsenm arsenm commented on 5a36904 Oct 13, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The problem is not really with this patch, but how fast math flags and nofpclass(inf nan) get applied to anything eligible to be an FPMathOperator. We end up with this:

define noundef nofpclass(nan inf) double @_ZNSt3__114numeric_limitsIdE8infinityB7v160006Ev() {
entry:
  %call = call fast noundef nofpclass(nan inf) double @_ZNSt3__123__libcpp_numeric_limitsIdLb1EE8infinityB7v160006Ev() 
    ret double %call
 }

By the definitions for ninf and nofpclass(inf), this is a poison result. Not really sure what to do with this. It would be unfortunate to either special case functions where we can't unconditionally add fast math information and about as unfortunate if we had to only apply fast math flags to specially recognized calls.

Please sign in to comment.