Skip to content

Commit

Permalink
AMDGPU: Replace certain llvm.amdgcn.class uses with llvm.is.fpclass
Browse files Browse the repository at this point in the history
Most transforms should now be performed on llvm.is.fpclass. Unlike the
generic intrinsic, this supports variable test masks.
  • Loading branch information
arsenm committed May 24, 2023
1 parent 5f64913 commit 9ef1333
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 77 deletions.
85 changes: 12 additions & 73 deletions llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -450,86 +450,25 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
Value *Src0 = II.getArgOperand(0);
Value *Src1 = II.getArgOperand(1);
const ConstantInt *CMask = dyn_cast<ConstantInt>(Src1);
if (!CMask) {
if (isa<UndefValue>(Src0)) {
return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
}
if (CMask) {
II.setCalledOperand(Intrinsic::getDeclaration(
II.getModule(), Intrinsic::is_fpclass, Src0->getType()));

if (isa<UndefValue>(Src1)) {
return IC.replaceInstUsesWith(II,
ConstantInt::get(II.getType(), false));
}
break;
// Clamp any excess bits, as they're illegal for the generic intrinsic.
II.setArgOperand(1, ConstantInt::get(Src1->getType(),
CMask->getZExtValue() & fcAllFlags));
return &II;
}

uint32_t Mask = CMask->getZExtValue();

// If all tests are made, it doesn't matter what the value is.
if ((Mask & fcAllFlags) == fcAllFlags) {
return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), true));
}
// FIXME: Should propagate poison.
if (isa<UndefValue>(Src0))
return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));

if ((Mask & fcAllFlags) == 0) {
if (isa<UndefValue>(Src1)) {
return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), false));
}

if (Mask == fcNan && !II.isStrictFP()) {
// Equivalent of isnan. Replace with standard fcmp.
Value *FCmp = IC.Builder.CreateFCmpUNO(Src0, Src0);
FCmp->takeName(&II);
return IC.replaceInstUsesWith(II, FCmp);
}

if (Mask == fcZero && !II.isStrictFP()) {
// Equivalent of == 0.
Value *FCmp =
IC.Builder.CreateFCmpOEQ(Src0, ConstantFP::get(Src0->getType(), 0.0));

FCmp->takeName(&II);
return IC.replaceInstUsesWith(II, FCmp);
}

// fp_class (nnan x), qnan|snan|other -> fp_class (nnan x), other
if ((Mask & fcNan) &&
isKnownNeverNaN(Src0, IC.getDataLayout(), &IC.getTargetLibraryInfo())) {
return IC.replaceOperand(
II, 1, ConstantInt::get(Src1->getType(), Mask & ~fcNan));
}

const ConstantFP *CVal = dyn_cast<ConstantFP>(Src0);
if (!CVal) {
if (isa<UndefValue>(Src0)) {
return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
}

// Clamp mask to used bits
if ((Mask & fcAllFlags) != Mask) {
CallInst *NewCall = IC.Builder.CreateCall(
II.getCalledFunction(),
{Src0, ConstantInt::get(Src1->getType(), Mask & fcAllFlags)});

NewCall->takeName(&II);
return IC.replaceInstUsesWith(II, NewCall);
}

break;
}

const APFloat &Val = CVal->getValueAPF();

bool Result =
((Mask & fcSNan) && Val.isNaN() && Val.isSignaling()) ||
((Mask & fcQNan) && Val.isNaN() && !Val.isSignaling()) ||
((Mask & fcNegInf) && Val.isInfinity() && Val.isNegative()) ||
((Mask & fcNegNormal) && Val.isNormal() && Val.isNegative()) ||
((Mask & fcNegSubnormal) && Val.isDenormal() && Val.isNegative()) ||
((Mask & fcNegZero) && Val.isZero() && Val.isNegative()) ||
((Mask & fcPosZero) && Val.isZero() && !Val.isNegative()) ||
((Mask & fcPosSubnormal) && Val.isDenormal() && !Val.isNegative()) ||
((Mask & fcPosNormal) && Val.isNormal() && !Val.isNegative()) ||
((Mask & fcPosInf) && Val.isInfinity() && !Val.isNegative());

return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), Result));
break;
}
case Intrinsic::amdgcn_cvt_pkrtz: {
Value *Src0 = II.getArgOperand(0);
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
Original file line number Diff line number Diff line change
Expand Up @@ -592,7 +592,7 @@ define i1 @test_class_poison_val_f32(i32 %arg) nounwind {

define i1 @test_class_over_max_mask_f32(float %x) nounwind {
; CHECK-LABEL: @test_class_over_max_mask_f32(
; CHECK-NEXT: [[VAL:%.*]] = call i1 @llvm.amdgcn.class.f32(float [[X:%.*]], i32 1)
; CHECK-NEXT: [[VAL:%.*]] = call i1 @llvm.is.fpclass.f32(float [[X:%.*]], i32 1)
; CHECK-NEXT: ret i1 [[VAL]]
;
%val = call i1 @llvm.amdgcn.class.f32(float %x, i32 1025)
Expand Down Expand Up @@ -675,7 +675,7 @@ define i1 @test_class_isnan_f32(float %x) nounwind {

define i1 @test_class_isnan_f32_strict(float %x) nounwind {
; CHECK-LABEL: @test_class_isnan_f32_strict(
; CHECK-NEXT: [[VAL:%.*]] = call i1 @llvm.amdgcn.class.f32(float [[X:%.*]], i32 3) #[[ATTR15:[0-9]+]]
; CHECK-NEXT: [[VAL:%.*]] = call i1 @llvm.is.fpclass.f32(float [[X:%.*]], i32 3) #[[ATTR15:[0-9]+]]
; CHECK-NEXT: ret i1 [[VAL]]
;
%val = call i1 @llvm.amdgcn.class.f32(float %x, i32 3) strictfp
Expand All @@ -693,7 +693,7 @@ define i1 @test_class_is_p0_n0_f32(float %x) nounwind {

define i1 @test_class_is_p0_n0_f32_strict(float %x) nounwind {
; CHECK-LABEL: @test_class_is_p0_n0_f32_strict(
; CHECK-NEXT: [[VAL:%.*]] = call i1 @llvm.amdgcn.class.f32(float [[X:%.*]], i32 96) #[[ATTR15]]
; CHECK-NEXT: [[VAL:%.*]] = call i1 @llvm.is.fpclass.f32(float [[X:%.*]], i32 96) #[[ATTR15]]
; CHECK-NEXT: ret i1 [[VAL]]
;
%val = call i1 @llvm.amdgcn.class.f32(float %x, i32 96) strictfp
Expand Down Expand Up @@ -914,7 +914,7 @@ define i1 @test_class_is_nan_nnan_src(float %x) {
define i1 @test_class_is_nan_other_nnan_src(float %x) {
; CHECK-LABEL: @test_class_is_nan_other_nnan_src(
; CHECK-NEXT: [[NNAN:%.*]] = fadd nnan float [[X:%.*]], 1.000000e+00
; CHECK-NEXT: [[CLASS:%.*]] = call i1 @llvm.amdgcn.class.f32(float [[NNAN]], i32 264)
; CHECK-NEXT: [[CLASS:%.*]] = call i1 @llvm.is.fpclass.f32(float [[NNAN]], i32 264)
; CHECK-NEXT: ret i1 [[CLASS]]
;
%nnan = fadd nnan float %x, 1.0
Expand Down

0 comments on commit 9ef1333

Please sign in to comment.