diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 89ffe065fa5aa8..118013a3876478 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -3500,18 +3500,25 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { Value *Src = II->getArgOperand(0); // TODO: Move to ConstantFolding/InstSimplify? - if (isa(Src)) - return replaceInstUsesWith(CI, Src); + if (isa(Src)) { + Type *Ty = II->getType(); + auto *QNaN = ConstantFP::get(Ty, APFloat::getQNaN(Ty->getFltSemantics())); + return replaceInstUsesWith(CI, QNaN); + } + + if (II->isStrictFP()) + break; if (const ConstantFP *C = dyn_cast(Src)) { const APFloat &ArgVal = C->getValueAPF(); APFloat Val(ArgVal.getSemantics(), 1); - APFloat::opStatus Status = Val.divide(ArgVal, - APFloat::rmNearestTiesToEven); - // Only do this if it was exact and therefore not dependent on the - // rounding mode. - if (Status == APFloat::opOK) - return replaceInstUsesWith(CI, ConstantFP::get(II->getContext(), Val)); + Val.divide(ArgVal, APFloat::rmNearestTiesToEven); + + // This is more precise than the instruction may give. + // + // TODO: The instruction always flushes denormal results (except for f16), + // should this also? + return replaceInstUsesWith(CI, ConstantFP::get(II->getContext(), Val)); } break; @@ -3520,8 +3527,12 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { Value *Src = II->getArgOperand(0); // TODO: Move to ConstantFolding/InstSimplify? - if (isa(Src)) - return replaceInstUsesWith(CI, Src); + if (isa(Src)) { + Type *Ty = II->getType(); + auto *QNaN = ConstantFP::get(Ty, APFloat::getQNaN(Ty->getFltSemantics())); + return replaceInstUsesWith(CI, QNaN); + } + break; } case Intrinsic::amdgcn_frexp_mant: diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll index 032ca08ffc2b9c..a3a99e51bf41bb 100644 --- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll @@ -10,7 +10,7 @@ declare double @llvm.amdgcn.rcp.f64(double) nounwind readnone define float @test_constant_fold_rcp_f32_undef() nounwind { ; CHECK-LABEL: @test_constant_fold_rcp_f32_undef( -; CHECK-NEXT: ret float undef +; CHECK-NEXT: ret float 0x7FF8000000000000 ; %val = call float @llvm.amdgcn.rcp.f32(float undef) nounwind readnone ret float %val @@ -50,8 +50,7 @@ define double @test_constant_fold_rcp_f64_half() nounwind { define float @test_constant_fold_rcp_f32_43() nounwind { ; CHECK-LABEL: @test_constant_fold_rcp_f32_43( -; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.rcp.f32(float 4.300000e+01) -; CHECK-NEXT: ret float [[VAL]] +; CHECK-NEXT: ret float 0x3F97D05F40000000 ; %val = call float @llvm.amdgcn.rcp.f32(float 4.300000e+01) nounwind readnone ret float %val @@ -59,13 +58,21 @@ define float @test_constant_fold_rcp_f32_43() nounwind { define double @test_constant_fold_rcp_f64_43() nounwind { ; CHECK-LABEL: @test_constant_fold_rcp_f64_43( -; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.rcp.f64(double 4.300000e+01) -; CHECK-NEXT: ret double [[VAL]] +; CHECK-NEXT: ret double 0x3F97D05F417D05F4 ; %val = call double @llvm.amdgcn.rcp.f64(double 4.300000e+01) nounwind readnone ret double %val } +define float @test_constant_fold_rcp_f32_43_strictfp() nounwind strictfp { +; CHECK-LABEL: @test_constant_fold_rcp_f32_43_strictfp( +; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.rcp.f32(float 4.300000e+01) #7 +; CHECK-NEXT: ret float [[VAL]] +; + %val = call float @llvm.amdgcn.rcp.f32(float 4.300000e+01) strictfp nounwind readnone + ret float %val +} + ; -------------------------------------------------------------------- ; llvm.amdgcn.rsq ; -------------------------------------------------------------------- @@ -74,7 +81,7 @@ declare float @llvm.amdgcn.rsq.f32(float) nounwind readnone define float @test_constant_fold_rsq_f32_undef() nounwind { ; CHECK-LABEL: @test_constant_fold_rsq_f32_undef( -; CHECK-NEXT: ret float undef +; CHECK-NEXT: ret float 0x7FF8000000000000 ; %val = call float @llvm.amdgcn.rsq.f32(float undef) nounwind readnone ret float %val @@ -2387,8 +2394,8 @@ declare i32 @llvm.amdgcn.ballot.i32(i1) nounwind readnone convergent define i64 @ballot_nocombine_64(i1 %i) { ; CHECK-LABEL: @ballot_nocombine_64( -; CHECK-NEXT: %b = call i64 @llvm.amdgcn.ballot.i64(i1 %i) -; CHECK-NEXT: ret i64 %b +; CHECK-NEXT: [[B:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[I:%.*]]) +; CHECK-NEXT: ret i64 [[B]] ; %b = call i64 @llvm.amdgcn.ballot.i64(i1 %i) ret i64 %b @@ -2413,8 +2420,8 @@ define i64 @ballot_one_64() { define i32 @ballot_nocombine_32(i1 %i) { ; CHECK-LABEL: @ballot_nocombine_32( -; CHECK-NEXT: %b = call i32 @llvm.amdgcn.ballot.i32(i1 %i) -; CHECK-NEXT: ret i32 %b +; CHECK-NEXT: [[B:%.*]] = call i32 @llvm.amdgcn.ballot.i32(i1 [[I:%.*]]) +; CHECK-NEXT: ret i32 [[B]] ; %b = call i32 @llvm.amdgcn.ballot.i32(i1 %i) ret i32 %b