From 699a23d4830d7fa70ab295c0e970c78fa962bd04 Mon Sep 17 00:00:00 2001 From: Lewis Crawford Date: Thu, 18 Sep 2025 09:14:07 +0000 Subject: [PATCH] [NVPTX] Fix NaN + overflow semantics of f2ll/d2i Fix the NaN-handling semantics of various NVVM intrinsics converting from fp types to integer types. Previously in ConstantFolding, NaN inputs would be constant-folded to 0. However, v9.0 of the PTX spec states that: In float-to-integer conversions, depending upon conversion types, NaN input results in following value: * Zero if source is not `.f64` and destination is not `.s64`, .`u64`. * Otherwise `1 << (BitWidth(dst) - 1)` corresponding to the value of `(MAXINT >> 1) + 1` for unsigned type or `MININT` for signed type. Also, support for constant-folding +/-Inf and values which overflow/underflow the integer output type has been added (they clamp to min/max int). Because of this NaN-handling semantic difference, we also need to disable transforming several intrinsics to FPToSI/FPToUI, as the LLVM intstruction will return poison, but the intrinsics have defined behaviour for these edge-cases like NaN/Inf/overflow. --- llvm/include/llvm/IR/NVVMIntrinsicUtils.h | 64 +++++++++++++ llvm/lib/Analysis/ConstantFolding.cpp | 23 +++-- .../Target/NVPTX/NVPTXTargetTransformInfo.cpp | 21 ++--- .../InstCombine/NVPTX/nvvm-intrins.ll | 29 ++++-- .../InstSimplify/const-fold-nvvm-f2i-d2i.ll | 58 +++++------- .../InstSimplify/const-fold-nvvm-f2ll-d2ll.ll | 90 ++++++++----------- 6 files changed, 165 insertions(+), 120 deletions(-) diff --git a/llvm/include/llvm/IR/NVVMIntrinsicUtils.h b/llvm/include/llvm/IR/NVVMIntrinsicUtils.h index cc4929a1ff8da..0e2d903bb2500 100644 --- a/llvm/include/llvm/IR/NVVMIntrinsicUtils.h +++ b/llvm/include/llvm/IR/NVVMIntrinsicUtils.h @@ -180,6 +180,70 @@ inline bool FPToIntegerIntrinsicResultIsSigned(Intrinsic::ID IntrinsicID) { "Checking invalid f2i/d2i intrinsic for signed int conversion"); } +inline bool FPToIntegerIntrinsicNaNZero(Intrinsic::ID IntrinsicID) { + switch (IntrinsicID) { + // f2i + case Intrinsic::nvvm_f2i_rm: + case Intrinsic::nvvm_f2i_rn: + case Intrinsic::nvvm_f2i_rp: + case Intrinsic::nvvm_f2i_rz: + case Intrinsic::nvvm_f2i_rm_ftz: + case Intrinsic::nvvm_f2i_rn_ftz: + case Intrinsic::nvvm_f2i_rp_ftz: + case Intrinsic::nvvm_f2i_rz_ftz: + // f2ui + case Intrinsic::nvvm_f2ui_rm: + case Intrinsic::nvvm_f2ui_rn: + case Intrinsic::nvvm_f2ui_rp: + case Intrinsic::nvvm_f2ui_rz: + case Intrinsic::nvvm_f2ui_rm_ftz: + case Intrinsic::nvvm_f2ui_rn_ftz: + case Intrinsic::nvvm_f2ui_rp_ftz: + case Intrinsic::nvvm_f2ui_rz_ftz: + return true; + // d2i + case Intrinsic::nvvm_d2i_rm: + case Intrinsic::nvvm_d2i_rn: + case Intrinsic::nvvm_d2i_rp: + case Intrinsic::nvvm_d2i_rz: + // d2ui + case Intrinsic::nvvm_d2ui_rm: + case Intrinsic::nvvm_d2ui_rn: + case Intrinsic::nvvm_d2ui_rp: + case Intrinsic::nvvm_d2ui_rz: + // f2ll + case Intrinsic::nvvm_f2ll_rm: + case Intrinsic::nvvm_f2ll_rn: + case Intrinsic::nvvm_f2ll_rp: + case Intrinsic::nvvm_f2ll_rz: + case Intrinsic::nvvm_f2ll_rm_ftz: + case Intrinsic::nvvm_f2ll_rn_ftz: + case Intrinsic::nvvm_f2ll_rp_ftz: + case Intrinsic::nvvm_f2ll_rz_ftz: + // f2ull + case Intrinsic::nvvm_f2ull_rm: + case Intrinsic::nvvm_f2ull_rn: + case Intrinsic::nvvm_f2ull_rp: + case Intrinsic::nvvm_f2ull_rz: + case Intrinsic::nvvm_f2ull_rm_ftz: + case Intrinsic::nvvm_f2ull_rn_ftz: + case Intrinsic::nvvm_f2ull_rp_ftz: + case Intrinsic::nvvm_f2ull_rz_ftz: + // d2ll + case Intrinsic::nvvm_d2ll_rm: + case Intrinsic::nvvm_d2ll_rn: + case Intrinsic::nvvm_d2ll_rp: + case Intrinsic::nvvm_d2ll_rz: + // d2ull + case Intrinsic::nvvm_d2ull_rm: + case Intrinsic::nvvm_d2ull_rn: + case Intrinsic::nvvm_d2ull_rp: + case Intrinsic::nvvm_d2ull_rz: + return false; + } + llvm_unreachable("Checking NaN result for invalid f2i/d2i intrinsic"); +} + inline APFloat::roundingMode GetFPToIntegerRoundingMode(Intrinsic::ID IntrinsicID) { switch (IntrinsicID) { diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp index a3b2e62a1b8ba..b03f0bca3c26c 100755 --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -2625,8 +2625,17 @@ static Constant *ConstantFoldScalarCall1(StringRef Name, case Intrinsic::nvvm_d2ull_rp: case Intrinsic::nvvm_d2ull_rz: { // In float-to-integer conversion, NaN inputs are converted to 0. - if (U.isNaN()) - return ConstantInt::get(Ty, 0); + if (U.isNaN()) { + // In float-to-integer conversion, NaN inputs are converted to 0 + // when the source and destination bitwidths are both less than 64. + if (nvvm::FPToIntegerIntrinsicNaNZero(IntrinsicID)) + return ConstantInt::get(Ty, 0); + + // Otherwise, the most significant bit is set. + unsigned BitWidth = Ty->getIntegerBitWidth(); + uint64_t Val = 1ULL << (BitWidth - 1); + return ConstantInt::get(Ty, APInt(BitWidth, Val, /*IsSigned=*/false)); + } APFloat::roundingMode RMode = nvvm::GetFPToIntegerRoundingMode(IntrinsicID); @@ -2636,13 +2645,11 @@ static Constant *ConstantFoldScalarCall1(StringRef Name, APSInt ResInt(Ty->getIntegerBitWidth(), !IsSigned); auto FloatToRound = IsFTZ ? FTZPreserveSign(U) : U; + // Return max/min value for integers if the result is +/-inf or + // is too large to fit in the result's integer bitwidth. bool IsExact = false; - APFloat::opStatus Status = - FloatToRound.convertToInteger(ResInt, RMode, &IsExact); - - if (Status != APFloat::opInvalidOp) - return ConstantInt::get(Ty, ResInt); - return nullptr; + FloatToRound.convertToInteger(ResInt, RMode, &IsExact); + return ConstantInt::get(Ty, ResInt); } } diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp index f4f89613b358d..4647b3cf8039d 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp @@ -281,21 +281,12 @@ static Instruction *convertNvvmIntrinsicToLlvm(InstCombiner &IC, return {Intrinsic::trunc, FTZ_MustBeOn}; // NVVM intrinsics that map to LLVM cast operations. - // - // Note that llvm's target-generic conversion operators correspond to the rz - // (round to zero) versions of the nvvm conversion intrinsics, even though - // most everything else here uses the rn (round to nearest even) nvvm ops. - case Intrinsic::nvvm_d2i_rz: - case Intrinsic::nvvm_f2i_rz: - case Intrinsic::nvvm_d2ll_rz: - case Intrinsic::nvvm_f2ll_rz: - return {Instruction::FPToSI}; - case Intrinsic::nvvm_d2ui_rz: - case Intrinsic::nvvm_f2ui_rz: - case Intrinsic::nvvm_d2ull_rz: - case Intrinsic::nvvm_f2ull_rz: - return {Instruction::FPToUI}; - // Integer to floating-point uses RN rounding, not RZ + // Note - we cannot map intrinsics like nvvm_d2ll_rz to LLVM's + // FPToSI, as NaN to int conversion with FPToSI is considered UB and is + // eliminated. NVVM conversion intrinsics are translated to PTX cvt + // instructions which define the outcome for NaN rather than leaving as UB. + // Therefore, translate NVVM intrinsics to sitofp/uitofp, but not to + // fptosi/fptoui. case Intrinsic::nvvm_i2d_rn: case Intrinsic::nvvm_i2f_rn: case Intrinsic::nvvm_ll2d_rn: diff --git a/llvm/test/Transforms/InstCombine/NVPTX/nvvm-intrins.ll b/llvm/test/Transforms/InstCombine/NVPTX/nvvm-intrins.ll index 1819f4ed181c0..4d856699b2d24 100644 --- a/llvm/test/Transforms/InstCombine/NVPTX/nvvm-intrins.ll +++ b/llvm/test/Transforms/InstCombine/NVPTX/nvvm-intrins.ll @@ -185,52 +185,63 @@ define float @trunc_float_ftz(float %a) #0 { } ; Check NVVM intrinsics that correspond to LLVM cast operations. +; fp -> integer casts should not be converted, as the semantics +; for NaN/Inf/Overflow inputs are different. +; Only integer -> fp casts should be converted. ; CHECK-LABEL: @test_d2i define i32 @test_d2i(double %a) #0 { -; CHECK: fptosi double %a to i32 +; CHECK: call i32 @llvm.nvvm.d2i.rz(double %a) +; CHECK-NOT: fptosi double %a to i32 %ret = call i32 @llvm.nvvm.d2i.rz(double %a) ret i32 %ret } ; CHECK-LABEL: @test_f2i define i32 @test_f2i(float %a) #0 { -; CHECK: fptosi float %a to i32 +; CHECK: call i32 @llvm.nvvm.f2i.rz(float %a) +; CHECK-NOT: fptosi float %a to i32 %ret = call i32 @llvm.nvvm.f2i.rz(float %a) ret i32 %ret } ; CHECK-LABEL: @test_d2ll define i64 @test_d2ll(double %a) #0 { -; CHECK: fptosi double %a to i64 +; CHECK: call i64 @llvm.nvvm.d2ll.rz(double %a) +; CHECK-NOT: fptosi double %a to i64 %ret = call i64 @llvm.nvvm.d2ll.rz(double %a) ret i64 %ret } ; CHECK-LABEL: @test_f2ll define i64 @test_f2ll(float %a) #0 { -; CHECK: fptosi float %a to i64 +; CHECK: call i64 @llvm.nvvm.f2ll.rz(float %a) +; CHECK-NOT: fptosi float %a to i64 %ret = call i64 @llvm.nvvm.f2ll.rz(float %a) ret i64 %ret } ; CHECK-LABEL: @test_d2ui define i32 @test_d2ui(double %a) #0 { -; CHECK: fptoui double %a to i32 +; CHECK: call i32 @llvm.nvvm.d2ui.rz(double %a) +; CHECK-NOT: fptoui double %a to i32 %ret = call i32 @llvm.nvvm.d2ui.rz(double %a) ret i32 %ret } ; CHECK-LABEL: @test_f2ui define i32 @test_f2ui(float %a) #0 { -; CHECK: fptoui float %a to i32 +; CHECK: call i32 @llvm.nvvm.f2ui.rz(float %a) +; CHECK-NOT: fptoui float %a to i32 %ret = call i32 @llvm.nvvm.f2ui.rz(float %a) ret i32 %ret } ; CHECK-LABEL: @test_d2ull define i64 @test_d2ull(double %a) #0 { -; CHECK: fptoui double %a to i64 +; CHECK: call i64 @llvm.nvvm.d2ull.rz(double %a) +; CHECK-NOT: fptoui double %a to i64 %ret = call i64 @llvm.nvvm.d2ull.rz(double %a) ret i64 %ret } ; CHECK-LABEL: @test_f2ull define i64 @test_f2ull(float %a) #0 { -; CHECK: fptoui float %a to i64 +; CHECK: call i64 @llvm.nvvm.f2ull.rz(float %a) +; CHECK-NOT: fptoui float %a to i64 %ret = call i64 @llvm.nvvm.f2ull.rz(float %a) ret i64 %ret } @@ -497,4 +508,4 @@ declare float @llvm.nvvm.ui2f.rn(i32) declare double @llvm.nvvm.ull2d.rn(i64) declare float @llvm.nvvm.ull2f.rn(i64) declare i32 @llvm.nvvm.fshr.clamp.i32(i32, i32, i32) -declare i32 @llvm.nvvm.fshl.clamp.i32(i32, i32, i32) \ No newline at end of file +declare i32 @llvm.nvvm.fshl.clamp.i32(i32, i32, i32) diff --git a/llvm/test/Transforms/InstSimplify/const-fold-nvvm-f2i-d2i.ll b/llvm/test/Transforms/InstSimplify/const-fold-nvvm-f2i-d2i.ll index 543c73137c1b6..b1a1e6b86c293 100644 --- a/llvm/test/Transforms/InstSimplify/const-fold-nvvm-f2i-d2i.ll +++ b/llvm/test/Transforms/InstSimplify/const-fold-nvvm-f2i-d2i.ll @@ -334,8 +334,7 @@ define i32 @test_neg_1_5_d2i_rz() { ;+-------------------------------------------------------------+ define i32 @test_neg_1_5_f2ui_rm() { ; CHECK-LABEL: define i32 @test_neg_1_5_f2ui_rm() { -; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.nvvm.f2ui.rm(float -1.500000e+00) -; CHECK-NEXT: ret i32 [[RES]] +; CHECK-NEXT: ret i32 0 ; %res = call i32 @llvm.nvvm.f2ui.rm(float -1.5) ret i32 %res @@ -343,8 +342,7 @@ define i32 @test_neg_1_5_f2ui_rm() { define i32 @test_neg_1_5_f2ui_rn() { ; CHECK-LABEL: define i32 @test_neg_1_5_f2ui_rn() { -; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.nvvm.f2ui.rn(float -1.500000e+00) -; CHECK-NEXT: ret i32 [[RES]] +; CHECK-NEXT: ret i32 0 ; %res = call i32 @llvm.nvvm.f2ui.rn(float -1.5) ret i32 %res @@ -353,8 +351,7 @@ define i32 @test_neg_1_5_f2ui_rn() { define i32 @test_neg_1_5_f2ui_rp() { ; CHECK-LABEL: define i32 @test_neg_1_5_f2ui_rp() { -; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.nvvm.f2ui.rp(float -1.500000e+00) -; CHECK-NEXT: ret i32 [[RES]] +; CHECK-NEXT: ret i32 0 ; %res = call i32 @llvm.nvvm.f2ui.rp(float -1.5) ret i32 %res @@ -362,8 +359,7 @@ define i32 @test_neg_1_5_f2ui_rp() { define i32 @test_neg_1_5_f2ui_rz() { ; CHECK-LABEL: define i32 @test_neg_1_5_f2ui_rz() { -; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.nvvm.f2ui.rz(float -1.500000e+00) -; CHECK-NEXT: ret i32 [[RES]] +; CHECK-NEXT: ret i32 0 ; %res = call i32 @llvm.nvvm.f2ui.rz(float -1.5) ret i32 %res @@ -374,8 +370,7 @@ define i32 @test_neg_1_5_f2ui_rz() { ;+-------------------------------------------------------------+ define i32 @test_neg_1_5_f2ui_rm_ftz() { ; CHECK-LABEL: define i32 @test_neg_1_5_f2ui_rm_ftz() { -; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.nvvm.f2ui.rm.ftz(float -1.500000e+00) -; CHECK-NEXT: ret i32 [[RES]] +; CHECK-NEXT: ret i32 0 ; %res = call i32 @llvm.nvvm.f2ui.rm.ftz(float -1.5) ret i32 %res @@ -383,8 +378,7 @@ define i32 @test_neg_1_5_f2ui_rm_ftz() { define i32 @test_neg_1_5_f2ui_rn_ftz() { ; CHECK-LABEL: define i32 @test_neg_1_5_f2ui_rn_ftz() { -; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.nvvm.f2ui.rn.ftz(float -1.500000e+00) -; CHECK-NEXT: ret i32 [[RES]] +; CHECK-NEXT: ret i32 0 ; %res = call i32 @llvm.nvvm.f2ui.rn.ftz(float -1.5) ret i32 %res @@ -392,8 +386,7 @@ define i32 @test_neg_1_5_f2ui_rn_ftz() { define i32 @test_neg_1_5_f2ui_rp_ftz() { ; CHECK-LABEL: define i32 @test_neg_1_5_f2ui_rp_ftz() { -; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.nvvm.f2ui.rp.ftz(float -1.500000e+00) -; CHECK-NEXT: ret i32 [[RES]] +; CHECK-NEXT: ret i32 0 ; %res = call i32 @llvm.nvvm.f2ui.rp.ftz(float -1.5) ret i32 %res @@ -401,8 +394,7 @@ define i32 @test_neg_1_5_f2ui_rp_ftz() { define i32 @test_neg_1_5_f2ui_rz_ftz() { ; CHECK-LABEL: define i32 @test_neg_1_5_f2ui_rz_ftz() { -; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.nvvm.f2ui.rz.ftz(float -1.500000e+00) -; CHECK-NEXT: ret i32 [[RES]] +; CHECK-NEXT: ret i32 0 ; %res = call i32 @llvm.nvvm.f2ui.rz.ftz(float -1.5) ret i32 %res @@ -412,8 +404,7 @@ define i32 @test_neg_1_5_f2ui_rz_ftz() { ;+-------------------------------------------------------------+ define i32 @test_neg_1_5_d2ui_rm() { ; CHECK-LABEL: define i32 @test_neg_1_5_d2ui_rm() { -; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.nvvm.d2ui.rm(double -1.500000e+00) -; CHECK-NEXT: ret i32 [[RES]] +; CHECK-NEXT: ret i32 0 ; %res = call i32 @llvm.nvvm.d2ui.rm(double -1.5) ret i32 %res @@ -421,8 +412,7 @@ define i32 @test_neg_1_5_d2ui_rm() { define i32 @test_neg_1_5_d2ui_rn() { ; CHECK-LABEL: define i32 @test_neg_1_5_d2ui_rn() { -; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.nvvm.d2ui.rn(double -1.500000e+00) -; CHECK-NEXT: ret i32 [[RES]] +; CHECK-NEXT: ret i32 0 ; %res = call i32 @llvm.nvvm.d2ui.rn(double -1.5) ret i32 %res @@ -431,8 +421,7 @@ define i32 @test_neg_1_5_d2ui_rn() { define i32 @test_neg_1_5_d2ui_rp() { ; CHECK-LABEL: define i32 @test_neg_1_5_d2ui_rp() { -; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.nvvm.d2ui.rp(double -1.500000e+00) -; CHECK-NEXT: ret i32 [[RES]] +; CHECK-NEXT: ret i32 0 ; %res = call i32 @llvm.nvvm.d2ui.rp(double -1.5) ret i32 %res @@ -440,8 +429,7 @@ define i32 @test_neg_1_5_d2ui_rp() { define i32 @test_neg_1_5_d2ui_rz() { ; CHECK-LABEL: define i32 @test_neg_1_5_d2ui_rz() { -; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.nvvm.d2ui.rz(double -1.500000e+00) -; CHECK-NEXT: ret i32 [[RES]] +; CHECK-NEXT: ret i32 0 ; %res = call i32 @llvm.nvvm.d2ui.rz(double -1.5) ret i32 %res @@ -526,7 +514,7 @@ define i32 @test_nan_f2i_rz_ftz() { ;+-------------------------------------------------------------+ define i32 @test_nan_d2i_rm() { ; CHECK-LABEL: define i32 @test_nan_d2i_rm() { -; CHECK-NEXT: ret i32 0 +; CHECK-NEXT: ret i32 -2147483648 ; %res = call i32 @llvm.nvvm.d2i.rm(double 0xFFF8000000000000) ret i32 %res @@ -534,7 +522,7 @@ define i32 @test_nan_d2i_rm() { define i32 @test_nan_d2i_rn() { ; CHECK-LABEL: define i32 @test_nan_d2i_rn() { -; CHECK-NEXT: ret i32 0 +; CHECK-NEXT: ret i32 -2147483648 ; %res = call i32 @llvm.nvvm.d2i.rn(double 0xFFF8000000000000) ret i32 %res @@ -543,7 +531,7 @@ define i32 @test_nan_d2i_rn() { define i32 @test_nan_d2i_rp() { ; CHECK-LABEL: define i32 @test_nan_d2i_rp() { -; CHECK-NEXT: ret i32 0 +; CHECK-NEXT: ret i32 -2147483648 ; %res = call i32 @llvm.nvvm.d2i.rp(double 0xFFF8000000000000) ret i32 %res @@ -551,7 +539,7 @@ define i32 @test_nan_d2i_rp() { define i32 @test_nan_d2i_rz() { ; CHECK-LABEL: define i32 @test_nan_d2i_rz() { -; CHECK-NEXT: ret i32 0 +; CHECK-NEXT: ret i32 -2147483648 ; %res = call i32 @llvm.nvvm.d2i.rz(double 0xFFF8000000000000) ret i32 %res @@ -632,7 +620,7 @@ define i32 @test_nan_f2ui_rz_ftz() { ;+-------------------------------------------------------------+ define i32 @test_nan_d2ui_rm() { ; CHECK-LABEL: define i32 @test_nan_d2ui_rm() { -; CHECK-NEXT: ret i32 0 +; CHECK-NEXT: ret i32 -2147483648 ; %res = call i32 @llvm.nvvm.d2ui.rm(double 0xFFF8000000000000) ret i32 %res @@ -640,7 +628,7 @@ define i32 @test_nan_d2ui_rm() { define i32 @test_nan_d2ui_rn() { ; CHECK-LABEL: define i32 @test_nan_d2ui_rn() { -; CHECK-NEXT: ret i32 0 +; CHECK-NEXT: ret i32 -2147483648 ; %res = call i32 @llvm.nvvm.d2ui.rn(double 0xFFF8000000000000) ret i32 %res @@ -649,7 +637,7 @@ define i32 @test_nan_d2ui_rn() { define i32 @test_nan_d2ui_rp() { ; CHECK-LABEL: define i32 @test_nan_d2ui_rp() { -; CHECK-NEXT: ret i32 0 +; CHECK-NEXT: ret i32 -2147483648 ; %res = call i32 @llvm.nvvm.d2ui.rp(double 0xFFF8000000000000) ret i32 %res @@ -657,7 +645,7 @@ define i32 @test_nan_d2ui_rp() { define i32 @test_nan_d2ui_rz() { ; CHECK-LABEL: define i32 @test_nan_d2ui_rz() { -; CHECK-NEXT: ret i32 0 +; CHECK-NEXT: ret i32 -2147483648 ; %res = call i32 @llvm.nvvm.d2ui.rz(double 0xFFF8000000000000) ret i32 %res @@ -994,8 +982,7 @@ define i32 @test_neg_subnormal_d2i_rz() { ;+-------------------------------------------------------------+ define i32 @test_neg_subnormal_f2ui_rm() { ; CHECK-LABEL: define i32 @test_neg_subnormal_f2ui_rm() { -; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.nvvm.f2ui.rm(float 0xB80FFFFFC0000000) -; CHECK-NEXT: ret i32 [[RES]] +; CHECK-NEXT: ret i32 0 ; %res = call i32 @llvm.nvvm.f2ui.rm(float 0xB80FFFFFC0000000) ret i32 %res @@ -1065,8 +1052,7 @@ define i32 @test_neg_subnormal_f2ui_rz_ftz() { ;+-------------------------------------------------------------+ define i32 @test_neg_subnormal_d2ui_rm() { ; CHECK-LABEL: define i32 @test_neg_subnormal_d2ui_rm() { -; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.nvvm.d2ui.rm(double 0x800FFFFFFFFFFFFF) -; CHECK-NEXT: ret i32 [[RES]] +; CHECK-NEXT: ret i32 0 ; %res = call i32 @llvm.nvvm.d2ui.rm(double 0x800fffffffffffff) ret i32 %res diff --git a/llvm/test/Transforms/InstSimplify/const-fold-nvvm-f2ll-d2ll.ll b/llvm/test/Transforms/InstSimplify/const-fold-nvvm-f2ll-d2ll.ll index be38177dce2c3..ffadf26f3c5b5 100644 --- a/llvm/test/Transforms/InstSimplify/const-fold-nvvm-f2ll-d2ll.ll +++ b/llvm/test/Transforms/InstSimplify/const-fold-nvvm-f2ll-d2ll.ll @@ -334,8 +334,7 @@ define i64 @test_neg_1_5_d2ll_rz() { ;+-------------------------------------------------------------+ define i64 @test_neg_1_5_f2ull_rm() { ; CHECK-LABEL: define i64 @test_neg_1_5_f2ull_rm() { -; CHECK-NEXT: [[RES:%.*]] = call i64 @llvm.nvvm.f2ull.rm(float -1.500000e+00) -; CHECK-NEXT: ret i64 [[RES]] +; CHECK-NEXT: ret i64 0 ; %res = call i64 @llvm.nvvm.f2ull.rm(float -1.5) ret i64 %res @@ -343,8 +342,7 @@ define i64 @test_neg_1_5_f2ull_rm() { define i64 @test_neg_1_5_f2ull_rn() { ; CHECK-LABEL: define i64 @test_neg_1_5_f2ull_rn() { -; CHECK-NEXT: [[RES:%.*]] = call i64 @llvm.nvvm.f2ull.rn(float -1.500000e+00) -; CHECK-NEXT: ret i64 [[RES]] +; CHECK-NEXT: ret i64 0 ; %res = call i64 @llvm.nvvm.f2ull.rn(float -1.5) ret i64 %res @@ -353,8 +351,7 @@ define i64 @test_neg_1_5_f2ull_rn() { define i64 @test_neg_1_5_f2ull_rp() { ; CHECK-LABEL: define i64 @test_neg_1_5_f2ull_rp() { -; CHECK-NEXT: [[RES:%.*]] = call i64 @llvm.nvvm.f2ull.rp(float -1.500000e+00) -; CHECK-NEXT: ret i64 [[RES]] +; CHECK-NEXT: ret i64 0 ; %res = call i64 @llvm.nvvm.f2ull.rp(float -1.5) ret i64 %res @@ -362,8 +359,7 @@ define i64 @test_neg_1_5_f2ull_rp() { define i64 @test_neg_1_5_f2ull_rz() { ; CHECK-LABEL: define i64 @test_neg_1_5_f2ull_rz() { -; CHECK-NEXT: [[RES:%.*]] = call i64 @llvm.nvvm.f2ull.rz(float -1.500000e+00) -; CHECK-NEXT: ret i64 [[RES]] +; CHECK-NEXT: ret i64 0 ; %res = call i64 @llvm.nvvm.f2ull.rz(float -1.5) ret i64 %res @@ -374,8 +370,7 @@ define i64 @test_neg_1_5_f2ull_rz() { ;+-------------------------------------------------------------+ define i64 @test_neg_1_5_f2ull_rm_ftz() { ; CHECK-LABEL: define i64 @test_neg_1_5_f2ull_rm_ftz() { -; CHECK-NEXT: [[RES:%.*]] = call i64 @llvm.nvvm.f2ull.rm.ftz(float -1.500000e+00) -; CHECK-NEXT: ret i64 [[RES]] +; CHECK-NEXT: ret i64 0 ; %res = call i64 @llvm.nvvm.f2ull.rm.ftz(float -1.5) ret i64 %res @@ -383,8 +378,7 @@ define i64 @test_neg_1_5_f2ull_rm_ftz() { define i64 @test_neg_1_5_f2ull_rn_ftz() { ; CHECK-LABEL: define i64 @test_neg_1_5_f2ull_rn_ftz() { -; CHECK-NEXT: [[RES:%.*]] = call i64 @llvm.nvvm.f2ull.rn.ftz(float -1.500000e+00) -; CHECK-NEXT: ret i64 [[RES]] +; CHECK-NEXT: ret i64 0 ; %res = call i64 @llvm.nvvm.f2ull.rn.ftz(float -1.5) ret i64 %res @@ -392,8 +386,7 @@ define i64 @test_neg_1_5_f2ull_rn_ftz() { define i64 @test_neg_1_5_f2ull_rp_ftz() { ; CHECK-LABEL: define i64 @test_neg_1_5_f2ull_rp_ftz() { -; CHECK-NEXT: [[RES:%.*]] = call i64 @llvm.nvvm.f2ull.rp.ftz(float -1.500000e+00) -; CHECK-NEXT: ret i64 [[RES]] +; CHECK-NEXT: ret i64 0 ; %res = call i64 @llvm.nvvm.f2ull.rp.ftz(float -1.5) ret i64 %res @@ -401,8 +394,7 @@ define i64 @test_neg_1_5_f2ull_rp_ftz() { define i64 @test_neg_1_5_f2ull_rz_ftz() { ; CHECK-LABEL: define i64 @test_neg_1_5_f2ull_rz_ftz() { -; CHECK-NEXT: [[RES:%.*]] = call i64 @llvm.nvvm.f2ull.rz.ftz(float -1.500000e+00) -; CHECK-NEXT: ret i64 [[RES]] +; CHECK-NEXT: ret i64 0 ; %res = call i64 @llvm.nvvm.f2ull.rz.ftz(float -1.5) ret i64 %res @@ -412,8 +404,7 @@ define i64 @test_neg_1_5_f2ull_rz_ftz() { ;+-------------------------------------------------------------+ define i64 @test_neg_1_5_d2ull_rm() { ; CHECK-LABEL: define i64 @test_neg_1_5_d2ull_rm() { -; CHECK-NEXT: [[RES:%.*]] = call i64 @llvm.nvvm.d2ull.rm(double -1.500000e+00) -; CHECK-NEXT: ret i64 [[RES]] +; CHECK-NEXT: ret i64 0 ; %res = call i64 @llvm.nvvm.d2ull.rm(double -1.5) ret i64 %res @@ -421,8 +412,7 @@ define i64 @test_neg_1_5_d2ull_rm() { define i64 @test_neg_1_5_d2ull_rn() { ; CHECK-LABEL: define i64 @test_neg_1_5_d2ull_rn() { -; CHECK-NEXT: [[RES:%.*]] = call i64 @llvm.nvvm.d2ull.rn(double -1.500000e+00) -; CHECK-NEXT: ret i64 [[RES]] +; CHECK-NEXT: ret i64 0 ; %res = call i64 @llvm.nvvm.d2ull.rn(double -1.5) ret i64 %res @@ -431,8 +421,7 @@ define i64 @test_neg_1_5_d2ull_rn() { define i64 @test_neg_1_5_d2ull_rp() { ; CHECK-LABEL: define i64 @test_neg_1_5_d2ull_rp() { -; CHECK-NEXT: [[RES:%.*]] = call i64 @llvm.nvvm.d2ull.rp(double -1.500000e+00) -; CHECK-NEXT: ret i64 [[RES]] +; CHECK-NEXT: ret i64 0 ; %res = call i64 @llvm.nvvm.d2ull.rp(double -1.5) ret i64 %res @@ -440,8 +429,7 @@ define i64 @test_neg_1_5_d2ull_rp() { define i64 @test_neg_1_5_d2ull_rz() { ; CHECK-LABEL: define i64 @test_neg_1_5_d2ull_rz() { -; CHECK-NEXT: [[RES:%.*]] = call i64 @llvm.nvvm.d2ull.rz(double -1.500000e+00) -; CHECK-NEXT: ret i64 [[RES]] +; CHECK-NEXT: ret i64 0 ; %res = call i64 @llvm.nvvm.d2ull.rz(double -1.5) ret i64 %res @@ -456,7 +444,7 @@ define i64 @test_neg_1_5_d2ull_rz() { ;+-------------------------------------------------------------+ define i64 @test_nan_f2ll_rm() { ; CHECK-LABEL: define i64 @test_nan_f2ll_rm() { -; CHECK-NEXT: ret i64 0 +; CHECK-NEXT: ret i64 -9223372036854775808 ; %res = call i64 @llvm.nvvm.f2ll.rm(float 0x7FFFFF0000000000) ret i64 %res @@ -464,7 +452,7 @@ define i64 @test_nan_f2ll_rm() { define i64 @test_nan_f2ll_rn() { ; CHECK-LABEL: define i64 @test_nan_f2ll_rn() { -; CHECK-NEXT: ret i64 0 +; CHECK-NEXT: ret i64 -9223372036854775808 ; %res = call i64 @llvm.nvvm.f2ll.rn(float 0x7FFFFF0000000000) ret i64 %res @@ -473,7 +461,7 @@ define i64 @test_nan_f2ll_rn() { define i64 @test_nan_f2ll_rp() { ; CHECK-LABEL: define i64 @test_nan_f2ll_rp() { -; CHECK-NEXT: ret i64 0 +; CHECK-NEXT: ret i64 -9223372036854775808 ; %res = call i64 @llvm.nvvm.f2ll.rp(float 0x7FFFFF0000000000) ret i64 %res @@ -481,7 +469,7 @@ define i64 @test_nan_f2ll_rp() { define i64 @test_nan_f2ll_rz() { ; CHECK-LABEL: define i64 @test_nan_f2ll_rz() { -; CHECK-NEXT: ret i64 0 +; CHECK-NEXT: ret i64 -9223372036854775808 ; %res = call i64 @llvm.nvvm.f2ll.rz(float 0x7FFFFF0000000000) ret i64 %res @@ -492,7 +480,7 @@ define i64 @test_nan_f2ll_rz() { ;+-------------------------------------------------------------+ define i64 @test_nan_f2ll_rm_ftz() { ; CHECK-LABEL: define i64 @test_nan_f2ll_rm_ftz() { -; CHECK-NEXT: ret i64 0 +; CHECK-NEXT: ret i64 -9223372036854775808 ; %res = call i64 @llvm.nvvm.f2ll.rm.ftz(float 0x7FFFFF0000000000) ret i64 %res @@ -500,7 +488,7 @@ define i64 @test_nan_f2ll_rm_ftz() { define i64 @test_nan_f2ll_rn_ftz() { ; CHECK-LABEL: define i64 @test_nan_f2ll_rn_ftz() { -; CHECK-NEXT: ret i64 0 +; CHECK-NEXT: ret i64 -9223372036854775808 ; %res = call i64 @llvm.nvvm.f2ll.rn.ftz(float 0x7FFFFF0000000000) ret i64 %res @@ -508,7 +496,7 @@ define i64 @test_nan_f2ll_rn_ftz() { define i64 @test_nan_f2ll_rp_ftz() { ; CHECK-LABEL: define i64 @test_nan_f2ll_rp_ftz() { -; CHECK-NEXT: ret i64 0 +; CHECK-NEXT: ret i64 -9223372036854775808 ; %res = call i64 @llvm.nvvm.f2ll.rp.ftz(float 0x7FFFFF0000000000) ret i64 %res @@ -516,7 +504,7 @@ define i64 @test_nan_f2ll_rp_ftz() { define i64 @test_nan_f2ll_rz_ftz() { ; CHECK-LABEL: define i64 @test_nan_f2ll_rz_ftz() { -; CHECK-NEXT: ret i64 0 +; CHECK-NEXT: ret i64 -9223372036854775808 ; %res = call i64 @llvm.nvvm.f2ll.rz.ftz(float 0x7FFFFF0000000000) ret i64 %res @@ -526,7 +514,7 @@ define i64 @test_nan_f2ll_rz_ftz() { ;+-------------------------------------------------------------+ define i64 @test_nan_d2ll_rm() { ; CHECK-LABEL: define i64 @test_nan_d2ll_rm() { -; CHECK-NEXT: ret i64 0 +; CHECK-NEXT: ret i64 -9223372036854775808 ; %res = call i64 @llvm.nvvm.d2ll.rm(double 0xFFF8000000000000) ret i64 %res @@ -534,7 +522,7 @@ define i64 @test_nan_d2ll_rm() { define i64 @test_nan_d2ll_rn() { ; CHECK-LABEL: define i64 @test_nan_d2ll_rn() { -; CHECK-NEXT: ret i64 0 +; CHECK-NEXT: ret i64 -9223372036854775808 ; %res = call i64 @llvm.nvvm.d2ll.rn(double 0xFFF8000000000000) ret i64 %res @@ -543,7 +531,7 @@ define i64 @test_nan_d2ll_rn() { define i64 @test_nan_d2ll_rp() { ; CHECK-LABEL: define i64 @test_nan_d2ll_rp() { -; CHECK-NEXT: ret i64 0 +; CHECK-NEXT: ret i64 -9223372036854775808 ; %res = call i64 @llvm.nvvm.d2ll.rp(double 0xFFF8000000000000) ret i64 %res @@ -551,7 +539,7 @@ define i64 @test_nan_d2ll_rp() { define i64 @test_nan_d2ll_rz() { ; CHECK-LABEL: define i64 @test_nan_d2ll_rz() { -; CHECK-NEXT: ret i64 0 +; CHECK-NEXT: ret i64 -9223372036854775808 ; %res = call i64 @llvm.nvvm.d2ll.rz(double 0xFFF8000000000000) ret i64 %res @@ -562,7 +550,7 @@ define i64 @test_nan_d2ll_rz() { ;+-------------------------------------------------------------+ define i64 @test_nan_f2ull_rm() { ; CHECK-LABEL: define i64 @test_nan_f2ull_rm() { -; CHECK-NEXT: ret i64 0 +; CHECK-NEXT: ret i64 -9223372036854775808 ; %res = call i64 @llvm.nvvm.f2ull.rm(float 0x7FFFFF0000000000) ret i64 %res @@ -570,7 +558,7 @@ define i64 @test_nan_f2ull_rm() { define i64 @test_nan_f2ull_rn() { ; CHECK-LABEL: define i64 @test_nan_f2ull_rn() { -; CHECK-NEXT: ret i64 0 +; CHECK-NEXT: ret i64 -9223372036854775808 ; %res = call i64 @llvm.nvvm.f2ull.rn(float 0x7FFFFF0000000000) ret i64 %res @@ -579,7 +567,7 @@ define i64 @test_nan_f2ull_rn() { define i64 @test_nan_f2ull_rp() { ; CHECK-LABEL: define i64 @test_nan_f2ull_rp() { -; CHECK-NEXT: ret i64 0 +; CHECK-NEXT: ret i64 -9223372036854775808 ; %res = call i64 @llvm.nvvm.f2ull.rp(float 0x7FFFFF0000000000) ret i64 %res @@ -587,7 +575,7 @@ define i64 @test_nan_f2ull_rp() { define i64 @test_nan_f2ull_rz() { ; CHECK-LABEL: define i64 @test_nan_f2ull_rz() { -; CHECK-NEXT: ret i64 0 +; CHECK-NEXT: ret i64 -9223372036854775808 ; %res = call i64 @llvm.nvvm.f2ull.rz(float 0x7FFFFF0000000000) ret i64 %res @@ -598,7 +586,7 @@ define i64 @test_nan_f2ull_rz() { ;+-------------------------------------------------------------+ define i64 @test_nan_f2ull_rm_ftz() { ; CHECK-LABEL: define i64 @test_nan_f2ull_rm_ftz() { -; CHECK-NEXT: ret i64 0 +; CHECK-NEXT: ret i64 -9223372036854775808 ; %res = call i64 @llvm.nvvm.f2ull.rm.ftz(float 0x7FFFFF0000000000) ret i64 %res @@ -606,7 +594,7 @@ define i64 @test_nan_f2ull_rm_ftz() { define i64 @test_nan_f2ull_rn_ftz() { ; CHECK-LABEL: define i64 @test_nan_f2ull_rn_ftz() { -; CHECK-NEXT: ret i64 0 +; CHECK-NEXT: ret i64 -9223372036854775808 ; %res = call i64 @llvm.nvvm.f2ull.rn.ftz(float 0x7FFFFF0000000000) ret i64 %res @@ -614,7 +602,7 @@ define i64 @test_nan_f2ull_rn_ftz() { define i64 @test_nan_f2ull_rp_ftz() { ; CHECK-LABEL: define i64 @test_nan_f2ull_rp_ftz() { -; CHECK-NEXT: ret i64 0 +; CHECK-NEXT: ret i64 -9223372036854775808 ; %res = call i64 @llvm.nvvm.f2ull.rp.ftz(float 0x7FFFFF0000000000) ret i64 %res @@ -622,7 +610,7 @@ define i64 @test_nan_f2ull_rp_ftz() { define i64 @test_nan_f2ull_rz_ftz() { ; CHECK-LABEL: define i64 @test_nan_f2ull_rz_ftz() { -; CHECK-NEXT: ret i64 0 +; CHECK-NEXT: ret i64 -9223372036854775808 ; %res = call i64 @llvm.nvvm.f2ull.rz.ftz(float 0x7FFFFF0000000000) ret i64 %res @@ -632,7 +620,7 @@ define i64 @test_nan_f2ull_rz_ftz() { ;+-------------------------------------------------------------+ define i64 @test_nan_d2ull_rm() { ; CHECK-LABEL: define i64 @test_nan_d2ull_rm() { -; CHECK-NEXT: ret i64 0 +; CHECK-NEXT: ret i64 -9223372036854775808 ; %res = call i64 @llvm.nvvm.d2ull.rm(double 0xFFF8000000000000) ret i64 %res @@ -640,7 +628,7 @@ define i64 @test_nan_d2ull_rm() { define i64 @test_nan_d2ull_rn() { ; CHECK-LABEL: define i64 @test_nan_d2ull_rn() { -; CHECK-NEXT: ret i64 0 +; CHECK-NEXT: ret i64 -9223372036854775808 ; %res = call i64 @llvm.nvvm.d2ull.rn(double 0xFFF8000000000000) ret i64 %res @@ -649,7 +637,7 @@ define i64 @test_nan_d2ull_rn() { define i64 @test_nan_d2ull_rp() { ; CHECK-LABEL: define i64 @test_nan_d2ull_rp() { -; CHECK-NEXT: ret i64 0 +; CHECK-NEXT: ret i64 -9223372036854775808 ; %res = call i64 @llvm.nvvm.d2ull.rp(double 0xFFF8000000000000) ret i64 %res @@ -657,7 +645,7 @@ define i64 @test_nan_d2ull_rp() { define i64 @test_nan_d2ull_rz() { ; CHECK-LABEL: define i64 @test_nan_d2ull_rz() { -; CHECK-NEXT: ret i64 0 +; CHECK-NEXT: ret i64 -9223372036854775808 ; %res = call i64 @llvm.nvvm.d2ull.rz(double 0xFFF8000000000000) ret i64 %res @@ -994,8 +982,7 @@ define i64 @test_neg_subnormal_d2ll_rz() { ;+-------------------------------------------------------------+ define i64 @test_neg_subnormal_f2ull_rm() { ; CHECK-LABEL: define i64 @test_neg_subnormal_f2ull_rm() { -; CHECK-NEXT: [[RES:%.*]] = call i64 @llvm.nvvm.f2ull.rm(float 0xB80FFFFFC0000000) -; CHECK-NEXT: ret i64 [[RES]] +; CHECK-NEXT: ret i64 0 ; %res = call i64 @llvm.nvvm.f2ull.rm(float 0xB80FFFFFC0000000) ret i64 %res @@ -1065,8 +1052,7 @@ define i64 @test_neg_subnormal_f2ull_rz_ftz() { ;+-------------------------------------------------------------+ define i64 @test_neg_subnormal_d2ull_rm() { ; CHECK-LABEL: define i64 @test_neg_subnormal_d2ull_rm() { -; CHECK-NEXT: [[RES:%.*]] = call i64 @llvm.nvvm.d2ull.rm(double 0x800FFFFFFFFFFFFF) -; CHECK-NEXT: ret i64 [[RES]] +; CHECK-NEXT: ret i64 0 ; %res = call i64 @llvm.nvvm.d2ull.rm(double 0x800fffffffffffff) ret i64 %res