Skip to content

Commit 25c0da8

Browse files
[NVPTX] Fix NaN + overflow semantics of f2ll/d2i (#159530)
Fix the NaN-handling semantics of various NVVM intrinsics converting from fp types to integer types. Previously in ConstantFolding, NaN inputs would be constant-folded to 0. However, v9.0 of the PTX spec states that: In float-to-integer conversions, depending upon conversion types, NaN input results in following value: * Zero if source is not `.f64` and destination is not `.s64`, .`u64`. * Otherwise `1 << (BitWidth(dst) - 1)` corresponding to the value of `(MAXINT >> 1) + 1` for unsigned type or `MININT` for signed type. Also, support for constant-folding +/-Inf and values which overflow/underflow the integer output type has been added (they clamp to min/max int). Because of this NaN-handling semantic difference, we also need to disable transforming several intrinsics to FPToSI/FPToUI, as the LLVM intstruction will return poison, but the intrinsics have defined behaviour for these edge-cases like NaN/Inf/overflow.
1 parent ec27c2d commit 25c0da8

File tree

6 files changed

+165
-120
lines changed

6 files changed

+165
-120
lines changed

llvm/include/llvm/IR/NVVMIntrinsicUtils.h

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,70 @@ inline bool FPToIntegerIntrinsicResultIsSigned(Intrinsic::ID IntrinsicID) {
189189
"Checking invalid f2i/d2i intrinsic for signed int conversion");
190190
}
191191

192+
inline bool FPToIntegerIntrinsicNaNZero(Intrinsic::ID IntrinsicID) {
193+
switch (IntrinsicID) {
194+
// f2i
195+
case Intrinsic::nvvm_f2i_rm:
196+
case Intrinsic::nvvm_f2i_rn:
197+
case Intrinsic::nvvm_f2i_rp:
198+
case Intrinsic::nvvm_f2i_rz:
199+
case Intrinsic::nvvm_f2i_rm_ftz:
200+
case Intrinsic::nvvm_f2i_rn_ftz:
201+
case Intrinsic::nvvm_f2i_rp_ftz:
202+
case Intrinsic::nvvm_f2i_rz_ftz:
203+
// f2ui
204+
case Intrinsic::nvvm_f2ui_rm:
205+
case Intrinsic::nvvm_f2ui_rn:
206+
case Intrinsic::nvvm_f2ui_rp:
207+
case Intrinsic::nvvm_f2ui_rz:
208+
case Intrinsic::nvvm_f2ui_rm_ftz:
209+
case Intrinsic::nvvm_f2ui_rn_ftz:
210+
case Intrinsic::nvvm_f2ui_rp_ftz:
211+
case Intrinsic::nvvm_f2ui_rz_ftz:
212+
return true;
213+
// d2i
214+
case Intrinsic::nvvm_d2i_rm:
215+
case Intrinsic::nvvm_d2i_rn:
216+
case Intrinsic::nvvm_d2i_rp:
217+
case Intrinsic::nvvm_d2i_rz:
218+
// d2ui
219+
case Intrinsic::nvvm_d2ui_rm:
220+
case Intrinsic::nvvm_d2ui_rn:
221+
case Intrinsic::nvvm_d2ui_rp:
222+
case Intrinsic::nvvm_d2ui_rz:
223+
// f2ll
224+
case Intrinsic::nvvm_f2ll_rm:
225+
case Intrinsic::nvvm_f2ll_rn:
226+
case Intrinsic::nvvm_f2ll_rp:
227+
case Intrinsic::nvvm_f2ll_rz:
228+
case Intrinsic::nvvm_f2ll_rm_ftz:
229+
case Intrinsic::nvvm_f2ll_rn_ftz:
230+
case Intrinsic::nvvm_f2ll_rp_ftz:
231+
case Intrinsic::nvvm_f2ll_rz_ftz:
232+
// f2ull
233+
case Intrinsic::nvvm_f2ull_rm:
234+
case Intrinsic::nvvm_f2ull_rn:
235+
case Intrinsic::nvvm_f2ull_rp:
236+
case Intrinsic::nvvm_f2ull_rz:
237+
case Intrinsic::nvvm_f2ull_rm_ftz:
238+
case Intrinsic::nvvm_f2ull_rn_ftz:
239+
case Intrinsic::nvvm_f2ull_rp_ftz:
240+
case Intrinsic::nvvm_f2ull_rz_ftz:
241+
// d2ll
242+
case Intrinsic::nvvm_d2ll_rm:
243+
case Intrinsic::nvvm_d2ll_rn:
244+
case Intrinsic::nvvm_d2ll_rp:
245+
case Intrinsic::nvvm_d2ll_rz:
246+
// d2ull
247+
case Intrinsic::nvvm_d2ull_rm:
248+
case Intrinsic::nvvm_d2ull_rn:
249+
case Intrinsic::nvvm_d2ull_rp:
250+
case Intrinsic::nvvm_d2ull_rz:
251+
return false;
252+
}
253+
llvm_unreachable("Checking NaN result for invalid f2i/d2i intrinsic");
254+
}
255+
192256
inline APFloat::roundingMode
193257
GetFPToIntegerRoundingMode(Intrinsic::ID IntrinsicID) {
194258
switch (IntrinsicID) {

llvm/lib/Analysis/ConstantFolding.cpp

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2625,8 +2625,17 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
26252625
case Intrinsic::nvvm_d2ull_rp:
26262626
case Intrinsic::nvvm_d2ull_rz: {
26272627
// In float-to-integer conversion, NaN inputs are converted to 0.
2628-
if (U.isNaN())
2629-
return ConstantInt::get(Ty, 0);
2628+
if (U.isNaN()) {
2629+
// In float-to-integer conversion, NaN inputs are converted to 0
2630+
// when the source and destination bitwidths are both less than 64.
2631+
if (nvvm::FPToIntegerIntrinsicNaNZero(IntrinsicID))
2632+
return ConstantInt::get(Ty, 0);
2633+
2634+
// Otherwise, the most significant bit is set.
2635+
unsigned BitWidth = Ty->getIntegerBitWidth();
2636+
uint64_t Val = 1ULL << (BitWidth - 1);
2637+
return ConstantInt::get(Ty, APInt(BitWidth, Val, /*IsSigned=*/false));
2638+
}
26302639

26312640
APFloat::roundingMode RMode =
26322641
nvvm::GetFPToIntegerRoundingMode(IntrinsicID);
@@ -2636,13 +2645,11 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
26362645
APSInt ResInt(Ty->getIntegerBitWidth(), !IsSigned);
26372646
auto FloatToRound = IsFTZ ? FTZPreserveSign(U) : U;
26382647

2648+
// Return max/min value for integers if the result is +/-inf or
2649+
// is too large to fit in the result's integer bitwidth.
26392650
bool IsExact = false;
2640-
APFloat::opStatus Status =
2641-
FloatToRound.convertToInteger(ResInt, RMode, &IsExact);
2642-
2643-
if (Status != APFloat::opInvalidOp)
2644-
return ConstantInt::get(Ty, ResInt);
2645-
return nullptr;
2651+
FloatToRound.convertToInteger(ResInt, RMode, &IsExact);
2652+
return ConstantInt::get(Ty, ResInt);
26462653
}
26472654
}
26482655

llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp

Lines changed: 6 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -281,21 +281,12 @@ static Instruction *convertNvvmIntrinsicToLlvm(InstCombiner &IC,
281281
return {Intrinsic::trunc, FTZ_MustBeOn};
282282

283283
// NVVM intrinsics that map to LLVM cast operations.
284-
//
285-
// Note that llvm's target-generic conversion operators correspond to the rz
286-
// (round to zero) versions of the nvvm conversion intrinsics, even though
287-
// most everything else here uses the rn (round to nearest even) nvvm ops.
288-
case Intrinsic::nvvm_d2i_rz:
289-
case Intrinsic::nvvm_f2i_rz:
290-
case Intrinsic::nvvm_d2ll_rz:
291-
case Intrinsic::nvvm_f2ll_rz:
292-
return {Instruction::FPToSI};
293-
case Intrinsic::nvvm_d2ui_rz:
294-
case Intrinsic::nvvm_f2ui_rz:
295-
case Intrinsic::nvvm_d2ull_rz:
296-
case Intrinsic::nvvm_f2ull_rz:
297-
return {Instruction::FPToUI};
298-
// Integer to floating-point uses RN rounding, not RZ
284+
// Note - we cannot map intrinsics like nvvm_d2ll_rz to LLVM's
285+
// FPToSI, as NaN to int conversion with FPToSI is considered UB and is
286+
// eliminated. NVVM conversion intrinsics are translated to PTX cvt
287+
// instructions which define the outcome for NaN rather than leaving as UB.
288+
// Therefore, translate NVVM intrinsics to sitofp/uitofp, but not to
289+
// fptosi/fptoui.
299290
case Intrinsic::nvvm_i2d_rn:
300291
case Intrinsic::nvvm_i2f_rn:
301292
case Intrinsic::nvvm_ll2d_rn:

llvm/test/Transforms/InstCombine/NVPTX/nvvm-intrins.ll

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -185,52 +185,63 @@ define float @trunc_float_ftz(float %a) #0 {
185185
}
186186

187187
; Check NVVM intrinsics that correspond to LLVM cast operations.
188+
; fp -> integer casts should not be converted, as the semantics
189+
; for NaN/Inf/Overflow inputs are different.
190+
; Only integer -> fp casts should be converted.
188191

189192
; CHECK-LABEL: @test_d2i
190193
define i32 @test_d2i(double %a) #0 {
191-
; CHECK: fptosi double %a to i32
194+
; CHECK: call i32 @llvm.nvvm.d2i.rz(double %a)
195+
; CHECK-NOT: fptosi double %a to i32
192196
%ret = call i32 @llvm.nvvm.d2i.rz(double %a)
193197
ret i32 %ret
194198
}
195199
; CHECK-LABEL: @test_f2i
196200
define i32 @test_f2i(float %a) #0 {
197-
; CHECK: fptosi float %a to i32
201+
; CHECK: call i32 @llvm.nvvm.f2i.rz(float %a)
202+
; CHECK-NOT: fptosi float %a to i32
198203
%ret = call i32 @llvm.nvvm.f2i.rz(float %a)
199204
ret i32 %ret
200205
}
201206
; CHECK-LABEL: @test_d2ll
202207
define i64 @test_d2ll(double %a) #0 {
203-
; CHECK: fptosi double %a to i64
208+
; CHECK: call i64 @llvm.nvvm.d2ll.rz(double %a)
209+
; CHECK-NOT: fptosi double %a to i64
204210
%ret = call i64 @llvm.nvvm.d2ll.rz(double %a)
205211
ret i64 %ret
206212
}
207213
; CHECK-LABEL: @test_f2ll
208214
define i64 @test_f2ll(float %a) #0 {
209-
; CHECK: fptosi float %a to i64
215+
; CHECK: call i64 @llvm.nvvm.f2ll.rz(float %a)
216+
; CHECK-NOT: fptosi float %a to i64
210217
%ret = call i64 @llvm.nvvm.f2ll.rz(float %a)
211218
ret i64 %ret
212219
}
213220
; CHECK-LABEL: @test_d2ui
214221
define i32 @test_d2ui(double %a) #0 {
215-
; CHECK: fptoui double %a to i32
222+
; CHECK: call i32 @llvm.nvvm.d2ui.rz(double %a)
223+
; CHECK-NOT: fptoui double %a to i32
216224
%ret = call i32 @llvm.nvvm.d2ui.rz(double %a)
217225
ret i32 %ret
218226
}
219227
; CHECK-LABEL: @test_f2ui
220228
define i32 @test_f2ui(float %a) #0 {
221-
; CHECK: fptoui float %a to i32
229+
; CHECK: call i32 @llvm.nvvm.f2ui.rz(float %a)
230+
; CHECK-NOT: fptoui float %a to i32
222231
%ret = call i32 @llvm.nvvm.f2ui.rz(float %a)
223232
ret i32 %ret
224233
}
225234
; CHECK-LABEL: @test_d2ull
226235
define i64 @test_d2ull(double %a) #0 {
227-
; CHECK: fptoui double %a to i64
236+
; CHECK: call i64 @llvm.nvvm.d2ull.rz(double %a)
237+
; CHECK-NOT: fptoui double %a to i64
228238
%ret = call i64 @llvm.nvvm.d2ull.rz(double %a)
229239
ret i64 %ret
230240
}
231241
; CHECK-LABEL: @test_f2ull
232242
define i64 @test_f2ull(float %a) #0 {
233-
; CHECK: fptoui float %a to i64
243+
; CHECK: call i64 @llvm.nvvm.f2ull.rz(float %a)
244+
; CHECK-NOT: fptoui float %a to i64
234245
%ret = call i64 @llvm.nvvm.f2ull.rz(float %a)
235246
ret i64 %ret
236247
}
@@ -497,4 +508,4 @@ declare float @llvm.nvvm.ui2f.rn(i32)
497508
declare double @llvm.nvvm.ull2d.rn(i64)
498509
declare float @llvm.nvvm.ull2f.rn(i64)
499510
declare i32 @llvm.nvvm.fshr.clamp.i32(i32, i32, i32)
500-
declare i32 @llvm.nvvm.fshl.clamp.i32(i32, i32, i32)
511+
declare i32 @llvm.nvvm.fshl.clamp.i32(i32, i32, i32)

0 commit comments

Comments
 (0)