[IR] Allow fast math flags on fptosi, fptoui and sitofp #160475

paperchalice · 2025-09-24T09:28:27Z

This allow optimizations like foldFPToIntToFP in CodeGen to use fast math flags.
uitofp is special, because the bit position of reassoc is taken by nneg, will support it later.

llvmbot · 2025-09-25T00:27:32Z

@llvm/pr-subscribers-llvm-transforms
@llvm/pr-subscribers-hlsl

@llvm/pr-subscribers-backend-amdgpu

Author: None (paperchalice)

Changes

This allow optimizations like FoldIntToFPToInt in CodeGen to use fast math flags.

Patch is 190.48 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/160475.diff

31 Files Affected:

(modified) clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu (+1-1)
(modified) clang/test/CodeGenCUDA/managed-var.cu (+1-1)
(modified) clang/test/CodeGenHLSL/BasicFeatures/AggregateSplatCast.hlsl (+3-3)
(modified) clang/test/CodeGenHLSL/BasicFeatures/ArrayElementwiseCast.hlsl (+4-4)
(modified) clang/test/CodeGenHLSL/BasicFeatures/InitLists.hlsl (+12-12)
(modified) clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl (+8-8)
(modified) clang/test/CodeGenHLSL/BasicFeatures/StructElementwiseCast.hlsl (+4-4)
(modified) clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl (+1-1)
(modified) clang/test/CodeGenHLSL/BasicFeatures/standard_conversion_sequences.hlsl (+3-3)
(modified) clang/test/CodeGenHLSL/builtins/dot2add.hlsl (+6-6)
(modified) clang/test/CodeGenHLSL/builtins/lerp-overloads.hlsl (+24-24)
(modified) clang/test/CodeGenHLSL/builtins/pow-overloads.hlsl (+16-16)
(modified) clang/test/Headers/__clang_hip_math.hip (+40-40)
(modified) clang/test/Headers/openmp_device_math_isnan.cpp (+2-2)
(modified) clang/test/SemaHLSL/VectorOverloadResolution.hlsl (+3-3)
(modified) llvm/docs/LangRef.rst (+8-1)
(modified) llvm/include/llvm/IR/Operator.h (+3)
(modified) llvm/lib/AsmParser/LLParser.cpp (+3-3)
(modified) llvm/test/Assembler/fast-math-flags.ll (+66)
(modified) llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll (+2-2)
(modified) llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll (+132-132)
(modified) llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll (+20-20)
(modified) llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll (+9-9)
(modified) llvm/test/CodeGen/AMDGPU/divrem24-assume.ll (+2-2)
(modified) llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll (+2-2)
(modified) llvm/test/Transforms/InstCombine/log-pow.ll (+3-3)
(modified) llvm/test/Transforms/LoopVectorize/PowerPC/vplan-scalarivsext-crash.ll (+2-2)
(modified) llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll (+38-38)
(modified) llvm/test/Transforms/LoopVectorize/float-induction.ll (+11-11)
(modified) llvm/test/Transforms/LoopVectorize/iv_outside_user.ll (+9-9)
(modified) llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll (+1-1)

diff --git a/clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu b/clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu
index 22c40e6d38ea2..b8819b8e550ea 100644
--- a/clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu
+++ b/clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu
@@ -164,7 +164,7 @@ __device__ double ffp4(double *p, float f) {
 
 __device__ double ffp5(double *p, int i) {
   // FUN-LABEL: @_Z4ffp5Pdi
-  // CHECK: sitofp i32 {{.*}} to double
+  // CHECK: sitofp contract i32 {{.*}} to double
   // SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, [[DEFMD]]
   // UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, [[DEFMD]]
   __atomic_fetch_sub(p, i, memory_order_relaxed);
diff --git a/clang/test/CodeGenCUDA/managed-var.cu b/clang/test/CodeGenCUDA/managed-var.cu
index 0e7a7be85ac8e..368adece297b7 100644
--- a/clang/test/CodeGenCUDA/managed-var.cu
+++ b/clang/test/CodeGenCUDA/managed-var.cu
@@ -145,7 +145,7 @@ float load3() {
 // HOST:  %4 = ptrtoint ptr %3 to i64
 // HOST:  %5 = sub i64 %4, %1
 // HOST:  %sub.ptr.div = sdiv exact i64 %5, 4
-// HOST:  %conv = sitofp i64 %sub.ptr.div to float
+// HOST:  %conv = sitofp contract i64 %sub.ptr.div to float
 // HOST:  ret float %conv
 float addr_taken2() {
   return (float)reinterpret_cast<long>(&(v2[1].y)-&(v[1].x));
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/AggregateSplatCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/AggregateSplatCast.hlsl
index 512fcd435191a..64fda1301d8cb 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/AggregateSplatCast.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/AggregateSplatCast.hlsl
@@ -38,7 +38,7 @@ export void call8() {
 // CHECK-NEXT: store <1 x float> splat (float 1.000000e+00), ptr [[B]], align 4
 // CHECK-NEXT: [[L:%.*]] = load <1 x float>, ptr [[B]], align 4
 // CHECK-NEXT: [[VL:%.*]] = extractelement <1 x float> [[L]], i32 0
-// CHECK-NEXT: [[C:%.*]] = fptosi float [[VL]] to i32
+// CHECK-NEXT: [[C:%.*]] = fptosi reassoc nnan ninf nsz arcp afn float [[VL]] to i32
 // CHECK-NEXT: [[SI:%.*]] = insertelement <4 x i32> poison, i32 [[C]], i64 0
 // CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[SI]], <4 x i32> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT: store <4 x i32> [[S]], ptr [[A]], align 16
@@ -62,7 +62,7 @@ struct S {
 // CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 0, i32 0
 // CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 0, i32 1
 // CHECK-NEXT: store i32 [[VL]], ptr [[G1]], align 4
-// CHECK-NEXT: [[C:%.*]] = sitofp i32 [[VL]] to float
+// CHECK-NEXT: [[C:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[VL]] to float
 // CHECK-NEXT: store float [[C]], ptr [[G2]], align 4
 export void call3() {
   int1 A = {1};
@@ -79,7 +79,7 @@ export void call3() {
 // CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 0, i32 0
 // CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 0, i32 1
 // CHECK-NEXT: store i32 [[VL]], ptr [[G1]], align 4
-// CHECK-NEXT: [[C:%.*]] = sitofp i32 [[VL]] to float
+// CHECK-NEXT: [[C:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[VL]] to float
 // CHECK-NEXT: store float [[C]], ptr [[G2]], align 4
 export void call5() {
   int1 A = {1};
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/ArrayElementwiseCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/ArrayElementwiseCast.hlsl
index ac02ddf5765ed..54b37401466ba 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/ArrayElementwiseCast.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/ArrayElementwiseCast.hlsl
@@ -46,7 +46,7 @@ export void call1() {
 // CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds [1 x float], ptr [[B]], i32 0, i32 0
 // CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds [1 x i32], ptr [[Tmp]], i32 0, i32 0
 // CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G2]], align 4
-// CHECK-NEXT: [[C:%.*]] = sitofp i32 [[L]] to float
+// CHECK-NEXT: [[C:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[L]] to float
 // CHECK-NEXT: store float [[C]], ptr [[G1]], align 4
 export void call2() {
   int A[1] = {0};
@@ -63,7 +63,7 @@ export void call2() {
 // CHECK-NEXT: [[C:%.*]] = load <1 x float>, ptr [[A]], align 4
 // CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds [1 x i32], ptr [[B]], i32 0, i32 0
 // CHECK-NEXT: [[V:%.*]] = extractelement <1 x float> [[C]], i64 0
-// CHECK-NEXT: [[C:%.*]] = fptosi float [[V]] to i32
+// CHECK-NEXT: [[C:%.*]] = fptosi reassoc nnan ninf nsz arcp afn float [[V]] to i32
 // CHECK-NEXT: store i32 [[C]], ptr [[G1]], align 4
 export void call3() {
   float1 A = {1.2};
@@ -84,11 +84,11 @@ export void call3() {
 // CHECK-NEXT: [[VG:%.*]] = getelementptr inbounds [1 x <2 x float>], ptr [[Tmp]], i32 0, i32 0
 // CHECK-NEXT: [[L:%.*]] = load <2 x float>, ptr [[VG]], align 8
 // CHECK-NEXT: [[VL:%.*]] = extractelement <2 x float> [[L]], i32 0
-// CHECK-NEXT: [[C:%.*]] = fptosi float [[VL]] to i32
+// CHECK-NEXT: [[C:%.*]] = fptosi reassoc nnan ninf nsz arcp afn float [[VL]] to i32
 // CHECK-NEXT: store i32 [[C]], ptr [[G1]], align 4
 // CHECK-NEXT: [[L4:%.*]] = load <2 x float>, ptr [[VG]], align 8
 // CHECK-NEXT: [[VL5:%.*]] = extractelement <2 x float> [[L4]], i32 1
-// CHECK-NEXT: [[C6:%.*]] = fptosi float [[VL5]] to i32
+// CHECK-NEXT: [[C6:%.*]] = fptosi reassoc nnan ninf nsz arcp afn float [[VL5]] to i32
 // CHECK-NEXT: store i32 [[C6]], ptr [[G2]], align 4
 export void call5() {
   float2 A[1] = {{1.2,3.4}};
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/InitLists.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/InitLists.hlsl
index 7e83e5f168538..7633fd361a274 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/InitLists.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/InitLists.hlsl
@@ -98,7 +98,7 @@ TwoFloats case2() {
 // CHECK-NEXT:    store i32 [[VAL]], ptr [[VAL_ADDR]], align 4
 // CHECK-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[AGG_RESULT]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[VAL_ADDR]], align 4
-// CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP0]] to float
+// CHECK-NEXT:    [[CONV:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[TMP0]] to float
 // CHECK-NEXT:    store float [[CONV]], ptr [[X]], align 1
 // CHECK-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[AGG_RESULT]], i32 0, i32 1
 // CHECK-NEXT:    store float 2.000000e+00, ptr [[Y]], align 1
@@ -119,12 +119,12 @@ TwoFloats case3(int Val) {
 // CHECK-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[AGG_RESULT]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i32>, ptr [[TWOVALS_ADDR]], align 8
 // CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <2 x i32> [[TMP0]], i64 0
-// CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[VECEXT]] to float
+// CHECK-NEXT:    [[CONV:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[VECEXT]] to float
 // CHECK-NEXT:    store float [[CONV]], ptr [[X]], align 1
 // CHECK-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[AGG_RESULT]], i32 0, i32 1
 // CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[TWOVALS_ADDR]], align 8
 // CHECK-NEXT:    [[VECEXT1:%.*]] = extractelement <2 x i32> [[TMP1]], i64 1
-// CHECK-NEXT:    [[CONV2:%.*]] = sitofp i32 [[VECEXT1]] to float
+// CHECK-NEXT:    [[CONV2:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[VECEXT1]] to float
 // CHECK-NEXT:    store float [[CONV2]], ptr [[Y]], align 1
 // CHECK-NEXT:    ret void
 //
@@ -162,12 +162,12 @@ TwoInts case5(int2 TwoVals) {
 // CHECK-NEXT:    [[Z:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOINTS]], ptr [[AGG_RESULT]], i32 0, i32 0
 // CHECK-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[TF4]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[X]], align 1
-// CHECK-NEXT:    [[CONV:%.*]] = fptosi float [[TMP0]] to i32
+// CHECK-NEXT:    [[CONV:%.*]] = fptosi reassoc nnan ninf nsz arcp afn float [[TMP0]] to i32
 // CHECK-NEXT:    store i32 [[CONV]], ptr [[Z]], align 1
 // CHECK-NEXT:    [[W:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOINTS]], ptr [[AGG_RESULT]], i32 0, i32 1
 // CHECK-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[TF4]], i32 0, i32 1
 // CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[Y]], align 1
-// CHECK-NEXT:    [[CONV1:%.*]] = fptosi float [[TMP1]] to i32
+// CHECK-NEXT:    [[CONV1:%.*]] = fptosi reassoc nnan ninf nsz arcp afn float [[TMP1]] to i32
 // CHECK-NEXT:    store i32 [[CONV1]], ptr [[W]], align 1
 // CHECK-NEXT:    ret void
 //
@@ -202,7 +202,7 @@ TwoInts case6(TwoFloats TF4) {
 // CHECK-NEXT:    store i32 [[TMP4]], ptr [[TAILSTATE]], align 1
 // CHECK-NEXT:    [[HAIRCOUNT:%.*]] = getelementptr inbounds nuw [[STRUCT_DOGGO]], ptr [[AGG_RESULT]], i32 0, i32 2
 // CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[VAL_ADDR]], align 4
-// CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP5]] to float
+// CHECK-NEXT:    [[CONV:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[TMP5]] to float
 // CHECK-NEXT:    store float [[CONV]], ptr [[HAIRCOUNT]], align 1
 // CHECK-NEXT:    [[EARDIRECTION:%.*]] = getelementptr inbounds nuw [[STRUCT_DOGGO]], ptr [[AGG_RESULT]], i32 0, i32 3
 // CHECK-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[TF1]], i32 0, i32 0
@@ -272,7 +272,7 @@ Doggo case7(TwoInts TI1, TwoInts TI2, int Val, TwoFloats TF1, TwoFloats TF2,
 // CHECK-NEXT:    [[COUNTER:%.*]] = getelementptr inbounds nuw [[STRUCT_ANIMALBITS]], ptr [[AGG_RESULT]], i32 0, i32 2
 // CHECK-NEXT:    [[HAIRCOUNT:%.*]] = getelementptr inbounds nuw [[STRUCT_DOGGO]], ptr [[D1]], i32 0, i32 2
 // CHECK-NEXT:    [[TMP5:%.*]] = load float, ptr [[HAIRCOUNT]], align 1
-// CHECK-NEXT:    [[CONV:%.*]] = fptosi float [[TMP5]] to i64
+// CHECK-NEXT:    [[CONV:%.*]] = fptosi reassoc nnan ninf nsz arcp afn float [[TMP5]] to i64
 // CHECK-NEXT:    store i64 [[CONV]], ptr [[COUNTER]], align 1
 // CHECK-NEXT:    [[LEFTDIR:%.*]] = getelementptr inbounds nuw [[STRUCT_ANIMALBITS]], ptr [[AGG_RESULT]], i32 0, i32 3
 // CHECK-NEXT:    [[EARDIRECTION:%.*]] = getelementptr inbounds nuw [[STRUCT_DOGGO]], ptr [[D1]], i32 0, i32 3
@@ -428,7 +428,7 @@ AnimalBits case8(Doggo D1) {
 // CHECK-NEXT:    [[HAIRCOUNT58:%.*]] = getelementptr inbounds nuw [[STRUCT_DOGGO]], ptr [[ARRAYINIT_ELEMENT44]], i32 0, i32 2
 // CHECK-NEXT:    [[COUNTER:%.*]] = getelementptr inbounds nuw [[STRUCT_ANIMALBITS]], ptr [[A1]], i32 0, i32 2
 // CHECK-NEXT:    [[TMP19:%.*]] = load i64, ptr [[COUNTER]], align 1
-// CHECK-NEXT:    [[CONV:%.*]] = sitofp i64 [[TMP19]] to float
+// CHECK-NEXT:    [[CONV:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i64 [[TMP19]] to float
 // CHECK-NEXT:    store float [[CONV]], ptr [[HAIRCOUNT58]], align 1
 // CHECK-NEXT:    [[EARDIRECTION59:%.*]] = getelementptr inbounds nuw [[STRUCT_DOGGO]], ptr [[ARRAYINIT_ELEMENT44]], i32 0, i32 3
 // CHECK-NEXT:    [[LEFTDIR:%.*]] = getelementptr inbounds nuw [[STRUCT_ANIMALBITS]], ptr [[A1]], i32 0, i32 3
@@ -563,7 +563,7 @@ AnimalBits case8(Doggo D1) {
 // CHECK-NEXT:    [[HAIRCOUNT149:%.*]] = getelementptr inbounds nuw [[STRUCT_KITTEH]], ptr [[ARRAYINIT_ELEMENT133]], i32 0, i32 2
 // CHECK-NEXT:    [[COUNTER150:%.*]] = getelementptr inbounds nuw [[STRUCT_ANIMALBITS]], ptr [[A1]], i32 0, i32 2
 // CHECK-NEXT:    [[TMP47:%.*]] = load i64, ptr [[COUNTER150]], align 1
-// CHECK-NEXT:    [[CONV151:%.*]] = sitofp i64 [[TMP47]] to float
+// CHECK-NEXT:    [[CONV151:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i64 [[TMP47]] to float
 // CHECK-NEXT:    store float [[CONV151]], ptr [[HAIRCOUNT149]], align 1
 // CHECK-NEXT:    [[CLAWS152:%.*]] = getelementptr inbounds nuw [[STRUCT_KITTEH]], ptr [[ARRAYINIT_ELEMENT133]], i32 0, i32 3
 // CHECK-NEXT:    [[LEFTDIR153:%.*]] = getelementptr inbounds nuw [[STRUCT_ANIMALBITS]], ptr [[A1]], i32 0, i32 3
@@ -698,7 +698,7 @@ AnimalBits case8(Doggo D1) {
 // CHECK-NEXT:    [[HAIRCOUNT246:%.*]] = getelementptr inbounds nuw [[STRUCT_KITTEH]], ptr [[ARRAYINIT_ELEMENT230]], i32 0, i32 2
 // CHECK-NEXT:    [[COUNTER247:%.*]] = getelementptr inbounds nuw [[STRUCT_ANIMALBITS]], ptr [[A1]], i32 0, i32 2
 // CHECK-NEXT:    [[TMP75:%.*]] = load i64, ptr [[COUNTER247]], align 1
-// CHECK-NEXT:    [[CONV248:%.*]] = sitofp i64 [[TMP75]] to float
+// CHECK-NEXT:    [[CONV248:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i64 [[TMP75]] to float
 // CHECK-NEXT:    store float [[CONV248]], ptr [[HAIRCOUNT246]], align 1
 // CHECK-NEXT:    [[CLAWS249:%.*]] = getelementptr inbounds nuw [[STRUCT_KITTEH]], ptr [[ARRAYINIT_ELEMENT230]], i32 0, i32 3
 // CHECK-NEXT:    [[LEFTDIR250:%.*]] = getelementptr inbounds nuw [[STRUCT_ANIMALBITS]], ptr [[A1]], i32 0, i32 3
@@ -888,13 +888,13 @@ TwoInts case14(SlicyBits SB) {
 // CHECK-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[AGG_RESULT]], i32 0, i32 0
 // CHECK-NEXT:    [[BF_LOAD:%.*]] = load i8, ptr [[SB]], align 1
 // CHECK-NEXT:    [[BF_CAST:%.*]] = sext i8 [[BF_LOAD]] to i32
-// CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[BF_CAST]] to float
+// CHECK-NEXT:    [[CONV:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[BF_CAST]] to float
 // CHECK-NEXT:    store float [[CONV]], ptr [[X]], align 1
 // CHECK-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[AGG_RESULT]], i32 0, i32 1
 // CHECK-NEXT:    [[W:%.*]] = getelementptr inbounds nuw [[STRUCT_SLICYBITS]], ptr [[SB]], i32 0, i32 1
 // CHECK-NEXT:    [[BF_LOAD1:%.*]] = load i8, ptr [[W]], align 1
 // CHECK-NEXT:    [[BF_CAST2:%.*]] = sext i8 [[BF_LOAD1]] to i32
-// CHECK-NEXT:    [[CONV3:%.*]] = sitofp i32 [[BF_CAST2]] to float
+// CHECK-NEXT:    [[CONV3:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[BF_CAST2]] to float
 // CHECK-NEXT:    store float [[CONV3]], ptr [[Y]], align 1
 // CHECK-NEXT:    ret void
 //
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl
index d0ba8f447b732..6f42b5611ce41 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl
@@ -13,14 +13,14 @@ void trunc_Param(inout int X) {}
 // CHECK: [[F:%.*]] = alloca float
 // CHECK: [[ArgTmp:%.*]] = alloca i32
 // CHECK: [[FVal:%.*]] = load float, ptr {{.*}}
-// CHECK: [[IVal:%.*]] = fptosi float [[FVal]] to i32
+// CHECK: [[IVal:%.*]] = fptosi reassoc nnan ninf nsz arcp afn float [[FVal]] to i32
 // CHECK: store i32 [[IVal]], ptr [[ArgTmp]]
 // CHECK: call void {{.*}}trunc_Param{{.*}}(ptr noalias noundef nonnull align 4 dereferenceable(4) [[ArgTmp]])
 // CHECK: [[IRet:%.*]] = load i32, ptr [[ArgTmp]]
-// CHECK: [[FRet:%.*]] = sitofp i32 [[IRet]] to float
+// CHECK: [[FRet:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[IRet]] to float
 // CHECK: store float [[FRet]], ptr [[F]]
-// OPT: [[IVal:%.*]] = fptosi float {{.*}} to i32
-// OPT: [[FVal:%.*]] = sitofp i32 [[IVal]] to float
+// OPT: [[IVal:%.*]] = fptosi reassoc nnan ninf nsz arcp afn float {{.*}} to i32
+// OPT: [[FVal:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[IVal]] to float
 // OPT: ret float [[FVal]]
 export float case1(float F) {
   trunc_Param(F);
@@ -202,15 +202,15 @@ void trunc_vec(inout int3 V) {}
 // CHECK: [[V:%.*]] = alloca <3 x float>
 // CHECK: [[Tmp:%.*]] = alloca <3 x i32>
 // CHECK: [[FVal:%.*]] = load <3 x float>, ptr [[V]]
-// CHECK: [[IVal:%.*]] = fptosi <3 x float> [[FVal]] to <3 x i32>
+// CHECK: [[IVal:%.*]] = fptosi reassoc nnan ninf nsz arcp afn <3 x float> [[FVal]] to <3 x i32>
 // CHECK: store <3 x i32> [[IVal]], ptr [[Tmp]]
 // CHECK: call void {{.*}}trunc_vec{{.*}}(ptr noalias noundef nonnull align 16 dereferenceable(16) [[Tmp]])
 // CHECK: [[IRet:%.*]] = load <3 x i32>, ptr [[Tmp]]
-// CHECK: [[FRet:%.*]] = sitofp <3 x i32> [[IRet]] to <3 x float>
+// CHECK: [[FRet:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <3 x i32> [[IRet]] to <3 x float>
 // CHECK: store <3 x float> [[FRet]], ptr [[V]]
 
-// OPT: [[IVal:%.*]] = fptosi <3 x float> {{.*}} to <3 x i32>
-// OPT: [[FVal:%.*]] = sitofp <3 x i32> [[IVal]] to <3 x float>
+// OPT: [[IVal:%.*]] = fptosi reassoc nnan ninf nsz arcp afn <3 x float> {{.*}} to <3 x i32>
+// OPT: [[FVal:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <3 x i32> [[IVal]] to <3 x float>
 // OPT: ret <3 x float> [[FVal]]
 
 export float3 case8(float3 V) {
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/StructElementwiseCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/StructElementwiseCast.hlsl
index 81b9f5b28cc7e..e4229951e58b5 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/StructElementwiseCast.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/StructElementwiseCast.hlsl
@@ -32,7 +32,7 @@ export void call0() {
 // CHECK-NEXT: [[VL:%.*]] = extractelement <2 x i32> [[L]], i64 0
 // CHECK-NEXT: store i32 [[VL]], ptr [[G1]], align 4
 // CHECK-NEXT: [[VL2:%.*]] = extractelement <2 x i32> [[L]], i64 1
-// CHECK-NEXT: [[C:%.*]] = sitofp i32 [[VL2]] to float
+// CHECK-NEXT: [[C:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[VL2]] to float
 // CHECK-NEXT: store float [[C]], ptr [[G2]], align 4
 export void call1() {
   int2 A = {1,2};
@@ -54,7 +54,7 @@ export void call1() {
 // CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G3]], align 4
 // CHECK-NEXT: store i32 [[L]], ptr [[G1]], align 4
 // CHECK-NEXT: [[L4:%.*]] = load i32, ptr [[G4]], align 4
-// CHECK-NEXT: [[C:%.*]] = sitofp i32 [[L4]] to float
+// CHECK-NEXT: [[C:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[L4]] to float
 // CHECK-NEXT: store float [[C]], ptr [[G2]], align 4
 export void call2() {
   int A[2] = {1,2};
@@ -104,7 +104,7 @@ export void call6() {
 // CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G3]], align 4
 // CHECK-NEXT: store i32 [[L]], ptr [[G1]], align 4
 // CHECK-NEXT: [[L4:%.*]] = load i32, ptr [[G4]], align 4
-// CHECK-NEXT: [[C:%.*]] = sitofp i32 [[L4]] to float
+// CHECK-NEXT: [[C:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[L4]] to float
 // CHECK-NEXT: store float [[C]], ptr [[G2]], align 4
 export void call7() {
   int A[2] = {1,2};
@@ -132,7 +132,7 @@ struct T {
 // CHECK-NEXT: %load = load i32, ptr %gep2, align 4
 // CHECK-NEXT: store i32 %load, ptr %gep, align 4
 // CHECK-NEXT: %load5 = load i32, ptr %gep3, align 4
-// CHECK-NEXT: %conv = sitofp i32 %load5 to float
+// CHECK-NEXT: %conv = sitofp reassoc nnan ninf nsz arcp afn i32 %load5 to float
 // CHECK-NEXT: store float %conv, ptr %gep1, align 4
 export void call8() {
   T t = {1,2,3};
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl
index 253b38a7ca072..a5843b0a95642 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl
@@ -40,7 +40,7 @@ struct S {
 // CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G1]], align 4
 // CHECK-NEXT: [[C:%.*]] = insertelement <2 x i32> [[B]], i32 [[L]], i64 0
 // CHECK-NEXT: [[L2:%.*]] = load float, ptr [[G2]], align 4
-// CHECK-NEXT: [[D:%.*]] = fptosi float [[L2]] to i32
+// CHECK-NEXT: [[D:%.*]] = fptosi reassoc nnan ninf nsz arcp afn float [[L2]] to i32
 // CHECK-NEXT: [[E:%.*]] = insertelement <2 x i32> [[C]], i32 [[D]], i64 1
 // CHECK-NEXT: store <2 x i32> [[E]], ptr [[A]], align 8
 export void call3() {
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/standard_conversion_sequences.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/standard_conversion_sequences.hlsl
index 6770efefe94fe..3e8c06ed074af 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/standard_conversion_sequences.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/standard_conversion_sequences.hlsl
@@ -43,7 +43,7 @@ void d4_to_f2() {
 // CHECK: [[i2:%.*]] = alloca <2 x i32>
 // CHECK: store <2 x float> splat (float 4.000000e+00), ptr [[f2]]
 // CHECK: [[vecf2:%.*]] = load <2 x float>, ptr [[f2]]
-// CHECK: [[veci2:%.*]] = fptosi <2 x float> [[vecf2]] to <2 x i32>
+// CHECK: [[veci2:%.*]] = fptosi reassoc nnan ninf nsz arcp afn <2 x float> [[vecf2]] to <2 x i32>
 // CHECK: store <2 x i32> [[veci2]], ptr [[i2]]
 void f2_to_i2() {
   vector<float,2> f2 = 4.0;
@@ -55,7 +55,7 @@ void f2_to_i2() {
 // CHECK: [[i2:%.*]] = alloca <2 x i32>
 // CHECK: store <4 x double> splat (double 5.000000e+00), ptr [[d4]]
 // CHECK: [[vecd4:%.*]] = load <4 x double>, ptr [[d4]]
-// CHECK: [[veci4:%.*]] = fptosi <4 x double> [[vecd4]] to <4 x i32>
+// CHECK: [[veci4:%.*]] = fptosi reassoc nnan ninf nsz arcp afn <4 x double> [[vecd4]] to <4 x i32>
 // CHECK: [[ve...
[truncated]

llvmbot · 2025-09-25T00:27:33Z

@llvm/pr-subscribers-llvm-ir

Author: None (paperchalice)

Changes

This allow optimizations like FoldIntToFPToInt in CodeGen to use fast math flags.

Patch is 190.48 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/160475.diff

31 Files Affected:

(modified) clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu (+1-1)
(modified) clang/test/CodeGenCUDA/managed-var.cu (+1-1)
(modified) clang/test/CodeGenHLSL/BasicFeatures/AggregateSplatCast.hlsl (+3-3)
(modified) clang/test/CodeGenHLSL/BasicFeatures/ArrayElementwiseCast.hlsl (+4-4)
(modified) clang/test/CodeGenHLSL/BasicFeatures/InitLists.hlsl (+12-12)
(modified) clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl (+8-8)
(modified) clang/test/CodeGenHLSL/BasicFeatures/StructElementwiseCast.hlsl (+4-4)
(modified) clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl (+1-1)
(modified) clang/test/CodeGenHLSL/BasicFeatures/standard_conversion_sequences.hlsl (+3-3)
(modified) clang/test/CodeGenHLSL/builtins/dot2add.hlsl (+6-6)
(modified) clang/test/CodeGenHLSL/builtins/lerp-overloads.hlsl (+24-24)
(modified) clang/test/CodeGenHLSL/builtins/pow-overloads.hlsl (+16-16)
(modified) clang/test/Headers/__clang_hip_math.hip (+40-40)
(modified) clang/test/Headers/openmp_device_math_isnan.cpp (+2-2)
(modified) clang/test/SemaHLSL/VectorOverloadResolution.hlsl (+3-3)
(modified) llvm/docs/LangRef.rst (+8-1)
(modified) llvm/include/llvm/IR/Operator.h (+3)
(modified) llvm/lib/AsmParser/LLParser.cpp (+3-3)
(modified) llvm/test/Assembler/fast-math-flags.ll (+66)
(modified) llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll (+2-2)
(modified) llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll (+132-132)
(modified) llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll (+20-20)
(modified) llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll (+9-9)
(modified) llvm/test/CodeGen/AMDGPU/divrem24-assume.ll (+2-2)
(modified) llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll (+2-2)
(modified) llvm/test/Transforms/InstCombine/log-pow.ll (+3-3)
(modified) llvm/test/Transforms/LoopVectorize/PowerPC/vplan-scalarivsext-crash.ll (+2-2)
(modified) llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll (+38-38)
(modified) llvm/test/Transforms/LoopVectorize/float-induction.ll (+11-11)
(modified) llvm/test/Transforms/LoopVectorize/iv_outside_user.ll (+9-9)
(modified) llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll (+1-1)

diff --git a/clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu b/clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu
index 22c40e6d38ea2..b8819b8e550ea 100644
--- a/clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu
+++ b/clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu
@@ -164,7 +164,7 @@ __device__ double ffp4(double *p, float f) {
 
 __device__ double ffp5(double *p, int i) {
   // FUN-LABEL: @_Z4ffp5Pdi
-  // CHECK: sitofp i32 {{.*}} to double
+  // CHECK: sitofp contract i32 {{.*}} to double
   // SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, [[DEFMD]]
   // UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, [[DEFMD]]
   __atomic_fetch_sub(p, i, memory_order_relaxed);
diff --git a/clang/test/CodeGenCUDA/managed-var.cu b/clang/test/CodeGenCUDA/managed-var.cu
index 0e7a7be85ac8e..368adece297b7 100644
--- a/clang/test/CodeGenCUDA/managed-var.cu
+++ b/clang/test/CodeGenCUDA/managed-var.cu
@@ -145,7 +145,7 @@ float load3() {
 // HOST:  %4 = ptrtoint ptr %3 to i64
 // HOST:  %5 = sub i64 %4, %1
 // HOST:  %sub.ptr.div = sdiv exact i64 %5, 4
-// HOST:  %conv = sitofp i64 %sub.ptr.div to float
+// HOST:  %conv = sitofp contract i64 %sub.ptr.div to float
 // HOST:  ret float %conv
 float addr_taken2() {
   return (float)reinterpret_cast<long>(&(v2[1].y)-&(v[1].x));
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/AggregateSplatCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/AggregateSplatCast.hlsl
index 512fcd435191a..64fda1301d8cb 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/AggregateSplatCast.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/AggregateSplatCast.hlsl
@@ -38,7 +38,7 @@ export void call8() {
 // CHECK-NEXT: store <1 x float> splat (float 1.000000e+00), ptr [[B]], align 4
 // CHECK-NEXT: [[L:%.*]] = load <1 x float>, ptr [[B]], align 4
 // CHECK-NEXT: [[VL:%.*]] = extractelement <1 x float> [[L]], i32 0
-// CHECK-NEXT: [[C:%.*]] = fptosi float [[VL]] to i32
+// CHECK-NEXT: [[C:%.*]] = fptosi reassoc nnan ninf nsz arcp afn float [[VL]] to i32
 // CHECK-NEXT: [[SI:%.*]] = insertelement <4 x i32> poison, i32 [[C]], i64 0
 // CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[SI]], <4 x i32> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT: store <4 x i32> [[S]], ptr [[A]], align 16
@@ -62,7 +62,7 @@ struct S {
 // CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 0, i32 0
 // CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 0, i32 1
 // CHECK-NEXT: store i32 [[VL]], ptr [[G1]], align 4
-// CHECK-NEXT: [[C:%.*]] = sitofp i32 [[VL]] to float
+// CHECK-NEXT: [[C:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[VL]] to float
 // CHECK-NEXT: store float [[C]], ptr [[G2]], align 4
 export void call3() {
   int1 A = {1};
@@ -79,7 +79,7 @@ export void call3() {
 // CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 0, i32 0
 // CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 0, i32 1
 // CHECK-NEXT: store i32 [[VL]], ptr [[G1]], align 4
-// CHECK-NEXT: [[C:%.*]] = sitofp i32 [[VL]] to float
+// CHECK-NEXT: [[C:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[VL]] to float
 // CHECK-NEXT: store float [[C]], ptr [[G2]], align 4
 export void call5() {
   int1 A = {1};
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/ArrayElementwiseCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/ArrayElementwiseCast.hlsl
index ac02ddf5765ed..54b37401466ba 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/ArrayElementwiseCast.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/ArrayElementwiseCast.hlsl
@@ -46,7 +46,7 @@ export void call1() {
 // CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds [1 x float], ptr [[B]], i32 0, i32 0
 // CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds [1 x i32], ptr [[Tmp]], i32 0, i32 0
 // CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G2]], align 4
-// CHECK-NEXT: [[C:%.*]] = sitofp i32 [[L]] to float
+// CHECK-NEXT: [[C:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[L]] to float
 // CHECK-NEXT: store float [[C]], ptr [[G1]], align 4
 export void call2() {
   int A[1] = {0};
@@ -63,7 +63,7 @@ export void call2() {
 // CHECK-NEXT: [[C:%.*]] = load <1 x float>, ptr [[A]], align 4
 // CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds [1 x i32], ptr [[B]], i32 0, i32 0
 // CHECK-NEXT: [[V:%.*]] = extractelement <1 x float> [[C]], i64 0
-// CHECK-NEXT: [[C:%.*]] = fptosi float [[V]] to i32
+// CHECK-NEXT: [[C:%.*]] = fptosi reassoc nnan ninf nsz arcp afn float [[V]] to i32
 // CHECK-NEXT: store i32 [[C]], ptr [[G1]], align 4
 export void call3() {
   float1 A = {1.2};
@@ -84,11 +84,11 @@ export void call3() {
 // CHECK-NEXT: [[VG:%.*]] = getelementptr inbounds [1 x <2 x float>], ptr [[Tmp]], i32 0, i32 0
 // CHECK-NEXT: [[L:%.*]] = load <2 x float>, ptr [[VG]], align 8
 // CHECK-NEXT: [[VL:%.*]] = extractelement <2 x float> [[L]], i32 0
-// CHECK-NEXT: [[C:%.*]] = fptosi float [[VL]] to i32
+// CHECK-NEXT: [[C:%.*]] = fptosi reassoc nnan ninf nsz arcp afn float [[VL]] to i32
 // CHECK-NEXT: store i32 [[C]], ptr [[G1]], align 4
 // CHECK-NEXT: [[L4:%.*]] = load <2 x float>, ptr [[VG]], align 8
 // CHECK-NEXT: [[VL5:%.*]] = extractelement <2 x float> [[L4]], i32 1
-// CHECK-NEXT: [[C6:%.*]] = fptosi float [[VL5]] to i32
+// CHECK-NEXT: [[C6:%.*]] = fptosi reassoc nnan ninf nsz arcp afn float [[VL5]] to i32
 // CHECK-NEXT: store i32 [[C6]], ptr [[G2]], align 4
 export void call5() {
   float2 A[1] = {{1.2,3.4}};
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/InitLists.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/InitLists.hlsl
index 7e83e5f168538..7633fd361a274 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/InitLists.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/InitLists.hlsl
@@ -98,7 +98,7 @@ TwoFloats case2() {
 // CHECK-NEXT:    store i32 [[VAL]], ptr [[VAL_ADDR]], align 4
 // CHECK-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[AGG_RESULT]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[VAL_ADDR]], align 4
-// CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP0]] to float
+// CHECK-NEXT:    [[CONV:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[TMP0]] to float
 // CHECK-NEXT:    store float [[CONV]], ptr [[X]], align 1
 // CHECK-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[AGG_RESULT]], i32 0, i32 1
 // CHECK-NEXT:    store float 2.000000e+00, ptr [[Y]], align 1
@@ -119,12 +119,12 @@ TwoFloats case3(int Val) {
 // CHECK-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[AGG_RESULT]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i32>, ptr [[TWOVALS_ADDR]], align 8
 // CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <2 x i32> [[TMP0]], i64 0
-// CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[VECEXT]] to float
+// CHECK-NEXT:    [[CONV:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[VECEXT]] to float
 // CHECK-NEXT:    store float [[CONV]], ptr [[X]], align 1
 // CHECK-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[AGG_RESULT]], i32 0, i32 1
 // CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[TWOVALS_ADDR]], align 8
 // CHECK-NEXT:    [[VECEXT1:%.*]] = extractelement <2 x i32> [[TMP1]], i64 1
-// CHECK-NEXT:    [[CONV2:%.*]] = sitofp i32 [[VECEXT1]] to float
+// CHECK-NEXT:    [[CONV2:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[VECEXT1]] to float
 // CHECK-NEXT:    store float [[CONV2]], ptr [[Y]], align 1
 // CHECK-NEXT:    ret void
 //
@@ -162,12 +162,12 @@ TwoInts case5(int2 TwoVals) {
 // CHECK-NEXT:    [[Z:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOINTS]], ptr [[AGG_RESULT]], i32 0, i32 0
 // CHECK-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[TF4]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[X]], align 1
-// CHECK-NEXT:    [[CONV:%.*]] = fptosi float [[TMP0]] to i32
+// CHECK-NEXT:    [[CONV:%.*]] = fptosi reassoc nnan ninf nsz arcp afn float [[TMP0]] to i32
 // CHECK-NEXT:    store i32 [[CONV]], ptr [[Z]], align 1
 // CHECK-NEXT:    [[W:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOINTS]], ptr [[AGG_RESULT]], i32 0, i32 1
 // CHECK-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[TF4]], i32 0, i32 1
 // CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[Y]], align 1
-// CHECK-NEXT:    [[CONV1:%.*]] = fptosi float [[TMP1]] to i32
+// CHECK-NEXT:    [[CONV1:%.*]] = fptosi reassoc nnan ninf nsz arcp afn float [[TMP1]] to i32
 // CHECK-NEXT:    store i32 [[CONV1]], ptr [[W]], align 1
 // CHECK-NEXT:    ret void
 //
@@ -202,7 +202,7 @@ TwoInts case6(TwoFloats TF4) {
 // CHECK-NEXT:    store i32 [[TMP4]], ptr [[TAILSTATE]], align 1
 // CHECK-NEXT:    [[HAIRCOUNT:%.*]] = getelementptr inbounds nuw [[STRUCT_DOGGO]], ptr [[AGG_RESULT]], i32 0, i32 2
 // CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[VAL_ADDR]], align 4
-// CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP5]] to float
+// CHECK-NEXT:    [[CONV:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[TMP5]] to float
 // CHECK-NEXT:    store float [[CONV]], ptr [[HAIRCOUNT]], align 1
 // CHECK-NEXT:    [[EARDIRECTION:%.*]] = getelementptr inbounds nuw [[STRUCT_DOGGO]], ptr [[AGG_RESULT]], i32 0, i32 3
 // CHECK-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[TF1]], i32 0, i32 0
@@ -272,7 +272,7 @@ Doggo case7(TwoInts TI1, TwoInts TI2, int Val, TwoFloats TF1, TwoFloats TF2,
 // CHECK-NEXT:    [[COUNTER:%.*]] = getelementptr inbounds nuw [[STRUCT_ANIMALBITS]], ptr [[AGG_RESULT]], i32 0, i32 2
 // CHECK-NEXT:    [[HAIRCOUNT:%.*]] = getelementptr inbounds nuw [[STRUCT_DOGGO]], ptr [[D1]], i32 0, i32 2
 // CHECK-NEXT:    [[TMP5:%.*]] = load float, ptr [[HAIRCOUNT]], align 1
-// CHECK-NEXT:    [[CONV:%.*]] = fptosi float [[TMP5]] to i64
+// CHECK-NEXT:    [[CONV:%.*]] = fptosi reassoc nnan ninf nsz arcp afn float [[TMP5]] to i64
 // CHECK-NEXT:    store i64 [[CONV]], ptr [[COUNTER]], align 1
 // CHECK-NEXT:    [[LEFTDIR:%.*]] = getelementptr inbounds nuw [[STRUCT_ANIMALBITS]], ptr [[AGG_RESULT]], i32 0, i32 3
 // CHECK-NEXT:    [[EARDIRECTION:%.*]] = getelementptr inbounds nuw [[STRUCT_DOGGO]], ptr [[D1]], i32 0, i32 3
@@ -428,7 +428,7 @@ AnimalBits case8(Doggo D1) {
 // CHECK-NEXT:    [[HAIRCOUNT58:%.*]] = getelementptr inbounds nuw [[STRUCT_DOGGO]], ptr [[ARRAYINIT_ELEMENT44]], i32 0, i32 2
 // CHECK-NEXT:    [[COUNTER:%.*]] = getelementptr inbounds nuw [[STRUCT_ANIMALBITS]], ptr [[A1]], i32 0, i32 2
 // CHECK-NEXT:    [[TMP19:%.*]] = load i64, ptr [[COUNTER]], align 1
-// CHECK-NEXT:    [[CONV:%.*]] = sitofp i64 [[TMP19]] to float
+// CHECK-NEXT:    [[CONV:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i64 [[TMP19]] to float
 // CHECK-NEXT:    store float [[CONV]], ptr [[HAIRCOUNT58]], align 1
 // CHECK-NEXT:    [[EARDIRECTION59:%.*]] = getelementptr inbounds nuw [[STRUCT_DOGGO]], ptr [[ARRAYINIT_ELEMENT44]], i32 0, i32 3
 // CHECK-NEXT:    [[LEFTDIR:%.*]] = getelementptr inbounds nuw [[STRUCT_ANIMALBITS]], ptr [[A1]], i32 0, i32 3
@@ -563,7 +563,7 @@ AnimalBits case8(Doggo D1) {
 // CHECK-NEXT:    [[HAIRCOUNT149:%.*]] = getelementptr inbounds nuw [[STRUCT_KITTEH]], ptr [[ARRAYINIT_ELEMENT133]], i32 0, i32 2
 // CHECK-NEXT:    [[COUNTER150:%.*]] = getelementptr inbounds nuw [[STRUCT_ANIMALBITS]], ptr [[A1]], i32 0, i32 2
 // CHECK-NEXT:    [[TMP47:%.*]] = load i64, ptr [[COUNTER150]], align 1
-// CHECK-NEXT:    [[CONV151:%.*]] = sitofp i64 [[TMP47]] to float
+// CHECK-NEXT:    [[CONV151:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i64 [[TMP47]] to float
 // CHECK-NEXT:    store float [[CONV151]], ptr [[HAIRCOUNT149]], align 1
 // CHECK-NEXT:    [[CLAWS152:%.*]] = getelementptr inbounds nuw [[STRUCT_KITTEH]], ptr [[ARRAYINIT_ELEMENT133]], i32 0, i32 3
 // CHECK-NEXT:    [[LEFTDIR153:%.*]] = getelementptr inbounds nuw [[STRUCT_ANIMALBITS]], ptr [[A1]], i32 0, i32 3
@@ -698,7 +698,7 @@ AnimalBits case8(Doggo D1) {
 // CHECK-NEXT:    [[HAIRCOUNT246:%.*]] = getelementptr inbounds nuw [[STRUCT_KITTEH]], ptr [[ARRAYINIT_ELEMENT230]], i32 0, i32 2
 // CHECK-NEXT:    [[COUNTER247:%.*]] = getelementptr inbounds nuw [[STRUCT_ANIMALBITS]], ptr [[A1]], i32 0, i32 2
 // CHECK-NEXT:    [[TMP75:%.*]] = load i64, ptr [[COUNTER247]], align 1
-// CHECK-NEXT:    [[CONV248:%.*]] = sitofp i64 [[TMP75]] to float
+// CHECK-NEXT:    [[CONV248:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i64 [[TMP75]] to float
 // CHECK-NEXT:    store float [[CONV248]], ptr [[HAIRCOUNT246]], align 1
 // CHECK-NEXT:    [[CLAWS249:%.*]] = getelementptr inbounds nuw [[STRUCT_KITTEH]], ptr [[ARRAYINIT_ELEMENT230]], i32 0, i32 3
 // CHECK-NEXT:    [[LEFTDIR250:%.*]] = getelementptr inbounds nuw [[STRUCT_ANIMALBITS]], ptr [[A1]], i32 0, i32 3
@@ -888,13 +888,13 @@ TwoInts case14(SlicyBits SB) {
 // CHECK-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[AGG_RESULT]], i32 0, i32 0
 // CHECK-NEXT:    [[BF_LOAD:%.*]] = load i8, ptr [[SB]], align 1
 // CHECK-NEXT:    [[BF_CAST:%.*]] = sext i8 [[BF_LOAD]] to i32
-// CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[BF_CAST]] to float
+// CHECK-NEXT:    [[CONV:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[BF_CAST]] to float
 // CHECK-NEXT:    store float [[CONV]], ptr [[X]], align 1
 // CHECK-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[AGG_RESULT]], i32 0, i32 1
 // CHECK-NEXT:    [[W:%.*]] = getelementptr inbounds nuw [[STRUCT_SLICYBITS]], ptr [[SB]], i32 0, i32 1
 // CHECK-NEXT:    [[BF_LOAD1:%.*]] = load i8, ptr [[W]], align 1
 // CHECK-NEXT:    [[BF_CAST2:%.*]] = sext i8 [[BF_LOAD1]] to i32
-// CHECK-NEXT:    [[CONV3:%.*]] = sitofp i32 [[BF_CAST2]] to float
+// CHECK-NEXT:    [[CONV3:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[BF_CAST2]] to float
 // CHECK-NEXT:    store float [[CONV3]], ptr [[Y]], align 1
 // CHECK-NEXT:    ret void
 //
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl
index d0ba8f447b732..6f42b5611ce41 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl
@@ -13,14 +13,14 @@ void trunc_Param(inout int X) {}
 // CHECK: [[F:%.*]] = alloca float
 // CHECK: [[ArgTmp:%.*]] = alloca i32
 // CHECK: [[FVal:%.*]] = load float, ptr {{.*}}
-// CHECK: [[IVal:%.*]] = fptosi float [[FVal]] to i32
+// CHECK: [[IVal:%.*]] = fptosi reassoc nnan ninf nsz arcp afn float [[FVal]] to i32
 // CHECK: store i32 [[IVal]], ptr [[ArgTmp]]
 // CHECK: call void {{.*}}trunc_Param{{.*}}(ptr noalias noundef nonnull align 4 dereferenceable(4) [[ArgTmp]])
 // CHECK: [[IRet:%.*]] = load i32, ptr [[ArgTmp]]
-// CHECK: [[FRet:%.*]] = sitofp i32 [[IRet]] to float
+// CHECK: [[FRet:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[IRet]] to float
 // CHECK: store float [[FRet]], ptr [[F]]
-// OPT: [[IVal:%.*]] = fptosi float {{.*}} to i32
-// OPT: [[FVal:%.*]] = sitofp i32 [[IVal]] to float
+// OPT: [[IVal:%.*]] = fptosi reassoc nnan ninf nsz arcp afn float {{.*}} to i32
+// OPT: [[FVal:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[IVal]] to float
 // OPT: ret float [[FVal]]
 export float case1(float F) {
   trunc_Param(F);
@@ -202,15 +202,15 @@ void trunc_vec(inout int3 V) {}
 // CHECK: [[V:%.*]] = alloca <3 x float>
 // CHECK: [[Tmp:%.*]] = alloca <3 x i32>
 // CHECK: [[FVal:%.*]] = load <3 x float>, ptr [[V]]
-// CHECK: [[IVal:%.*]] = fptosi <3 x float> [[FVal]] to <3 x i32>
+// CHECK: [[IVal:%.*]] = fptosi reassoc nnan ninf nsz arcp afn <3 x float> [[FVal]] to <3 x i32>
 // CHECK: store <3 x i32> [[IVal]], ptr [[Tmp]]
 // CHECK: call void {{.*}}trunc_vec{{.*}}(ptr noalias noundef nonnull align 16 dereferenceable(16) [[Tmp]])
 // CHECK: [[IRet:%.*]] = load <3 x i32>, ptr [[Tmp]]
-// CHECK: [[FRet:%.*]] = sitofp <3 x i32> [[IRet]] to <3 x float>
+// CHECK: [[FRet:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <3 x i32> [[IRet]] to <3 x float>
 // CHECK: store <3 x float> [[FRet]], ptr [[V]]
 
-// OPT: [[IVal:%.*]] = fptosi <3 x float> {{.*}} to <3 x i32>
-// OPT: [[FVal:%.*]] = sitofp <3 x i32> [[IVal]] to <3 x float>
+// OPT: [[IVal:%.*]] = fptosi reassoc nnan ninf nsz arcp afn <3 x float> {{.*}} to <3 x i32>
+// OPT: [[FVal:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <3 x i32> [[IVal]] to <3 x float>
 // OPT: ret <3 x float> [[FVal]]
 
 export float3 case8(float3 V) {
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/StructElementwiseCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/StructElementwiseCast.hlsl
index 81b9f5b28cc7e..e4229951e58b5 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/StructElementwiseCast.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/StructElementwiseCast.hlsl
@@ -32,7 +32,7 @@ export void call0() {
 // CHECK-NEXT: [[VL:%.*]] = extractelement <2 x i32> [[L]], i64 0
 // CHECK-NEXT: store i32 [[VL]], ptr [[G1]], align 4
 // CHECK-NEXT: [[VL2:%.*]] = extractelement <2 x i32> [[L]], i64 1
-// CHECK-NEXT: [[C:%.*]] = sitofp i32 [[VL2]] to float
+// CHECK-NEXT: [[C:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[VL2]] to float
 // CHECK-NEXT: store float [[C]], ptr [[G2]], align 4
 export void call1() {
   int2 A = {1,2};
@@ -54,7 +54,7 @@ export void call1() {
 // CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G3]], align 4
 // CHECK-NEXT: store i32 [[L]], ptr [[G1]], align 4
 // CHECK-NEXT: [[L4:%.*]] = load i32, ptr [[G4]], align 4
-// CHECK-NEXT: [[C:%.*]] = sitofp i32 [[L4]] to float
+// CHECK-NEXT: [[C:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[L4]] to float
 // CHECK-NEXT: store float [[C]], ptr [[G2]], align 4
 export void call2() {
   int A[2] = {1,2};
@@ -104,7 +104,7 @@ export void call6() {
 // CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G3]], align 4
 // CHECK-NEXT: store i32 [[L]], ptr [[G1]], align 4
 // CHECK-NEXT: [[L4:%.*]] = load i32, ptr [[G4]], align 4
-// CHECK-NEXT: [[C:%.*]] = sitofp i32 [[L4]] to float
+// CHECK-NEXT: [[C:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[L4]] to float
 // CHECK-NEXT: store float [[C]], ptr [[G2]], align 4
 export void call7() {
   int A[2] = {1,2};
@@ -132,7 +132,7 @@ struct T {
 // CHECK-NEXT: %load = load i32, ptr %gep2, align 4
 // CHECK-NEXT: store i32 %load, ptr %gep, align 4
 // CHECK-NEXT: %load5 = load i32, ptr %gep3, align 4
-// CHECK-NEXT: %conv = sitofp i32 %load5 to float
+// CHECK-NEXT: %conv = sitofp reassoc nnan ninf nsz arcp afn i32 %load5 to float
 // CHECK-NEXT: store float %conv, ptr %gep1, align 4
 export void call8() {
   T t = {1,2,3};
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl
index 253b38a7ca072..a5843b0a95642 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl
@@ -40,7 +40,7 @@ struct S {
 // CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G1]], align 4
 // CHECK-NEXT: [[C:%.*]] = insertelement <2 x i32> [[B]], i32 [[L]], i64 0
 // CHECK-NEXT: [[L2:%.*]] = load float, ptr [[G2]], align 4
-// CHECK-NEXT: [[D:%.*]] = fptosi float [[L2]] to i32
+// CHECK-NEXT: [[D:%.*]] = fptosi reassoc nnan ninf nsz arcp afn float [[L2]] to i32
 // CHECK-NEXT: [[E:%.*]] = insertelement <2 x i32> [[C]], i32 [[D]], i64 1
 // CHECK-NEXT: store <2 x i32> [[E]], ptr [[A]], align 8
 export void call3() {
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/standard_conversion_sequences.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/standard_conversion_sequences.hlsl
index 6770efefe94fe..3e8c06ed074af 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/standard_conversion_sequences.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/standard_conversion_sequences.hlsl
@@ -43,7 +43,7 @@ void d4_to_f2() {
 // CHECK: [[i2:%.*]] = alloca <2 x i32>
 // CHECK: store <2 x float> splat (float 4.000000e+00), ptr [[f2]]
 // CHECK: [[vecf2:%.*]] = load <2 x float>, ptr [[f2]]
-// CHECK: [[veci2:%.*]] = fptosi <2 x float> [[vecf2]] to <2 x i32>
+// CHECK: [[veci2:%.*]] = fptosi reassoc nnan ninf nsz arcp afn <2 x float> [[vecf2]] to <2 x i32>
 // CHECK: store <2 x i32> [[veci2]], ptr [[i2]]
 void f2_to_i2() {
   vector<float,2> f2 = 4.0;
@@ -55,7 +55,7 @@ void f2_to_i2() {
 // CHECK: [[i2:%.*]] = alloca <2 x i32>
 // CHECK: store <4 x double> splat (double 5.000000e+00), ptr [[d4]]
 // CHECK: [[vecd4:%.*]] = load <4 x double>, ptr [[d4]]
-// CHECK: [[veci4:%.*]] = fptosi <4 x double> [[vecd4]] to <4 x i32>
+// CHECK: [[veci4:%.*]] = fptosi reassoc nnan ninf nsz arcp afn <4 x double> [[vecd4]] to <4 x i32>
 // CHECK: [[ve...
[truncated]

llvmbot · 2025-09-25T00:27:33Z

@llvm/pr-subscribers-backend-powerpc

Author: None (paperchalice)

Changes

This allow optimizations like FoldIntToFPToInt in CodeGen to use fast math flags.

Patch is 190.48 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/160475.diff

31 Files Affected:

(modified) clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu (+1-1)
(modified) clang/test/CodeGenCUDA/managed-var.cu (+1-1)
(modified) clang/test/CodeGenHLSL/BasicFeatures/AggregateSplatCast.hlsl (+3-3)
(modified) clang/test/CodeGenHLSL/BasicFeatures/ArrayElementwiseCast.hlsl (+4-4)
(modified) clang/test/CodeGenHLSL/BasicFeatures/InitLists.hlsl (+12-12)
(modified) clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl (+8-8)
(modified) clang/test/CodeGenHLSL/BasicFeatures/StructElementwiseCast.hlsl (+4-4)
(modified) clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl (+1-1)
(modified) clang/test/CodeGenHLSL/BasicFeatures/standard_conversion_sequences.hlsl (+3-3)
(modified) clang/test/CodeGenHLSL/builtins/dot2add.hlsl (+6-6)
(modified) clang/test/CodeGenHLSL/builtins/lerp-overloads.hlsl (+24-24)
(modified) clang/test/CodeGenHLSL/builtins/pow-overloads.hlsl (+16-16)
(modified) clang/test/Headers/__clang_hip_math.hip (+40-40)
(modified) clang/test/Headers/openmp_device_math_isnan.cpp (+2-2)
(modified) clang/test/SemaHLSL/VectorOverloadResolution.hlsl (+3-3)
(modified) llvm/docs/LangRef.rst (+8-1)
(modified) llvm/include/llvm/IR/Operator.h (+3)
(modified) llvm/lib/AsmParser/LLParser.cpp (+3-3)
(modified) llvm/test/Assembler/fast-math-flags.ll (+66)
(modified) llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll (+2-2)
(modified) llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll (+132-132)
(modified) llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll (+20-20)
(modified) llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll (+9-9)
(modified) llvm/test/CodeGen/AMDGPU/divrem24-assume.ll (+2-2)
(modified) llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll (+2-2)
(modified) llvm/test/Transforms/InstCombine/log-pow.ll (+3-3)
(modified) llvm/test/Transforms/LoopVectorize/PowerPC/vplan-scalarivsext-crash.ll (+2-2)
(modified) llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll (+38-38)
(modified) llvm/test/Transforms/LoopVectorize/float-induction.ll (+11-11)
(modified) llvm/test/Transforms/LoopVectorize/iv_outside_user.ll (+9-9)
(modified) llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll (+1-1)

diff --git a/clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu b/clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu
index 22c40e6d38ea2..b8819b8e550ea 100644
--- a/clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu
+++ b/clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu
@@ -164,7 +164,7 @@ __device__ double ffp4(double *p, float f) {
 
 __device__ double ffp5(double *p, int i) {
   // FUN-LABEL: @_Z4ffp5Pdi
-  // CHECK: sitofp i32 {{.*}} to double
+  // CHECK: sitofp contract i32 {{.*}} to double
   // SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, [[DEFMD]]
   // UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, [[DEFMD]]
   __atomic_fetch_sub(p, i, memory_order_relaxed);
diff --git a/clang/test/CodeGenCUDA/managed-var.cu b/clang/test/CodeGenCUDA/managed-var.cu
index 0e7a7be85ac8e..368adece297b7 100644
--- a/clang/test/CodeGenCUDA/managed-var.cu
+++ b/clang/test/CodeGenCUDA/managed-var.cu
@@ -145,7 +145,7 @@ float load3() {
 // HOST:  %4 = ptrtoint ptr %3 to i64
 // HOST:  %5 = sub i64 %4, %1
 // HOST:  %sub.ptr.div = sdiv exact i64 %5, 4
-// HOST:  %conv = sitofp i64 %sub.ptr.div to float
+// HOST:  %conv = sitofp contract i64 %sub.ptr.div to float
 // HOST:  ret float %conv
 float addr_taken2() {
   return (float)reinterpret_cast<long>(&(v2[1].y)-&(v[1].x));
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/AggregateSplatCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/AggregateSplatCast.hlsl
index 512fcd435191a..64fda1301d8cb 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/AggregateSplatCast.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/AggregateSplatCast.hlsl
@@ -38,7 +38,7 @@ export void call8() {
 // CHECK-NEXT: store <1 x float> splat (float 1.000000e+00), ptr [[B]], align 4
 // CHECK-NEXT: [[L:%.*]] = load <1 x float>, ptr [[B]], align 4
 // CHECK-NEXT: [[VL:%.*]] = extractelement <1 x float> [[L]], i32 0
-// CHECK-NEXT: [[C:%.*]] = fptosi float [[VL]] to i32
+// CHECK-NEXT: [[C:%.*]] = fptosi reassoc nnan ninf nsz arcp afn float [[VL]] to i32
 // CHECK-NEXT: [[SI:%.*]] = insertelement <4 x i32> poison, i32 [[C]], i64 0
 // CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[SI]], <4 x i32> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT: store <4 x i32> [[S]], ptr [[A]], align 16
@@ -62,7 +62,7 @@ struct S {
 // CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 0, i32 0
 // CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 0, i32 1
 // CHECK-NEXT: store i32 [[VL]], ptr [[G1]], align 4
-// CHECK-NEXT: [[C:%.*]] = sitofp i32 [[VL]] to float
+// CHECK-NEXT: [[C:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[VL]] to float
 // CHECK-NEXT: store float [[C]], ptr [[G2]], align 4
 export void call3() {
   int1 A = {1};
@@ -79,7 +79,7 @@ export void call3() {
 // CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 0, i32 0
 // CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 0, i32 1
 // CHECK-NEXT: store i32 [[VL]], ptr [[G1]], align 4
-// CHECK-NEXT: [[C:%.*]] = sitofp i32 [[VL]] to float
+// CHECK-NEXT: [[C:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[VL]] to float
 // CHECK-NEXT: store float [[C]], ptr [[G2]], align 4
 export void call5() {
   int1 A = {1};
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/ArrayElementwiseCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/ArrayElementwiseCast.hlsl
index ac02ddf5765ed..54b37401466ba 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/ArrayElementwiseCast.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/ArrayElementwiseCast.hlsl
@@ -46,7 +46,7 @@ export void call1() {
 // CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds [1 x float], ptr [[B]], i32 0, i32 0
 // CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds [1 x i32], ptr [[Tmp]], i32 0, i32 0
 // CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G2]], align 4
-// CHECK-NEXT: [[C:%.*]] = sitofp i32 [[L]] to float
+// CHECK-NEXT: [[C:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[L]] to float
 // CHECK-NEXT: store float [[C]], ptr [[G1]], align 4
 export void call2() {
   int A[1] = {0};
@@ -63,7 +63,7 @@ export void call2() {
 // CHECK-NEXT: [[C:%.*]] = load <1 x float>, ptr [[A]], align 4
 // CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds [1 x i32], ptr [[B]], i32 0, i32 0
 // CHECK-NEXT: [[V:%.*]] = extractelement <1 x float> [[C]], i64 0
-// CHECK-NEXT: [[C:%.*]] = fptosi float [[V]] to i32
+// CHECK-NEXT: [[C:%.*]] = fptosi reassoc nnan ninf nsz arcp afn float [[V]] to i32
 // CHECK-NEXT: store i32 [[C]], ptr [[G1]], align 4
 export void call3() {
   float1 A = {1.2};
@@ -84,11 +84,11 @@ export void call3() {
 // CHECK-NEXT: [[VG:%.*]] = getelementptr inbounds [1 x <2 x float>], ptr [[Tmp]], i32 0, i32 0
 // CHECK-NEXT: [[L:%.*]] = load <2 x float>, ptr [[VG]], align 8
 // CHECK-NEXT: [[VL:%.*]] = extractelement <2 x float> [[L]], i32 0
-// CHECK-NEXT: [[C:%.*]] = fptosi float [[VL]] to i32
+// CHECK-NEXT: [[C:%.*]] = fptosi reassoc nnan ninf nsz arcp afn float [[VL]] to i32
 // CHECK-NEXT: store i32 [[C]], ptr [[G1]], align 4
 // CHECK-NEXT: [[L4:%.*]] = load <2 x float>, ptr [[VG]], align 8
 // CHECK-NEXT: [[VL5:%.*]] = extractelement <2 x float> [[L4]], i32 1
-// CHECK-NEXT: [[C6:%.*]] = fptosi float [[VL5]] to i32
+// CHECK-NEXT: [[C6:%.*]] = fptosi reassoc nnan ninf nsz arcp afn float [[VL5]] to i32
 // CHECK-NEXT: store i32 [[C6]], ptr [[G2]], align 4
 export void call5() {
   float2 A[1] = {{1.2,3.4}};
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/InitLists.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/InitLists.hlsl
index 7e83e5f168538..7633fd361a274 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/InitLists.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/InitLists.hlsl
@@ -98,7 +98,7 @@ TwoFloats case2() {
 // CHECK-NEXT:    store i32 [[VAL]], ptr [[VAL_ADDR]], align 4
 // CHECK-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[AGG_RESULT]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[VAL_ADDR]], align 4
-// CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP0]] to float
+// CHECK-NEXT:    [[CONV:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[TMP0]] to float
 // CHECK-NEXT:    store float [[CONV]], ptr [[X]], align 1
 // CHECK-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[AGG_RESULT]], i32 0, i32 1
 // CHECK-NEXT:    store float 2.000000e+00, ptr [[Y]], align 1
@@ -119,12 +119,12 @@ TwoFloats case3(int Val) {
 // CHECK-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[AGG_RESULT]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i32>, ptr [[TWOVALS_ADDR]], align 8
 // CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <2 x i32> [[TMP0]], i64 0
-// CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[VECEXT]] to float
+// CHECK-NEXT:    [[CONV:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[VECEXT]] to float
 // CHECK-NEXT:    store float [[CONV]], ptr [[X]], align 1
 // CHECK-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[AGG_RESULT]], i32 0, i32 1
 // CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[TWOVALS_ADDR]], align 8
 // CHECK-NEXT:    [[VECEXT1:%.*]] = extractelement <2 x i32> [[TMP1]], i64 1
-// CHECK-NEXT:    [[CONV2:%.*]] = sitofp i32 [[VECEXT1]] to float
+// CHECK-NEXT:    [[CONV2:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[VECEXT1]] to float
 // CHECK-NEXT:    store float [[CONV2]], ptr [[Y]], align 1
 // CHECK-NEXT:    ret void
 //
@@ -162,12 +162,12 @@ TwoInts case5(int2 TwoVals) {
 // CHECK-NEXT:    [[Z:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOINTS]], ptr [[AGG_RESULT]], i32 0, i32 0
 // CHECK-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[TF4]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[X]], align 1
-// CHECK-NEXT:    [[CONV:%.*]] = fptosi float [[TMP0]] to i32
+// CHECK-NEXT:    [[CONV:%.*]] = fptosi reassoc nnan ninf nsz arcp afn float [[TMP0]] to i32
 // CHECK-NEXT:    store i32 [[CONV]], ptr [[Z]], align 1
 // CHECK-NEXT:    [[W:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOINTS]], ptr [[AGG_RESULT]], i32 0, i32 1
 // CHECK-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[TF4]], i32 0, i32 1
 // CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[Y]], align 1
-// CHECK-NEXT:    [[CONV1:%.*]] = fptosi float [[TMP1]] to i32
+// CHECK-NEXT:    [[CONV1:%.*]] = fptosi reassoc nnan ninf nsz arcp afn float [[TMP1]] to i32
 // CHECK-NEXT:    store i32 [[CONV1]], ptr [[W]], align 1
 // CHECK-NEXT:    ret void
 //
@@ -202,7 +202,7 @@ TwoInts case6(TwoFloats TF4) {
 // CHECK-NEXT:    store i32 [[TMP4]], ptr [[TAILSTATE]], align 1
 // CHECK-NEXT:    [[HAIRCOUNT:%.*]] = getelementptr inbounds nuw [[STRUCT_DOGGO]], ptr [[AGG_RESULT]], i32 0, i32 2
 // CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[VAL_ADDR]], align 4
-// CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP5]] to float
+// CHECK-NEXT:    [[CONV:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[TMP5]] to float
 // CHECK-NEXT:    store float [[CONV]], ptr [[HAIRCOUNT]], align 1
 // CHECK-NEXT:    [[EARDIRECTION:%.*]] = getelementptr inbounds nuw [[STRUCT_DOGGO]], ptr [[AGG_RESULT]], i32 0, i32 3
 // CHECK-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[TF1]], i32 0, i32 0
@@ -272,7 +272,7 @@ Doggo case7(TwoInts TI1, TwoInts TI2, int Val, TwoFloats TF1, TwoFloats TF2,
 // CHECK-NEXT:    [[COUNTER:%.*]] = getelementptr inbounds nuw [[STRUCT_ANIMALBITS]], ptr [[AGG_RESULT]], i32 0, i32 2
 // CHECK-NEXT:    [[HAIRCOUNT:%.*]] = getelementptr inbounds nuw [[STRUCT_DOGGO]], ptr [[D1]], i32 0, i32 2
 // CHECK-NEXT:    [[TMP5:%.*]] = load float, ptr [[HAIRCOUNT]], align 1
-// CHECK-NEXT:    [[CONV:%.*]] = fptosi float [[TMP5]] to i64
+// CHECK-NEXT:    [[CONV:%.*]] = fptosi reassoc nnan ninf nsz arcp afn float [[TMP5]] to i64
 // CHECK-NEXT:    store i64 [[CONV]], ptr [[COUNTER]], align 1
 // CHECK-NEXT:    [[LEFTDIR:%.*]] = getelementptr inbounds nuw [[STRUCT_ANIMALBITS]], ptr [[AGG_RESULT]], i32 0, i32 3
 // CHECK-NEXT:    [[EARDIRECTION:%.*]] = getelementptr inbounds nuw [[STRUCT_DOGGO]], ptr [[D1]], i32 0, i32 3
@@ -428,7 +428,7 @@ AnimalBits case8(Doggo D1) {
 // CHECK-NEXT:    [[HAIRCOUNT58:%.*]] = getelementptr inbounds nuw [[STRUCT_DOGGO]], ptr [[ARRAYINIT_ELEMENT44]], i32 0, i32 2
 // CHECK-NEXT:    [[COUNTER:%.*]] = getelementptr inbounds nuw [[STRUCT_ANIMALBITS]], ptr [[A1]], i32 0, i32 2
 // CHECK-NEXT:    [[TMP19:%.*]] = load i64, ptr [[COUNTER]], align 1
-// CHECK-NEXT:    [[CONV:%.*]] = sitofp i64 [[TMP19]] to float
+// CHECK-NEXT:    [[CONV:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i64 [[TMP19]] to float
 // CHECK-NEXT:    store float [[CONV]], ptr [[HAIRCOUNT58]], align 1
 // CHECK-NEXT:    [[EARDIRECTION59:%.*]] = getelementptr inbounds nuw [[STRUCT_DOGGO]], ptr [[ARRAYINIT_ELEMENT44]], i32 0, i32 3
 // CHECK-NEXT:    [[LEFTDIR:%.*]] = getelementptr inbounds nuw [[STRUCT_ANIMALBITS]], ptr [[A1]], i32 0, i32 3
@@ -563,7 +563,7 @@ AnimalBits case8(Doggo D1) {
 // CHECK-NEXT:    [[HAIRCOUNT149:%.*]] = getelementptr inbounds nuw [[STRUCT_KITTEH]], ptr [[ARRAYINIT_ELEMENT133]], i32 0, i32 2
 // CHECK-NEXT:    [[COUNTER150:%.*]] = getelementptr inbounds nuw [[STRUCT_ANIMALBITS]], ptr [[A1]], i32 0, i32 2
 // CHECK-NEXT:    [[TMP47:%.*]] = load i64, ptr [[COUNTER150]], align 1
-// CHECK-NEXT:    [[CONV151:%.*]] = sitofp i64 [[TMP47]] to float
+// CHECK-NEXT:    [[CONV151:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i64 [[TMP47]] to float
 // CHECK-NEXT:    store float [[CONV151]], ptr [[HAIRCOUNT149]], align 1
 // CHECK-NEXT:    [[CLAWS152:%.*]] = getelementptr inbounds nuw [[STRUCT_KITTEH]], ptr [[ARRAYINIT_ELEMENT133]], i32 0, i32 3
 // CHECK-NEXT:    [[LEFTDIR153:%.*]] = getelementptr inbounds nuw [[STRUCT_ANIMALBITS]], ptr [[A1]], i32 0, i32 3
@@ -698,7 +698,7 @@ AnimalBits case8(Doggo D1) {
 // CHECK-NEXT:    [[HAIRCOUNT246:%.*]] = getelementptr inbounds nuw [[STRUCT_KITTEH]], ptr [[ARRAYINIT_ELEMENT230]], i32 0, i32 2
 // CHECK-NEXT:    [[COUNTER247:%.*]] = getelementptr inbounds nuw [[STRUCT_ANIMALBITS]], ptr [[A1]], i32 0, i32 2
 // CHECK-NEXT:    [[TMP75:%.*]] = load i64, ptr [[COUNTER247]], align 1
-// CHECK-NEXT:    [[CONV248:%.*]] = sitofp i64 [[TMP75]] to float
+// CHECK-NEXT:    [[CONV248:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i64 [[TMP75]] to float
 // CHECK-NEXT:    store float [[CONV248]], ptr [[HAIRCOUNT246]], align 1
 // CHECK-NEXT:    [[CLAWS249:%.*]] = getelementptr inbounds nuw [[STRUCT_KITTEH]], ptr [[ARRAYINIT_ELEMENT230]], i32 0, i32 3
 // CHECK-NEXT:    [[LEFTDIR250:%.*]] = getelementptr inbounds nuw [[STRUCT_ANIMALBITS]], ptr [[A1]], i32 0, i32 3
@@ -888,13 +888,13 @@ TwoInts case14(SlicyBits SB) {
 // CHECK-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[AGG_RESULT]], i32 0, i32 0
 // CHECK-NEXT:    [[BF_LOAD:%.*]] = load i8, ptr [[SB]], align 1
 // CHECK-NEXT:    [[BF_CAST:%.*]] = sext i8 [[BF_LOAD]] to i32
-// CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[BF_CAST]] to float
+// CHECK-NEXT:    [[CONV:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[BF_CAST]] to float
 // CHECK-NEXT:    store float [[CONV]], ptr [[X]], align 1
 // CHECK-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[AGG_RESULT]], i32 0, i32 1
 // CHECK-NEXT:    [[W:%.*]] = getelementptr inbounds nuw [[STRUCT_SLICYBITS]], ptr [[SB]], i32 0, i32 1
 // CHECK-NEXT:    [[BF_LOAD1:%.*]] = load i8, ptr [[W]], align 1
 // CHECK-NEXT:    [[BF_CAST2:%.*]] = sext i8 [[BF_LOAD1]] to i32
-// CHECK-NEXT:    [[CONV3:%.*]] = sitofp i32 [[BF_CAST2]] to float
+// CHECK-NEXT:    [[CONV3:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[BF_CAST2]] to float
 // CHECK-NEXT:    store float [[CONV3]], ptr [[Y]], align 1
 // CHECK-NEXT:    ret void
 //
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl
index d0ba8f447b732..6f42b5611ce41 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl
@@ -13,14 +13,14 @@ void trunc_Param(inout int X) {}
 // CHECK: [[F:%.*]] = alloca float
 // CHECK: [[ArgTmp:%.*]] = alloca i32
 // CHECK: [[FVal:%.*]] = load float, ptr {{.*}}
-// CHECK: [[IVal:%.*]] = fptosi float [[FVal]] to i32
+// CHECK: [[IVal:%.*]] = fptosi reassoc nnan ninf nsz arcp afn float [[FVal]] to i32
 // CHECK: store i32 [[IVal]], ptr [[ArgTmp]]
 // CHECK: call void {{.*}}trunc_Param{{.*}}(ptr noalias noundef nonnull align 4 dereferenceable(4) [[ArgTmp]])
 // CHECK: [[IRet:%.*]] = load i32, ptr [[ArgTmp]]
-// CHECK: [[FRet:%.*]] = sitofp i32 [[IRet]] to float
+// CHECK: [[FRet:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[IRet]] to float
 // CHECK: store float [[FRet]], ptr [[F]]
-// OPT: [[IVal:%.*]] = fptosi float {{.*}} to i32
-// OPT: [[FVal:%.*]] = sitofp i32 [[IVal]] to float
+// OPT: [[IVal:%.*]] = fptosi reassoc nnan ninf nsz arcp afn float {{.*}} to i32
+// OPT: [[FVal:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[IVal]] to float
 // OPT: ret float [[FVal]]
 export float case1(float F) {
   trunc_Param(F);
@@ -202,15 +202,15 @@ void trunc_vec(inout int3 V) {}
 // CHECK: [[V:%.*]] = alloca <3 x float>
 // CHECK: [[Tmp:%.*]] = alloca <3 x i32>
 // CHECK: [[FVal:%.*]] = load <3 x float>, ptr [[V]]
-// CHECK: [[IVal:%.*]] = fptosi <3 x float> [[FVal]] to <3 x i32>
+// CHECK: [[IVal:%.*]] = fptosi reassoc nnan ninf nsz arcp afn <3 x float> [[FVal]] to <3 x i32>
 // CHECK: store <3 x i32> [[IVal]], ptr [[Tmp]]
 // CHECK: call void {{.*}}trunc_vec{{.*}}(ptr noalias noundef nonnull align 16 dereferenceable(16) [[Tmp]])
 // CHECK: [[IRet:%.*]] = load <3 x i32>, ptr [[Tmp]]
-// CHECK: [[FRet:%.*]] = sitofp <3 x i32> [[IRet]] to <3 x float>
+// CHECK: [[FRet:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <3 x i32> [[IRet]] to <3 x float>
 // CHECK: store <3 x float> [[FRet]], ptr [[V]]
 
-// OPT: [[IVal:%.*]] = fptosi <3 x float> {{.*}} to <3 x i32>
-// OPT: [[FVal:%.*]] = sitofp <3 x i32> [[IVal]] to <3 x float>
+// OPT: [[IVal:%.*]] = fptosi reassoc nnan ninf nsz arcp afn <3 x float> {{.*}} to <3 x i32>
+// OPT: [[FVal:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <3 x i32> [[IVal]] to <3 x float>
 // OPT: ret <3 x float> [[FVal]]
 
 export float3 case8(float3 V) {
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/StructElementwiseCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/StructElementwiseCast.hlsl
index 81b9f5b28cc7e..e4229951e58b5 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/StructElementwiseCast.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/StructElementwiseCast.hlsl
@@ -32,7 +32,7 @@ export void call0() {
 // CHECK-NEXT: [[VL:%.*]] = extractelement <2 x i32> [[L]], i64 0
 // CHECK-NEXT: store i32 [[VL]], ptr [[G1]], align 4
 // CHECK-NEXT: [[VL2:%.*]] = extractelement <2 x i32> [[L]], i64 1
-// CHECK-NEXT: [[C:%.*]] = sitofp i32 [[VL2]] to float
+// CHECK-NEXT: [[C:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[VL2]] to float
 // CHECK-NEXT: store float [[C]], ptr [[G2]], align 4
 export void call1() {
   int2 A = {1,2};
@@ -54,7 +54,7 @@ export void call1() {
 // CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G3]], align 4
 // CHECK-NEXT: store i32 [[L]], ptr [[G1]], align 4
 // CHECK-NEXT: [[L4:%.*]] = load i32, ptr [[G4]], align 4
-// CHECK-NEXT: [[C:%.*]] = sitofp i32 [[L4]] to float
+// CHECK-NEXT: [[C:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[L4]] to float
 // CHECK-NEXT: store float [[C]], ptr [[G2]], align 4
 export void call2() {
   int A[2] = {1,2};
@@ -104,7 +104,7 @@ export void call6() {
 // CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G3]], align 4
 // CHECK-NEXT: store i32 [[L]], ptr [[G1]], align 4
 // CHECK-NEXT: [[L4:%.*]] = load i32, ptr [[G4]], align 4
-// CHECK-NEXT: [[C:%.*]] = sitofp i32 [[L4]] to float
+// CHECK-NEXT: [[C:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[L4]] to float
 // CHECK-NEXT: store float [[C]], ptr [[G2]], align 4
 export void call7() {
   int A[2] = {1,2};
@@ -132,7 +132,7 @@ struct T {
 // CHECK-NEXT: %load = load i32, ptr %gep2, align 4
 // CHECK-NEXT: store i32 %load, ptr %gep, align 4
 // CHECK-NEXT: %load5 = load i32, ptr %gep3, align 4
-// CHECK-NEXT: %conv = sitofp i32 %load5 to float
+// CHECK-NEXT: %conv = sitofp reassoc nnan ninf nsz arcp afn i32 %load5 to float
 // CHECK-NEXT: store float %conv, ptr %gep1, align 4
 export void call8() {
   T t = {1,2,3};
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl
index 253b38a7ca072..a5843b0a95642 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl
@@ -40,7 +40,7 @@ struct S {
 // CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G1]], align 4
 // CHECK-NEXT: [[C:%.*]] = insertelement <2 x i32> [[B]], i32 [[L]], i64 0
 // CHECK-NEXT: [[L2:%.*]] = load float, ptr [[G2]], align 4
-// CHECK-NEXT: [[D:%.*]] = fptosi float [[L2]] to i32
+// CHECK-NEXT: [[D:%.*]] = fptosi reassoc nnan ninf nsz arcp afn float [[L2]] to i32
 // CHECK-NEXT: [[E:%.*]] = insertelement <2 x i32> [[C]], i32 [[D]], i64 1
 // CHECK-NEXT: store <2 x i32> [[E]], ptr [[A]], align 8
 export void call3() {
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/standard_conversion_sequences.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/standard_conversion_sequences.hlsl
index 6770efefe94fe..3e8c06ed074af 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/standard_conversion_sequences.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/standard_conversion_sequences.hlsl
@@ -43,7 +43,7 @@ void d4_to_f2() {
 // CHECK: [[i2:%.*]] = alloca <2 x i32>
 // CHECK: store <2 x float> splat (float 4.000000e+00), ptr [[f2]]
 // CHECK: [[vecf2:%.*]] = load <2 x float>, ptr [[f2]]
-// CHECK: [[veci2:%.*]] = fptosi <2 x float> [[vecf2]] to <2 x i32>
+// CHECK: [[veci2:%.*]] = fptosi reassoc nnan ninf nsz arcp afn <2 x float> [[vecf2]] to <2 x i32>
 // CHECK: store <2 x i32> [[veci2]], ptr [[i2]]
 void f2_to_i2() {
   vector<float,2> f2 = 4.0;
@@ -55,7 +55,7 @@ void f2_to_i2() {
 // CHECK: [[i2:%.*]] = alloca <2 x i32>
 // CHECK: store <4 x double> splat (double 5.000000e+00), ptr [[d4]]
 // CHECK: [[vecd4:%.*]] = load <4 x double>, ptr [[d4]]
-// CHECK: [[veci4:%.*]] = fptosi <4 x double> [[vecd4]] to <4 x i32>
+// CHECK: [[veci4:%.*]] = fptosi reassoc nnan ninf nsz arcp afn <4 x double> [[vecd4]] to <4 x i32>
 // CHECK: [[ve...
[truncated]

llvmbot · 2025-09-25T00:27:34Z

@llvm/pr-subscribers-clang

Author: None (paperchalice)

Changes

This allow optimizations like FoldIntToFPToInt in CodeGen to use fast math flags.

Patch is 190.48 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/160475.diff

31 Files Affected:

(modified) clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu (+1-1)
(modified) clang/test/CodeGenCUDA/managed-var.cu (+1-1)
(modified) clang/test/CodeGenHLSL/BasicFeatures/AggregateSplatCast.hlsl (+3-3)
(modified) clang/test/CodeGenHLSL/BasicFeatures/ArrayElementwiseCast.hlsl (+4-4)
(modified) clang/test/CodeGenHLSL/BasicFeatures/InitLists.hlsl (+12-12)
(modified) clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl (+8-8)
(modified) clang/test/CodeGenHLSL/BasicFeatures/StructElementwiseCast.hlsl (+4-4)
(modified) clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl (+1-1)
(modified) clang/test/CodeGenHLSL/BasicFeatures/standard_conversion_sequences.hlsl (+3-3)
(modified) clang/test/CodeGenHLSL/builtins/dot2add.hlsl (+6-6)
(modified) clang/test/CodeGenHLSL/builtins/lerp-overloads.hlsl (+24-24)
(modified) clang/test/CodeGenHLSL/builtins/pow-overloads.hlsl (+16-16)
(modified) clang/test/Headers/__clang_hip_math.hip (+40-40)
(modified) clang/test/Headers/openmp_device_math_isnan.cpp (+2-2)
(modified) clang/test/SemaHLSL/VectorOverloadResolution.hlsl (+3-3)
(modified) llvm/docs/LangRef.rst (+8-1)
(modified) llvm/include/llvm/IR/Operator.h (+3)
(modified) llvm/lib/AsmParser/LLParser.cpp (+3-3)
(modified) llvm/test/Assembler/fast-math-flags.ll (+66)
(modified) llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll (+2-2)
(modified) llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll (+132-132)
(modified) llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll (+20-20)
(modified) llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll (+9-9)
(modified) llvm/test/CodeGen/AMDGPU/divrem24-assume.ll (+2-2)
(modified) llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll (+2-2)
(modified) llvm/test/Transforms/InstCombine/log-pow.ll (+3-3)
(modified) llvm/test/Transforms/LoopVectorize/PowerPC/vplan-scalarivsext-crash.ll (+2-2)
(modified) llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll (+38-38)
(modified) llvm/test/Transforms/LoopVectorize/float-induction.ll (+11-11)
(modified) llvm/test/Transforms/LoopVectorize/iv_outside_user.ll (+9-9)
(modified) llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll (+1-1)

diff --git a/clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu b/clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu
index 22c40e6d38ea2..b8819b8e550ea 100644
--- a/clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu
+++ b/clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu
@@ -164,7 +164,7 @@ __device__ double ffp4(double *p, float f) {
 
 __device__ double ffp5(double *p, int i) {
   // FUN-LABEL: @_Z4ffp5Pdi
-  // CHECK: sitofp i32 {{.*}} to double
+  // CHECK: sitofp contract i32 {{.*}} to double
   // SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, [[DEFMD]]
   // UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, [[DEFMD]]
   __atomic_fetch_sub(p, i, memory_order_relaxed);
diff --git a/clang/test/CodeGenCUDA/managed-var.cu b/clang/test/CodeGenCUDA/managed-var.cu
index 0e7a7be85ac8e..368adece297b7 100644
--- a/clang/test/CodeGenCUDA/managed-var.cu
+++ b/clang/test/CodeGenCUDA/managed-var.cu
@@ -145,7 +145,7 @@ float load3() {
 // HOST:  %4 = ptrtoint ptr %3 to i64
 // HOST:  %5 = sub i64 %4, %1
 // HOST:  %sub.ptr.div = sdiv exact i64 %5, 4
-// HOST:  %conv = sitofp i64 %sub.ptr.div to float
+// HOST:  %conv = sitofp contract i64 %sub.ptr.div to float
 // HOST:  ret float %conv
 float addr_taken2() {
   return (float)reinterpret_cast<long>(&(v2[1].y)-&(v[1].x));
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/AggregateSplatCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/AggregateSplatCast.hlsl
index 512fcd435191a..64fda1301d8cb 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/AggregateSplatCast.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/AggregateSplatCast.hlsl
@@ -38,7 +38,7 @@ export void call8() {
 // CHECK-NEXT: store <1 x float> splat (float 1.000000e+00), ptr [[B]], align 4
 // CHECK-NEXT: [[L:%.*]] = load <1 x float>, ptr [[B]], align 4
 // CHECK-NEXT: [[VL:%.*]] = extractelement <1 x float> [[L]], i32 0
-// CHECK-NEXT: [[C:%.*]] = fptosi float [[VL]] to i32
+// CHECK-NEXT: [[C:%.*]] = fptosi reassoc nnan ninf nsz arcp afn float [[VL]] to i32
 // CHECK-NEXT: [[SI:%.*]] = insertelement <4 x i32> poison, i32 [[C]], i64 0
 // CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[SI]], <4 x i32> poison, <4 x i32> zeroinitializer
 // CHECK-NEXT: store <4 x i32> [[S]], ptr [[A]], align 16
@@ -62,7 +62,7 @@ struct S {
 // CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 0, i32 0
 // CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 0, i32 1
 // CHECK-NEXT: store i32 [[VL]], ptr [[G1]], align 4
-// CHECK-NEXT: [[C:%.*]] = sitofp i32 [[VL]] to float
+// CHECK-NEXT: [[C:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[VL]] to float
 // CHECK-NEXT: store float [[C]], ptr [[G2]], align 4
 export void call3() {
   int1 A = {1};
@@ -79,7 +79,7 @@ export void call3() {
 // CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 0, i32 0
 // CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 0, i32 1
 // CHECK-NEXT: store i32 [[VL]], ptr [[G1]], align 4
-// CHECK-NEXT: [[C:%.*]] = sitofp i32 [[VL]] to float
+// CHECK-NEXT: [[C:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[VL]] to float
 // CHECK-NEXT: store float [[C]], ptr [[G2]], align 4
 export void call5() {
   int1 A = {1};
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/ArrayElementwiseCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/ArrayElementwiseCast.hlsl
index ac02ddf5765ed..54b37401466ba 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/ArrayElementwiseCast.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/ArrayElementwiseCast.hlsl
@@ -46,7 +46,7 @@ export void call1() {
 // CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds [1 x float], ptr [[B]], i32 0, i32 0
 // CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds [1 x i32], ptr [[Tmp]], i32 0, i32 0
 // CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G2]], align 4
-// CHECK-NEXT: [[C:%.*]] = sitofp i32 [[L]] to float
+// CHECK-NEXT: [[C:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[L]] to float
 // CHECK-NEXT: store float [[C]], ptr [[G1]], align 4
 export void call2() {
   int A[1] = {0};
@@ -63,7 +63,7 @@ export void call2() {
 // CHECK-NEXT: [[C:%.*]] = load <1 x float>, ptr [[A]], align 4
 // CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds [1 x i32], ptr [[B]], i32 0, i32 0
 // CHECK-NEXT: [[V:%.*]] = extractelement <1 x float> [[C]], i64 0
-// CHECK-NEXT: [[C:%.*]] = fptosi float [[V]] to i32
+// CHECK-NEXT: [[C:%.*]] = fptosi reassoc nnan ninf nsz arcp afn float [[V]] to i32
 // CHECK-NEXT: store i32 [[C]], ptr [[G1]], align 4
 export void call3() {
   float1 A = {1.2};
@@ -84,11 +84,11 @@ export void call3() {
 // CHECK-NEXT: [[VG:%.*]] = getelementptr inbounds [1 x <2 x float>], ptr [[Tmp]], i32 0, i32 0
 // CHECK-NEXT: [[L:%.*]] = load <2 x float>, ptr [[VG]], align 8
 // CHECK-NEXT: [[VL:%.*]] = extractelement <2 x float> [[L]], i32 0
-// CHECK-NEXT: [[C:%.*]] = fptosi float [[VL]] to i32
+// CHECK-NEXT: [[C:%.*]] = fptosi reassoc nnan ninf nsz arcp afn float [[VL]] to i32
 // CHECK-NEXT: store i32 [[C]], ptr [[G1]], align 4
 // CHECK-NEXT: [[L4:%.*]] = load <2 x float>, ptr [[VG]], align 8
 // CHECK-NEXT: [[VL5:%.*]] = extractelement <2 x float> [[L4]], i32 1
-// CHECK-NEXT: [[C6:%.*]] = fptosi float [[VL5]] to i32
+// CHECK-NEXT: [[C6:%.*]] = fptosi reassoc nnan ninf nsz arcp afn float [[VL5]] to i32
 // CHECK-NEXT: store i32 [[C6]], ptr [[G2]], align 4
 export void call5() {
   float2 A[1] = {{1.2,3.4}};
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/InitLists.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/InitLists.hlsl
index 7e83e5f168538..7633fd361a274 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/InitLists.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/InitLists.hlsl
@@ -98,7 +98,7 @@ TwoFloats case2() {
 // CHECK-NEXT:    store i32 [[VAL]], ptr [[VAL_ADDR]], align 4
 // CHECK-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[AGG_RESULT]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[VAL_ADDR]], align 4
-// CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP0]] to float
+// CHECK-NEXT:    [[CONV:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[TMP0]] to float
 // CHECK-NEXT:    store float [[CONV]], ptr [[X]], align 1
 // CHECK-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[AGG_RESULT]], i32 0, i32 1
 // CHECK-NEXT:    store float 2.000000e+00, ptr [[Y]], align 1
@@ -119,12 +119,12 @@ TwoFloats case3(int Val) {
 // CHECK-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[AGG_RESULT]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i32>, ptr [[TWOVALS_ADDR]], align 8
 // CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <2 x i32> [[TMP0]], i64 0
-// CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[VECEXT]] to float
+// CHECK-NEXT:    [[CONV:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[VECEXT]] to float
 // CHECK-NEXT:    store float [[CONV]], ptr [[X]], align 1
 // CHECK-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[AGG_RESULT]], i32 0, i32 1
 // CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[TWOVALS_ADDR]], align 8
 // CHECK-NEXT:    [[VECEXT1:%.*]] = extractelement <2 x i32> [[TMP1]], i64 1
-// CHECK-NEXT:    [[CONV2:%.*]] = sitofp i32 [[VECEXT1]] to float
+// CHECK-NEXT:    [[CONV2:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[VECEXT1]] to float
 // CHECK-NEXT:    store float [[CONV2]], ptr [[Y]], align 1
 // CHECK-NEXT:    ret void
 //
@@ -162,12 +162,12 @@ TwoInts case5(int2 TwoVals) {
 // CHECK-NEXT:    [[Z:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOINTS]], ptr [[AGG_RESULT]], i32 0, i32 0
 // CHECK-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[TF4]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[X]], align 1
-// CHECK-NEXT:    [[CONV:%.*]] = fptosi float [[TMP0]] to i32
+// CHECK-NEXT:    [[CONV:%.*]] = fptosi reassoc nnan ninf nsz arcp afn float [[TMP0]] to i32
 // CHECK-NEXT:    store i32 [[CONV]], ptr [[Z]], align 1
 // CHECK-NEXT:    [[W:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOINTS]], ptr [[AGG_RESULT]], i32 0, i32 1
 // CHECK-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[TF4]], i32 0, i32 1
 // CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[Y]], align 1
-// CHECK-NEXT:    [[CONV1:%.*]] = fptosi float [[TMP1]] to i32
+// CHECK-NEXT:    [[CONV1:%.*]] = fptosi reassoc nnan ninf nsz arcp afn float [[TMP1]] to i32
 // CHECK-NEXT:    store i32 [[CONV1]], ptr [[W]], align 1
 // CHECK-NEXT:    ret void
 //
@@ -202,7 +202,7 @@ TwoInts case6(TwoFloats TF4) {
 // CHECK-NEXT:    store i32 [[TMP4]], ptr [[TAILSTATE]], align 1
 // CHECK-NEXT:    [[HAIRCOUNT:%.*]] = getelementptr inbounds nuw [[STRUCT_DOGGO]], ptr [[AGG_RESULT]], i32 0, i32 2
 // CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[VAL_ADDR]], align 4
-// CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP5]] to float
+// CHECK-NEXT:    [[CONV:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[TMP5]] to float
 // CHECK-NEXT:    store float [[CONV]], ptr [[HAIRCOUNT]], align 1
 // CHECK-NEXT:    [[EARDIRECTION:%.*]] = getelementptr inbounds nuw [[STRUCT_DOGGO]], ptr [[AGG_RESULT]], i32 0, i32 3
 // CHECK-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[TF1]], i32 0, i32 0
@@ -272,7 +272,7 @@ Doggo case7(TwoInts TI1, TwoInts TI2, int Val, TwoFloats TF1, TwoFloats TF2,
 // CHECK-NEXT:    [[COUNTER:%.*]] = getelementptr inbounds nuw [[STRUCT_ANIMALBITS]], ptr [[AGG_RESULT]], i32 0, i32 2
 // CHECK-NEXT:    [[HAIRCOUNT:%.*]] = getelementptr inbounds nuw [[STRUCT_DOGGO]], ptr [[D1]], i32 0, i32 2
 // CHECK-NEXT:    [[TMP5:%.*]] = load float, ptr [[HAIRCOUNT]], align 1
-// CHECK-NEXT:    [[CONV:%.*]] = fptosi float [[TMP5]] to i64
+// CHECK-NEXT:    [[CONV:%.*]] = fptosi reassoc nnan ninf nsz arcp afn float [[TMP5]] to i64
 // CHECK-NEXT:    store i64 [[CONV]], ptr [[COUNTER]], align 1
 // CHECK-NEXT:    [[LEFTDIR:%.*]] = getelementptr inbounds nuw [[STRUCT_ANIMALBITS]], ptr [[AGG_RESULT]], i32 0, i32 3
 // CHECK-NEXT:    [[EARDIRECTION:%.*]] = getelementptr inbounds nuw [[STRUCT_DOGGO]], ptr [[D1]], i32 0, i32 3
@@ -428,7 +428,7 @@ AnimalBits case8(Doggo D1) {
 // CHECK-NEXT:    [[HAIRCOUNT58:%.*]] = getelementptr inbounds nuw [[STRUCT_DOGGO]], ptr [[ARRAYINIT_ELEMENT44]], i32 0, i32 2
 // CHECK-NEXT:    [[COUNTER:%.*]] = getelementptr inbounds nuw [[STRUCT_ANIMALBITS]], ptr [[A1]], i32 0, i32 2
 // CHECK-NEXT:    [[TMP19:%.*]] = load i64, ptr [[COUNTER]], align 1
-// CHECK-NEXT:    [[CONV:%.*]] = sitofp i64 [[TMP19]] to float
+// CHECK-NEXT:    [[CONV:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i64 [[TMP19]] to float
 // CHECK-NEXT:    store float [[CONV]], ptr [[HAIRCOUNT58]], align 1
 // CHECK-NEXT:    [[EARDIRECTION59:%.*]] = getelementptr inbounds nuw [[STRUCT_DOGGO]], ptr [[ARRAYINIT_ELEMENT44]], i32 0, i32 3
 // CHECK-NEXT:    [[LEFTDIR:%.*]] = getelementptr inbounds nuw [[STRUCT_ANIMALBITS]], ptr [[A1]], i32 0, i32 3
@@ -563,7 +563,7 @@ AnimalBits case8(Doggo D1) {
 // CHECK-NEXT:    [[HAIRCOUNT149:%.*]] = getelementptr inbounds nuw [[STRUCT_KITTEH]], ptr [[ARRAYINIT_ELEMENT133]], i32 0, i32 2
 // CHECK-NEXT:    [[COUNTER150:%.*]] = getelementptr inbounds nuw [[STRUCT_ANIMALBITS]], ptr [[A1]], i32 0, i32 2
 // CHECK-NEXT:    [[TMP47:%.*]] = load i64, ptr [[COUNTER150]], align 1
-// CHECK-NEXT:    [[CONV151:%.*]] = sitofp i64 [[TMP47]] to float
+// CHECK-NEXT:    [[CONV151:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i64 [[TMP47]] to float
 // CHECK-NEXT:    store float [[CONV151]], ptr [[HAIRCOUNT149]], align 1
 // CHECK-NEXT:    [[CLAWS152:%.*]] = getelementptr inbounds nuw [[STRUCT_KITTEH]], ptr [[ARRAYINIT_ELEMENT133]], i32 0, i32 3
 // CHECK-NEXT:    [[LEFTDIR153:%.*]] = getelementptr inbounds nuw [[STRUCT_ANIMALBITS]], ptr [[A1]], i32 0, i32 3
@@ -698,7 +698,7 @@ AnimalBits case8(Doggo D1) {
 // CHECK-NEXT:    [[HAIRCOUNT246:%.*]] = getelementptr inbounds nuw [[STRUCT_KITTEH]], ptr [[ARRAYINIT_ELEMENT230]], i32 0, i32 2
 // CHECK-NEXT:    [[COUNTER247:%.*]] = getelementptr inbounds nuw [[STRUCT_ANIMALBITS]], ptr [[A1]], i32 0, i32 2
 // CHECK-NEXT:    [[TMP75:%.*]] = load i64, ptr [[COUNTER247]], align 1
-// CHECK-NEXT:    [[CONV248:%.*]] = sitofp i64 [[TMP75]] to float
+// CHECK-NEXT:    [[CONV248:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i64 [[TMP75]] to float
 // CHECK-NEXT:    store float [[CONV248]], ptr [[HAIRCOUNT246]], align 1
 // CHECK-NEXT:    [[CLAWS249:%.*]] = getelementptr inbounds nuw [[STRUCT_KITTEH]], ptr [[ARRAYINIT_ELEMENT230]], i32 0, i32 3
 // CHECK-NEXT:    [[LEFTDIR250:%.*]] = getelementptr inbounds nuw [[STRUCT_ANIMALBITS]], ptr [[A1]], i32 0, i32 3
@@ -888,13 +888,13 @@ TwoInts case14(SlicyBits SB) {
 // CHECK-NEXT:    [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[AGG_RESULT]], i32 0, i32 0
 // CHECK-NEXT:    [[BF_LOAD:%.*]] = load i8, ptr [[SB]], align 1
 // CHECK-NEXT:    [[BF_CAST:%.*]] = sext i8 [[BF_LOAD]] to i32
-// CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[BF_CAST]] to float
+// CHECK-NEXT:    [[CONV:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[BF_CAST]] to float
 // CHECK-NEXT:    store float [[CONV]], ptr [[X]], align 1
 // CHECK-NEXT:    [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[AGG_RESULT]], i32 0, i32 1
 // CHECK-NEXT:    [[W:%.*]] = getelementptr inbounds nuw [[STRUCT_SLICYBITS]], ptr [[SB]], i32 0, i32 1
 // CHECK-NEXT:    [[BF_LOAD1:%.*]] = load i8, ptr [[W]], align 1
 // CHECK-NEXT:    [[BF_CAST2:%.*]] = sext i8 [[BF_LOAD1]] to i32
-// CHECK-NEXT:    [[CONV3:%.*]] = sitofp i32 [[BF_CAST2]] to float
+// CHECK-NEXT:    [[CONV3:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[BF_CAST2]] to float
 // CHECK-NEXT:    store float [[CONV3]], ptr [[Y]], align 1
 // CHECK-NEXT:    ret void
 //
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl
index d0ba8f447b732..6f42b5611ce41 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl
@@ -13,14 +13,14 @@ void trunc_Param(inout int X) {}
 // CHECK: [[F:%.*]] = alloca float
 // CHECK: [[ArgTmp:%.*]] = alloca i32
 // CHECK: [[FVal:%.*]] = load float, ptr {{.*}}
-// CHECK: [[IVal:%.*]] = fptosi float [[FVal]] to i32
+// CHECK: [[IVal:%.*]] = fptosi reassoc nnan ninf nsz arcp afn float [[FVal]] to i32
 // CHECK: store i32 [[IVal]], ptr [[ArgTmp]]
 // CHECK: call void {{.*}}trunc_Param{{.*}}(ptr noalias noundef nonnull align 4 dereferenceable(4) [[ArgTmp]])
 // CHECK: [[IRet:%.*]] = load i32, ptr [[ArgTmp]]
-// CHECK: [[FRet:%.*]] = sitofp i32 [[IRet]] to float
+// CHECK: [[FRet:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[IRet]] to float
 // CHECK: store float [[FRet]], ptr [[F]]
-// OPT: [[IVal:%.*]] = fptosi float {{.*}} to i32
-// OPT: [[FVal:%.*]] = sitofp i32 [[IVal]] to float
+// OPT: [[IVal:%.*]] = fptosi reassoc nnan ninf nsz arcp afn float {{.*}} to i32
+// OPT: [[FVal:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[IVal]] to float
 // OPT: ret float [[FVal]]
 export float case1(float F) {
   trunc_Param(F);
@@ -202,15 +202,15 @@ void trunc_vec(inout int3 V) {}
 // CHECK: [[V:%.*]] = alloca <3 x float>
 // CHECK: [[Tmp:%.*]] = alloca <3 x i32>
 // CHECK: [[FVal:%.*]] = load <3 x float>, ptr [[V]]
-// CHECK: [[IVal:%.*]] = fptosi <3 x float> [[FVal]] to <3 x i32>
+// CHECK: [[IVal:%.*]] = fptosi reassoc nnan ninf nsz arcp afn <3 x float> [[FVal]] to <3 x i32>
 // CHECK: store <3 x i32> [[IVal]], ptr [[Tmp]]
 // CHECK: call void {{.*}}trunc_vec{{.*}}(ptr noalias noundef nonnull align 16 dereferenceable(16) [[Tmp]])
 // CHECK: [[IRet:%.*]] = load <3 x i32>, ptr [[Tmp]]
-// CHECK: [[FRet:%.*]] = sitofp <3 x i32> [[IRet]] to <3 x float>
+// CHECK: [[FRet:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <3 x i32> [[IRet]] to <3 x float>
 // CHECK: store <3 x float> [[FRet]], ptr [[V]]
 
-// OPT: [[IVal:%.*]] = fptosi <3 x float> {{.*}} to <3 x i32>
-// OPT: [[FVal:%.*]] = sitofp <3 x i32> [[IVal]] to <3 x float>
+// OPT: [[IVal:%.*]] = fptosi reassoc nnan ninf nsz arcp afn <3 x float> {{.*}} to <3 x i32>
+// OPT: [[FVal:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <3 x i32> [[IVal]] to <3 x float>
 // OPT: ret <3 x float> [[FVal]]
 
 export float3 case8(float3 V) {
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/StructElementwiseCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/StructElementwiseCast.hlsl
index 81b9f5b28cc7e..e4229951e58b5 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/StructElementwiseCast.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/StructElementwiseCast.hlsl
@@ -32,7 +32,7 @@ export void call0() {
 // CHECK-NEXT: [[VL:%.*]] = extractelement <2 x i32> [[L]], i64 0
 // CHECK-NEXT: store i32 [[VL]], ptr [[G1]], align 4
 // CHECK-NEXT: [[VL2:%.*]] = extractelement <2 x i32> [[L]], i64 1
-// CHECK-NEXT: [[C:%.*]] = sitofp i32 [[VL2]] to float
+// CHECK-NEXT: [[C:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[VL2]] to float
 // CHECK-NEXT: store float [[C]], ptr [[G2]], align 4
 export void call1() {
   int2 A = {1,2};
@@ -54,7 +54,7 @@ export void call1() {
 // CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G3]], align 4
 // CHECK-NEXT: store i32 [[L]], ptr [[G1]], align 4
 // CHECK-NEXT: [[L4:%.*]] = load i32, ptr [[G4]], align 4
-// CHECK-NEXT: [[C:%.*]] = sitofp i32 [[L4]] to float
+// CHECK-NEXT: [[C:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[L4]] to float
 // CHECK-NEXT: store float [[C]], ptr [[G2]], align 4
 export void call2() {
   int A[2] = {1,2};
@@ -104,7 +104,7 @@ export void call6() {
 // CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G3]], align 4
 // CHECK-NEXT: store i32 [[L]], ptr [[G1]], align 4
 // CHECK-NEXT: [[L4:%.*]] = load i32, ptr [[G4]], align 4
-// CHECK-NEXT: [[C:%.*]] = sitofp i32 [[L4]] to float
+// CHECK-NEXT: [[C:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[L4]] to float
 // CHECK-NEXT: store float [[C]], ptr [[G2]], align 4
 export void call7() {
   int A[2] = {1,2};
@@ -132,7 +132,7 @@ struct T {
 // CHECK-NEXT: %load = load i32, ptr %gep2, align 4
 // CHECK-NEXT: store i32 %load, ptr %gep, align 4
 // CHECK-NEXT: %load5 = load i32, ptr %gep3, align 4
-// CHECK-NEXT: %conv = sitofp i32 %load5 to float
+// CHECK-NEXT: %conv = sitofp reassoc nnan ninf nsz arcp afn i32 %load5 to float
 // CHECK-NEXT: store float %conv, ptr %gep1, align 4
 export void call8() {
   T t = {1,2,3};
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl
index 253b38a7ca072..a5843b0a95642 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl
@@ -40,7 +40,7 @@ struct S {
 // CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G1]], align 4
 // CHECK-NEXT: [[C:%.*]] = insertelement <2 x i32> [[B]], i32 [[L]], i64 0
 // CHECK-NEXT: [[L2:%.*]] = load float, ptr [[G2]], align 4
-// CHECK-NEXT: [[D:%.*]] = fptosi float [[L2]] to i32
+// CHECK-NEXT: [[D:%.*]] = fptosi reassoc nnan ninf nsz arcp afn float [[L2]] to i32
 // CHECK-NEXT: [[E:%.*]] = insertelement <2 x i32> [[C]], i32 [[D]], i64 1
 // CHECK-NEXT: store <2 x i32> [[E]], ptr [[A]], align 8
 export void call3() {
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/standard_conversion_sequences.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/standard_conversion_sequences.hlsl
index 6770efefe94fe..3e8c06ed074af 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/standard_conversion_sequences.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/standard_conversion_sequences.hlsl
@@ -43,7 +43,7 @@ void d4_to_f2() {
 // CHECK: [[i2:%.*]] = alloca <2 x i32>
 // CHECK: store <2 x float> splat (float 4.000000e+00), ptr [[f2]]
 // CHECK: [[vecf2:%.*]] = load <2 x float>, ptr [[f2]]
-// CHECK: [[veci2:%.*]] = fptosi <2 x float> [[vecf2]] to <2 x i32>
+// CHECK: [[veci2:%.*]] = fptosi reassoc nnan ninf nsz arcp afn <2 x float> [[vecf2]] to <2 x i32>
 // CHECK: store <2 x i32> [[veci2]], ptr [[i2]]
 void f2_to_i2() {
   vector<float,2> f2 = 4.0;
@@ -55,7 +55,7 @@ void f2_to_i2() {
 // CHECK: [[i2:%.*]] = alloca <2 x i32>
 // CHECK: store <4 x double> splat (double 5.000000e+00), ptr [[d4]]
 // CHECK: [[vecd4:%.*]] = load <4 x double>, ptr [[d4]]
-// CHECK: [[veci4:%.*]] = fptosi <4 x double> [[vecd4]] to <4 x i32>
+// CHECK: [[veci4:%.*]] = fptosi reassoc nnan ninf nsz arcp afn <4 x double> [[vecd4]] to <4 x i32>
 // CHECK: [[ve...
[truncated]

arsenm · 2025-09-25T00:32:15Z

llvm/test/Assembler/fast-math-flags.ll

  %h_vec = fptrunc <3 x float> %vec to <3 x half>
 ; CHECK: %h_scalable = fptrunc <vscale x 3 x float> %scalable to <vscale x 3 x half>
  %h_scalable = fptrunc <vscale x 3 x float> %scalable to <vscale x 3 x half>
+; CHECK: %i = fptoui float %x to i32


Also needs bitcode compatibility test change

arsenm · 2025-09-25T00:33:28Z

llvm/docs/LangRef.rst

      %X = fpext float 3.125 to double         ; yields double:3.125000e+00
      %Y = fpext double %X to fp128            ; yields fp128:0xL00000000000000004000900000000000

+.. _i_fptoui:


Why sitofp but not uitofp?

Can you add some notes on the interpretation of these? I assume afn is the only meaningful one?

paperchalice · 2025-09-25T00:41:24Z

uitofp has nneg flag which conflicts with fast math flag reassoc.
Currently I have two ideas:

Treat uitofp specially, let it support all other flags.
Add llvm.uitofp intrinsic, the return type is floating-point type.

arsenm · 2025-09-25T00:45:56Z

FoldIntToFPToInt

I don't see this checking UnsafeFPMath now?

paperchalice · 2025-09-25T00:49:37Z

FoldIntToFPToInt

I don't see this checking UnsafeFPMath now?

Oops, it is foldFPToIntToFP, it currently depends on DAG.getTarget().Options.NoSignedZerosFPMath, plan to use contract + nsz.

nikic · 2025-09-25T12:55:06Z

I don't think we should do this until we have freed up the necessary flag space to also support uitofp as well.

jcranmer-intel · 2025-09-25T19:13:59Z

I don't remember the example off-hand, but there was a fast-math transform I wanted at one point that needed a flag on fptoui.

I don't think we should do this until we have freed up the necessary flag space to also support uitofp as well.

I can see an argument for doing fptoui and to fptosi without the inverses, but agreed that sitofp without uitofp is a bad idea.

paperchalice · 2025-09-29T00:21:36Z

I don't remember the example off-hand, but there was a fast-math transform I wanted at one point that needed a flag on fptoui.

I don't think we should do this until we have freed up the necessary flag space to also support uitofp as well.

I can see an argument for doing fptoui and to fptosi without the inverses, but agreed that sitofp without uitofp is a bad idea.

PowerPC also need sitofp/uitofp with afn to allow double rounding.

paperchalice force-pushed the i2fp branch from ea2a7e3 to 8bf6ae5 Compare September 24, 2025 14:17

[IR] Allow fast math flags on fptosi, fptoui and sitofp

48f4e6d

paperchalice force-pushed the i2fp branch from 8bf6ae5 to 48f4e6d Compare September 25, 2025 00:06

paperchalice marked this pull request as ready for review September 25, 2025 00:26

llvmbot added clang Clang issues not falling into any other category backend:AMDGPU backend:PowerPC llvm:instcombine Covers the InstCombine, InstSimplify and AggressiveInstCombine passes HLSL HLSL Language Support llvm:ir llvm:transforms labels Sep 25, 2025

paperchalice requested review from arsenm, dtcxzyw, jcranmer-intel and nikic September 25, 2025 00:28

arsenm added the floating-point Floating-point math label Sep 25, 2025

arsenm reviewed Sep 25, 2025

View reviewed changes

add compatibility test

6a6eea6

[IR] Allow fast math flags on fptosi, fptoui and sitofp #160475

Are you sure you want to change the base?

[IR] Allow fast math flags on fptosi, fptoui and sitofp #160475

Uh oh!

Conversation

paperchalice commented Sep 24, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Sep 25, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Sep 25, 2025

Uh oh!

llvmbot commented Sep 25, 2025

Uh oh!

llvmbot commented Sep 25, 2025

Uh oh!

arsenm Sep 25, 2025

Choose a reason for hiding this comment

Uh oh!

arsenm Sep 25, 2025

Choose a reason for hiding this comment

Uh oh!

paperchalice commented Sep 25, 2025

Uh oh!

arsenm commented Sep 25, 2025

Uh oh!

paperchalice commented Sep 25, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

nikic commented Sep 25, 2025

Uh oh!

jcranmer-intel commented Sep 25, 2025

Uh oh!

paperchalice commented Sep 29, 2025

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

5 participants

paperchalice commented Sep 24, 2025 •

edited

Loading

llvmbot commented Sep 25, 2025 •

edited

Loading

paperchalice commented Sep 25, 2025 •

edited

Loading