diff --git a/clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu b/clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu index 22c40e6d38ea2..b8819b8e550ea 100644 --- a/clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu +++ b/clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu @@ -164,7 +164,7 @@ __device__ double ffp4(double *p, float f) { __device__ double ffp5(double *p, int i) { // FUN-LABEL: @_Z4ffp5Pdi - // CHECK: sitofp i32 {{.*}} to double + // CHECK: sitofp contract i32 {{.*}} to double // SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, [[DEFMD]] // UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, [[DEFMD]] __atomic_fetch_sub(p, i, memory_order_relaxed); diff --git a/clang/test/CodeGenCUDA/managed-var.cu b/clang/test/CodeGenCUDA/managed-var.cu index 0e7a7be85ac8e..368adece297b7 100644 --- a/clang/test/CodeGenCUDA/managed-var.cu +++ b/clang/test/CodeGenCUDA/managed-var.cu @@ -145,7 +145,7 @@ float load3() { // HOST: %4 = ptrtoint ptr %3 to i64 // HOST: %5 = sub i64 %4, %1 // HOST: %sub.ptr.div = sdiv exact i64 %5, 4 -// HOST: %conv = sitofp i64 %sub.ptr.div to float +// HOST: %conv = sitofp contract i64 %sub.ptr.div to float // HOST: ret float %conv float addr_taken2() { return (float)reinterpret_cast(&(v2[1].y)-&(v[1].x)); diff --git a/clang/test/CodeGenHLSL/BasicFeatures/AggregateSplatCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/AggregateSplatCast.hlsl index 512fcd435191a..64fda1301d8cb 100644 --- a/clang/test/CodeGenHLSL/BasicFeatures/AggregateSplatCast.hlsl +++ b/clang/test/CodeGenHLSL/BasicFeatures/AggregateSplatCast.hlsl @@ -38,7 +38,7 @@ export void call8() { // CHECK-NEXT: store <1 x float> splat (float 1.000000e+00), ptr [[B]], align 4 // CHECK-NEXT: [[L:%.*]] = load <1 x float>, ptr [[B]], align 4 // CHECK-NEXT: [[VL:%.*]] = extractelement <1 x float> [[L]], i32 0 -// CHECK-NEXT: [[C:%.*]] = fptosi float [[VL]] to i32 +// CHECK-NEXT: [[C:%.*]] = fptosi reassoc nnan ninf nsz arcp afn float [[VL]] to i32 // CHECK-NEXT: [[SI:%.*]] = insertelement <4 x i32> poison, i32 [[C]], i64 0 // CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[SI]], <4 x i32> poison, <4 x i32> zeroinitializer // CHECK-NEXT: store <4 x i32> [[S]], ptr [[A]], align 16 @@ -62,7 +62,7 @@ struct S { // CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 0, i32 0 // CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 0, i32 1 // CHECK-NEXT: store i32 [[VL]], ptr [[G1]], align 4 -// CHECK-NEXT: [[C:%.*]] = sitofp i32 [[VL]] to float +// CHECK-NEXT: [[C:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[VL]] to float // CHECK-NEXT: store float [[C]], ptr [[G2]], align 4 export void call3() { int1 A = {1}; @@ -79,7 +79,7 @@ export void call3() { // CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 0, i32 0 // CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 0, i32 1 // CHECK-NEXT: store i32 [[VL]], ptr [[G1]], align 4 -// CHECK-NEXT: [[C:%.*]] = sitofp i32 [[VL]] to float +// CHECK-NEXT: [[C:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[VL]] to float // CHECK-NEXT: store float [[C]], ptr [[G2]], align 4 export void call5() { int1 A = {1}; diff --git a/clang/test/CodeGenHLSL/BasicFeatures/ArrayElementwiseCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/ArrayElementwiseCast.hlsl index ac02ddf5765ed..54b37401466ba 100644 --- a/clang/test/CodeGenHLSL/BasicFeatures/ArrayElementwiseCast.hlsl +++ b/clang/test/CodeGenHLSL/BasicFeatures/ArrayElementwiseCast.hlsl @@ -46,7 +46,7 @@ export void call1() { // CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds [1 x float], ptr [[B]], i32 0, i32 0 // CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds [1 x i32], ptr [[Tmp]], i32 0, i32 0 // CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G2]], align 4 -// CHECK-NEXT: [[C:%.*]] = sitofp i32 [[L]] to float +// CHECK-NEXT: [[C:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[L]] to float // CHECK-NEXT: store float [[C]], ptr [[G1]], align 4 export void call2() { int A[1] = {0}; @@ -63,7 +63,7 @@ export void call2() { // CHECK-NEXT: [[C:%.*]] = load <1 x float>, ptr [[A]], align 4 // CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds [1 x i32], ptr [[B]], i32 0, i32 0 // CHECK-NEXT: [[V:%.*]] = extractelement <1 x float> [[C]], i64 0 -// CHECK-NEXT: [[C:%.*]] = fptosi float [[V]] to i32 +// CHECK-NEXT: [[C:%.*]] = fptosi reassoc nnan ninf nsz arcp afn float [[V]] to i32 // CHECK-NEXT: store i32 [[C]], ptr [[G1]], align 4 export void call3() { float1 A = {1.2}; @@ -84,11 +84,11 @@ export void call3() { // CHECK-NEXT: [[VG:%.*]] = getelementptr inbounds [1 x <2 x float>], ptr [[Tmp]], i32 0, i32 0 // CHECK-NEXT: [[L:%.*]] = load <2 x float>, ptr [[VG]], align 8 // CHECK-NEXT: [[VL:%.*]] = extractelement <2 x float> [[L]], i32 0 -// CHECK-NEXT: [[C:%.*]] = fptosi float [[VL]] to i32 +// CHECK-NEXT: [[C:%.*]] = fptosi reassoc nnan ninf nsz arcp afn float [[VL]] to i32 // CHECK-NEXT: store i32 [[C]], ptr [[G1]], align 4 // CHECK-NEXT: [[L4:%.*]] = load <2 x float>, ptr [[VG]], align 8 // CHECK-NEXT: [[VL5:%.*]] = extractelement <2 x float> [[L4]], i32 1 -// CHECK-NEXT: [[C6:%.*]] = fptosi float [[VL5]] to i32 +// CHECK-NEXT: [[C6:%.*]] = fptosi reassoc nnan ninf nsz arcp afn float [[VL5]] to i32 // CHECK-NEXT: store i32 [[C6]], ptr [[G2]], align 4 export void call5() { float2 A[1] = {{1.2,3.4}}; diff --git a/clang/test/CodeGenHLSL/BasicFeatures/InitLists.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/InitLists.hlsl index 7e83e5f168538..7633fd361a274 100644 --- a/clang/test/CodeGenHLSL/BasicFeatures/InitLists.hlsl +++ b/clang/test/CodeGenHLSL/BasicFeatures/InitLists.hlsl @@ -98,7 +98,7 @@ TwoFloats case2() { // CHECK-NEXT: store i32 [[VAL]], ptr [[VAL_ADDR]], align 4 // CHECK-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[AGG_RESULT]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[VAL_ADDR]], align 4 -// CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP0]] to float +// CHECK-NEXT: [[CONV:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[TMP0]] to float // CHECK-NEXT: store float [[CONV]], ptr [[X]], align 1 // CHECK-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[AGG_RESULT]], i32 0, i32 1 // CHECK-NEXT: store float 2.000000e+00, ptr [[Y]], align 1 @@ -119,12 +119,12 @@ TwoFloats case3(int Val) { // CHECK-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[AGG_RESULT]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[TWOVALS_ADDR]], align 8 // CHECK-NEXT: [[VECEXT:%.*]] = extractelement <2 x i32> [[TMP0]], i64 0 -// CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[VECEXT]] to float +// CHECK-NEXT: [[CONV:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[VECEXT]] to float // CHECK-NEXT: store float [[CONV]], ptr [[X]], align 1 // CHECK-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[AGG_RESULT]], i32 0, i32 1 // CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[TWOVALS_ADDR]], align 8 // CHECK-NEXT: [[VECEXT1:%.*]] = extractelement <2 x i32> [[TMP1]], i64 1 -// CHECK-NEXT: [[CONV2:%.*]] = sitofp i32 [[VECEXT1]] to float +// CHECK-NEXT: [[CONV2:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[VECEXT1]] to float // CHECK-NEXT: store float [[CONV2]], ptr [[Y]], align 1 // CHECK-NEXT: ret void // @@ -162,12 +162,12 @@ TwoInts case5(int2 TwoVals) { // CHECK-NEXT: [[Z:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOINTS]], ptr [[AGG_RESULT]], i32 0, i32 0 // CHECK-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[TF4]], i32 0, i32 0 // CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[X]], align 1 -// CHECK-NEXT: [[CONV:%.*]] = fptosi float [[TMP0]] to i32 +// CHECK-NEXT: [[CONV:%.*]] = fptosi reassoc nnan ninf nsz arcp afn float [[TMP0]] to i32 // CHECK-NEXT: store i32 [[CONV]], ptr [[Z]], align 1 // CHECK-NEXT: [[W:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOINTS]], ptr [[AGG_RESULT]], i32 0, i32 1 // CHECK-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[TF4]], i32 0, i32 1 // CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[Y]], align 1 -// CHECK-NEXT: [[CONV1:%.*]] = fptosi float [[TMP1]] to i32 +// CHECK-NEXT: [[CONV1:%.*]] = fptosi reassoc nnan ninf nsz arcp afn float [[TMP1]] to i32 // CHECK-NEXT: store i32 [[CONV1]], ptr [[W]], align 1 // CHECK-NEXT: ret void // @@ -202,7 +202,7 @@ TwoInts case6(TwoFloats TF4) { // CHECK-NEXT: store i32 [[TMP4]], ptr [[TAILSTATE]], align 1 // CHECK-NEXT: [[HAIRCOUNT:%.*]] = getelementptr inbounds nuw [[STRUCT_DOGGO]], ptr [[AGG_RESULT]], i32 0, i32 2 // CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[VAL_ADDR]], align 4 -// CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to float +// CHECK-NEXT: [[CONV:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[TMP5]] to float // CHECK-NEXT: store float [[CONV]], ptr [[HAIRCOUNT]], align 1 // CHECK-NEXT: [[EARDIRECTION:%.*]] = getelementptr inbounds nuw [[STRUCT_DOGGO]], ptr [[AGG_RESULT]], i32 0, i32 3 // CHECK-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[TF1]], i32 0, i32 0 @@ -272,7 +272,7 @@ Doggo case7(TwoInts TI1, TwoInts TI2, int Val, TwoFloats TF1, TwoFloats TF2, // CHECK-NEXT: [[COUNTER:%.*]] = getelementptr inbounds nuw [[STRUCT_ANIMALBITS]], ptr [[AGG_RESULT]], i32 0, i32 2 // CHECK-NEXT: [[HAIRCOUNT:%.*]] = getelementptr inbounds nuw [[STRUCT_DOGGO]], ptr [[D1]], i32 0, i32 2 // CHECK-NEXT: [[TMP5:%.*]] = load float, ptr [[HAIRCOUNT]], align 1 -// CHECK-NEXT: [[CONV:%.*]] = fptosi float [[TMP5]] to i64 +// CHECK-NEXT: [[CONV:%.*]] = fptosi reassoc nnan ninf nsz arcp afn float [[TMP5]] to i64 // CHECK-NEXT: store i64 [[CONV]], ptr [[COUNTER]], align 1 // CHECK-NEXT: [[LEFTDIR:%.*]] = getelementptr inbounds nuw [[STRUCT_ANIMALBITS]], ptr [[AGG_RESULT]], i32 0, i32 3 // CHECK-NEXT: [[EARDIRECTION:%.*]] = getelementptr inbounds nuw [[STRUCT_DOGGO]], ptr [[D1]], i32 0, i32 3 @@ -428,7 +428,7 @@ AnimalBits case8(Doggo D1) { // CHECK-NEXT: [[HAIRCOUNT58:%.*]] = getelementptr inbounds nuw [[STRUCT_DOGGO]], ptr [[ARRAYINIT_ELEMENT44]], i32 0, i32 2 // CHECK-NEXT: [[COUNTER:%.*]] = getelementptr inbounds nuw [[STRUCT_ANIMALBITS]], ptr [[A1]], i32 0, i32 2 // CHECK-NEXT: [[TMP19:%.*]] = load i64, ptr [[COUNTER]], align 1 -// CHECK-NEXT: [[CONV:%.*]] = sitofp i64 [[TMP19]] to float +// CHECK-NEXT: [[CONV:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i64 [[TMP19]] to float // CHECK-NEXT: store float [[CONV]], ptr [[HAIRCOUNT58]], align 1 // CHECK-NEXT: [[EARDIRECTION59:%.*]] = getelementptr inbounds nuw [[STRUCT_DOGGO]], ptr [[ARRAYINIT_ELEMENT44]], i32 0, i32 3 // CHECK-NEXT: [[LEFTDIR:%.*]] = getelementptr inbounds nuw [[STRUCT_ANIMALBITS]], ptr [[A1]], i32 0, i32 3 @@ -563,7 +563,7 @@ AnimalBits case8(Doggo D1) { // CHECK-NEXT: [[HAIRCOUNT149:%.*]] = getelementptr inbounds nuw [[STRUCT_KITTEH]], ptr [[ARRAYINIT_ELEMENT133]], i32 0, i32 2 // CHECK-NEXT: [[COUNTER150:%.*]] = getelementptr inbounds nuw [[STRUCT_ANIMALBITS]], ptr [[A1]], i32 0, i32 2 // CHECK-NEXT: [[TMP47:%.*]] = load i64, ptr [[COUNTER150]], align 1 -// CHECK-NEXT: [[CONV151:%.*]] = sitofp i64 [[TMP47]] to float +// CHECK-NEXT: [[CONV151:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i64 [[TMP47]] to float // CHECK-NEXT: store float [[CONV151]], ptr [[HAIRCOUNT149]], align 1 // CHECK-NEXT: [[CLAWS152:%.*]] = getelementptr inbounds nuw [[STRUCT_KITTEH]], ptr [[ARRAYINIT_ELEMENT133]], i32 0, i32 3 // CHECK-NEXT: [[LEFTDIR153:%.*]] = getelementptr inbounds nuw [[STRUCT_ANIMALBITS]], ptr [[A1]], i32 0, i32 3 @@ -698,7 +698,7 @@ AnimalBits case8(Doggo D1) { // CHECK-NEXT: [[HAIRCOUNT246:%.*]] = getelementptr inbounds nuw [[STRUCT_KITTEH]], ptr [[ARRAYINIT_ELEMENT230]], i32 0, i32 2 // CHECK-NEXT: [[COUNTER247:%.*]] = getelementptr inbounds nuw [[STRUCT_ANIMALBITS]], ptr [[A1]], i32 0, i32 2 // CHECK-NEXT: [[TMP75:%.*]] = load i64, ptr [[COUNTER247]], align 1 -// CHECK-NEXT: [[CONV248:%.*]] = sitofp i64 [[TMP75]] to float +// CHECK-NEXT: [[CONV248:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i64 [[TMP75]] to float // CHECK-NEXT: store float [[CONV248]], ptr [[HAIRCOUNT246]], align 1 // CHECK-NEXT: [[CLAWS249:%.*]] = getelementptr inbounds nuw [[STRUCT_KITTEH]], ptr [[ARRAYINIT_ELEMENT230]], i32 0, i32 3 // CHECK-NEXT: [[LEFTDIR250:%.*]] = getelementptr inbounds nuw [[STRUCT_ANIMALBITS]], ptr [[A1]], i32 0, i32 3 @@ -888,13 +888,13 @@ TwoInts case14(SlicyBits SB) { // CHECK-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[AGG_RESULT]], i32 0, i32 0 // CHECK-NEXT: [[BF_LOAD:%.*]] = load i8, ptr [[SB]], align 1 // CHECK-NEXT: [[BF_CAST:%.*]] = sext i8 [[BF_LOAD]] to i32 -// CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[BF_CAST]] to float +// CHECK-NEXT: [[CONV:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[BF_CAST]] to float // CHECK-NEXT: store float [[CONV]], ptr [[X]], align 1 // CHECK-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_TWOFLOATS]], ptr [[AGG_RESULT]], i32 0, i32 1 // CHECK-NEXT: [[W:%.*]] = getelementptr inbounds nuw [[STRUCT_SLICYBITS]], ptr [[SB]], i32 0, i32 1 // CHECK-NEXT: [[BF_LOAD1:%.*]] = load i8, ptr [[W]], align 1 // CHECK-NEXT: [[BF_CAST2:%.*]] = sext i8 [[BF_LOAD1]] to i32 -// CHECK-NEXT: [[CONV3:%.*]] = sitofp i32 [[BF_CAST2]] to float +// CHECK-NEXT: [[CONV3:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[BF_CAST2]] to float // CHECK-NEXT: store float [[CONV3]], ptr [[Y]], align 1 // CHECK-NEXT: ret void // diff --git a/clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl index d0ba8f447b732..6f42b5611ce41 100644 --- a/clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl +++ b/clang/test/CodeGenHLSL/BasicFeatures/OutputArguments.hlsl @@ -13,14 +13,14 @@ void trunc_Param(inout int X) {} // CHECK: [[F:%.*]] = alloca float // CHECK: [[ArgTmp:%.*]] = alloca i32 // CHECK: [[FVal:%.*]] = load float, ptr {{.*}} -// CHECK: [[IVal:%.*]] = fptosi float [[FVal]] to i32 +// CHECK: [[IVal:%.*]] = fptosi reassoc nnan ninf nsz arcp afn float [[FVal]] to i32 // CHECK: store i32 [[IVal]], ptr [[ArgTmp]] // CHECK: call void {{.*}}trunc_Param{{.*}}(ptr noalias noundef nonnull align 4 dereferenceable(4) [[ArgTmp]]) // CHECK: [[IRet:%.*]] = load i32, ptr [[ArgTmp]] -// CHECK: [[FRet:%.*]] = sitofp i32 [[IRet]] to float +// CHECK: [[FRet:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[IRet]] to float // CHECK: store float [[FRet]], ptr [[F]] -// OPT: [[IVal:%.*]] = fptosi float {{.*}} to i32 -// OPT: [[FVal:%.*]] = sitofp i32 [[IVal]] to float +// OPT: [[IVal:%.*]] = fptosi reassoc nnan ninf nsz arcp afn float {{.*}} to i32 +// OPT: [[FVal:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[IVal]] to float // OPT: ret float [[FVal]] export float case1(float F) { trunc_Param(F); @@ -202,15 +202,15 @@ void trunc_vec(inout int3 V) {} // CHECK: [[V:%.*]] = alloca <3 x float> // CHECK: [[Tmp:%.*]] = alloca <3 x i32> // CHECK: [[FVal:%.*]] = load <3 x float>, ptr [[V]] -// CHECK: [[IVal:%.*]] = fptosi <3 x float> [[FVal]] to <3 x i32> +// CHECK: [[IVal:%.*]] = fptosi reassoc nnan ninf nsz arcp afn <3 x float> [[FVal]] to <3 x i32> // CHECK: store <3 x i32> [[IVal]], ptr [[Tmp]] // CHECK: call void {{.*}}trunc_vec{{.*}}(ptr noalias noundef nonnull align 16 dereferenceable(16) [[Tmp]]) // CHECK: [[IRet:%.*]] = load <3 x i32>, ptr [[Tmp]] -// CHECK: [[FRet:%.*]] = sitofp <3 x i32> [[IRet]] to <3 x float> +// CHECK: [[FRet:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <3 x i32> [[IRet]] to <3 x float> // CHECK: store <3 x float> [[FRet]], ptr [[V]] -// OPT: [[IVal:%.*]] = fptosi <3 x float> {{.*}} to <3 x i32> -// OPT: [[FVal:%.*]] = sitofp <3 x i32> [[IVal]] to <3 x float> +// OPT: [[IVal:%.*]] = fptosi reassoc nnan ninf nsz arcp afn <3 x float> {{.*}} to <3 x i32> +// OPT: [[FVal:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <3 x i32> [[IVal]] to <3 x float> // OPT: ret <3 x float> [[FVal]] export float3 case8(float3 V) { diff --git a/clang/test/CodeGenHLSL/BasicFeatures/StructElementwiseCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/StructElementwiseCast.hlsl index 81b9f5b28cc7e..e4229951e58b5 100644 --- a/clang/test/CodeGenHLSL/BasicFeatures/StructElementwiseCast.hlsl +++ b/clang/test/CodeGenHLSL/BasicFeatures/StructElementwiseCast.hlsl @@ -32,7 +32,7 @@ export void call0() { // CHECK-NEXT: [[VL:%.*]] = extractelement <2 x i32> [[L]], i64 0 // CHECK-NEXT: store i32 [[VL]], ptr [[G1]], align 4 // CHECK-NEXT: [[VL2:%.*]] = extractelement <2 x i32> [[L]], i64 1 -// CHECK-NEXT: [[C:%.*]] = sitofp i32 [[VL2]] to float +// CHECK-NEXT: [[C:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[VL2]] to float // CHECK-NEXT: store float [[C]], ptr [[G2]], align 4 export void call1() { int2 A = {1,2}; @@ -54,7 +54,7 @@ export void call1() { // CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G3]], align 4 // CHECK-NEXT: store i32 [[L]], ptr [[G1]], align 4 // CHECK-NEXT: [[L4:%.*]] = load i32, ptr [[G4]], align 4 -// CHECK-NEXT: [[C:%.*]] = sitofp i32 [[L4]] to float +// CHECK-NEXT: [[C:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[L4]] to float // CHECK-NEXT: store float [[C]], ptr [[G2]], align 4 export void call2() { int A[2] = {1,2}; @@ -104,7 +104,7 @@ export void call6() { // CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G3]], align 4 // CHECK-NEXT: store i32 [[L]], ptr [[G1]], align 4 // CHECK-NEXT: [[L4:%.*]] = load i32, ptr [[G4]], align 4 -// CHECK-NEXT: [[C:%.*]] = sitofp i32 [[L4]] to float +// CHECK-NEXT: [[C:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 [[L4]] to float // CHECK-NEXT: store float [[C]], ptr [[G2]], align 4 export void call7() { int A[2] = {1,2}; @@ -132,7 +132,7 @@ struct T { // CHECK-NEXT: %load = load i32, ptr %gep2, align 4 // CHECK-NEXT: store i32 %load, ptr %gep, align 4 // CHECK-NEXT: %load5 = load i32, ptr %gep3, align 4 -// CHECK-NEXT: %conv = sitofp i32 %load5 to float +// CHECK-NEXT: %conv = sitofp reassoc nnan ninf nsz arcp afn i32 %load5 to float // CHECK-NEXT: store float %conv, ptr %gep1, align 4 export void call8() { T t = {1,2,3}; diff --git a/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl index 253b38a7ca072..a5843b0a95642 100644 --- a/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl +++ b/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl @@ -40,7 +40,7 @@ struct S { // CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G1]], align 4 // CHECK-NEXT: [[C:%.*]] = insertelement <2 x i32> [[B]], i32 [[L]], i64 0 // CHECK-NEXT: [[L2:%.*]] = load float, ptr [[G2]], align 4 -// CHECK-NEXT: [[D:%.*]] = fptosi float [[L2]] to i32 +// CHECK-NEXT: [[D:%.*]] = fptosi reassoc nnan ninf nsz arcp afn float [[L2]] to i32 // CHECK-NEXT: [[E:%.*]] = insertelement <2 x i32> [[C]], i32 [[D]], i64 1 // CHECK-NEXT: store <2 x i32> [[E]], ptr [[A]], align 8 export void call3() { diff --git a/clang/test/CodeGenHLSL/BasicFeatures/standard_conversion_sequences.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/standard_conversion_sequences.hlsl index 6770efefe94fe..3e8c06ed074af 100644 --- a/clang/test/CodeGenHLSL/BasicFeatures/standard_conversion_sequences.hlsl +++ b/clang/test/CodeGenHLSL/BasicFeatures/standard_conversion_sequences.hlsl @@ -43,7 +43,7 @@ void d4_to_f2() { // CHECK: [[i2:%.*]] = alloca <2 x i32> // CHECK: store <2 x float> splat (float 4.000000e+00), ptr [[f2]] // CHECK: [[vecf2:%.*]] = load <2 x float>, ptr [[f2]] -// CHECK: [[veci2:%.*]] = fptosi <2 x float> [[vecf2]] to <2 x i32> +// CHECK: [[veci2:%.*]] = fptosi reassoc nnan ninf nsz arcp afn <2 x float> [[vecf2]] to <2 x i32> // CHECK: store <2 x i32> [[veci2]], ptr [[i2]] void f2_to_i2() { vector f2 = 4.0; @@ -55,7 +55,7 @@ void f2_to_i2() { // CHECK: [[i2:%.*]] = alloca <2 x i32> // CHECK: store <4 x double> splat (double 5.000000e+00), ptr [[d4]] // CHECK: [[vecd4:%.*]] = load <4 x double>, ptr [[d4]] -// CHECK: [[veci4:%.*]] = fptosi <4 x double> [[vecd4]] to <4 x i32> +// CHECK: [[veci4:%.*]] = fptosi reassoc nnan ninf nsz arcp afn <4 x double> [[vecd4]] to <4 x i32> // CHECK: [[veci2:%.*]] = shufflevector <4 x i32> [[veci4]], <4 x i32> poison, <2 x i32> // CHECK: store <2 x i32> [[veci2]], ptr [[i2]] void d4_to_i2() { @@ -68,7 +68,7 @@ void d4_to_i2() { // CHECK: [[l4:%.*]] = alloca <4 x i64> // CHECK: store <4 x double> splat (double 6.000000e+00), ptr [[d4]] // CHECK: [[vecd4:%.*]] = load <4 x double>, ptr [[d4]] -// CHECK: [[vecl4:%.*]] = fptosi <4 x double> [[vecd4]] to <4 x i64> +// CHECK: [[vecl4:%.*]] = fptosi reassoc nnan ninf nsz arcp afn <4 x double> [[vecd4]] to <4 x i64> // CHECK: store <4 x i64> [[vecl4]], ptr [[l4]] void d4_to_l4() { vector d4 = 6.0; diff --git a/clang/test/CodeGenHLSL/builtins/dot2add.hlsl b/clang/test/CodeGenHLSL/builtins/dot2add.hlsl index e80ffba2bcfdb..00897f6ab9dd9 100644 --- a/clang/test/CodeGenHLSL/builtins/dot2add.hlsl +++ b/clang/test/CodeGenHLSL/builtins/dot2add.hlsl @@ -106,8 +106,8 @@ float test_double_arg1_arg2_type(double2 p1, double2 p2, float p3) { // CHECK-LABEL: define {{.*}}test_int16_arg1_arg2_type float test_int16_arg1_arg2_type(int16_t2 p1, int16_t2 p2, float p3) { - // CHECK: %conv = sitofp <2 x i16> %{{.*}} to <2 x half> - // CHECK: %conv1 = sitofp <2 x i16> %{{.*}} to <2 x half> + // CHECK: %conv = sitofp reassoc nnan ninf nsz arcp afn <2 x i16> %{{.*}} to <2 x half> + // CHECK: %conv1 = sitofp reassoc nnan ninf nsz arcp afn <2 x i16> %{{.*}} to <2 x half> // CHECK-SPIRV: %[[MUL:.*]] = call reassoc nnan ninf nsz arcp afn half @llvm.spv.fdot.v2f16(<2 x half> %{{.*}}, <2 x half> %{{.*}}) // CHECK-SPIRV: %[[CONV:.*]] = fpext reassoc nnan ninf nsz arcp afn half %[[MUL]] to float // CHECK-SPIRV: %[[C:.*]] = load float, ptr %c.addr.i, align 4 @@ -123,8 +123,8 @@ float test_int16_arg1_arg2_type(int16_t2 p1, int16_t2 p2, float p3) { // CHECK-LABEL: define {{.*}}test_int32_arg1_arg2_type float test_int32_arg1_arg2_type(int32_t2 p1, int32_t2 p2, float p3) { - // CHECK: %conv = sitofp <2 x i32> %{{.*}} to <2 x half> - // CHECK: %conv1 = sitofp <2 x i32> %{{.*}} to <2 x half> + // CHECK: %conv = sitofp reassoc nnan ninf nsz arcp afn <2 x i32> %{{.*}} to <2 x half> + // CHECK: %conv1 = sitofp reassoc nnan ninf nsz arcp afn <2 x i32> %{{.*}} to <2 x half> // CHECK-SPIRV: %[[MUL:.*]] = call reassoc nnan ninf nsz arcp afn half @llvm.spv.fdot.v2f16(<2 x half> %{{.*}}, <2 x half> %{{.*}}) // CHECK-SPIRV: %[[CONV:.*]] = fpext reassoc nnan ninf nsz arcp afn half %[[MUL]] to float // CHECK-SPIRV: %[[C:.*]] = load float, ptr %c.addr.i, align 4 @@ -140,8 +140,8 @@ float test_int32_arg1_arg2_type(int32_t2 p1, int32_t2 p2, float p3) { // CHECK-LABEL: define {{.*}}test_int64_arg1_arg2_type float test_int64_arg1_arg2_type(int64_t2 p1, int64_t2 p2, float p3) { - // CHECK: %conv = sitofp <2 x i64> %{{.*}} to <2 x half> - // CHECK: %conv1 = sitofp <2 x i64> %{{.*}} to <2 x half> + // CHECK: %conv = sitofp reassoc nnan ninf nsz arcp afn <2 x i64> %{{.*}} to <2 x half> + // CHECK: %conv1 = sitofp reassoc nnan ninf nsz arcp afn <2 x i64> %{{.*}} to <2 x half> // CHECK-SPIRV: %[[MUL:.*]] = call reassoc nnan ninf nsz arcp afn half @llvm.spv.fdot.v2f16(<2 x half> %{{.*}}, <2 x half> %{{.*}}) // CHECK-SPIRV: %[[CONV:.*]] = fpext reassoc nnan ninf nsz arcp afn half %[[MUL]] to float // CHECK-SPIRV: %[[C:.*]] = load float, ptr %c.addr.i, align 4 diff --git a/clang/test/CodeGenHLSL/builtins/lerp-overloads.hlsl b/clang/test/CodeGenHLSL/builtins/lerp-overloads.hlsl index 3b13e43873c77..589f18e67deb8 100644 --- a/clang/test/CodeGenHLSL/builtins/lerp-overloads.hlsl +++ b/clang/test/CodeGenHLSL/builtins/lerp-overloads.hlsl @@ -36,33 +36,33 @@ float3 test_lerp_double3(double3 p0) { return lerp(p0, p0, p0); } float4 test_lerp_double4(double4 p0) { return lerp(p0, p0, p0); } // CHECK: define [[FNATTRS]] float @_Z13test_lerp_inti( -// CHECK: [[CONV0:%.*]] = sitofp i32 %{{.*}} to float -// CHECK: [[CONV1:%.*]] = sitofp i32 %{{.*}} to float -// CHECK: [[CONV2:%.*]] = sitofp i32 %{{.*}} to float +// CHECK: [[CONV0:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 %{{.*}} to float +// CHECK: [[CONV1:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 %{{.*}} to float +// CHECK: [[CONV2:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 %{{.*}} to float // CHECK: [[LERP:%.*]] = call {{.*}} float @llvm.[[TARGET]].lerp.f32(float [[CONV0]], float [[CONV1]], float [[CONV2]]) // CHECK: ret float [[LERP]] float test_lerp_int(int p0) { return lerp(p0, p0, p0); } // CHECK: define [[FNATTRS]] <2 x float> @_Z14test_lerp_int2Dv2_i( -// CHECK: [[CONV0:%.*]] = sitofp <2 x i32> %{{.*}} to <2 x float> -// CHECK: [[CONV1:%.*]] = sitofp <2 x i32> %{{.*}} to <2 x float> -// CHECK: [[CONV2:%.*]] = sitofp <2 x i32> %{{.*}} to <2 x float> +// CHECK: [[CONV0:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <2 x i32> %{{.*}} to <2 x float> +// CHECK: [[CONV1:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <2 x i32> %{{.*}} to <2 x float> +// CHECK: [[CONV2:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <2 x i32> %{{.*}} to <2 x float> // CHECK: [[LERP:%.*]] = call {{.*}} <2 x float> @llvm.[[TARGET]].lerp.v2f32(<2 x float> [[CONV0]], <2 x float> [[CONV1]], <2 x float> [[CONV2]]) // CHECK: ret <2 x float> [[LERP]] float2 test_lerp_int2(int2 p0) { return lerp(p0, p0, p0); } // CHECK: define [[FNATTRS]] <3 x float> @_Z14test_lerp_int3Dv3_i( -// CHECK: [[CONV0:%.*]] = sitofp <3 x i32> %{{.*}} to <3 x float> -// CHECK: [[CONV1:%.*]] = sitofp <3 x i32> %{{.*}} to <3 x float> -// CHECK: [[CONV2:%.*]] = sitofp <3 x i32> %{{.*}} to <3 x float> +// CHECK: [[CONV0:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <3 x i32> %{{.*}} to <3 x float> +// CHECK: [[CONV1:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <3 x i32> %{{.*}} to <3 x float> +// CHECK: [[CONV2:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <3 x i32> %{{.*}} to <3 x float> // CHECK: [[LERP:%.*]] = call {{.*}} <3 x float> @llvm.[[TARGET]].lerp.v3f32(<3 x float> [[CONV0]], <3 x float> [[CONV1]], <3 x float> [[CONV2]]) // CHECK: ret <3 x float> [[LERP]] float3 test_lerp_int3(int3 p0) { return lerp(p0, p0, p0); } // CHECK: define [[FNATTRS]] <4 x float> @_Z14test_lerp_int4Dv4_i( -// CHECK: [[CONV0:%.*]] = sitofp <4 x i32> %{{.*}} to <4 x float> -// CHECK: [[CONV1:%.*]] = sitofp <4 x i32> %{{.*}} to <4 x float> -// CHECK: [[CONV2:%.*]] = sitofp <4 x i32> %{{.*}} to <4 x float> +// CHECK: [[CONV0:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <4 x i32> %{{.*}} to <4 x float> +// CHECK: [[CONV1:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <4 x i32> %{{.*}} to <4 x float> +// CHECK: [[CONV2:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <4 x i32> %{{.*}} to <4 x float> // CHECK: [[LERP:%.*]] = call {{.*}} <4 x float> @llvm.[[TARGET]].lerp.v4f32(<4 x float> [[CONV0]], <4 x float> [[CONV1]], <4 x float> [[CONV2]]) // CHECK: ret <4 x float> [[LERP]] float4 test_lerp_int4(int4 p0) { return lerp(p0, p0, p0); } @@ -100,33 +100,33 @@ float3 test_lerp_uint3(uint3 p0) { return lerp(p0, p0, p0); } float4 test_lerp_uint4(uint4 p0) { return lerp(p0, p0, p0); } // CHECK: define [[FNATTRS]] float @_Z17test_lerp_int64_tl( -// CHECK: [[CONV0:%.*]] = sitofp i64 %{{.*}} to float -// CHECK: [[CONV1:%.*]] = sitofp i64 %{{.*}} to float -// CHECK: [[CONV2:%.*]] = sitofp i64 %{{.*}} to float +// CHECK: [[CONV0:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i64 %{{.*}} to float +// CHECK: [[CONV1:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i64 %{{.*}} to float +// CHECK: [[CONV2:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i64 %{{.*}} to float // CHECK: [[LERP:%.*]] = call {{.*}} float @llvm.[[TARGET]].lerp.f32(float [[CONV0]], float [[CONV1]], float [[CONV2]]) // CHECK: ret float [[LERP]] float test_lerp_int64_t(int64_t p0) { return lerp(p0, p0, p0); } // CHECK: define [[FNATTRS]] <2 x float> @_Z18test_lerp_int64_t2Dv2_l( -// CHECK: [[CONV0:%.*]] = sitofp <2 x i64> %{{.*}} to <2 x float> -// CHECK: [[CONV1:%.*]] = sitofp <2 x i64> %{{.*}} to <2 x float> -// CHECK: [[CONV2:%.*]] = sitofp <2 x i64> %{{.*}} to <2 x float> +// CHECK: [[CONV0:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <2 x i64> %{{.*}} to <2 x float> +// CHECK: [[CONV1:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <2 x i64> %{{.*}} to <2 x float> +// CHECK: [[CONV2:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <2 x i64> %{{.*}} to <2 x float> // CHECK: [[LERP:%.*]] = call {{.*}} <2 x float> @llvm.[[TARGET]].lerp.v2f32(<2 x float> [[CONV0]], <2 x float> [[CONV1]], <2 x float> [[CONV2]]) // CHECK: ret <2 x float> [[LERP]] float2 test_lerp_int64_t2(int64_t2 p0) { return lerp(p0, p0, p0); } // CHECK: define [[FNATTRS]] <3 x float> @_Z18test_lerp_int64_t3Dv3_l( -// CHECK: [[CONV0:%.*]] = sitofp <3 x i64> %{{.*}} to <3 x float> -// CHECK: [[CONV1:%.*]] = sitofp <3 x i64> %{{.*}} to <3 x float> -// CHECK: [[CONV2:%.*]] = sitofp <3 x i64> %{{.*}} to <3 x float> +// CHECK: [[CONV0:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <3 x i64> %{{.*}} to <3 x float> +// CHECK: [[CONV1:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <3 x i64> %{{.*}} to <3 x float> +// CHECK: [[CONV2:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <3 x i64> %{{.*}} to <3 x float> // CHECK: [[LERP:%.*]] = call {{.*}} <3 x float> @llvm.[[TARGET]].lerp.v3f32(<3 x float> [[CONV0]], <3 x float> [[CONV1]], <3 x float> [[CONV2]]) // CHECK: ret <3 x float> [[LERP]] float3 test_lerp_int64_t3(int64_t3 p0) { return lerp(p0, p0, p0); } // CHECK: define [[FNATTRS]] <4 x float> @_Z18test_lerp_int64_t4Dv4_l( -// CHECK: [[CONV0:%.*]] = sitofp <4 x i64> %{{.*}} to <4 x float> -// CHECK: [[CONV1:%.*]] = sitofp <4 x i64> %{{.*}} to <4 x float> -// CHECK: [[CONV2:%.*]] = sitofp <4 x i64> %{{.*}} to <4 x float> +// CHECK: [[CONV0:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <4 x i64> %{{.*}} to <4 x float> +// CHECK: [[CONV1:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <4 x i64> %{{.*}} to <4 x float> +// CHECK: [[CONV2:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <4 x i64> %{{.*}} to <4 x float> // CHECK: [[LERP:%.*]] = call {{.*}} <4 x float> @llvm.[[TARGET]].lerp.v4f32(<4 x float> [[CONV0]], <4 x float> [[CONV1]], <4 x float> [[CONV2]]) // CHECK: ret <4 x float> [[LERP]] float4 test_lerp_int64_t4(int64_t4 p0) { return lerp(p0, p0, p0); } diff --git a/clang/test/CodeGenHLSL/builtins/pow-overloads.hlsl b/clang/test/CodeGenHLSL/builtins/pow-overloads.hlsl index 0d1f3d3546a33..bcf8997196d2e 100644 --- a/clang/test/CodeGenHLSL/builtins/pow-overloads.hlsl +++ b/clang/test/CodeGenHLSL/builtins/pow-overloads.hlsl @@ -28,26 +28,26 @@ float3 test_pow_double3(double3 p0, double3 p1) { return pow(p0, p1); } float4 test_pow_double4(double4 p0, double4 p1) { return pow(p0, p1); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) float {{.*}}test_pow_int -// CHECK: [[CONV0:%.*]] = sitofp i32 %{{.*}} to float -// CHECK: [[CONV1:%.*]] = sitofp i32 %{{.*}} to float +// CHECK: [[CONV0:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 %{{.*}} to float +// CHECK: [[CONV1:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i32 %{{.*}} to float // CHECK: [[POW:%.*]] = call [[FLOATATTRS]] noundef float @llvm.pow.f32(float [[CONV0]], float [[CONV1]]) // CHECK: ret float [[POW]] float test_pow_int(int p0, int p1) { return pow(p0, p1); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) <2 x float> {{.*}}test_pow_int2 -// CHECK: [[CONV0:%.*]] = sitofp <2 x i32> %{{.*}} to <2 x float> -// CHECK: [[CONV1:%.*]] = sitofp <2 x i32> %{{.*}} to <2 x float> +// CHECK: [[CONV0:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <2 x i32> %{{.*}} to <2 x float> +// CHECK: [[CONV1:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <2 x i32> %{{.*}} to <2 x float> // CHECK: [[POW:%.*]] = call [[FLOATATTRS]] noundef <2 x float> @llvm.pow.v2f32(<2 x float> [[CONV0]], <2 x float> [[CONV1]]) // CHECK: ret <2 x float> [[POW]] float2 test_pow_int2(int2 p0, int2 p1) { return pow(p0, p1); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) <3 x float> {{.*}}test_pow_int3 -// CHECK: [[CONV0:%.*]] = sitofp <3 x i32> %{{.*}} to <3 x float> -// CHECK: [[CONV1:%.*]] = sitofp <3 x i32> %{{.*}} to <3 x float> +// CHECK: [[CONV0:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <3 x i32> %{{.*}} to <3 x float> +// CHECK: [[CONV1:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <3 x i32> %{{.*}} to <3 x float> // CHECK: [[POW:%.*]] = call [[FLOATATTRS]] noundef <3 x float> @llvm.pow.v3f32(<3 x float> [[CONV0]], <3 x float> [[CONV1]]) // CHECK: ret <3 x float> [[POW]] float3 test_pow_int3(int3 p0, int3 p1) { return pow(p0, p1); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) <4 x float> {{.*}}test_pow_int4 -// CHECK: [[CONV0:%.*]] = sitofp <4 x i32> %{{.*}} to <4 x float> -// CHECK: [[CONV1:%.*]] = sitofp <4 x i32> %{{.*}} to <4 x float> +// CHECK: [[CONV0:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <4 x i32> %{{.*}} to <4 x float> +// CHECK: [[CONV1:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <4 x i32> %{{.*}} to <4 x float> // CHECK: [[POW:%.*]] = call [[FLOATATTRS]] noundef <4 x float> @llvm.pow.v4f32(<4 x float> [[CONV0]], <4 x float> [[CONV1]]) // CHECK: ret <4 x float> [[POW]] float4 test_pow_int4(int4 p0, int4 p1) { return pow(p0, p1); } @@ -78,26 +78,26 @@ float3 test_pow_uint3(uint3 p0, uint3 p1) { return pow(p0, p1); } float4 test_pow_uint4(uint4 p0, uint4 p1) { return pow(p0, p1); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) float {{.*}}test_pow_int64_t -// CHECK: [[CONV0:%.*]] = sitofp i64 %{{.*}} to float -// CHECK: [[CONV1:%.*]] = sitofp i64 %{{.*}} to float +// CHECK: [[CONV0:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i64 %{{.*}} to float +// CHECK: [[CONV1:%.*]] = sitofp reassoc nnan ninf nsz arcp afn i64 %{{.*}} to float // CHECK: [[POW:%.*]] = call [[FLOATATTRS]] noundef float @llvm.pow.f32(float [[CONV0]], float [[CONV1]]) // CHECK: ret float [[POW]] float test_pow_int64_t(int64_t p0, int64_t p1) { return pow(p0, p1); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) <2 x float> {{.*}}test_pow_int64_t2 -// CHECK: [[CONV0:%.*]] = sitofp <2 x i64> %{{.*}} to <2 x float> -// CHECK: [[CONV1:%.*]] = sitofp <2 x i64> %{{.*}} to <2 x float> +// CHECK: [[CONV0:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <2 x i64> %{{.*}} to <2 x float> +// CHECK: [[CONV1:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <2 x i64> %{{.*}} to <2 x float> // CHECK: [[POW:%.*]] = call [[FLOATATTRS]] noundef <2 x float> @llvm.pow.v2f32(<2 x float> [[CONV0]], <2 x float> [[CONV1]]) // CHECK: ret <2 x float> [[POW]] float2 test_pow_int64_t2(int64_t2 p0, int64_t2 p1) { return pow(p0, p1); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) <3 x float> {{.*}}test_pow_int64_t3 -// CHECK: [[CONV0:%.*]] = sitofp <3 x i64> %{{.*}} to <3 x float> -// CHECK: [[CONV1:%.*]] = sitofp <3 x i64> %{{.*}} to <3 x float> +// CHECK: [[CONV0:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <3 x i64> %{{.*}} to <3 x float> +// CHECK: [[CONV1:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <3 x i64> %{{.*}} to <3 x float> // CHECK: [[POW:%.*]] = call [[FLOATATTRS]] noundef <3 x float> @llvm.pow.v3f32(<3 x float> [[CONV0]], <3 x float> [[CONV1]]) // CHECK: ret <3 x float> [[POW]] float3 test_pow_int64_t3(int64_t3 p0, int64_t3 p1) { return pow(p0, p1); } // CHECK-LABEL: define hidden noundef nofpclass(nan inf) <4 x float> {{.*}}test_pow_int64_t4 -// CHECK: [[CONV0:%.*]] = sitofp <4 x i64> %{{.*}} to <4 x float> -// CHECK: [[CONV1:%.*]] = sitofp <4 x i64> %{{.*}} to <4 x float> +// CHECK: [[CONV0:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <4 x i64> %{{.*}} to <4 x float> +// CHECK: [[CONV1:%.*]] = sitofp reassoc nnan ninf nsz arcp afn <4 x i64> %{{.*}} to <4 x float> // CHECK: [[POW:%.*]] = call [[FLOATATTRS]] noundef <4 x float> @llvm.pow.v4f32(<4 x float> [[CONV0]], <4 x float> [[CONV1]]) // CHECK: ret <4 x float> [[POW]] float4 test_pow_int64_t4(int64_t4 p0, int64_t4 p1) { return pow(p0, p1); } diff --git a/clang/test/Headers/__clang_hip_math.hip b/clang/test/Headers/__clang_hip_math.hip index b88aa3cc18207..39c24f1c0e606 100644 --- a/clang/test/Headers/__clang_hip_math.hip +++ b/clang/test/Headers/__clang_hip_math.hip @@ -3518,35 +3518,35 @@ extern "C" __device__ double test_lgamma(double x) { // DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { // DEFAULT-NEXT: [[ENTRY:.*:]] // DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract float @llvm.rint.f32(float [[X]]) -// DEFAULT-NEXT: [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64 +// DEFAULT-NEXT: [[CONV_I:%.*]] = fptosi contract float [[TMP0]] to i64 // DEFAULT-NEXT: ret i64 [[CONV_I]] // // FINITEONLY-LABEL: define dso_local i64 @test_llrintf( // FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] // FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract float @llvm.rint.f32(float nofpclass(nan inf) [[X]]) -// FINITEONLY-NEXT: [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64 +// FINITEONLY-NEXT: [[CONV_I:%.*]] = fptosi nnan ninf contract float [[TMP0]] to i64 // FINITEONLY-NEXT: ret i64 [[CONV_I]] // // APPROX-LABEL: define dso_local i64 @test_llrintf( // APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { // APPROX-NEXT: [[ENTRY:.*:]] // APPROX-NEXT: [[TMP0:%.*]] = tail call contract float @llvm.rint.f32(float [[X]]) -// APPROX-NEXT: [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64 +// APPROX-NEXT: [[CONV_I:%.*]] = fptosi contract float [[TMP0]] to i64 // APPROX-NEXT: ret i64 [[CONV_I]] // // NCRDIV-LABEL: define dso_local i64 @test_llrintf( // NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { // NCRDIV-NEXT: [[ENTRY:.*:]] // NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract float @llvm.rint.f32(float [[X]]) -// NCRDIV-NEXT: [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64 +// NCRDIV-NEXT: [[CONV_I:%.*]] = fptosi contract float [[TMP0]] to i64 // NCRDIV-NEXT: ret i64 [[CONV_I]] // // AMDGCNSPIRV-LABEL: define spir_func i64 @test_llrintf( // AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { // AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] // AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract addrspace(4) float @llvm.rint.f32(float [[X]]) -// AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64 +// AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = fptosi contract float [[TMP0]] to i64 // AMDGCNSPIRV-NEXT: ret i64 [[CONV_I]] // extern "C" __device__ long long int test_llrintf(float x) { @@ -3557,35 +3557,35 @@ extern "C" __device__ long long int test_llrintf(float x) { // DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { // DEFAULT-NEXT: [[ENTRY:.*:]] // DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract double @llvm.rint.f64(double [[X]]) -// DEFAULT-NEXT: [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64 +// DEFAULT-NEXT: [[CONV_I:%.*]] = fptosi contract double [[TMP0]] to i64 // DEFAULT-NEXT: ret i64 [[CONV_I]] // // FINITEONLY-LABEL: define dso_local i64 @test_llrint( // FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] // FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract double @llvm.rint.f64(double nofpclass(nan inf) [[X]]) -// FINITEONLY-NEXT: [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64 +// FINITEONLY-NEXT: [[CONV_I:%.*]] = fptosi nnan ninf contract double [[TMP0]] to i64 // FINITEONLY-NEXT: ret i64 [[CONV_I]] // // APPROX-LABEL: define dso_local i64 @test_llrint( // APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { // APPROX-NEXT: [[ENTRY:.*:]] // APPROX-NEXT: [[TMP0:%.*]] = tail call contract double @llvm.rint.f64(double [[X]]) -// APPROX-NEXT: [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64 +// APPROX-NEXT: [[CONV_I:%.*]] = fptosi contract double [[TMP0]] to i64 // APPROX-NEXT: ret i64 [[CONV_I]] // // NCRDIV-LABEL: define dso_local i64 @test_llrint( // NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { // NCRDIV-NEXT: [[ENTRY:.*:]] // NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract double @llvm.rint.f64(double [[X]]) -// NCRDIV-NEXT: [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64 +// NCRDIV-NEXT: [[CONV_I:%.*]] = fptosi contract double [[TMP0]] to i64 // NCRDIV-NEXT: ret i64 [[CONV_I]] // // AMDGCNSPIRV-LABEL: define spir_func i64 @test_llrint( // AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { // AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] // AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract addrspace(4) double @llvm.rint.f64(double [[X]]) -// AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64 +// AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = fptosi contract double [[TMP0]] to i64 // AMDGCNSPIRV-NEXT: ret i64 [[CONV_I]] // extern "C" __device__ long long int test_llrint(double x) { @@ -3596,35 +3596,35 @@ extern "C" __device__ long long int test_llrint(double x) { // DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { // DEFAULT-NEXT: [[ENTRY:.*:]] // DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract float @llvm.round.f32(float [[X]]) -// DEFAULT-NEXT: [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64 +// DEFAULT-NEXT: [[CONV_I:%.*]] = fptosi contract float [[TMP0]] to i64 // DEFAULT-NEXT: ret i64 [[CONV_I]] // // FINITEONLY-LABEL: define dso_local i64 @test_llroundf( // FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] // FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract float @llvm.round.f32(float nofpclass(nan inf) [[X]]) -// FINITEONLY-NEXT: [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64 +// FINITEONLY-NEXT: [[CONV_I:%.*]] = fptosi nnan ninf contract float [[TMP0]] to i64 // FINITEONLY-NEXT: ret i64 [[CONV_I]] // // APPROX-LABEL: define dso_local i64 @test_llroundf( // APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { // APPROX-NEXT: [[ENTRY:.*:]] // APPROX-NEXT: [[TMP0:%.*]] = tail call contract float @llvm.round.f32(float [[X]]) -// APPROX-NEXT: [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64 +// APPROX-NEXT: [[CONV_I:%.*]] = fptosi contract float [[TMP0]] to i64 // APPROX-NEXT: ret i64 [[CONV_I]] // // NCRDIV-LABEL: define dso_local i64 @test_llroundf( // NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { // NCRDIV-NEXT: [[ENTRY:.*:]] // NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract float @llvm.round.f32(float [[X]]) -// NCRDIV-NEXT: [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64 +// NCRDIV-NEXT: [[CONV_I:%.*]] = fptosi contract float [[TMP0]] to i64 // NCRDIV-NEXT: ret i64 [[CONV_I]] // // AMDGCNSPIRV-LABEL: define spir_func i64 @test_llroundf( // AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { // AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] // AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract addrspace(4) float @llvm.round.f32(float [[X]]) -// AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64 +// AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = fptosi contract float [[TMP0]] to i64 // AMDGCNSPIRV-NEXT: ret i64 [[CONV_I]] // extern "C" __device__ long long int test_llroundf(float x) { @@ -3635,35 +3635,35 @@ extern "C" __device__ long long int test_llroundf(float x) { // DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { // DEFAULT-NEXT: [[ENTRY:.*:]] // DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract double @llvm.round.f64(double [[X]]) -// DEFAULT-NEXT: [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64 +// DEFAULT-NEXT: [[CONV_I:%.*]] = fptosi contract double [[TMP0]] to i64 // DEFAULT-NEXT: ret i64 [[CONV_I]] // // FINITEONLY-LABEL: define dso_local i64 @test_llround( // FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] // FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract double @llvm.round.f64(double nofpclass(nan inf) [[X]]) -// FINITEONLY-NEXT: [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64 +// FINITEONLY-NEXT: [[CONV_I:%.*]] = fptosi nnan ninf contract double [[TMP0]] to i64 // FINITEONLY-NEXT: ret i64 [[CONV_I]] // // APPROX-LABEL: define dso_local i64 @test_llround( // APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { // APPROX-NEXT: [[ENTRY:.*:]] // APPROX-NEXT: [[TMP0:%.*]] = tail call contract double @llvm.round.f64(double [[X]]) -// APPROX-NEXT: [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64 +// APPROX-NEXT: [[CONV_I:%.*]] = fptosi contract double [[TMP0]] to i64 // APPROX-NEXT: ret i64 [[CONV_I]] // // NCRDIV-LABEL: define dso_local i64 @test_llround( // NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { // NCRDIV-NEXT: [[ENTRY:.*:]] // NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract double @llvm.round.f64(double [[X]]) -// NCRDIV-NEXT: [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64 +// NCRDIV-NEXT: [[CONV_I:%.*]] = fptosi contract double [[TMP0]] to i64 // NCRDIV-NEXT: ret i64 [[CONV_I]] // // AMDGCNSPIRV-LABEL: define spir_func i64 @test_llround( // AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { // AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] // AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract addrspace(4) double @llvm.round.f64(double [[X]]) -// AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64 +// AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = fptosi contract double [[TMP0]] to i64 // AMDGCNSPIRV-NEXT: ret i64 [[CONV_I]] // extern "C" __device__ long long int test_llround(double x) { @@ -3980,35 +3980,35 @@ extern "C" __device__ float test_logf(float x) { // DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { // DEFAULT-NEXT: [[ENTRY:.*:]] // DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract float @llvm.rint.f32(float [[X]]) -// DEFAULT-NEXT: [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64 +// DEFAULT-NEXT: [[CONV_I:%.*]] = fptosi contract float [[TMP0]] to i64 // DEFAULT-NEXT: ret i64 [[CONV_I]] // // FINITEONLY-LABEL: define dso_local i64 @test_lrintf( // FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] // FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract float @llvm.rint.f32(float nofpclass(nan inf) [[X]]) -// FINITEONLY-NEXT: [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64 +// FINITEONLY-NEXT: [[CONV_I:%.*]] = fptosi nnan ninf contract float [[TMP0]] to i64 // FINITEONLY-NEXT: ret i64 [[CONV_I]] // // APPROX-LABEL: define dso_local i64 @test_lrintf( // APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { // APPROX-NEXT: [[ENTRY:.*:]] // APPROX-NEXT: [[TMP0:%.*]] = tail call contract float @llvm.rint.f32(float [[X]]) -// APPROX-NEXT: [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64 +// APPROX-NEXT: [[CONV_I:%.*]] = fptosi contract float [[TMP0]] to i64 // APPROX-NEXT: ret i64 [[CONV_I]] // // NCRDIV-LABEL: define dso_local i64 @test_lrintf( // NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { // NCRDIV-NEXT: [[ENTRY:.*:]] // NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract float @llvm.rint.f32(float [[X]]) -// NCRDIV-NEXT: [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64 +// NCRDIV-NEXT: [[CONV_I:%.*]] = fptosi contract float [[TMP0]] to i64 // NCRDIV-NEXT: ret i64 [[CONV_I]] // // AMDGCNSPIRV-LABEL: define spir_func i64 @test_lrintf( // AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { // AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] // AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract addrspace(4) float @llvm.rint.f32(float [[X]]) -// AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64 +// AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = fptosi contract float [[TMP0]] to i64 // AMDGCNSPIRV-NEXT: ret i64 [[CONV_I]] // extern "C" __device__ long int test_lrintf(float x) { @@ -4019,35 +4019,35 @@ extern "C" __device__ long int test_lrintf(float x) { // DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { // DEFAULT-NEXT: [[ENTRY:.*:]] // DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract double @llvm.rint.f64(double [[X]]) -// DEFAULT-NEXT: [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64 +// DEFAULT-NEXT: [[CONV_I:%.*]] = fptosi contract double [[TMP0]] to i64 // DEFAULT-NEXT: ret i64 [[CONV_I]] // // FINITEONLY-LABEL: define dso_local i64 @test_lrint( // FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] // FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract double @llvm.rint.f64(double nofpclass(nan inf) [[X]]) -// FINITEONLY-NEXT: [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64 +// FINITEONLY-NEXT: [[CONV_I:%.*]] = fptosi nnan ninf contract double [[TMP0]] to i64 // FINITEONLY-NEXT: ret i64 [[CONV_I]] // // APPROX-LABEL: define dso_local i64 @test_lrint( // APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { // APPROX-NEXT: [[ENTRY:.*:]] // APPROX-NEXT: [[TMP0:%.*]] = tail call contract double @llvm.rint.f64(double [[X]]) -// APPROX-NEXT: [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64 +// APPROX-NEXT: [[CONV_I:%.*]] = fptosi contract double [[TMP0]] to i64 // APPROX-NEXT: ret i64 [[CONV_I]] // // NCRDIV-LABEL: define dso_local i64 @test_lrint( // NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { // NCRDIV-NEXT: [[ENTRY:.*:]] // NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract double @llvm.rint.f64(double [[X]]) -// NCRDIV-NEXT: [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64 +// NCRDIV-NEXT: [[CONV_I:%.*]] = fptosi contract double [[TMP0]] to i64 // NCRDIV-NEXT: ret i64 [[CONV_I]] // // AMDGCNSPIRV-LABEL: define spir_func i64 @test_lrint( // AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { // AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] // AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract addrspace(4) double @llvm.rint.f64(double [[X]]) -// AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64 +// AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = fptosi contract double [[TMP0]] to i64 // AMDGCNSPIRV-NEXT: ret i64 [[CONV_I]] // extern "C" __device__ long int test_lrint(double x) { @@ -4058,35 +4058,35 @@ extern "C" __device__ long int test_lrint(double x) { // DEFAULT-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { // DEFAULT-NEXT: [[ENTRY:.*:]] // DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract float @llvm.round.f32(float [[X]]) -// DEFAULT-NEXT: [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64 +// DEFAULT-NEXT: [[CONV_I:%.*]] = fptosi contract float [[TMP0]] to i64 // DEFAULT-NEXT: ret i64 [[CONV_I]] // // FINITEONLY-LABEL: define dso_local i64 @test_lroundf( // FINITEONLY-SAME: float noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] // FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract float @llvm.round.f32(float nofpclass(nan inf) [[X]]) -// FINITEONLY-NEXT: [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64 +// FINITEONLY-NEXT: [[CONV_I:%.*]] = fptosi nnan ninf contract float [[TMP0]] to i64 // FINITEONLY-NEXT: ret i64 [[CONV_I]] // // APPROX-LABEL: define dso_local i64 @test_lroundf( // APPROX-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { // APPROX-NEXT: [[ENTRY:.*:]] // APPROX-NEXT: [[TMP0:%.*]] = tail call contract float @llvm.round.f32(float [[X]]) -// APPROX-NEXT: [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64 +// APPROX-NEXT: [[CONV_I:%.*]] = fptosi contract float [[TMP0]] to i64 // APPROX-NEXT: ret i64 [[CONV_I]] // // NCRDIV-LABEL: define dso_local i64 @test_lroundf( // NCRDIV-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { // NCRDIV-NEXT: [[ENTRY:.*:]] // NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract float @llvm.round.f32(float [[X]]) -// NCRDIV-NEXT: [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64 +// NCRDIV-NEXT: [[CONV_I:%.*]] = fptosi contract float [[TMP0]] to i64 // NCRDIV-NEXT: ret i64 [[CONV_I]] // // AMDGCNSPIRV-LABEL: define spir_func i64 @test_lroundf( // AMDGCNSPIRV-SAME: float noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { // AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] // AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract addrspace(4) float @llvm.round.f32(float [[X]]) -// AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = fptosi float [[TMP0]] to i64 +// AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = fptosi contract float [[TMP0]] to i64 // AMDGCNSPIRV-NEXT: ret i64 [[CONV_I]] // extern "C" __device__ long int test_lroundf(float x) { @@ -4097,35 +4097,35 @@ extern "C" __device__ long int test_lroundf(float x) { // DEFAULT-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { // DEFAULT-NEXT: [[ENTRY:.*:]] // DEFAULT-NEXT: [[TMP0:%.*]] = tail call contract double @llvm.round.f64(double [[X]]) -// DEFAULT-NEXT: [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64 +// DEFAULT-NEXT: [[CONV_I:%.*]] = fptosi contract double [[TMP0]] to i64 // DEFAULT-NEXT: ret i64 [[CONV_I]] // // FINITEONLY-LABEL: define dso_local i64 @test_lround( // FINITEONLY-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { // FINITEONLY-NEXT: [[ENTRY:.*:]] // FINITEONLY-NEXT: [[TMP0:%.*]] = tail call nnan ninf contract double @llvm.round.f64(double nofpclass(nan inf) [[X]]) -// FINITEONLY-NEXT: [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64 +// FINITEONLY-NEXT: [[CONV_I:%.*]] = fptosi nnan ninf contract double [[TMP0]] to i64 // FINITEONLY-NEXT: ret i64 [[CONV_I]] // // APPROX-LABEL: define dso_local i64 @test_lround( // APPROX-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { // APPROX-NEXT: [[ENTRY:.*:]] // APPROX-NEXT: [[TMP0:%.*]] = tail call contract double @llvm.round.f64(double [[X]]) -// APPROX-NEXT: [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64 +// APPROX-NEXT: [[CONV_I:%.*]] = fptosi contract double [[TMP0]] to i64 // APPROX-NEXT: ret i64 [[CONV_I]] // // NCRDIV-LABEL: define dso_local i64 @test_lround( // NCRDIV-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR3]] { // NCRDIV-NEXT: [[ENTRY:.*:]] // NCRDIV-NEXT: [[TMP0:%.*]] = tail call contract double @llvm.round.f64(double [[X]]) -// NCRDIV-NEXT: [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64 +// NCRDIV-NEXT: [[CONV_I:%.*]] = fptosi contract double [[TMP0]] to i64 // NCRDIV-NEXT: ret i64 [[CONV_I]] // // AMDGCNSPIRV-LABEL: define spir_func i64 @test_lround( // AMDGCNSPIRV-SAME: double noundef [[X:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR3]] { // AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] // AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = tail call contract addrspace(4) double @llvm.round.f64(double [[X]]) -// AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = fptosi double [[TMP0]] to i64 +// AMDGCNSPIRV-NEXT: [[CONV_I:%.*]] = fptosi contract double [[TMP0]] to i64 // AMDGCNSPIRV-NEXT: ret i64 [[CONV_I]] // extern "C" __device__ long int test_lround(double x) { diff --git a/clang/test/Headers/openmp_device_math_isnan.cpp b/clang/test/Headers/openmp_device_math_isnan.cpp index 3fd98813f2480..bc885982e84f5 100644 --- a/clang/test/Headers/openmp_device_math_isnan.cpp +++ b/clang/test/Headers/openmp_device_math_isnan.cpp @@ -29,7 +29,7 @@ double math(float f, double d) { double r = 0; // INT_RETURN: call noundef i32 @__nv_isnanf(float // AMD_INT_RETURN_SAFE: call i1 @llvm.is.fpclass.f32(float{{.*}}, i32 3) - // AMD_INT_RETURN_FAST: sitofp i32 {{.*}} to double + // AMD_INT_RETURN_FAST: sitofp fast i32 {{.*}} to double // SPIRV_INT_RETURN: call spir_func noundef i32 @_Z5isnanf(float // BOOL_RETURN: call noundef i32 @__nv_isnanf(float // SPIRV_BOOL_RETURN: call spir_func noundef zeroext i1 @_Z5isnanf(float @@ -39,7 +39,7 @@ double math(float f, double d) { // INT_RETURN: call noundef i32 @__nv_isnand(double // SPIRV_INT_RETURN: call spir_func noundef i32 @_Z5isnand(double // AMD_INT_RETURN_SAFE: call i1 @llvm.is.fpclass.f64(double{{.*}}, i32 3) - // AMD_INT_RETURN_FAST: sitofp i32 {{.*}} to double + // AMD_INT_RETURN_FAST: sitofp fast i32 {{.*}} to double // BOOL_RETURN: call noundef i32 @__nv_isnand(double // SPIRV_BOOL_RETURN: call spir_func noundef zeroext i1 @_Z5isnand(double // AMD_BOOL_RETURN_SAFE: call i1 @llvm.is.fpclass.f64(double{{.*}}, i32 3) diff --git a/clang/test/SemaHLSL/VectorOverloadResolution.hlsl b/clang/test/SemaHLSL/VectorOverloadResolution.hlsl index b320abdd81182..0201e6807038d 100644 --- a/clang/test/SemaHLSL/VectorOverloadResolution.hlsl +++ b/clang/test/SemaHLSL/VectorOverloadResolution.hlsl @@ -40,7 +40,7 @@ void Fn3( int64_t2 p0); // CHECK-NEXT: ImplicitCastExpr {{.*}} 'half2':'vector' // CHECK-NEXT: DeclRefExpr {{.*}} 'half2':'vector' lvalue ParmVar {{.*}} 'p0' 'half2':'vector' // CHECKIR-LABEL: Call3 -// CHECKIR: {{.*}} = fptosi <2 x half> {{.*}} to <2 x i64> +// CHECKIR: {{.*}} = fptosi reassoc nnan ninf nsz arcp afn <2 x half> {{.*}} to <2 x i64> void Call3(half2 p0) { Fn3(p0); } @@ -53,7 +53,7 @@ void Call3(half2 p0) { // CHECK-NEXT: ImplicitCastExpr {{.*}} 'float2':'vector' // CHECK-NEXT: DeclRefExpr {{.*}} 'float2':'vector' lvalue ParmVar {{.*}} 'p0' 'float2':'vector' // CHECKIR-LABEL: Call4 -// CHECKIR: {{.*}} = fptosi <2 x float> {{.*}} to <2 x i64> +// CHECKIR: {{.*}} = fptosi reassoc nnan ninf nsz arcp afn <2 x float> {{.*}} to <2 x i64> void Call4(float2 p0) { Fn3(p0); } @@ -68,7 +68,7 @@ void Fn4( float2 p0); // CHECK-NEXT: ImplicitCastExpr {{.*}} 'int64_t2':'vector' // CHECK-NEXT: DeclRefExpr {{.*}} 'int64_t2':'vector' lvalue ParmVar {{.*}} 'p0' 'int64_t2':'vector' // CHECKIR-LABEL: Call5 -// CHECKIR: {{.*}} = sitofp <2 x i64> {{.*}} to <2 x float> +// CHECKIR: {{.*}} = sitofp reassoc nnan ninf nsz arcp afn <2 x i64> {{.*}} to <2 x float> void Call5(int64_t2 p0) { Fn4(p0); } diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 77928d63913d7..c6c20a25b8ea4 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -4060,7 +4060,8 @@ Fast-Math Flags LLVM IR floating-point operations (:ref:`fneg `, :ref:`fadd `, :ref:`fsub `, :ref:`fmul `, :ref:`fdiv `, :ref:`frem `, :ref:`fcmp `, :ref:`fptrunc `, -:ref:`fpext `), and :ref:`phi `, :ref:`select `, or +:ref:`fpext `), :ref:`fptoui `, :ref:`fptosi `, +:ref:`sitofp `, and :ref:`phi `, :ref:`select `, or :ref:`call ` instructions that return floating-point types may use the following flags to enable otherwise unsafe floating-point transformations. @@ -12389,6 +12390,8 @@ Example: %X = fpext float 3.125 to double ; yields double:3.125000e+00 %Y = fpext double %X to fp128 ; yields fp128:0xL00000000000000004000900000000000 +.. _i_fptoui: + '``fptoui .. to``' Instruction ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -12431,6 +12434,8 @@ Example: %Y = fptoui float 1.0E+300 to i1 ; yields undefined:1 %Z = fptoui float 1.04E+17 to i8 ; yields undefined:1 +.. _i_fptosi: + '``fptosi .. to``' Instruction ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -12525,6 +12530,8 @@ Example: %a = uitofp nneg i32 256 to i32 ; yields float:256.0 %b = uitofp nneg i32 -256 to i32 ; yields i32 poison +.. _i_sitofp: + '``sitofp .. to``' Instruction ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/llvm/include/llvm/IR/Operator.h b/llvm/include/llvm/IR/Operator.h index 10816c0e62c29..b725e01462e70 100644 --- a/llvm/include/llvm/IR/Operator.h +++ b/llvm/include/llvm/IR/Operator.h @@ -362,6 +362,9 @@ class FPMathOperator : public Operator { case Instruction::FRem: case Instruction::FPTrunc: case Instruction::FPExt: + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::SIToFP: // FIXME: To clean up and correct the semantics of fast-math-flags, FCmp // should not be treated as a math op, but the other opcodes should. // This would make things consistent with Select/PHI (FP value type diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp index 897e679095906..c2f856aa65bf6 100644 --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -7319,13 +7319,13 @@ int LLParser::parseInstruction(Instruction *&Inst, BasicBlock *BB, case lltok::kw_sext: case lltok::kw_bitcast: case lltok::kw_addrspacecast: - case lltok::kw_sitofp: - case lltok::kw_fptoui: - case lltok::kw_fptosi: case lltok::kw_inttoptr: case lltok::kw_ptrtoaddr: case lltok::kw_ptrtoint: return parseCast(Inst, PFS, KeywordVal); + case lltok::kw_sitofp: + case lltok::kw_fptoui: + case lltok::kw_fptosi: case lltok::kw_fptrunc: case lltok::kw_fpext: { FastMathFlags FMF = EatFastMathFlagsIfPresent(); diff --git a/llvm/test/Assembler/fast-math-flags.ll b/llvm/test/Assembler/fast-math-flags.ll index 9c08e9da1d19e..2a725cd445182 100644 --- a/llvm/test/Assembler/fast-math-flags.ll +++ b/llvm/test/Assembler/fast-math-flags.ll @@ -56,6 +56,24 @@ entry: %h_vec = fptrunc <3 x float> %vec to <3 x half> ; CHECK: %h_scalable = fptrunc %scalable to %h_scalable = fptrunc %scalable to +; CHECK: %i = fptoui float %x to i32 + %i = fptoui float %x to i32 +; CHECK: %i_vec = fptoui <3 x float> %vec to <3 x i32> + %i_vec = fptoui <3 x float> %vec to <3 x i32> +; CHECK: %i_scalable = fptoui %scalable to + %i_scalable = fptoui %scalable to +; CHECK: %j = fptosi float %x to i32 + %j = fptosi float %x to i32 +; CHECK: %j_vec = fptosi <3 x float> %vec to <3 x i32> + %j_vec = fptosi <3 x float> %vec to <3 x i32> +; CHECK: %j_scalable = fptosi %scalable to + %j_scalable = fptosi %scalable to +; CHECK: %k = sitofp i32 %j to float + %k = sitofp i32 %j to float +; CHECK: %k_vec = sitofp <3 x i32> %j_vec to <3 x float> + %k_vec = sitofp <3 x i32> %j_vec to <3 x float> +; CHECK: %k_scalable = sitofp %j_scalable to + %k_scalable = sitofp %j_scalable to ; CHECK: ret float %f ret float %f } @@ -108,6 +126,24 @@ entry: %h_vec = fptrunc nnan <3 x float> %vec to <3 x half> ; CHECK: %h_scalable = fptrunc nnan %scalable to %h_scalable = fptrunc nnan %scalable to +; CHECK: %i = fptoui nnan float %x to i32 + %i = fptoui nnan float %x to i32 +; CHECK: %i_vec = fptoui nnan <3 x float> %vec to <3 x i32> + %i_vec = fptoui nnan <3 x float> %vec to <3 x i32> +; CHECK: %i_scalable = fptoui nnan %scalable to + %i_scalable = fptoui nnan %scalable to +; CHECK: %j = fptosi nnan float %x to i32 + %j = fptosi nnan float %x to i32 +; CHECK: %j_vec = fptosi nnan <3 x float> %vec to <3 x i32> + %j_vec = fptosi nnan <3 x float> %vec to <3 x i32> +; CHECK: %j_scalable = fptosi nnan %scalable to + %j_scalable = fptosi nnan %scalable to +; CHECK: %k = sitofp nnan i32 %j to float + %k = sitofp nnan i32 %j to float +; CHECK: %k_vec = sitofp nnan <3 x i32> %j_vec to <3 x float> + %k_vec = sitofp nnan <3 x i32> %j_vec to <3 x float> +; CHECK: %k_scalable = sitofp nnan %j_scalable to + %k_scalable = sitofp nnan %j_scalable to ; CHECK: ret float %f ret float %f } @@ -125,6 +161,12 @@ entry: %d = fpext contract float %x to double ; CHECK: %e = fptrunc contract float %x to half %e = fptrunc contract float %x to half +; CHECK: %f = fptoui contract float %x to i32 + %f = fptoui contract float %x to i32 +; CHECK: %g = fptosi contract float %x to i32 + %g = fptosi contract float %x to i32 +; CHECK: %h = sitofp contract i32 %g to float + %h = sitofp contract i32 %g to float ret float %c } @@ -140,6 +182,12 @@ define float @reassoc(float %x, float %y) { %d = fpext reassoc float %x to double ; CHECK: %e = fptrunc reassoc float %x to half %e = fptrunc reassoc float %x to half +; CHECK: %f = fptoui reassoc float %x to i32 + %f = fptoui reassoc float %x to i32 +; CHECK: %g = fptosi reassoc float %x to i32 + %g = fptosi reassoc float %x to i32 +; CHECK: %h = sitofp reassoc i32 %g to float + %h = sitofp reassoc i32 %g to float ret float %c } @@ -198,6 +246,24 @@ entry: %g_vec = fptrunc ninf nnan <3 x float> %vec to <3 x half> ; CHECK: %g_scalable = fptrunc nnan ninf %scalable to %g_scalable = fptrunc ninf nnan %scalable to +; CHECK: %i = fptoui nnan ninf float %x to i32 + %i = fptoui ninf nnan float %x to i32 +; CHECK: %i_vec = fptoui nnan ninf <3 x float> %vec to <3 x i32> + %i_vec = fptoui ninf nnan <3 x float> %vec to <3 x i32> +; CHECK: %i_scalable = fptoui nnan ninf %scalable to + %i_scalable = fptoui ninf nnan %scalable to +; CHECK: %j = fptosi nnan ninf float %x to i32 + %j = fptosi ninf nnan float %x to i32 +; CHECK: %j_vec = fptosi nnan ninf <3 x float> %vec to <3 x i32> + %j_vec = fptosi ninf nnan <3 x float> %vec to <3 x i32> +; CHECK: %j_scalable = fptosi nnan ninf %scalable to + %j_scalable = fptosi ninf nnan %scalable to +; CHECK: %k = sitofp nnan ninf i32 %j to float + %k = sitofp ninf nnan i32 %j to float +; CHECK: %k_vec = sitofp nnan ninf <3 x i32> %j_vec to <3 x float> + %k_vec = sitofp ninf nnan <3 x i32> %j_vec to <3 x float> +; CHECK: %k_scalable = sitofp nnan ninf %j_scalable to + %k_scalable = sitofp ninf nnan %j_scalable to ; CHECK: ret float %e ret float %e } diff --git a/llvm/test/Bitcode/compatibility.ll b/llvm/test/Bitcode/compatibility.ll index 0b5ce08c00a23..85e1efb81b6cb 100644 --- a/llvm/test/Bitcode/compatibility.ll +++ b/llvm/test/Bitcode/compatibility.ll @@ -1242,6 +1242,69 @@ define void @fastmathflags_fptrunc(float %op1) { ret void } +; CHECK-LABEL fastmathflags_fptosi( +define void @fastmathflags_fptosi(float %op1) { + %i32.nnan = fptosi nnan float %op1 to i32 + ; CHECK: %i32.nnan = fptosi nnan float %op1 to i32 + %i32.ninf = fptosi ninf float %op1 to i32 + ; CHECK: %i32.ninf = fptosi ninf float %op1 to i32 + %i32.nsz = fptosi nsz float %op1 to i32 + ; CHECK: %i32.nsz = fptosi nsz float %op1 to i32 + %i32.arcp = fptosi arcp float %op1 to i32 + ; CHECK: %i32.arcp = fptosi arcp float %op1 to i32 + %i32.contract = fptosi contract float %op1 to i32 + ; CHECK: %i32.contract = fptosi contract float %op1 to i32 + %i32.afn = fptosi afn float %op1 to i32 + ; CHECK: %i32.afn = fptosi afn float %op1 to i32 + %i32.reassoc = fptosi reassoc float %op1 to i32 + ; CHECK: %i32.reassoc = fptosi reassoc float %op1 to i32 + %i32.fast = fptosi fast float %op1 to i32 + ; CHECK: %i32.fast = fptosi fast float %op1 to i32 + ret void +} + +; CHECK-LABEL fastmathflags_fptoui( +define void @fastmathflags_fptoui(float %op1) { + %i32.nnan = fptoui nnan float %op1 to i32 + ; CHECK: %i32.nnan = fptoui nnan float %op1 to i32 + %i32.ninf = fptoui ninf float %op1 to i32 + ; CHECK: %i32.ninf = fptoui ninf float %op1 to i32 + %i32.nsz = fptoui nsz float %op1 to i32 + ; CHECK: %i32.nsz = fptoui nsz float %op1 to i32 + %i32.arcp = fptoui arcp float %op1 to i32 + ; CHECK: %i32.arcp = fptoui arcp float %op1 to i32 + %i32.contract = fptoui contract float %op1 to i32 + ; CHECK: %i32.contract = fptoui contract float %op1 to i32 + %i32.afn = fptoui afn float %op1 to i32 + ; CHECK: %i32.afn = fptoui afn float %op1 to i32 + %i32.reassoc = fptoui reassoc float %op1 to i32 + ; CHECK: %i32.reassoc = fptoui reassoc float %op1 to i32 + %i32.fast = fptoui fast float %op1 to i32 + ; CHECK: %i32.fast = fptoui fast float %op1 to i32 + ret void +} + +; CHECK-LABEL fastmathflags_sitofp( +define void @fastmathflags_sitofp(i32 %op1) { + %float.nnan = sitofp nnan i32 %op1 to float + ; CHECK: %float.nnan = sitofp nnan i32 %op1 to float + %float.ninf = sitofp ninf i32 %op1 to float + ; CHECK: %float.ninf = sitofp ninf i32 %op1 to float + %float.nsz = sitofp nsz i32 %op1 to float + ; CHECK: %float.nsz = sitofp nsz i32 %op1 to float + %float.arcp = sitofp arcp i32 %op1 to float + ; CHECK: %float.arcp = sitofp arcp i32 %op1 to float + %float.contract = sitofp contract i32 %op1 to float + ; CHECK: %float.contract = sitofp contract i32 %op1 to float + %float.afn = sitofp afn i32 %op1 to float + ; CHECK: %float.afn = sitofp afn i32 %op1 to float + %float.reassoc = sitofp reassoc i32 %op1 to float + ; CHECK: %float.reassoc = sitofp reassoc i32 %op1 to float + %float.fast = sitofp fast i32 %op1 to float + ; CHECK: %float.fast = sitofp fast i32 %op1 to float + ret void +} + ;; Type System %opaquety = type opaque define void @typesystem() { diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll index e71bf15384727..152b5b37c8308 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll @@ -94,7 +94,7 @@ define i32 @select_sdiv_lhs_opaque_const0_i32(i1 %cond) { ; IR-NEXT: [[TMP5:%.*]] = uitofp i32 [[TMP4]] to float ; IR-NEXT: [[TMP6:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP5]]) ; IR-NEXT: [[TMP7:%.*]] = fmul fast float [[TMP6]], 0x41EFFFFFC0000000 -; IR-NEXT: [[TMP8:%.*]] = fptoui float [[TMP7]] to i32 +; IR-NEXT: [[TMP8:%.*]] = fptoui fast float [[TMP7]] to i32 ; IR-NEXT: [[TMP9:%.*]] = sub i32 0, [[TMP4]] ; IR-NEXT: [[TMP10:%.*]] = mul i32 [[TMP9]], [[TMP8]] ; IR-NEXT: [[TMP11:%.*]] = zext i32 [[TMP8]] to i64 @@ -176,7 +176,7 @@ define i32 @select_sdiv_lhs_opaque_const1_i32(i1 %cond) { ; IR-NEXT: [[TMP5:%.*]] = uitofp i32 [[TMP4]] to float ; IR-NEXT: [[TMP6:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP5]]) ; IR-NEXT: [[TMP7:%.*]] = fmul fast float [[TMP6]], 0x41EFFFFFC0000000 -; IR-NEXT: [[TMP8:%.*]] = fptoui float [[TMP7]] to i32 +; IR-NEXT: [[TMP8:%.*]] = fptoui fast float [[TMP7]] to i32 ; IR-NEXT: [[TMP9:%.*]] = sub i32 0, [[TMP4]] ; IR-NEXT: [[TMP10:%.*]] = mul i32 [[TMP9]], [[TMP8]] ; IR-NEXT: [[TMP11:%.*]] = zext i32 [[TMP8]] to i64 diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll index b2dcd77274989..0b447c59eb858 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll @@ -8,7 +8,7 @@ define amdgpu_kernel void @udiv_i32(ptr addrspace(1) %out, i32 %x, i32 %y) { ; CHECK-NEXT: [[TMP1:%.*]] = uitofp i32 [[Y:%.*]] to float ; CHECK-NEXT: [[TMP2:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP1]]) ; CHECK-NEXT: [[TMP3:%.*]] = fmul fast float [[TMP2]], 0x41EFFFFFC0000000 -; CHECK-NEXT: [[TMP4:%.*]] = fptoui float [[TMP3]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = fptoui fast float [[TMP3]] to i32 ; CHECK-NEXT: [[TMP5:%.*]] = sub i32 0, [[Y]] ; CHECK-NEXT: [[TMP6:%.*]] = mul i32 [[TMP5]], [[TMP4]] ; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP4]] to i64 @@ -108,7 +108,7 @@ define amdgpu_kernel void @urem_i32(ptr addrspace(1) %out, i32 %x, i32 %y) { ; CHECK-NEXT: [[TMP1:%.*]] = uitofp i32 [[Y:%.*]] to float ; CHECK-NEXT: [[TMP2:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP1]]) ; CHECK-NEXT: [[TMP3:%.*]] = fmul fast float [[TMP2]], 0x41EFFFFFC0000000 -; CHECK-NEXT: [[TMP4:%.*]] = fptoui float [[TMP3]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = fptoui fast float [[TMP3]] to i32 ; CHECK-NEXT: [[TMP5:%.*]] = sub i32 0, [[Y]] ; CHECK-NEXT: [[TMP6:%.*]] = mul i32 [[TMP5]], [[TMP4]] ; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP4]] to i64 @@ -208,7 +208,7 @@ define amdgpu_kernel void @sdiv_i32(ptr addrspace(1) %out, i32 %x, i32 %y) { ; CHECK-NEXT: [[TMP8:%.*]] = uitofp i32 [[TMP7]] to float ; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP8]]) ; CHECK-NEXT: [[TMP10:%.*]] = fmul fast float [[TMP9]], 0x41EFFFFFC0000000 -; CHECK-NEXT: [[TMP11:%.*]] = fptoui float [[TMP10]] to i32 +; CHECK-NEXT: [[TMP11:%.*]] = fptoui fast float [[TMP10]] to i32 ; CHECK-NEXT: [[TMP12:%.*]] = sub i32 0, [[TMP7]] ; CHECK-NEXT: [[TMP13:%.*]] = mul i32 [[TMP12]], [[TMP11]] ; CHECK-NEXT: [[TMP14:%.*]] = zext i32 [[TMP11]] to i64 @@ -328,7 +328,7 @@ define amdgpu_kernel void @srem_i32(ptr addrspace(1) %out, i32 %x, i32 %y) { ; CHECK-NEXT: [[TMP7:%.*]] = uitofp i32 [[TMP6]] to float ; CHECK-NEXT: [[TMP8:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP7]]) ; CHECK-NEXT: [[TMP9:%.*]] = fmul fast float [[TMP8]], 0x41EFFFFFC0000000 -; CHECK-NEXT: [[TMP10:%.*]] = fptoui float [[TMP9]] to i32 +; CHECK-NEXT: [[TMP10:%.*]] = fptoui fast float [[TMP9]] to i32 ; CHECK-NEXT: [[TMP11:%.*]] = sub i32 0, [[TMP6]] ; CHECK-NEXT: [[TMP12:%.*]] = mul i32 [[TMP11]], [[TMP10]] ; CHECK-NEXT: [[TMP13:%.*]] = zext i32 [[TMP10]] to i64 @@ -439,7 +439,7 @@ define amdgpu_kernel void @udiv_i16(ptr addrspace(1) %out, i16 %x, i16 %y) { ; CHECK-NEXT: [[TMP7:%.*]] = call fast float @llvm.trunc.f32(float [[TMP6]]) ; CHECK-NEXT: [[TMP8:%.*]] = fneg fast float [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP8]], float [[TMP4]], float [[TMP3]]) -; CHECK-NEXT: [[TMP10:%.*]] = fptoui float [[TMP7]] to i32 +; CHECK-NEXT: [[TMP10:%.*]] = fptoui fast float [[TMP7]] to i32 ; CHECK-NEXT: [[TMP11:%.*]] = call fast float @llvm.fabs.f32(float [[TMP9]]) ; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.fabs.f32(float [[TMP4]]) ; CHECK-NEXT: [[TMP13:%.*]] = fcmp fast oge float [[TMP11]], [[TMP12]] @@ -508,7 +508,7 @@ define amdgpu_kernel void @urem_i16(ptr addrspace(1) %out, i16 %x, i16 %y) { ; CHECK-NEXT: [[TMP7:%.*]] = call fast float @llvm.trunc.f32(float [[TMP6]]) ; CHECK-NEXT: [[TMP8:%.*]] = fneg fast float [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP8]], float [[TMP4]], float [[TMP3]]) -; CHECK-NEXT: [[TMP10:%.*]] = fptoui float [[TMP7]] to i32 +; CHECK-NEXT: [[TMP10:%.*]] = fptoui fast float [[TMP7]] to i32 ; CHECK-NEXT: [[TMP11:%.*]] = call fast float @llvm.fabs.f32(float [[TMP9]]) ; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.fabs.f32(float [[TMP4]]) ; CHECK-NEXT: [[TMP13:%.*]] = fcmp fast oge float [[TMP11]], [[TMP12]] @@ -579,14 +579,14 @@ define amdgpu_kernel void @sdiv_i16(ptr addrspace(1) %out, i16 %x, i16 %y) { ; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = ashr i32 [[TMP3]], 30 ; CHECK-NEXT: [[TMP5:%.*]] = or i32 [[TMP4]], 1 -; CHECK-NEXT: [[TMP6:%.*]] = sitofp i32 [[TMP1]] to float -; CHECK-NEXT: [[TMP7:%.*]] = sitofp i32 [[TMP2]] to float +; CHECK-NEXT: [[TMP6:%.*]] = sitofp fast i32 [[TMP1]] to float +; CHECK-NEXT: [[TMP7:%.*]] = sitofp fast i32 [[TMP2]] to float ; CHECK-NEXT: [[TMP8:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP7]]) ; CHECK-NEXT: [[TMP9:%.*]] = fmul fast float [[TMP6]], [[TMP8]] ; CHECK-NEXT: [[TMP10:%.*]] = call fast float @llvm.trunc.f32(float [[TMP9]]) ; CHECK-NEXT: [[TMP11:%.*]] = fneg fast float [[TMP10]] ; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP11]], float [[TMP7]], float [[TMP6]]) -; CHECK-NEXT: [[TMP13:%.*]] = fptosi float [[TMP10]] to i32 +; CHECK-NEXT: [[TMP13:%.*]] = fptosi fast float [[TMP10]] to i32 ; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.fabs.f32(float [[TMP12]]) ; CHECK-NEXT: [[TMP15:%.*]] = call fast float @llvm.fabs.f32(float [[TMP7]]) ; CHECK-NEXT: [[TMP16:%.*]] = fcmp fast oge float [[TMP14]], [[TMP15]] @@ -660,14 +660,14 @@ define amdgpu_kernel void @srem_i16(ptr addrspace(1) %out, i16 %x, i16 %y) { ; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = ashr i32 [[TMP3]], 30 ; CHECK-NEXT: [[TMP5:%.*]] = or i32 [[TMP4]], 1 -; CHECK-NEXT: [[TMP6:%.*]] = sitofp i32 [[TMP1]] to float -; CHECK-NEXT: [[TMP7:%.*]] = sitofp i32 [[TMP2]] to float +; CHECK-NEXT: [[TMP6:%.*]] = sitofp fast i32 [[TMP1]] to float +; CHECK-NEXT: [[TMP7:%.*]] = sitofp fast i32 [[TMP2]] to float ; CHECK-NEXT: [[TMP8:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP7]]) ; CHECK-NEXT: [[TMP9:%.*]] = fmul fast float [[TMP6]], [[TMP8]] ; CHECK-NEXT: [[TMP10:%.*]] = call fast float @llvm.trunc.f32(float [[TMP9]]) ; CHECK-NEXT: [[TMP11:%.*]] = fneg fast float [[TMP10]] ; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP11]], float [[TMP7]], float [[TMP6]]) -; CHECK-NEXT: [[TMP13:%.*]] = fptosi float [[TMP10]] to i32 +; CHECK-NEXT: [[TMP13:%.*]] = fptosi fast float [[TMP10]] to i32 ; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.fabs.f32(float [[TMP12]]) ; CHECK-NEXT: [[TMP15:%.*]] = call fast float @llvm.fabs.f32(float [[TMP7]]) ; CHECK-NEXT: [[TMP16:%.*]] = fcmp fast oge float [[TMP14]], [[TMP15]] @@ -751,7 +751,7 @@ define amdgpu_kernel void @udiv_i8(ptr addrspace(1) %out, i8 %x, i8 %y) { ; CHECK-NEXT: [[TMP7:%.*]] = call fast float @llvm.trunc.f32(float [[TMP6]]) ; CHECK-NEXT: [[TMP8:%.*]] = fneg fast float [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP8]], float [[TMP4]], float [[TMP3]]) -; CHECK-NEXT: [[TMP10:%.*]] = fptoui float [[TMP7]] to i32 +; CHECK-NEXT: [[TMP10:%.*]] = fptoui fast float [[TMP7]] to i32 ; CHECK-NEXT: [[TMP11:%.*]] = call fast float @llvm.fabs.f32(float [[TMP9]]) ; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.fabs.f32(float [[TMP4]]) ; CHECK-NEXT: [[TMP13:%.*]] = fcmp fast oge float [[TMP11]], [[TMP12]] @@ -814,7 +814,7 @@ define amdgpu_kernel void @urem_i8(ptr addrspace(1) %out, i8 %x, i8 %y) { ; CHECK-NEXT: [[TMP7:%.*]] = call fast float @llvm.trunc.f32(float [[TMP6]]) ; CHECK-NEXT: [[TMP8:%.*]] = fneg fast float [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP8]], float [[TMP4]], float [[TMP3]]) -; CHECK-NEXT: [[TMP10:%.*]] = fptoui float [[TMP7]] to i32 +; CHECK-NEXT: [[TMP10:%.*]] = fptoui fast float [[TMP7]] to i32 ; CHECK-NEXT: [[TMP11:%.*]] = call fast float @llvm.fabs.f32(float [[TMP9]]) ; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.fabs.f32(float [[TMP4]]) ; CHECK-NEXT: [[TMP13:%.*]] = fcmp fast oge float [[TMP11]], [[TMP12]] @@ -881,14 +881,14 @@ define amdgpu_kernel void @sdiv_i8(ptr addrspace(1) %out, i8 %x, i8 %y) { ; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = ashr i32 [[TMP3]], 30 ; CHECK-NEXT: [[TMP5:%.*]] = or i32 [[TMP4]], 1 -; CHECK-NEXT: [[TMP6:%.*]] = sitofp i32 [[TMP1]] to float -; CHECK-NEXT: [[TMP7:%.*]] = sitofp i32 [[TMP2]] to float +; CHECK-NEXT: [[TMP6:%.*]] = sitofp fast i32 [[TMP1]] to float +; CHECK-NEXT: [[TMP7:%.*]] = sitofp fast i32 [[TMP2]] to float ; CHECK-NEXT: [[TMP8:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP7]]) ; CHECK-NEXT: [[TMP9:%.*]] = fmul fast float [[TMP6]], [[TMP8]] ; CHECK-NEXT: [[TMP10:%.*]] = call fast float @llvm.trunc.f32(float [[TMP9]]) ; CHECK-NEXT: [[TMP11:%.*]] = fneg fast float [[TMP10]] ; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP11]], float [[TMP7]], float [[TMP6]]) -; CHECK-NEXT: [[TMP13:%.*]] = fptosi float [[TMP10]] to i32 +; CHECK-NEXT: [[TMP13:%.*]] = fptosi fast float [[TMP10]] to i32 ; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.fabs.f32(float [[TMP12]]) ; CHECK-NEXT: [[TMP15:%.*]] = call fast float @llvm.fabs.f32(float [[TMP7]]) ; CHECK-NEXT: [[TMP16:%.*]] = fcmp fast oge float [[TMP14]], [[TMP15]] @@ -962,14 +962,14 @@ define amdgpu_kernel void @srem_i8(ptr addrspace(1) %out, i8 %x, i8 %y) { ; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = ashr i32 [[TMP3]], 30 ; CHECK-NEXT: [[TMP5:%.*]] = or i32 [[TMP4]], 1 -; CHECK-NEXT: [[TMP6:%.*]] = sitofp i32 [[TMP1]] to float -; CHECK-NEXT: [[TMP7:%.*]] = sitofp i32 [[TMP2]] to float +; CHECK-NEXT: [[TMP6:%.*]] = sitofp fast i32 [[TMP1]] to float +; CHECK-NEXT: [[TMP7:%.*]] = sitofp fast i32 [[TMP2]] to float ; CHECK-NEXT: [[TMP8:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP7]]) ; CHECK-NEXT: [[TMP9:%.*]] = fmul fast float [[TMP6]], [[TMP8]] ; CHECK-NEXT: [[TMP10:%.*]] = call fast float @llvm.trunc.f32(float [[TMP9]]) ; CHECK-NEXT: [[TMP11:%.*]] = fneg fast float [[TMP10]] ; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP11]], float [[TMP7]], float [[TMP6]]) -; CHECK-NEXT: [[TMP13:%.*]] = fptosi float [[TMP10]] to i32 +; CHECK-NEXT: [[TMP13:%.*]] = fptosi fast float [[TMP10]] to i32 ; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.fabs.f32(float [[TMP12]]) ; CHECK-NEXT: [[TMP15:%.*]] = call fast float @llvm.fabs.f32(float [[TMP7]]) ; CHECK-NEXT: [[TMP16:%.*]] = fcmp fast oge float [[TMP14]], [[TMP15]] @@ -1051,7 +1051,7 @@ define amdgpu_kernel void @udiv_v4i32(ptr addrspace(1) %out, <4 x i32> %x, <4 x ; CHECK-NEXT: [[TMP3:%.*]] = uitofp i32 [[TMP2]] to float ; CHECK-NEXT: [[TMP4:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP3]]) ; CHECK-NEXT: [[TMP5:%.*]] = fmul fast float [[TMP4]], 0x41EFFFFFC0000000 -; CHECK-NEXT: [[TMP6:%.*]] = fptoui float [[TMP5]] to i32 +; CHECK-NEXT: [[TMP6:%.*]] = fptoui fast float [[TMP5]] to i32 ; CHECK-NEXT: [[TMP7:%.*]] = sub i32 0, [[TMP2]] ; CHECK-NEXT: [[TMP8:%.*]] = mul i32 [[TMP7]], [[TMP6]] ; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP6]] to i64 @@ -1083,7 +1083,7 @@ define amdgpu_kernel void @udiv_v4i32(ptr addrspace(1) %out, <4 x i32> %x, <4 x ; CHECK-NEXT: [[TMP35:%.*]] = uitofp i32 [[TMP34]] to float ; CHECK-NEXT: [[TMP36:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP35]]) ; CHECK-NEXT: [[TMP37:%.*]] = fmul fast float [[TMP36]], 0x41EFFFFFC0000000 -; CHECK-NEXT: [[TMP38:%.*]] = fptoui float [[TMP37]] to i32 +; CHECK-NEXT: [[TMP38:%.*]] = fptoui fast float [[TMP37]] to i32 ; CHECK-NEXT: [[TMP39:%.*]] = sub i32 0, [[TMP34]] ; CHECK-NEXT: [[TMP40:%.*]] = mul i32 [[TMP39]], [[TMP38]] ; CHECK-NEXT: [[TMP41:%.*]] = zext i32 [[TMP38]] to i64 @@ -1115,7 +1115,7 @@ define amdgpu_kernel void @udiv_v4i32(ptr addrspace(1) %out, <4 x i32> %x, <4 x ; CHECK-NEXT: [[TMP67:%.*]] = uitofp i32 [[TMP66]] to float ; CHECK-NEXT: [[TMP68:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP67]]) ; CHECK-NEXT: [[TMP69:%.*]] = fmul fast float [[TMP68]], 0x41EFFFFFC0000000 -; CHECK-NEXT: [[TMP70:%.*]] = fptoui float [[TMP69]] to i32 +; CHECK-NEXT: [[TMP70:%.*]] = fptoui fast float [[TMP69]] to i32 ; CHECK-NEXT: [[TMP71:%.*]] = sub i32 0, [[TMP66]] ; CHECK-NEXT: [[TMP72:%.*]] = mul i32 [[TMP71]], [[TMP70]] ; CHECK-NEXT: [[TMP73:%.*]] = zext i32 [[TMP70]] to i64 @@ -1147,7 +1147,7 @@ define amdgpu_kernel void @udiv_v4i32(ptr addrspace(1) %out, <4 x i32> %x, <4 x ; CHECK-NEXT: [[TMP99:%.*]] = uitofp i32 [[TMP98]] to float ; CHECK-NEXT: [[TMP100:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP99]]) ; CHECK-NEXT: [[TMP101:%.*]] = fmul fast float [[TMP100]], 0x41EFFFFFC0000000 -; CHECK-NEXT: [[TMP102:%.*]] = fptoui float [[TMP101]] to i32 +; CHECK-NEXT: [[TMP102:%.*]] = fptoui fast float [[TMP101]] to i32 ; CHECK-NEXT: [[TMP103:%.*]] = sub i32 0, [[TMP98]] ; CHECK-NEXT: [[TMP104:%.*]] = mul i32 [[TMP103]], [[TMP102]] ; CHECK-NEXT: [[TMP105:%.*]] = zext i32 [[TMP102]] to i64 @@ -1379,7 +1379,7 @@ define amdgpu_kernel void @urem_v4i32(ptr addrspace(1) %out, <4 x i32> %x, <4 x ; CHECK-NEXT: [[TMP3:%.*]] = uitofp i32 [[TMP2]] to float ; CHECK-NEXT: [[TMP4:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP3]]) ; CHECK-NEXT: [[TMP5:%.*]] = fmul fast float [[TMP4]], 0x41EFFFFFC0000000 -; CHECK-NEXT: [[TMP6:%.*]] = fptoui float [[TMP5]] to i32 +; CHECK-NEXT: [[TMP6:%.*]] = fptoui fast float [[TMP5]] to i32 ; CHECK-NEXT: [[TMP7:%.*]] = sub i32 0, [[TMP2]] ; CHECK-NEXT: [[TMP8:%.*]] = mul i32 [[TMP7]], [[TMP6]] ; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP6]] to i64 @@ -1409,7 +1409,7 @@ define amdgpu_kernel void @urem_v4i32(ptr addrspace(1) %out, <4 x i32> %x, <4 x ; CHECK-NEXT: [[TMP33:%.*]] = uitofp i32 [[TMP32]] to float ; CHECK-NEXT: [[TMP34:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP33]]) ; CHECK-NEXT: [[TMP35:%.*]] = fmul fast float [[TMP34]], 0x41EFFFFFC0000000 -; CHECK-NEXT: [[TMP36:%.*]] = fptoui float [[TMP35]] to i32 +; CHECK-NEXT: [[TMP36:%.*]] = fptoui fast float [[TMP35]] to i32 ; CHECK-NEXT: [[TMP37:%.*]] = sub i32 0, [[TMP32]] ; CHECK-NEXT: [[TMP38:%.*]] = mul i32 [[TMP37]], [[TMP36]] ; CHECK-NEXT: [[TMP39:%.*]] = zext i32 [[TMP36]] to i64 @@ -1439,7 +1439,7 @@ define amdgpu_kernel void @urem_v4i32(ptr addrspace(1) %out, <4 x i32> %x, <4 x ; CHECK-NEXT: [[TMP63:%.*]] = uitofp i32 [[TMP62]] to float ; CHECK-NEXT: [[TMP64:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP63]]) ; CHECK-NEXT: [[TMP65:%.*]] = fmul fast float [[TMP64]], 0x41EFFFFFC0000000 -; CHECK-NEXT: [[TMP66:%.*]] = fptoui float [[TMP65]] to i32 +; CHECK-NEXT: [[TMP66:%.*]] = fptoui fast float [[TMP65]] to i32 ; CHECK-NEXT: [[TMP67:%.*]] = sub i32 0, [[TMP62]] ; CHECK-NEXT: [[TMP68:%.*]] = mul i32 [[TMP67]], [[TMP66]] ; CHECK-NEXT: [[TMP69:%.*]] = zext i32 [[TMP66]] to i64 @@ -1469,7 +1469,7 @@ define amdgpu_kernel void @urem_v4i32(ptr addrspace(1) %out, <4 x i32> %x, <4 x ; CHECK-NEXT: [[TMP93:%.*]] = uitofp i32 [[TMP92]] to float ; CHECK-NEXT: [[TMP94:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP93]]) ; CHECK-NEXT: [[TMP95:%.*]] = fmul fast float [[TMP94]], 0x41EFFFFFC0000000 -; CHECK-NEXT: [[TMP96:%.*]] = fptoui float [[TMP95]] to i32 +; CHECK-NEXT: [[TMP96:%.*]] = fptoui fast float [[TMP95]] to i32 ; CHECK-NEXT: [[TMP97:%.*]] = sub i32 0, [[TMP92]] ; CHECK-NEXT: [[TMP98:%.*]] = mul i32 [[TMP97]], [[TMP96]] ; CHECK-NEXT: [[TMP99:%.*]] = zext i32 [[TMP96]] to i64 @@ -1687,7 +1687,7 @@ define amdgpu_kernel void @sdiv_v4i32(ptr addrspace(1) %out, <4 x i32> %x, <4 x ; CHECK-NEXT: [[TMP10:%.*]] = uitofp i32 [[TMP9]] to float ; CHECK-NEXT: [[TMP11:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP10]]) ; CHECK-NEXT: [[TMP12:%.*]] = fmul fast float [[TMP11]], 0x41EFFFFFC0000000 -; CHECK-NEXT: [[TMP13:%.*]] = fptoui float [[TMP12]] to i32 +; CHECK-NEXT: [[TMP13:%.*]] = fptoui fast float [[TMP12]] to i32 ; CHECK-NEXT: [[TMP14:%.*]] = sub i32 0, [[TMP9]] ; CHECK-NEXT: [[TMP15:%.*]] = mul i32 [[TMP14]], [[TMP13]] ; CHECK-NEXT: [[TMP16:%.*]] = zext i32 [[TMP13]] to i64 @@ -1728,7 +1728,7 @@ define amdgpu_kernel void @sdiv_v4i32(ptr addrspace(1) %out, <4 x i32> %x, <4 x ; CHECK-NEXT: [[TMP51:%.*]] = uitofp i32 [[TMP50]] to float ; CHECK-NEXT: [[TMP52:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP51]]) ; CHECK-NEXT: [[TMP53:%.*]] = fmul fast float [[TMP52]], 0x41EFFFFFC0000000 -; CHECK-NEXT: [[TMP54:%.*]] = fptoui float [[TMP53]] to i32 +; CHECK-NEXT: [[TMP54:%.*]] = fptoui fast float [[TMP53]] to i32 ; CHECK-NEXT: [[TMP55:%.*]] = sub i32 0, [[TMP50]] ; CHECK-NEXT: [[TMP56:%.*]] = mul i32 [[TMP55]], [[TMP54]] ; CHECK-NEXT: [[TMP57:%.*]] = zext i32 [[TMP54]] to i64 @@ -1769,7 +1769,7 @@ define amdgpu_kernel void @sdiv_v4i32(ptr addrspace(1) %out, <4 x i32> %x, <4 x ; CHECK-NEXT: [[TMP92:%.*]] = uitofp i32 [[TMP91]] to float ; CHECK-NEXT: [[TMP93:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP92]]) ; CHECK-NEXT: [[TMP94:%.*]] = fmul fast float [[TMP93]], 0x41EFFFFFC0000000 -; CHECK-NEXT: [[TMP95:%.*]] = fptoui float [[TMP94]] to i32 +; CHECK-NEXT: [[TMP95:%.*]] = fptoui fast float [[TMP94]] to i32 ; CHECK-NEXT: [[TMP96:%.*]] = sub i32 0, [[TMP91]] ; CHECK-NEXT: [[TMP97:%.*]] = mul i32 [[TMP96]], [[TMP95]] ; CHECK-NEXT: [[TMP98:%.*]] = zext i32 [[TMP95]] to i64 @@ -1810,7 +1810,7 @@ define amdgpu_kernel void @sdiv_v4i32(ptr addrspace(1) %out, <4 x i32> %x, <4 x ; CHECK-NEXT: [[TMP133:%.*]] = uitofp i32 [[TMP132]] to float ; CHECK-NEXT: [[TMP134:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP133]]) ; CHECK-NEXT: [[TMP135:%.*]] = fmul fast float [[TMP134]], 0x41EFFFFFC0000000 -; CHECK-NEXT: [[TMP136:%.*]] = fptoui float [[TMP135]] to i32 +; CHECK-NEXT: [[TMP136:%.*]] = fptoui fast float [[TMP135]] to i32 ; CHECK-NEXT: [[TMP137:%.*]] = sub i32 0, [[TMP132]] ; CHECK-NEXT: [[TMP138:%.*]] = mul i32 [[TMP137]], [[TMP136]] ; CHECK-NEXT: [[TMP139:%.*]] = zext i32 [[TMP136]] to i64 @@ -2099,7 +2099,7 @@ define amdgpu_kernel void @srem_v4i32(ptr addrspace(1) %out, <4 x i32> %x, <4 x ; CHECK-NEXT: [[TMP9:%.*]] = uitofp i32 [[TMP8]] to float ; CHECK-NEXT: [[TMP10:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP9]]) ; CHECK-NEXT: [[TMP11:%.*]] = fmul fast float [[TMP10]], 0x41EFFFFFC0000000 -; CHECK-NEXT: [[TMP12:%.*]] = fptoui float [[TMP11]] to i32 +; CHECK-NEXT: [[TMP12:%.*]] = fptoui fast float [[TMP11]] to i32 ; CHECK-NEXT: [[TMP13:%.*]] = sub i32 0, [[TMP8]] ; CHECK-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], [[TMP12]] ; CHECK-NEXT: [[TMP15:%.*]] = zext i32 [[TMP12]] to i64 @@ -2137,7 +2137,7 @@ define amdgpu_kernel void @srem_v4i32(ptr addrspace(1) %out, <4 x i32> %x, <4 x ; CHECK-NEXT: [[TMP47:%.*]] = uitofp i32 [[TMP46]] to float ; CHECK-NEXT: [[TMP48:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP47]]) ; CHECK-NEXT: [[TMP49:%.*]] = fmul fast float [[TMP48]], 0x41EFFFFFC0000000 -; CHECK-NEXT: [[TMP50:%.*]] = fptoui float [[TMP49]] to i32 +; CHECK-NEXT: [[TMP50:%.*]] = fptoui fast float [[TMP49]] to i32 ; CHECK-NEXT: [[TMP51:%.*]] = sub i32 0, [[TMP46]] ; CHECK-NEXT: [[TMP52:%.*]] = mul i32 [[TMP51]], [[TMP50]] ; CHECK-NEXT: [[TMP53:%.*]] = zext i32 [[TMP50]] to i64 @@ -2175,7 +2175,7 @@ define amdgpu_kernel void @srem_v4i32(ptr addrspace(1) %out, <4 x i32> %x, <4 x ; CHECK-NEXT: [[TMP85:%.*]] = uitofp i32 [[TMP84]] to float ; CHECK-NEXT: [[TMP86:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP85]]) ; CHECK-NEXT: [[TMP87:%.*]] = fmul fast float [[TMP86]], 0x41EFFFFFC0000000 -; CHECK-NEXT: [[TMP88:%.*]] = fptoui float [[TMP87]] to i32 +; CHECK-NEXT: [[TMP88:%.*]] = fptoui fast float [[TMP87]] to i32 ; CHECK-NEXT: [[TMP89:%.*]] = sub i32 0, [[TMP84]] ; CHECK-NEXT: [[TMP90:%.*]] = mul i32 [[TMP89]], [[TMP88]] ; CHECK-NEXT: [[TMP91:%.*]] = zext i32 [[TMP88]] to i64 @@ -2213,7 +2213,7 @@ define amdgpu_kernel void @srem_v4i32(ptr addrspace(1) %out, <4 x i32> %x, <4 x ; CHECK-NEXT: [[TMP123:%.*]] = uitofp i32 [[TMP122]] to float ; CHECK-NEXT: [[TMP124:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP123]]) ; CHECK-NEXT: [[TMP125:%.*]] = fmul fast float [[TMP124]], 0x41EFFFFFC0000000 -; CHECK-NEXT: [[TMP126:%.*]] = fptoui float [[TMP125]] to i32 +; CHECK-NEXT: [[TMP126:%.*]] = fptoui fast float [[TMP125]] to i32 ; CHECK-NEXT: [[TMP127:%.*]] = sub i32 0, [[TMP122]] ; CHECK-NEXT: [[TMP128:%.*]] = mul i32 [[TMP127]], [[TMP126]] ; CHECK-NEXT: [[TMP129:%.*]] = zext i32 [[TMP126]] to i64 @@ -2473,7 +2473,7 @@ define amdgpu_kernel void @udiv_v4i16(ptr addrspace(1) %out, <4 x i16> %x, <4 x ; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.trunc.f32(float [[TMP8]]) ; CHECK-NEXT: [[TMP10:%.*]] = fneg fast float [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP10]], float [[TMP6]], float [[TMP5]]) -; CHECK-NEXT: [[TMP12:%.*]] = fptoui float [[TMP9]] to i32 +; CHECK-NEXT: [[TMP12:%.*]] = fptoui fast float [[TMP9]] to i32 ; CHECK-NEXT: [[TMP13:%.*]] = call fast float @llvm.fabs.f32(float [[TMP11]]) ; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.fabs.f32(float [[TMP6]]) ; CHECK-NEXT: [[TMP15:%.*]] = fcmp fast oge float [[TMP13]], [[TMP14]] @@ -2493,7 +2493,7 @@ define amdgpu_kernel void @udiv_v4i16(ptr addrspace(1) %out, <4 x i16> %x, <4 x ; CHECK-NEXT: [[TMP29:%.*]] = call fast float @llvm.trunc.f32(float [[TMP28]]) ; CHECK-NEXT: [[TMP30:%.*]] = fneg fast float [[TMP29]] ; CHECK-NEXT: [[TMP31:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP30]], float [[TMP26]], float [[TMP25]]) -; CHECK-NEXT: [[TMP32:%.*]] = fptoui float [[TMP29]] to i32 +; CHECK-NEXT: [[TMP32:%.*]] = fptoui fast float [[TMP29]] to i32 ; CHECK-NEXT: [[TMP33:%.*]] = call fast float @llvm.fabs.f32(float [[TMP31]]) ; CHECK-NEXT: [[TMP34:%.*]] = call fast float @llvm.fabs.f32(float [[TMP26]]) ; CHECK-NEXT: [[TMP35:%.*]] = fcmp fast oge float [[TMP33]], [[TMP34]] @@ -2513,7 +2513,7 @@ define amdgpu_kernel void @udiv_v4i16(ptr addrspace(1) %out, <4 x i16> %x, <4 x ; CHECK-NEXT: [[TMP49:%.*]] = call fast float @llvm.trunc.f32(float [[TMP48]]) ; CHECK-NEXT: [[TMP50:%.*]] = fneg fast float [[TMP49]] ; CHECK-NEXT: [[TMP51:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP50]], float [[TMP46]], float [[TMP45]]) -; CHECK-NEXT: [[TMP52:%.*]] = fptoui float [[TMP49]] to i32 +; CHECK-NEXT: [[TMP52:%.*]] = fptoui fast float [[TMP49]] to i32 ; CHECK-NEXT: [[TMP53:%.*]] = call fast float @llvm.fabs.f32(float [[TMP51]]) ; CHECK-NEXT: [[TMP54:%.*]] = call fast float @llvm.fabs.f32(float [[TMP46]]) ; CHECK-NEXT: [[TMP55:%.*]] = fcmp fast oge float [[TMP53]], [[TMP54]] @@ -2533,7 +2533,7 @@ define amdgpu_kernel void @udiv_v4i16(ptr addrspace(1) %out, <4 x i16> %x, <4 x ; CHECK-NEXT: [[TMP69:%.*]] = call fast float @llvm.trunc.f32(float [[TMP68]]) ; CHECK-NEXT: [[TMP70:%.*]] = fneg fast float [[TMP69]] ; CHECK-NEXT: [[TMP71:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP70]], float [[TMP66]], float [[TMP65]]) -; CHECK-NEXT: [[TMP72:%.*]] = fptoui float [[TMP69]] to i32 +; CHECK-NEXT: [[TMP72:%.*]] = fptoui fast float [[TMP69]] to i32 ; CHECK-NEXT: [[TMP73:%.*]] = call fast float @llvm.fabs.f32(float [[TMP71]]) ; CHECK-NEXT: [[TMP74:%.*]] = call fast float @llvm.fabs.f32(float [[TMP66]]) ; CHECK-NEXT: [[TMP75:%.*]] = fcmp fast oge float [[TMP73]], [[TMP74]] @@ -2680,7 +2680,7 @@ define amdgpu_kernel void @urem_v4i16(ptr addrspace(1) %out, <4 x i16> %x, <4 x ; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.trunc.f32(float [[TMP8]]) ; CHECK-NEXT: [[TMP10:%.*]] = fneg fast float [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP10]], float [[TMP6]], float [[TMP5]]) -; CHECK-NEXT: [[TMP12:%.*]] = fptoui float [[TMP9]] to i32 +; CHECK-NEXT: [[TMP12:%.*]] = fptoui fast float [[TMP9]] to i32 ; CHECK-NEXT: [[TMP13:%.*]] = call fast float @llvm.fabs.f32(float [[TMP11]]) ; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.fabs.f32(float [[TMP6]]) ; CHECK-NEXT: [[TMP15:%.*]] = fcmp fast oge float [[TMP13]], [[TMP14]] @@ -2702,7 +2702,7 @@ define amdgpu_kernel void @urem_v4i16(ptr addrspace(1) %out, <4 x i16> %x, <4 x ; CHECK-NEXT: [[TMP31:%.*]] = call fast float @llvm.trunc.f32(float [[TMP30]]) ; CHECK-NEXT: [[TMP32:%.*]] = fneg fast float [[TMP31]] ; CHECK-NEXT: [[TMP33:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP32]], float [[TMP28]], float [[TMP27]]) -; CHECK-NEXT: [[TMP34:%.*]] = fptoui float [[TMP31]] to i32 +; CHECK-NEXT: [[TMP34:%.*]] = fptoui fast float [[TMP31]] to i32 ; CHECK-NEXT: [[TMP35:%.*]] = call fast float @llvm.fabs.f32(float [[TMP33]]) ; CHECK-NEXT: [[TMP36:%.*]] = call fast float @llvm.fabs.f32(float [[TMP28]]) ; CHECK-NEXT: [[TMP37:%.*]] = fcmp fast oge float [[TMP35]], [[TMP36]] @@ -2724,7 +2724,7 @@ define amdgpu_kernel void @urem_v4i16(ptr addrspace(1) %out, <4 x i16> %x, <4 x ; CHECK-NEXT: [[TMP53:%.*]] = call fast float @llvm.trunc.f32(float [[TMP52]]) ; CHECK-NEXT: [[TMP54:%.*]] = fneg fast float [[TMP53]] ; CHECK-NEXT: [[TMP55:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP54]], float [[TMP50]], float [[TMP49]]) -; CHECK-NEXT: [[TMP56:%.*]] = fptoui float [[TMP53]] to i32 +; CHECK-NEXT: [[TMP56:%.*]] = fptoui fast float [[TMP53]] to i32 ; CHECK-NEXT: [[TMP57:%.*]] = call fast float @llvm.fabs.f32(float [[TMP55]]) ; CHECK-NEXT: [[TMP58:%.*]] = call fast float @llvm.fabs.f32(float [[TMP50]]) ; CHECK-NEXT: [[TMP59:%.*]] = fcmp fast oge float [[TMP57]], [[TMP58]] @@ -2746,7 +2746,7 @@ define amdgpu_kernel void @urem_v4i16(ptr addrspace(1) %out, <4 x i16> %x, <4 x ; CHECK-NEXT: [[TMP75:%.*]] = call fast float @llvm.trunc.f32(float [[TMP74]]) ; CHECK-NEXT: [[TMP76:%.*]] = fneg fast float [[TMP75]] ; CHECK-NEXT: [[TMP77:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP76]], float [[TMP72]], float [[TMP71]]) -; CHECK-NEXT: [[TMP78:%.*]] = fptoui float [[TMP75]] to i32 +; CHECK-NEXT: [[TMP78:%.*]] = fptoui fast float [[TMP75]] to i32 ; CHECK-NEXT: [[TMP79:%.*]] = call fast float @llvm.fabs.f32(float [[TMP77]]) ; CHECK-NEXT: [[TMP80:%.*]] = call fast float @llvm.fabs.f32(float [[TMP72]]) ; CHECK-NEXT: [[TMP81:%.*]] = fcmp fast oge float [[TMP79]], [[TMP80]] @@ -2906,14 +2906,14 @@ define amdgpu_kernel void @sdiv_v4i16(ptr addrspace(1) %out, <4 x i16> %x, <4 x ; CHECK-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = ashr i32 [[TMP5]], 30 ; CHECK-NEXT: [[TMP7:%.*]] = or i32 [[TMP6]], 1 -; CHECK-NEXT: [[TMP8:%.*]] = sitofp i32 [[TMP3]] to float -; CHECK-NEXT: [[TMP9:%.*]] = sitofp i32 [[TMP4]] to float +; CHECK-NEXT: [[TMP8:%.*]] = sitofp fast i32 [[TMP3]] to float +; CHECK-NEXT: [[TMP9:%.*]] = sitofp fast i32 [[TMP4]] to float ; CHECK-NEXT: [[TMP10:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP9]]) ; CHECK-NEXT: [[TMP11:%.*]] = fmul fast float [[TMP8]], [[TMP10]] ; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.trunc.f32(float [[TMP11]]) ; CHECK-NEXT: [[TMP13:%.*]] = fneg fast float [[TMP12]] ; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP13]], float [[TMP9]], float [[TMP8]]) -; CHECK-NEXT: [[TMP15:%.*]] = fptosi float [[TMP12]] to i32 +; CHECK-NEXT: [[TMP15:%.*]] = fptosi fast float [[TMP12]] to i32 ; CHECK-NEXT: [[TMP16:%.*]] = call fast float @llvm.fabs.f32(float [[TMP14]]) ; CHECK-NEXT: [[TMP17:%.*]] = call fast float @llvm.fabs.f32(float [[TMP9]]) ; CHECK-NEXT: [[TMP18:%.*]] = fcmp fast oge float [[TMP16]], [[TMP17]] @@ -2930,14 +2930,14 @@ define amdgpu_kernel void @sdiv_v4i16(ptr addrspace(1) %out, <4 x i16> %x, <4 x ; CHECK-NEXT: [[TMP29:%.*]] = xor i32 [[TMP27]], [[TMP28]] ; CHECK-NEXT: [[TMP30:%.*]] = ashr i32 [[TMP29]], 30 ; CHECK-NEXT: [[TMP31:%.*]] = or i32 [[TMP30]], 1 -; CHECK-NEXT: [[TMP32:%.*]] = sitofp i32 [[TMP27]] to float -; CHECK-NEXT: [[TMP33:%.*]] = sitofp i32 [[TMP28]] to float +; CHECK-NEXT: [[TMP32:%.*]] = sitofp fast i32 [[TMP27]] to float +; CHECK-NEXT: [[TMP33:%.*]] = sitofp fast i32 [[TMP28]] to float ; CHECK-NEXT: [[TMP34:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP33]]) ; CHECK-NEXT: [[TMP35:%.*]] = fmul fast float [[TMP32]], [[TMP34]] ; CHECK-NEXT: [[TMP36:%.*]] = call fast float @llvm.trunc.f32(float [[TMP35]]) ; CHECK-NEXT: [[TMP37:%.*]] = fneg fast float [[TMP36]] ; CHECK-NEXT: [[TMP38:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP37]], float [[TMP33]], float [[TMP32]]) -; CHECK-NEXT: [[TMP39:%.*]] = fptosi float [[TMP36]] to i32 +; CHECK-NEXT: [[TMP39:%.*]] = fptosi fast float [[TMP36]] to i32 ; CHECK-NEXT: [[TMP40:%.*]] = call fast float @llvm.fabs.f32(float [[TMP38]]) ; CHECK-NEXT: [[TMP41:%.*]] = call fast float @llvm.fabs.f32(float [[TMP33]]) ; CHECK-NEXT: [[TMP42:%.*]] = fcmp fast oge float [[TMP40]], [[TMP41]] @@ -2954,14 +2954,14 @@ define amdgpu_kernel void @sdiv_v4i16(ptr addrspace(1) %out, <4 x i16> %x, <4 x ; CHECK-NEXT: [[TMP53:%.*]] = xor i32 [[TMP51]], [[TMP52]] ; CHECK-NEXT: [[TMP54:%.*]] = ashr i32 [[TMP53]], 30 ; CHECK-NEXT: [[TMP55:%.*]] = or i32 [[TMP54]], 1 -; CHECK-NEXT: [[TMP56:%.*]] = sitofp i32 [[TMP51]] to float -; CHECK-NEXT: [[TMP57:%.*]] = sitofp i32 [[TMP52]] to float +; CHECK-NEXT: [[TMP56:%.*]] = sitofp fast i32 [[TMP51]] to float +; CHECK-NEXT: [[TMP57:%.*]] = sitofp fast i32 [[TMP52]] to float ; CHECK-NEXT: [[TMP58:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP57]]) ; CHECK-NEXT: [[TMP59:%.*]] = fmul fast float [[TMP56]], [[TMP58]] ; CHECK-NEXT: [[TMP60:%.*]] = call fast float @llvm.trunc.f32(float [[TMP59]]) ; CHECK-NEXT: [[TMP61:%.*]] = fneg fast float [[TMP60]] ; CHECK-NEXT: [[TMP62:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP61]], float [[TMP57]], float [[TMP56]]) -; CHECK-NEXT: [[TMP63:%.*]] = fptosi float [[TMP60]] to i32 +; CHECK-NEXT: [[TMP63:%.*]] = fptosi fast float [[TMP60]] to i32 ; CHECK-NEXT: [[TMP64:%.*]] = call fast float @llvm.fabs.f32(float [[TMP62]]) ; CHECK-NEXT: [[TMP65:%.*]] = call fast float @llvm.fabs.f32(float [[TMP57]]) ; CHECK-NEXT: [[TMP66:%.*]] = fcmp fast oge float [[TMP64]], [[TMP65]] @@ -2978,14 +2978,14 @@ define amdgpu_kernel void @sdiv_v4i16(ptr addrspace(1) %out, <4 x i16> %x, <4 x ; CHECK-NEXT: [[TMP77:%.*]] = xor i32 [[TMP75]], [[TMP76]] ; CHECK-NEXT: [[TMP78:%.*]] = ashr i32 [[TMP77]], 30 ; CHECK-NEXT: [[TMP79:%.*]] = or i32 [[TMP78]], 1 -; CHECK-NEXT: [[TMP80:%.*]] = sitofp i32 [[TMP75]] to float -; CHECK-NEXT: [[TMP81:%.*]] = sitofp i32 [[TMP76]] to float +; CHECK-NEXT: [[TMP80:%.*]] = sitofp fast i32 [[TMP75]] to float +; CHECK-NEXT: [[TMP81:%.*]] = sitofp fast i32 [[TMP76]] to float ; CHECK-NEXT: [[TMP82:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP81]]) ; CHECK-NEXT: [[TMP83:%.*]] = fmul fast float [[TMP80]], [[TMP82]] ; CHECK-NEXT: [[TMP84:%.*]] = call fast float @llvm.trunc.f32(float [[TMP83]]) ; CHECK-NEXT: [[TMP85:%.*]] = fneg fast float [[TMP84]] ; CHECK-NEXT: [[TMP86:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP85]], float [[TMP81]], float [[TMP80]]) -; CHECK-NEXT: [[TMP87:%.*]] = fptosi float [[TMP84]] to i32 +; CHECK-NEXT: [[TMP87:%.*]] = fptosi fast float [[TMP84]] to i32 ; CHECK-NEXT: [[TMP88:%.*]] = call fast float @llvm.fabs.f32(float [[TMP86]]) ; CHECK-NEXT: [[TMP89:%.*]] = call fast float @llvm.fabs.f32(float [[TMP81]]) ; CHECK-NEXT: [[TMP90:%.*]] = fcmp fast oge float [[TMP88]], [[TMP89]] @@ -3168,14 +3168,14 @@ define amdgpu_kernel void @srem_v4i16(ptr addrspace(1) %out, <4 x i16> %x, <4 x ; CHECK-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = ashr i32 [[TMP5]], 30 ; CHECK-NEXT: [[TMP7:%.*]] = or i32 [[TMP6]], 1 -; CHECK-NEXT: [[TMP8:%.*]] = sitofp i32 [[TMP3]] to float -; CHECK-NEXT: [[TMP9:%.*]] = sitofp i32 [[TMP4]] to float +; CHECK-NEXT: [[TMP8:%.*]] = sitofp fast i32 [[TMP3]] to float +; CHECK-NEXT: [[TMP9:%.*]] = sitofp fast i32 [[TMP4]] to float ; CHECK-NEXT: [[TMP10:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP9]]) ; CHECK-NEXT: [[TMP11:%.*]] = fmul fast float [[TMP8]], [[TMP10]] ; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.trunc.f32(float [[TMP11]]) ; CHECK-NEXT: [[TMP13:%.*]] = fneg fast float [[TMP12]] ; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP13]], float [[TMP9]], float [[TMP8]]) -; CHECK-NEXT: [[TMP15:%.*]] = fptosi float [[TMP12]] to i32 +; CHECK-NEXT: [[TMP15:%.*]] = fptosi fast float [[TMP12]] to i32 ; CHECK-NEXT: [[TMP16:%.*]] = call fast float @llvm.fabs.f32(float [[TMP14]]) ; CHECK-NEXT: [[TMP17:%.*]] = call fast float @llvm.fabs.f32(float [[TMP9]]) ; CHECK-NEXT: [[TMP18:%.*]] = fcmp fast oge float [[TMP16]], [[TMP17]] @@ -3194,14 +3194,14 @@ define amdgpu_kernel void @srem_v4i16(ptr addrspace(1) %out, <4 x i16> %x, <4 x ; CHECK-NEXT: [[TMP31:%.*]] = xor i32 [[TMP29]], [[TMP30]] ; CHECK-NEXT: [[TMP32:%.*]] = ashr i32 [[TMP31]], 30 ; CHECK-NEXT: [[TMP33:%.*]] = or i32 [[TMP32]], 1 -; CHECK-NEXT: [[TMP34:%.*]] = sitofp i32 [[TMP29]] to float -; CHECK-NEXT: [[TMP35:%.*]] = sitofp i32 [[TMP30]] to float +; CHECK-NEXT: [[TMP34:%.*]] = sitofp fast i32 [[TMP29]] to float +; CHECK-NEXT: [[TMP35:%.*]] = sitofp fast i32 [[TMP30]] to float ; CHECK-NEXT: [[TMP36:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP35]]) ; CHECK-NEXT: [[TMP37:%.*]] = fmul fast float [[TMP34]], [[TMP36]] ; CHECK-NEXT: [[TMP38:%.*]] = call fast float @llvm.trunc.f32(float [[TMP37]]) ; CHECK-NEXT: [[TMP39:%.*]] = fneg fast float [[TMP38]] ; CHECK-NEXT: [[TMP40:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP39]], float [[TMP35]], float [[TMP34]]) -; CHECK-NEXT: [[TMP41:%.*]] = fptosi float [[TMP38]] to i32 +; CHECK-NEXT: [[TMP41:%.*]] = fptosi fast float [[TMP38]] to i32 ; CHECK-NEXT: [[TMP42:%.*]] = call fast float @llvm.fabs.f32(float [[TMP40]]) ; CHECK-NEXT: [[TMP43:%.*]] = call fast float @llvm.fabs.f32(float [[TMP35]]) ; CHECK-NEXT: [[TMP44:%.*]] = fcmp fast oge float [[TMP42]], [[TMP43]] @@ -3220,14 +3220,14 @@ define amdgpu_kernel void @srem_v4i16(ptr addrspace(1) %out, <4 x i16> %x, <4 x ; CHECK-NEXT: [[TMP57:%.*]] = xor i32 [[TMP55]], [[TMP56]] ; CHECK-NEXT: [[TMP58:%.*]] = ashr i32 [[TMP57]], 30 ; CHECK-NEXT: [[TMP59:%.*]] = or i32 [[TMP58]], 1 -; CHECK-NEXT: [[TMP60:%.*]] = sitofp i32 [[TMP55]] to float -; CHECK-NEXT: [[TMP61:%.*]] = sitofp i32 [[TMP56]] to float +; CHECK-NEXT: [[TMP60:%.*]] = sitofp fast i32 [[TMP55]] to float +; CHECK-NEXT: [[TMP61:%.*]] = sitofp fast i32 [[TMP56]] to float ; CHECK-NEXT: [[TMP62:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP61]]) ; CHECK-NEXT: [[TMP63:%.*]] = fmul fast float [[TMP60]], [[TMP62]] ; CHECK-NEXT: [[TMP64:%.*]] = call fast float @llvm.trunc.f32(float [[TMP63]]) ; CHECK-NEXT: [[TMP65:%.*]] = fneg fast float [[TMP64]] ; CHECK-NEXT: [[TMP66:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP65]], float [[TMP61]], float [[TMP60]]) -; CHECK-NEXT: [[TMP67:%.*]] = fptosi float [[TMP64]] to i32 +; CHECK-NEXT: [[TMP67:%.*]] = fptosi fast float [[TMP64]] to i32 ; CHECK-NEXT: [[TMP68:%.*]] = call fast float @llvm.fabs.f32(float [[TMP66]]) ; CHECK-NEXT: [[TMP69:%.*]] = call fast float @llvm.fabs.f32(float [[TMP61]]) ; CHECK-NEXT: [[TMP70:%.*]] = fcmp fast oge float [[TMP68]], [[TMP69]] @@ -3246,14 +3246,14 @@ define amdgpu_kernel void @srem_v4i16(ptr addrspace(1) %out, <4 x i16> %x, <4 x ; CHECK-NEXT: [[TMP83:%.*]] = xor i32 [[TMP81]], [[TMP82]] ; CHECK-NEXT: [[TMP84:%.*]] = ashr i32 [[TMP83]], 30 ; CHECK-NEXT: [[TMP85:%.*]] = or i32 [[TMP84]], 1 -; CHECK-NEXT: [[TMP86:%.*]] = sitofp i32 [[TMP81]] to float -; CHECK-NEXT: [[TMP87:%.*]] = sitofp i32 [[TMP82]] to float +; CHECK-NEXT: [[TMP86:%.*]] = sitofp fast i32 [[TMP81]] to float +; CHECK-NEXT: [[TMP87:%.*]] = sitofp fast i32 [[TMP82]] to float ; CHECK-NEXT: [[TMP88:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP87]]) ; CHECK-NEXT: [[TMP89:%.*]] = fmul fast float [[TMP86]], [[TMP88]] ; CHECK-NEXT: [[TMP90:%.*]] = call fast float @llvm.trunc.f32(float [[TMP89]]) ; CHECK-NEXT: [[TMP91:%.*]] = fneg fast float [[TMP90]] ; CHECK-NEXT: [[TMP92:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP91]], float [[TMP87]], float [[TMP86]]) -; CHECK-NEXT: [[TMP93:%.*]] = fptosi float [[TMP90]] to i32 +; CHECK-NEXT: [[TMP93:%.*]] = fptosi fast float [[TMP90]] to i32 ; CHECK-NEXT: [[TMP94:%.*]] = call fast float @llvm.fabs.f32(float [[TMP92]]) ; CHECK-NEXT: [[TMP95:%.*]] = call fast float @llvm.fabs.f32(float [[TMP87]]) ; CHECK-NEXT: [[TMP96:%.*]] = fcmp fast oge float [[TMP94]], [[TMP95]] @@ -3460,7 +3460,7 @@ define amdgpu_kernel void @udiv_i3(ptr addrspace(1) %out, i3 %x, i3 %y) { ; CHECK-NEXT: [[TMP7:%.*]] = call fast float @llvm.trunc.f32(float [[TMP6]]) ; CHECK-NEXT: [[TMP8:%.*]] = fneg fast float [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP8]], float [[TMP4]], float [[TMP3]]) -; CHECK-NEXT: [[TMP10:%.*]] = fptoui float [[TMP7]] to i32 +; CHECK-NEXT: [[TMP10:%.*]] = fptoui fast float [[TMP7]] to i32 ; CHECK-NEXT: [[TMP11:%.*]] = call fast float @llvm.fabs.f32(float [[TMP9]]) ; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.fabs.f32(float [[TMP4]]) ; CHECK-NEXT: [[TMP13:%.*]] = fcmp fast oge float [[TMP11]], [[TMP12]] @@ -3529,7 +3529,7 @@ define amdgpu_kernel void @urem_i3(ptr addrspace(1) %out, i3 %x, i3 %y) { ; CHECK-NEXT: [[TMP7:%.*]] = call fast float @llvm.trunc.f32(float [[TMP6]]) ; CHECK-NEXT: [[TMP8:%.*]] = fneg fast float [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP8]], float [[TMP4]], float [[TMP3]]) -; CHECK-NEXT: [[TMP10:%.*]] = fptoui float [[TMP7]] to i32 +; CHECK-NEXT: [[TMP10:%.*]] = fptoui fast float [[TMP7]] to i32 ; CHECK-NEXT: [[TMP11:%.*]] = call fast float @llvm.fabs.f32(float [[TMP9]]) ; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.fabs.f32(float [[TMP4]]) ; CHECK-NEXT: [[TMP13:%.*]] = fcmp fast oge float [[TMP11]], [[TMP12]] @@ -3603,14 +3603,14 @@ define amdgpu_kernel void @sdiv_i3(ptr addrspace(1) %out, i3 %x, i3 %y) { ; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = ashr i32 [[TMP3]], 30 ; CHECK-NEXT: [[TMP5:%.*]] = or i32 [[TMP4]], 1 -; CHECK-NEXT: [[TMP6:%.*]] = sitofp i32 [[TMP1]] to float -; CHECK-NEXT: [[TMP7:%.*]] = sitofp i32 [[TMP2]] to float +; CHECK-NEXT: [[TMP6:%.*]] = sitofp fast i32 [[TMP1]] to float +; CHECK-NEXT: [[TMP7:%.*]] = sitofp fast i32 [[TMP2]] to float ; CHECK-NEXT: [[TMP8:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP7]]) ; CHECK-NEXT: [[TMP9:%.*]] = fmul fast float [[TMP6]], [[TMP8]] ; CHECK-NEXT: [[TMP10:%.*]] = call fast float @llvm.trunc.f32(float [[TMP9]]) ; CHECK-NEXT: [[TMP11:%.*]] = fneg fast float [[TMP10]] ; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP11]], float [[TMP7]], float [[TMP6]]) -; CHECK-NEXT: [[TMP13:%.*]] = fptosi float [[TMP10]] to i32 +; CHECK-NEXT: [[TMP13:%.*]] = fptosi fast float [[TMP10]] to i32 ; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.fabs.f32(float [[TMP12]]) ; CHECK-NEXT: [[TMP15:%.*]] = call fast float @llvm.fabs.f32(float [[TMP7]]) ; CHECK-NEXT: [[TMP16:%.*]] = fcmp fast oge float [[TMP14]], [[TMP15]] @@ -3686,14 +3686,14 @@ define amdgpu_kernel void @srem_i3(ptr addrspace(1) %out, i3 %x, i3 %y) { ; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = ashr i32 [[TMP3]], 30 ; CHECK-NEXT: [[TMP5:%.*]] = or i32 [[TMP4]], 1 -; CHECK-NEXT: [[TMP6:%.*]] = sitofp i32 [[TMP1]] to float -; CHECK-NEXT: [[TMP7:%.*]] = sitofp i32 [[TMP2]] to float +; CHECK-NEXT: [[TMP6:%.*]] = sitofp fast i32 [[TMP1]] to float +; CHECK-NEXT: [[TMP7:%.*]] = sitofp fast i32 [[TMP2]] to float ; CHECK-NEXT: [[TMP8:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP7]]) ; CHECK-NEXT: [[TMP9:%.*]] = fmul fast float [[TMP6]], [[TMP8]] ; CHECK-NEXT: [[TMP10:%.*]] = call fast float @llvm.trunc.f32(float [[TMP9]]) ; CHECK-NEXT: [[TMP11:%.*]] = fneg fast float [[TMP10]] ; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP11]], float [[TMP7]], float [[TMP6]]) -; CHECK-NEXT: [[TMP13:%.*]] = fptosi float [[TMP10]] to i32 +; CHECK-NEXT: [[TMP13:%.*]] = fptosi fast float [[TMP10]] to i32 ; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.fabs.f32(float [[TMP12]]) ; CHECK-NEXT: [[TMP15:%.*]] = call fast float @llvm.fabs.f32(float [[TMP7]]) ; CHECK-NEXT: [[TMP16:%.*]] = fcmp fast oge float [[TMP14]], [[TMP15]] @@ -3784,7 +3784,7 @@ define amdgpu_kernel void @udiv_v3i16(ptr addrspace(1) %out, <3 x i16> %x, <3 x ; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.trunc.f32(float [[TMP8]]) ; CHECK-NEXT: [[TMP10:%.*]] = fneg fast float [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP10]], float [[TMP6]], float [[TMP5]]) -; CHECK-NEXT: [[TMP12:%.*]] = fptoui float [[TMP9]] to i32 +; CHECK-NEXT: [[TMP12:%.*]] = fptoui fast float [[TMP9]] to i32 ; CHECK-NEXT: [[TMP13:%.*]] = call fast float @llvm.fabs.f32(float [[TMP11]]) ; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.fabs.f32(float [[TMP6]]) ; CHECK-NEXT: [[TMP15:%.*]] = fcmp fast oge float [[TMP13]], [[TMP14]] @@ -3804,7 +3804,7 @@ define amdgpu_kernel void @udiv_v3i16(ptr addrspace(1) %out, <3 x i16> %x, <3 x ; CHECK-NEXT: [[TMP29:%.*]] = call fast float @llvm.trunc.f32(float [[TMP28]]) ; CHECK-NEXT: [[TMP30:%.*]] = fneg fast float [[TMP29]] ; CHECK-NEXT: [[TMP31:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP30]], float [[TMP26]], float [[TMP25]]) -; CHECK-NEXT: [[TMP32:%.*]] = fptoui float [[TMP29]] to i32 +; CHECK-NEXT: [[TMP32:%.*]] = fptoui fast float [[TMP29]] to i32 ; CHECK-NEXT: [[TMP33:%.*]] = call fast float @llvm.fabs.f32(float [[TMP31]]) ; CHECK-NEXT: [[TMP34:%.*]] = call fast float @llvm.fabs.f32(float [[TMP26]]) ; CHECK-NEXT: [[TMP35:%.*]] = fcmp fast oge float [[TMP33]], [[TMP34]] @@ -3824,7 +3824,7 @@ define amdgpu_kernel void @udiv_v3i16(ptr addrspace(1) %out, <3 x i16> %x, <3 x ; CHECK-NEXT: [[TMP49:%.*]] = call fast float @llvm.trunc.f32(float [[TMP48]]) ; CHECK-NEXT: [[TMP50:%.*]] = fneg fast float [[TMP49]] ; CHECK-NEXT: [[TMP51:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP50]], float [[TMP46]], float [[TMP45]]) -; CHECK-NEXT: [[TMP52:%.*]] = fptoui float [[TMP49]] to i32 +; CHECK-NEXT: [[TMP52:%.*]] = fptoui fast float [[TMP49]] to i32 ; CHECK-NEXT: [[TMP53:%.*]] = call fast float @llvm.fabs.f32(float [[TMP51]]) ; CHECK-NEXT: [[TMP54:%.*]] = call fast float @llvm.fabs.f32(float [[TMP46]]) ; CHECK-NEXT: [[TMP55:%.*]] = fcmp fast oge float [[TMP53]], [[TMP54]] @@ -3946,7 +3946,7 @@ define amdgpu_kernel void @urem_v3i16(ptr addrspace(1) %out, <3 x i16> %x, <3 x ; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.trunc.f32(float [[TMP8]]) ; CHECK-NEXT: [[TMP10:%.*]] = fneg fast float [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP10]], float [[TMP6]], float [[TMP5]]) -; CHECK-NEXT: [[TMP12:%.*]] = fptoui float [[TMP9]] to i32 +; CHECK-NEXT: [[TMP12:%.*]] = fptoui fast float [[TMP9]] to i32 ; CHECK-NEXT: [[TMP13:%.*]] = call fast float @llvm.fabs.f32(float [[TMP11]]) ; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.fabs.f32(float [[TMP6]]) ; CHECK-NEXT: [[TMP15:%.*]] = fcmp fast oge float [[TMP13]], [[TMP14]] @@ -3968,7 +3968,7 @@ define amdgpu_kernel void @urem_v3i16(ptr addrspace(1) %out, <3 x i16> %x, <3 x ; CHECK-NEXT: [[TMP31:%.*]] = call fast float @llvm.trunc.f32(float [[TMP30]]) ; CHECK-NEXT: [[TMP32:%.*]] = fneg fast float [[TMP31]] ; CHECK-NEXT: [[TMP33:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP32]], float [[TMP28]], float [[TMP27]]) -; CHECK-NEXT: [[TMP34:%.*]] = fptoui float [[TMP31]] to i32 +; CHECK-NEXT: [[TMP34:%.*]] = fptoui fast float [[TMP31]] to i32 ; CHECK-NEXT: [[TMP35:%.*]] = call fast float @llvm.fabs.f32(float [[TMP33]]) ; CHECK-NEXT: [[TMP36:%.*]] = call fast float @llvm.fabs.f32(float [[TMP28]]) ; CHECK-NEXT: [[TMP37:%.*]] = fcmp fast oge float [[TMP35]], [[TMP36]] @@ -3990,7 +3990,7 @@ define amdgpu_kernel void @urem_v3i16(ptr addrspace(1) %out, <3 x i16> %x, <3 x ; CHECK-NEXT: [[TMP53:%.*]] = call fast float @llvm.trunc.f32(float [[TMP52]]) ; CHECK-NEXT: [[TMP54:%.*]] = fneg fast float [[TMP53]] ; CHECK-NEXT: [[TMP55:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP54]], float [[TMP50]], float [[TMP49]]) -; CHECK-NEXT: [[TMP56:%.*]] = fptoui float [[TMP53]] to i32 +; CHECK-NEXT: [[TMP56:%.*]] = fptoui fast float [[TMP53]] to i32 ; CHECK-NEXT: [[TMP57:%.*]] = call fast float @llvm.fabs.f32(float [[TMP55]]) ; CHECK-NEXT: [[TMP58:%.*]] = call fast float @llvm.fabs.f32(float [[TMP50]]) ; CHECK-NEXT: [[TMP59:%.*]] = fcmp fast oge float [[TMP57]], [[TMP58]] @@ -4121,14 +4121,14 @@ define amdgpu_kernel void @sdiv_v3i16(ptr addrspace(1) %out, <3 x i16> %x, <3 x ; CHECK-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = ashr i32 [[TMP5]], 30 ; CHECK-NEXT: [[TMP7:%.*]] = or i32 [[TMP6]], 1 -; CHECK-NEXT: [[TMP8:%.*]] = sitofp i32 [[TMP3]] to float -; CHECK-NEXT: [[TMP9:%.*]] = sitofp i32 [[TMP4]] to float +; CHECK-NEXT: [[TMP8:%.*]] = sitofp fast i32 [[TMP3]] to float +; CHECK-NEXT: [[TMP9:%.*]] = sitofp fast i32 [[TMP4]] to float ; CHECK-NEXT: [[TMP10:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP9]]) ; CHECK-NEXT: [[TMP11:%.*]] = fmul fast float [[TMP8]], [[TMP10]] ; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.trunc.f32(float [[TMP11]]) ; CHECK-NEXT: [[TMP13:%.*]] = fneg fast float [[TMP12]] ; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP13]], float [[TMP9]], float [[TMP8]]) -; CHECK-NEXT: [[TMP15:%.*]] = fptosi float [[TMP12]] to i32 +; CHECK-NEXT: [[TMP15:%.*]] = fptosi fast float [[TMP12]] to i32 ; CHECK-NEXT: [[TMP16:%.*]] = call fast float @llvm.fabs.f32(float [[TMP14]]) ; CHECK-NEXT: [[TMP17:%.*]] = call fast float @llvm.fabs.f32(float [[TMP9]]) ; CHECK-NEXT: [[TMP18:%.*]] = fcmp fast oge float [[TMP16]], [[TMP17]] @@ -4145,14 +4145,14 @@ define amdgpu_kernel void @sdiv_v3i16(ptr addrspace(1) %out, <3 x i16> %x, <3 x ; CHECK-NEXT: [[TMP29:%.*]] = xor i32 [[TMP27]], [[TMP28]] ; CHECK-NEXT: [[TMP30:%.*]] = ashr i32 [[TMP29]], 30 ; CHECK-NEXT: [[TMP31:%.*]] = or i32 [[TMP30]], 1 -; CHECK-NEXT: [[TMP32:%.*]] = sitofp i32 [[TMP27]] to float -; CHECK-NEXT: [[TMP33:%.*]] = sitofp i32 [[TMP28]] to float +; CHECK-NEXT: [[TMP32:%.*]] = sitofp fast i32 [[TMP27]] to float +; CHECK-NEXT: [[TMP33:%.*]] = sitofp fast i32 [[TMP28]] to float ; CHECK-NEXT: [[TMP34:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP33]]) ; CHECK-NEXT: [[TMP35:%.*]] = fmul fast float [[TMP32]], [[TMP34]] ; CHECK-NEXT: [[TMP36:%.*]] = call fast float @llvm.trunc.f32(float [[TMP35]]) ; CHECK-NEXT: [[TMP37:%.*]] = fneg fast float [[TMP36]] ; CHECK-NEXT: [[TMP38:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP37]], float [[TMP33]], float [[TMP32]]) -; CHECK-NEXT: [[TMP39:%.*]] = fptosi float [[TMP36]] to i32 +; CHECK-NEXT: [[TMP39:%.*]] = fptosi fast float [[TMP36]] to i32 ; CHECK-NEXT: [[TMP40:%.*]] = call fast float @llvm.fabs.f32(float [[TMP38]]) ; CHECK-NEXT: [[TMP41:%.*]] = call fast float @llvm.fabs.f32(float [[TMP33]]) ; CHECK-NEXT: [[TMP42:%.*]] = fcmp fast oge float [[TMP40]], [[TMP41]] @@ -4169,14 +4169,14 @@ define amdgpu_kernel void @sdiv_v3i16(ptr addrspace(1) %out, <3 x i16> %x, <3 x ; CHECK-NEXT: [[TMP53:%.*]] = xor i32 [[TMP51]], [[TMP52]] ; CHECK-NEXT: [[TMP54:%.*]] = ashr i32 [[TMP53]], 30 ; CHECK-NEXT: [[TMP55:%.*]] = or i32 [[TMP54]], 1 -; CHECK-NEXT: [[TMP56:%.*]] = sitofp i32 [[TMP51]] to float -; CHECK-NEXT: [[TMP57:%.*]] = sitofp i32 [[TMP52]] to float +; CHECK-NEXT: [[TMP56:%.*]] = sitofp fast i32 [[TMP51]] to float +; CHECK-NEXT: [[TMP57:%.*]] = sitofp fast i32 [[TMP52]] to float ; CHECK-NEXT: [[TMP58:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP57]]) ; CHECK-NEXT: [[TMP59:%.*]] = fmul fast float [[TMP56]], [[TMP58]] ; CHECK-NEXT: [[TMP60:%.*]] = call fast float @llvm.trunc.f32(float [[TMP59]]) ; CHECK-NEXT: [[TMP61:%.*]] = fneg fast float [[TMP60]] ; CHECK-NEXT: [[TMP62:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP61]], float [[TMP57]], float [[TMP56]]) -; CHECK-NEXT: [[TMP63:%.*]] = fptosi float [[TMP60]] to i32 +; CHECK-NEXT: [[TMP63:%.*]] = fptosi fast float [[TMP60]] to i32 ; CHECK-NEXT: [[TMP64:%.*]] = call fast float @llvm.fabs.f32(float [[TMP62]]) ; CHECK-NEXT: [[TMP65:%.*]] = call fast float @llvm.fabs.f32(float [[TMP57]]) ; CHECK-NEXT: [[TMP66:%.*]] = fcmp fast oge float [[TMP64]], [[TMP65]] @@ -4324,14 +4324,14 @@ define amdgpu_kernel void @srem_v3i16(ptr addrspace(1) %out, <3 x i16> %x, <3 x ; CHECK-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = ashr i32 [[TMP5]], 30 ; CHECK-NEXT: [[TMP7:%.*]] = or i32 [[TMP6]], 1 -; CHECK-NEXT: [[TMP8:%.*]] = sitofp i32 [[TMP3]] to float -; CHECK-NEXT: [[TMP9:%.*]] = sitofp i32 [[TMP4]] to float +; CHECK-NEXT: [[TMP8:%.*]] = sitofp fast i32 [[TMP3]] to float +; CHECK-NEXT: [[TMP9:%.*]] = sitofp fast i32 [[TMP4]] to float ; CHECK-NEXT: [[TMP10:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP9]]) ; CHECK-NEXT: [[TMP11:%.*]] = fmul fast float [[TMP8]], [[TMP10]] ; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.trunc.f32(float [[TMP11]]) ; CHECK-NEXT: [[TMP13:%.*]] = fneg fast float [[TMP12]] ; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP13]], float [[TMP9]], float [[TMP8]]) -; CHECK-NEXT: [[TMP15:%.*]] = fptosi float [[TMP12]] to i32 +; CHECK-NEXT: [[TMP15:%.*]] = fptosi fast float [[TMP12]] to i32 ; CHECK-NEXT: [[TMP16:%.*]] = call fast float @llvm.fabs.f32(float [[TMP14]]) ; CHECK-NEXT: [[TMP17:%.*]] = call fast float @llvm.fabs.f32(float [[TMP9]]) ; CHECK-NEXT: [[TMP18:%.*]] = fcmp fast oge float [[TMP16]], [[TMP17]] @@ -4350,14 +4350,14 @@ define amdgpu_kernel void @srem_v3i16(ptr addrspace(1) %out, <3 x i16> %x, <3 x ; CHECK-NEXT: [[TMP31:%.*]] = xor i32 [[TMP29]], [[TMP30]] ; CHECK-NEXT: [[TMP32:%.*]] = ashr i32 [[TMP31]], 30 ; CHECK-NEXT: [[TMP33:%.*]] = or i32 [[TMP32]], 1 -; CHECK-NEXT: [[TMP34:%.*]] = sitofp i32 [[TMP29]] to float -; CHECK-NEXT: [[TMP35:%.*]] = sitofp i32 [[TMP30]] to float +; CHECK-NEXT: [[TMP34:%.*]] = sitofp fast i32 [[TMP29]] to float +; CHECK-NEXT: [[TMP35:%.*]] = sitofp fast i32 [[TMP30]] to float ; CHECK-NEXT: [[TMP36:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP35]]) ; CHECK-NEXT: [[TMP37:%.*]] = fmul fast float [[TMP34]], [[TMP36]] ; CHECK-NEXT: [[TMP38:%.*]] = call fast float @llvm.trunc.f32(float [[TMP37]]) ; CHECK-NEXT: [[TMP39:%.*]] = fneg fast float [[TMP38]] ; CHECK-NEXT: [[TMP40:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP39]], float [[TMP35]], float [[TMP34]]) -; CHECK-NEXT: [[TMP41:%.*]] = fptosi float [[TMP38]] to i32 +; CHECK-NEXT: [[TMP41:%.*]] = fptosi fast float [[TMP38]] to i32 ; CHECK-NEXT: [[TMP42:%.*]] = call fast float @llvm.fabs.f32(float [[TMP40]]) ; CHECK-NEXT: [[TMP43:%.*]] = call fast float @llvm.fabs.f32(float [[TMP35]]) ; CHECK-NEXT: [[TMP44:%.*]] = fcmp fast oge float [[TMP42]], [[TMP43]] @@ -4376,14 +4376,14 @@ define amdgpu_kernel void @srem_v3i16(ptr addrspace(1) %out, <3 x i16> %x, <3 x ; CHECK-NEXT: [[TMP57:%.*]] = xor i32 [[TMP55]], [[TMP56]] ; CHECK-NEXT: [[TMP58:%.*]] = ashr i32 [[TMP57]], 30 ; CHECK-NEXT: [[TMP59:%.*]] = or i32 [[TMP58]], 1 -; CHECK-NEXT: [[TMP60:%.*]] = sitofp i32 [[TMP55]] to float -; CHECK-NEXT: [[TMP61:%.*]] = sitofp i32 [[TMP56]] to float +; CHECK-NEXT: [[TMP60:%.*]] = sitofp fast i32 [[TMP55]] to float +; CHECK-NEXT: [[TMP61:%.*]] = sitofp fast i32 [[TMP56]] to float ; CHECK-NEXT: [[TMP62:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP61]]) ; CHECK-NEXT: [[TMP63:%.*]] = fmul fast float [[TMP60]], [[TMP62]] ; CHECK-NEXT: [[TMP64:%.*]] = call fast float @llvm.trunc.f32(float [[TMP63]]) ; CHECK-NEXT: [[TMP65:%.*]] = fneg fast float [[TMP64]] ; CHECK-NEXT: [[TMP66:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP65]], float [[TMP61]], float [[TMP60]]) -; CHECK-NEXT: [[TMP67:%.*]] = fptosi float [[TMP64]] to i32 +; CHECK-NEXT: [[TMP67:%.*]] = fptosi fast float [[TMP64]] to i32 ; CHECK-NEXT: [[TMP68:%.*]] = call fast float @llvm.fabs.f32(float [[TMP66]]) ; CHECK-NEXT: [[TMP69:%.*]] = call fast float @llvm.fabs.f32(float [[TMP61]]) ; CHECK-NEXT: [[TMP70:%.*]] = fcmp fast oge float [[TMP68]], [[TMP69]] @@ -4551,7 +4551,7 @@ define amdgpu_kernel void @udiv_v3i15(ptr addrspace(1) %out, <3 x i15> %x, <3 x ; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.trunc.f32(float [[TMP8]]) ; CHECK-NEXT: [[TMP10:%.*]] = fneg fast float [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP10]], float [[TMP6]], float [[TMP5]]) -; CHECK-NEXT: [[TMP12:%.*]] = fptoui float [[TMP9]] to i32 +; CHECK-NEXT: [[TMP12:%.*]] = fptoui fast float [[TMP9]] to i32 ; CHECK-NEXT: [[TMP13:%.*]] = call fast float @llvm.fabs.f32(float [[TMP11]]) ; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.fabs.f32(float [[TMP6]]) ; CHECK-NEXT: [[TMP15:%.*]] = fcmp fast oge float [[TMP13]], [[TMP14]] @@ -4571,7 +4571,7 @@ define amdgpu_kernel void @udiv_v3i15(ptr addrspace(1) %out, <3 x i15> %x, <3 x ; CHECK-NEXT: [[TMP29:%.*]] = call fast float @llvm.trunc.f32(float [[TMP28]]) ; CHECK-NEXT: [[TMP30:%.*]] = fneg fast float [[TMP29]] ; CHECK-NEXT: [[TMP31:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP30]], float [[TMP26]], float [[TMP25]]) -; CHECK-NEXT: [[TMP32:%.*]] = fptoui float [[TMP29]] to i32 +; CHECK-NEXT: [[TMP32:%.*]] = fptoui fast float [[TMP29]] to i32 ; CHECK-NEXT: [[TMP33:%.*]] = call fast float @llvm.fabs.f32(float [[TMP31]]) ; CHECK-NEXT: [[TMP34:%.*]] = call fast float @llvm.fabs.f32(float [[TMP26]]) ; CHECK-NEXT: [[TMP35:%.*]] = fcmp fast oge float [[TMP33]], [[TMP34]] @@ -4591,7 +4591,7 @@ define amdgpu_kernel void @udiv_v3i15(ptr addrspace(1) %out, <3 x i15> %x, <3 x ; CHECK-NEXT: [[TMP49:%.*]] = call fast float @llvm.trunc.f32(float [[TMP48]]) ; CHECK-NEXT: [[TMP50:%.*]] = fneg fast float [[TMP49]] ; CHECK-NEXT: [[TMP51:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP50]], float [[TMP46]], float [[TMP45]]) -; CHECK-NEXT: [[TMP52:%.*]] = fptoui float [[TMP49]] to i32 +; CHECK-NEXT: [[TMP52:%.*]] = fptoui fast float [[TMP49]] to i32 ; CHECK-NEXT: [[TMP53:%.*]] = call fast float @llvm.fabs.f32(float [[TMP51]]) ; CHECK-NEXT: [[TMP54:%.*]] = call fast float @llvm.fabs.f32(float [[TMP46]]) ; CHECK-NEXT: [[TMP55:%.*]] = fcmp fast oge float [[TMP53]], [[TMP54]] @@ -4732,7 +4732,7 @@ define amdgpu_kernel void @urem_v3i15(ptr addrspace(1) %out, <3 x i15> %x, <3 x ; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.trunc.f32(float [[TMP8]]) ; CHECK-NEXT: [[TMP10:%.*]] = fneg fast float [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP10]], float [[TMP6]], float [[TMP5]]) -; CHECK-NEXT: [[TMP12:%.*]] = fptoui float [[TMP9]] to i32 +; CHECK-NEXT: [[TMP12:%.*]] = fptoui fast float [[TMP9]] to i32 ; CHECK-NEXT: [[TMP13:%.*]] = call fast float @llvm.fabs.f32(float [[TMP11]]) ; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.fabs.f32(float [[TMP6]]) ; CHECK-NEXT: [[TMP15:%.*]] = fcmp fast oge float [[TMP13]], [[TMP14]] @@ -4754,7 +4754,7 @@ define amdgpu_kernel void @urem_v3i15(ptr addrspace(1) %out, <3 x i15> %x, <3 x ; CHECK-NEXT: [[TMP31:%.*]] = call fast float @llvm.trunc.f32(float [[TMP30]]) ; CHECK-NEXT: [[TMP32:%.*]] = fneg fast float [[TMP31]] ; CHECK-NEXT: [[TMP33:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP32]], float [[TMP28]], float [[TMP27]]) -; CHECK-NEXT: [[TMP34:%.*]] = fptoui float [[TMP31]] to i32 +; CHECK-NEXT: [[TMP34:%.*]] = fptoui fast float [[TMP31]] to i32 ; CHECK-NEXT: [[TMP35:%.*]] = call fast float @llvm.fabs.f32(float [[TMP33]]) ; CHECK-NEXT: [[TMP36:%.*]] = call fast float @llvm.fabs.f32(float [[TMP28]]) ; CHECK-NEXT: [[TMP37:%.*]] = fcmp fast oge float [[TMP35]], [[TMP36]] @@ -4776,7 +4776,7 @@ define amdgpu_kernel void @urem_v3i15(ptr addrspace(1) %out, <3 x i15> %x, <3 x ; CHECK-NEXT: [[TMP53:%.*]] = call fast float @llvm.trunc.f32(float [[TMP52]]) ; CHECK-NEXT: [[TMP54:%.*]] = fneg fast float [[TMP53]] ; CHECK-NEXT: [[TMP55:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP54]], float [[TMP50]], float [[TMP49]]) -; CHECK-NEXT: [[TMP56:%.*]] = fptoui float [[TMP53]] to i32 +; CHECK-NEXT: [[TMP56:%.*]] = fptoui fast float [[TMP53]] to i32 ; CHECK-NEXT: [[TMP57:%.*]] = call fast float @llvm.fabs.f32(float [[TMP55]]) ; CHECK-NEXT: [[TMP58:%.*]] = call fast float @llvm.fabs.f32(float [[TMP50]]) ; CHECK-NEXT: [[TMP59:%.*]] = fcmp fast oge float [[TMP57]], [[TMP58]] @@ -4931,14 +4931,14 @@ define amdgpu_kernel void @sdiv_v3i15(ptr addrspace(1) %out, <3 x i15> %x, <3 x ; CHECK-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = ashr i32 [[TMP5]], 30 ; CHECK-NEXT: [[TMP7:%.*]] = or i32 [[TMP6]], 1 -; CHECK-NEXT: [[TMP8:%.*]] = sitofp i32 [[TMP3]] to float -; CHECK-NEXT: [[TMP9:%.*]] = sitofp i32 [[TMP4]] to float +; CHECK-NEXT: [[TMP8:%.*]] = sitofp fast i32 [[TMP3]] to float +; CHECK-NEXT: [[TMP9:%.*]] = sitofp fast i32 [[TMP4]] to float ; CHECK-NEXT: [[TMP10:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP9]]) ; CHECK-NEXT: [[TMP11:%.*]] = fmul fast float [[TMP8]], [[TMP10]] ; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.trunc.f32(float [[TMP11]]) ; CHECK-NEXT: [[TMP13:%.*]] = fneg fast float [[TMP12]] ; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP13]], float [[TMP9]], float [[TMP8]]) -; CHECK-NEXT: [[TMP15:%.*]] = fptosi float [[TMP12]] to i32 +; CHECK-NEXT: [[TMP15:%.*]] = fptosi fast float [[TMP12]] to i32 ; CHECK-NEXT: [[TMP16:%.*]] = call fast float @llvm.fabs.f32(float [[TMP14]]) ; CHECK-NEXT: [[TMP17:%.*]] = call fast float @llvm.fabs.f32(float [[TMP9]]) ; CHECK-NEXT: [[TMP18:%.*]] = fcmp fast oge float [[TMP16]], [[TMP17]] @@ -4955,14 +4955,14 @@ define amdgpu_kernel void @sdiv_v3i15(ptr addrspace(1) %out, <3 x i15> %x, <3 x ; CHECK-NEXT: [[TMP29:%.*]] = xor i32 [[TMP27]], [[TMP28]] ; CHECK-NEXT: [[TMP30:%.*]] = ashr i32 [[TMP29]], 30 ; CHECK-NEXT: [[TMP31:%.*]] = or i32 [[TMP30]], 1 -; CHECK-NEXT: [[TMP32:%.*]] = sitofp i32 [[TMP27]] to float -; CHECK-NEXT: [[TMP33:%.*]] = sitofp i32 [[TMP28]] to float +; CHECK-NEXT: [[TMP32:%.*]] = sitofp fast i32 [[TMP27]] to float +; CHECK-NEXT: [[TMP33:%.*]] = sitofp fast i32 [[TMP28]] to float ; CHECK-NEXT: [[TMP34:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP33]]) ; CHECK-NEXT: [[TMP35:%.*]] = fmul fast float [[TMP32]], [[TMP34]] ; CHECK-NEXT: [[TMP36:%.*]] = call fast float @llvm.trunc.f32(float [[TMP35]]) ; CHECK-NEXT: [[TMP37:%.*]] = fneg fast float [[TMP36]] ; CHECK-NEXT: [[TMP38:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP37]], float [[TMP33]], float [[TMP32]]) -; CHECK-NEXT: [[TMP39:%.*]] = fptosi float [[TMP36]] to i32 +; CHECK-NEXT: [[TMP39:%.*]] = fptosi fast float [[TMP36]] to i32 ; CHECK-NEXT: [[TMP40:%.*]] = call fast float @llvm.fabs.f32(float [[TMP38]]) ; CHECK-NEXT: [[TMP41:%.*]] = call fast float @llvm.fabs.f32(float [[TMP33]]) ; CHECK-NEXT: [[TMP42:%.*]] = fcmp fast oge float [[TMP40]], [[TMP41]] @@ -4979,14 +4979,14 @@ define amdgpu_kernel void @sdiv_v3i15(ptr addrspace(1) %out, <3 x i15> %x, <3 x ; CHECK-NEXT: [[TMP53:%.*]] = xor i32 [[TMP51]], [[TMP52]] ; CHECK-NEXT: [[TMP54:%.*]] = ashr i32 [[TMP53]], 30 ; CHECK-NEXT: [[TMP55:%.*]] = or i32 [[TMP54]], 1 -; CHECK-NEXT: [[TMP56:%.*]] = sitofp i32 [[TMP51]] to float -; CHECK-NEXT: [[TMP57:%.*]] = sitofp i32 [[TMP52]] to float +; CHECK-NEXT: [[TMP56:%.*]] = sitofp fast i32 [[TMP51]] to float +; CHECK-NEXT: [[TMP57:%.*]] = sitofp fast i32 [[TMP52]] to float ; CHECK-NEXT: [[TMP58:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP57]]) ; CHECK-NEXT: [[TMP59:%.*]] = fmul fast float [[TMP56]], [[TMP58]] ; CHECK-NEXT: [[TMP60:%.*]] = call fast float @llvm.trunc.f32(float [[TMP59]]) ; CHECK-NEXT: [[TMP61:%.*]] = fneg fast float [[TMP60]] ; CHECK-NEXT: [[TMP62:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP61]], float [[TMP57]], float [[TMP56]]) -; CHECK-NEXT: [[TMP63:%.*]] = fptosi float [[TMP60]] to i32 +; CHECK-NEXT: [[TMP63:%.*]] = fptosi fast float [[TMP60]] to i32 ; CHECK-NEXT: [[TMP64:%.*]] = call fast float @llvm.fabs.f32(float [[TMP62]]) ; CHECK-NEXT: [[TMP65:%.*]] = call fast float @llvm.fabs.f32(float [[TMP57]]) ; CHECK-NEXT: [[TMP66:%.*]] = fcmp fast oge float [[TMP64]], [[TMP65]] @@ -5152,14 +5152,14 @@ define amdgpu_kernel void @srem_v3i15(ptr addrspace(1) %out, <3 x i15> %x, <3 x ; CHECK-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = ashr i32 [[TMP5]], 30 ; CHECK-NEXT: [[TMP7:%.*]] = or i32 [[TMP6]], 1 -; CHECK-NEXT: [[TMP8:%.*]] = sitofp i32 [[TMP3]] to float -; CHECK-NEXT: [[TMP9:%.*]] = sitofp i32 [[TMP4]] to float +; CHECK-NEXT: [[TMP8:%.*]] = sitofp fast i32 [[TMP3]] to float +; CHECK-NEXT: [[TMP9:%.*]] = sitofp fast i32 [[TMP4]] to float ; CHECK-NEXT: [[TMP10:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP9]]) ; CHECK-NEXT: [[TMP11:%.*]] = fmul fast float [[TMP8]], [[TMP10]] ; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.trunc.f32(float [[TMP11]]) ; CHECK-NEXT: [[TMP13:%.*]] = fneg fast float [[TMP12]] ; CHECK-NEXT: [[TMP14:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP13]], float [[TMP9]], float [[TMP8]]) -; CHECK-NEXT: [[TMP15:%.*]] = fptosi float [[TMP12]] to i32 +; CHECK-NEXT: [[TMP15:%.*]] = fptosi fast float [[TMP12]] to i32 ; CHECK-NEXT: [[TMP16:%.*]] = call fast float @llvm.fabs.f32(float [[TMP14]]) ; CHECK-NEXT: [[TMP17:%.*]] = call fast float @llvm.fabs.f32(float [[TMP9]]) ; CHECK-NEXT: [[TMP18:%.*]] = fcmp fast oge float [[TMP16]], [[TMP17]] @@ -5178,14 +5178,14 @@ define amdgpu_kernel void @srem_v3i15(ptr addrspace(1) %out, <3 x i15> %x, <3 x ; CHECK-NEXT: [[TMP31:%.*]] = xor i32 [[TMP29]], [[TMP30]] ; CHECK-NEXT: [[TMP32:%.*]] = ashr i32 [[TMP31]], 30 ; CHECK-NEXT: [[TMP33:%.*]] = or i32 [[TMP32]], 1 -; CHECK-NEXT: [[TMP34:%.*]] = sitofp i32 [[TMP29]] to float -; CHECK-NEXT: [[TMP35:%.*]] = sitofp i32 [[TMP30]] to float +; CHECK-NEXT: [[TMP34:%.*]] = sitofp fast i32 [[TMP29]] to float +; CHECK-NEXT: [[TMP35:%.*]] = sitofp fast i32 [[TMP30]] to float ; CHECK-NEXT: [[TMP36:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP35]]) ; CHECK-NEXT: [[TMP37:%.*]] = fmul fast float [[TMP34]], [[TMP36]] ; CHECK-NEXT: [[TMP38:%.*]] = call fast float @llvm.trunc.f32(float [[TMP37]]) ; CHECK-NEXT: [[TMP39:%.*]] = fneg fast float [[TMP38]] ; CHECK-NEXT: [[TMP40:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP39]], float [[TMP35]], float [[TMP34]]) -; CHECK-NEXT: [[TMP41:%.*]] = fptosi float [[TMP38]] to i32 +; CHECK-NEXT: [[TMP41:%.*]] = fptosi fast float [[TMP38]] to i32 ; CHECK-NEXT: [[TMP42:%.*]] = call fast float @llvm.fabs.f32(float [[TMP40]]) ; CHECK-NEXT: [[TMP43:%.*]] = call fast float @llvm.fabs.f32(float [[TMP35]]) ; CHECK-NEXT: [[TMP44:%.*]] = fcmp fast oge float [[TMP42]], [[TMP43]] @@ -5204,14 +5204,14 @@ define amdgpu_kernel void @srem_v3i15(ptr addrspace(1) %out, <3 x i15> %x, <3 x ; CHECK-NEXT: [[TMP57:%.*]] = xor i32 [[TMP55]], [[TMP56]] ; CHECK-NEXT: [[TMP58:%.*]] = ashr i32 [[TMP57]], 30 ; CHECK-NEXT: [[TMP59:%.*]] = or i32 [[TMP58]], 1 -; CHECK-NEXT: [[TMP60:%.*]] = sitofp i32 [[TMP55]] to float -; CHECK-NEXT: [[TMP61:%.*]] = sitofp i32 [[TMP56]] to float +; CHECK-NEXT: [[TMP60:%.*]] = sitofp fast i32 [[TMP55]] to float +; CHECK-NEXT: [[TMP61:%.*]] = sitofp fast i32 [[TMP56]] to float ; CHECK-NEXT: [[TMP62:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP61]]) ; CHECK-NEXT: [[TMP63:%.*]] = fmul fast float [[TMP60]], [[TMP62]] ; CHECK-NEXT: [[TMP64:%.*]] = call fast float @llvm.trunc.f32(float [[TMP63]]) ; CHECK-NEXT: [[TMP65:%.*]] = fneg fast float [[TMP64]] ; CHECK-NEXT: [[TMP66:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP65]], float [[TMP61]], float [[TMP60]]) -; CHECK-NEXT: [[TMP67:%.*]] = fptosi float [[TMP64]] to i32 +; CHECK-NEXT: [[TMP67:%.*]] = fptosi fast float [[TMP64]] to i32 ; CHECK-NEXT: [[TMP68:%.*]] = call fast float @llvm.fabs.f32(float [[TMP66]]) ; CHECK-NEXT: [[TMP69:%.*]] = call fast float @llvm.fabs.f32(float [[TMP61]]) ; CHECK-NEXT: [[TMP70:%.*]] = fcmp fast oge float [[TMP68]], [[TMP69]] @@ -5601,7 +5601,7 @@ define amdgpu_kernel void @udiv_v2i32_pow2_shl_denom(ptr addrspace(1) %out, <2 x ; CHECK-NEXT: [[TMP3:%.*]] = uitofp i32 [[TMP2]] to float ; CHECK-NEXT: [[TMP4:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP3]]) ; CHECK-NEXT: [[TMP5:%.*]] = fmul fast float [[TMP4]], 0x41EFFFFFC0000000 -; CHECK-NEXT: [[TMP6:%.*]] = fptoui float [[TMP5]] to i32 +; CHECK-NEXT: [[TMP6:%.*]] = fptoui fast float [[TMP5]] to i32 ; CHECK-NEXT: [[TMP7:%.*]] = sub i32 0, [[TMP2]] ; CHECK-NEXT: [[TMP8:%.*]] = mul i32 [[TMP7]], [[TMP6]] ; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP6]] to i64 @@ -5633,7 +5633,7 @@ define amdgpu_kernel void @udiv_v2i32_pow2_shl_denom(ptr addrspace(1) %out, <2 x ; CHECK-NEXT: [[TMP35:%.*]] = uitofp i32 [[TMP34]] to float ; CHECK-NEXT: [[TMP36:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP35]]) ; CHECK-NEXT: [[TMP37:%.*]] = fmul fast float [[TMP36]], 0x41EFFFFFC0000000 -; CHECK-NEXT: [[TMP38:%.*]] = fptoui float [[TMP37]] to i32 +; CHECK-NEXT: [[TMP38:%.*]] = fptoui fast float [[TMP37]] to i32 ; CHECK-NEXT: [[TMP39:%.*]] = sub i32 0, [[TMP34]] ; CHECK-NEXT: [[TMP40:%.*]] = mul i32 [[TMP39]], [[TMP38]] ; CHECK-NEXT: [[TMP41:%.*]] = zext i32 [[TMP38]] to i64 @@ -5946,7 +5946,7 @@ define amdgpu_kernel void @urem_v2i32_pow2_shl_denom(ptr addrspace(1) %out, <2 x ; CHECK-NEXT: [[TMP3:%.*]] = uitofp i32 [[TMP2]] to float ; CHECK-NEXT: [[TMP4:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP3]]) ; CHECK-NEXT: [[TMP5:%.*]] = fmul fast float [[TMP4]], 0x41EFFFFFC0000000 -; CHECK-NEXT: [[TMP6:%.*]] = fptoui float [[TMP5]] to i32 +; CHECK-NEXT: [[TMP6:%.*]] = fptoui fast float [[TMP5]] to i32 ; CHECK-NEXT: [[TMP7:%.*]] = sub i32 0, [[TMP2]] ; CHECK-NEXT: [[TMP8:%.*]] = mul i32 [[TMP7]], [[TMP6]] ; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP6]] to i64 @@ -5976,7 +5976,7 @@ define amdgpu_kernel void @urem_v2i32_pow2_shl_denom(ptr addrspace(1) %out, <2 x ; CHECK-NEXT: [[TMP33:%.*]] = uitofp i32 [[TMP32]] to float ; CHECK-NEXT: [[TMP34:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP33]]) ; CHECK-NEXT: [[TMP35:%.*]] = fmul fast float [[TMP34]], 0x41EFFFFFC0000000 -; CHECK-NEXT: [[TMP36:%.*]] = fptoui float [[TMP35]] to i32 +; CHECK-NEXT: [[TMP36:%.*]] = fptoui fast float [[TMP35]] to i32 ; CHECK-NEXT: [[TMP37:%.*]] = sub i32 0, [[TMP32]] ; CHECK-NEXT: [[TMP38:%.*]] = mul i32 [[TMP37]], [[TMP36]] ; CHECK-NEXT: [[TMP39:%.*]] = zext i32 [[TMP36]] to i64 @@ -6408,7 +6408,7 @@ define amdgpu_kernel void @sdiv_v2i32_pow2_shl_denom(ptr addrspace(1) %out, <2 x ; CHECK-NEXT: [[TMP10:%.*]] = uitofp i32 [[TMP9]] to float ; CHECK-NEXT: [[TMP11:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP10]]) ; CHECK-NEXT: [[TMP12:%.*]] = fmul fast float [[TMP11]], 0x41EFFFFFC0000000 -; CHECK-NEXT: [[TMP13:%.*]] = fptoui float [[TMP12]] to i32 +; CHECK-NEXT: [[TMP13:%.*]] = fptoui fast float [[TMP12]] to i32 ; CHECK-NEXT: [[TMP14:%.*]] = sub i32 0, [[TMP9]] ; CHECK-NEXT: [[TMP15:%.*]] = mul i32 [[TMP14]], [[TMP13]] ; CHECK-NEXT: [[TMP16:%.*]] = zext i32 [[TMP13]] to i64 @@ -6449,7 +6449,7 @@ define amdgpu_kernel void @sdiv_v2i32_pow2_shl_denom(ptr addrspace(1) %out, <2 x ; CHECK-NEXT: [[TMP51:%.*]] = uitofp i32 [[TMP50]] to float ; CHECK-NEXT: [[TMP52:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP51]]) ; CHECK-NEXT: [[TMP53:%.*]] = fmul fast float [[TMP52]], 0x41EFFFFFC0000000 -; CHECK-NEXT: [[TMP54:%.*]] = fptoui float [[TMP53]] to i32 +; CHECK-NEXT: [[TMP54:%.*]] = fptoui fast float [[TMP53]] to i32 ; CHECK-NEXT: [[TMP55:%.*]] = sub i32 0, [[TMP50]] ; CHECK-NEXT: [[TMP56:%.*]] = mul i32 [[TMP55]], [[TMP54]] ; CHECK-NEXT: [[TMP57:%.*]] = zext i32 [[TMP54]] to i64 @@ -6865,7 +6865,7 @@ define amdgpu_kernel void @srem_v2i32_pow2_shl_denom(ptr addrspace(1) %out, <2 x ; CHECK-NEXT: [[TMP9:%.*]] = uitofp i32 [[TMP8]] to float ; CHECK-NEXT: [[TMP10:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP9]]) ; CHECK-NEXT: [[TMP11:%.*]] = fmul fast float [[TMP10]], 0x41EFFFFFC0000000 -; CHECK-NEXT: [[TMP12:%.*]] = fptoui float [[TMP11]] to i32 +; CHECK-NEXT: [[TMP12:%.*]] = fptoui fast float [[TMP11]] to i32 ; CHECK-NEXT: [[TMP13:%.*]] = sub i32 0, [[TMP8]] ; CHECK-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], [[TMP12]] ; CHECK-NEXT: [[TMP15:%.*]] = zext i32 [[TMP12]] to i64 @@ -6903,7 +6903,7 @@ define amdgpu_kernel void @srem_v2i32_pow2_shl_denom(ptr addrspace(1) %out, <2 x ; CHECK-NEXT: [[TMP47:%.*]] = uitofp i32 [[TMP46]] to float ; CHECK-NEXT: [[TMP48:%.*]] = call fast float @llvm.amdgcn.rcp.f32(float [[TMP47]]) ; CHECK-NEXT: [[TMP49:%.*]] = fmul fast float [[TMP48]], 0x41EFFFFFC0000000 -; CHECK-NEXT: [[TMP50:%.*]] = fptoui float [[TMP49]] to i32 +; CHECK-NEXT: [[TMP50:%.*]] = fptoui fast float [[TMP49]] to i32 ; CHECK-NEXT: [[TMP51:%.*]] = sub i32 0, [[TMP46]] ; CHECK-NEXT: [[TMP52:%.*]] = mul i32 [[TMP51]], [[TMP50]] ; CHECK-NEXT: [[TMP53:%.*]] = zext i32 [[TMP50]] to i64 diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll index 091e5a67799a9..84977cede80bd 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll @@ -2191,7 +2191,7 @@ define float @test_pow_afn_f32_known_integral_sitofp(float %x, i32 %y) { ; CHECK-LABEL: define float @test_pow_afn_f32_known_integral_sitofp ; CHECK-SAME: (float [[X:%.*]], i32 [[Y:%.*]]) { ; CHECK-NEXT: [[Y_CAST:%.*]] = sitofp i32 [[Y]] to float -; CHECK-NEXT: [[TMP1:%.*]] = fptosi float [[Y_CAST]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = fptosi afn float [[Y_CAST]] to i32 ; CHECK-NEXT: [[POW:%.*]] = tail call afn float @_Z4pownfi(float [[X]], i32 [[TMP1]]) ; CHECK-NEXT: ret float [[POW]] ; @@ -2204,10 +2204,10 @@ define float @test_pow_afn_nnan_ninf_f32_known_integral_sitofp(float %x, i32 %y) ; CHECK-LABEL: define float @test_pow_afn_nnan_ninf_f32_known_integral_sitofp ; CHECK-SAME: (float [[X:%.*]], i32 [[Y:%.*]]) { ; CHECK-NEXT: [[Y_CAST:%.*]] = sitofp i32 [[Y]] to float -; CHECK-NEXT: [[TMP1:%.*]] = fptosi float [[Y_CAST]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = fptosi nnan ninf afn float [[Y_CAST]] to i32 ; CHECK-NEXT: [[__FABS:%.*]] = call nnan ninf afn float @llvm.fabs.f32(float [[X]]) ; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn float @llvm.log2.f32(float [[__FABS]]) -; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp i32 [[TMP1]] to float +; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp nnan ninf afn i32 [[TMP1]] to float ; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn float [[__LOG2]], [[POWNI2F]] ; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn float @llvm.exp2.f32(float [[__YLOGX]]) ; CHECK-NEXT: [[__YEVEN:%.*]] = shl i32 [[TMP1]], 31 @@ -2227,7 +2227,7 @@ define float @test_pow_afn_nnan_f32_known_integral_sitofp(float %x, i32 %y) { ; CHECK-LABEL: define float @test_pow_afn_nnan_f32_known_integral_sitofp ; CHECK-SAME: (float [[X:%.*]], i32 [[Y:%.*]]) { ; CHECK-NEXT: [[Y_CAST:%.*]] = sitofp i32 [[Y]] to float -; CHECK-NEXT: [[TMP1:%.*]] = fptosi float [[Y_CAST]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = fptosi nnan afn float [[Y_CAST]] to i32 ; CHECK-NEXT: [[POW:%.*]] = tail call nnan afn float @_Z4pownfi(float [[X]], i32 [[TMP1]]) ; CHECK-NEXT: ret float [[POW]] ; @@ -2240,7 +2240,7 @@ define float @test_pow_afn_ninf_f32_known_integral_sitofp(float %x, i32 %y) { ; CHECK-LABEL: define float @test_pow_afn_ninf_f32_known_integral_sitofp ; CHECK-SAME: (float [[X:%.*]], i32 [[Y:%.*]]) { ; CHECK-NEXT: [[Y_CAST:%.*]] = sitofp i32 [[Y]] to float -; CHECK-NEXT: [[TMP1:%.*]] = fptosi float [[Y_CAST]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = fptosi ninf afn float [[Y_CAST]] to i32 ; CHECK-NEXT: [[POW:%.*]] = tail call ninf afn float @_Z4pownfi(float [[X]], i32 [[TMP1]]) ; CHECK-NEXT: ret float [[POW]] ; @@ -2279,7 +2279,7 @@ define float @test_pow_afn_f32_known_integral_uitofp(float %x, i32 %y) { ; CHECK-LABEL: define float @test_pow_afn_f32_known_integral_uitofp ; CHECK-SAME: (float [[X:%.*]], i32 [[Y:%.*]]) { ; CHECK-NEXT: [[Y_CAST:%.*]] = uitofp i32 [[Y]] to float -; CHECK-NEXT: [[TMP1:%.*]] = fptosi float [[Y_CAST]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = fptosi afn float [[Y_CAST]] to i32 ; CHECK-NEXT: [[POW:%.*]] = tail call afn float @_Z4pownfi(float [[X]], i32 [[TMP1]]) ; CHECK-NEXT: ret float [[POW]] ; @@ -2292,10 +2292,10 @@ define float @test_pow_afn_nnan_ninf_f32_known_integral_uitofp(float %x, i32 %y) ; CHECK-LABEL: define float @test_pow_afn_nnan_ninf_f32_known_integral_uitofp ; CHECK-SAME: (float [[X:%.*]], i32 [[Y:%.*]]) { ; CHECK-NEXT: [[Y_CAST:%.*]] = uitofp i32 [[Y]] to float -; CHECK-NEXT: [[TMP1:%.*]] = fptosi float [[Y_CAST]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = fptosi nnan ninf afn float [[Y_CAST]] to i32 ; CHECK-NEXT: [[__FABS:%.*]] = call nnan ninf afn float @llvm.fabs.f32(float [[X]]) ; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn float @llvm.log2.f32(float [[__FABS]]) -; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp i32 [[TMP1]] to float +; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp nnan ninf afn i32 [[TMP1]] to float ; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn float [[__LOG2]], [[POWNI2F]] ; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn float @llvm.exp2.f32(float [[__YLOGX]]) ; CHECK-NEXT: [[__YEVEN:%.*]] = shl i32 [[TMP1]], 31 @@ -2341,10 +2341,10 @@ define float @test_pow_afn_nnan_ninf_f32_known_integral_uitofp_i256(float %x, i2 ; CHECK-LABEL: define float @test_pow_afn_nnan_ninf_f32_known_integral_uitofp_i256 ; CHECK-SAME: (float [[X:%.*]], i256 [[Y:%.*]]) { ; CHECK-NEXT: [[Y_CAST:%.*]] = uitofp i256 [[Y]] to float -; CHECK-NEXT: [[TMP1:%.*]] = fptosi float [[Y_CAST]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = fptosi nnan ninf afn float [[Y_CAST]] to i32 ; CHECK-NEXT: [[__FABS:%.*]] = call nnan ninf afn float @llvm.fabs.f32(float [[X]]) ; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn float @llvm.log2.f32(float [[__FABS]]) -; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp i32 [[TMP1]] to float +; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp nnan ninf afn i32 [[TMP1]] to float ; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn float [[__LOG2]], [[POWNI2F]] ; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn float @llvm.exp2.f32(float [[__YLOGX]]) ; CHECK-NEXT: [[__YEVEN:%.*]] = shl i32 [[TMP1]], 31 @@ -2364,10 +2364,10 @@ define float @test_pow_afn_nnan_ninf_f32_known_integral_sitofp_i256(float %x, i2 ; CHECK-LABEL: define float @test_pow_afn_nnan_ninf_f32_known_integral_sitofp_i256 ; CHECK-SAME: (float [[X:%.*]], i256 [[Y:%.*]]) { ; CHECK-NEXT: [[Y_CAST:%.*]] = sitofp i256 [[Y]] to float -; CHECK-NEXT: [[TMP1:%.*]] = fptosi float [[Y_CAST]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = fptosi nnan ninf afn float [[Y_CAST]] to i32 ; CHECK-NEXT: [[__FABS:%.*]] = call nnan ninf afn float @llvm.fabs.f32(float [[X]]) ; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn float @llvm.log2.f32(float [[__FABS]]) -; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp i32 [[TMP1]] to float +; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp nnan ninf afn i32 [[TMP1]] to float ; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn float [[__LOG2]], [[POWNI2F]] ; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn float @llvm.exp2.f32(float [[__YLOGX]]) ; CHECK-NEXT: [[__YEVEN:%.*]] = shl i32 [[TMP1]], 31 @@ -2387,10 +2387,10 @@ define <2 x float> @test_pow_afn_nnan_ninf_v2f32_known_integral_sitofp(<2 x floa ; CHECK-LABEL: define <2 x float> @test_pow_afn_nnan_ninf_v2f32_known_integral_sitofp ; CHECK-SAME: (<2 x float> [[X:%.*]], <2 x i32> [[Y:%.*]]) { ; CHECK-NEXT: [[Y_CAST:%.*]] = sitofp <2 x i32> [[Y]] to <2 x float> -; CHECK-NEXT: [[TMP1:%.*]] = fptosi <2 x float> [[Y_CAST]] to <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fptosi nnan ninf afn <2 x float> [[Y_CAST]] to <2 x i32> ; CHECK-NEXT: [[__FABS:%.*]] = call nnan ninf afn <2 x float> @llvm.fabs.v2f32(<2 x float> [[X]]) ; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn <2 x float> @llvm.log2.v2f32(<2 x float> [[__FABS]]) -; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x float> +; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp nnan ninf afn <2 x i32> [[TMP1]] to <2 x float> ; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x float> [[__LOG2]], [[POWNI2F]] ; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn <2 x float> @llvm.exp2.v2f32(<2 x float> [[__YLOGX]]) ; CHECK-NEXT: [[__YEVEN:%.*]] = shl <2 x i32> [[TMP1]], splat (i32 31) @@ -2423,7 +2423,7 @@ define <2 x float> @test_pow_afn_v2f32_known_integral_uitofp(<2 x float> %x, <2 ; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_known_integral_uitofp ; CHECK-SAME: (<2 x float> [[X:%.*]], <2 x i32> [[Y:%.*]]) { ; CHECK-NEXT: [[Y_CAST:%.*]] = uitofp <2 x i32> [[Y]] to <2 x float> -; CHECK-NEXT: [[TMP1:%.*]] = fptosi <2 x float> [[Y_CAST]] to <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fptosi afn <2 x float> [[Y_CAST]] to <2 x i32> ; CHECK-NEXT: [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> [[TMP1]]) ; CHECK-NEXT: ret <2 x float> [[POW]] ; @@ -2436,10 +2436,10 @@ define <2 x float> @test_pow_afn_nnan_ninf_v2f32_known_integral_uitofp(<2 x floa ; CHECK-LABEL: define <2 x float> @test_pow_afn_nnan_ninf_v2f32_known_integral_uitofp ; CHECK-SAME: (<2 x float> [[X:%.*]], <2 x i32> [[Y:%.*]]) { ; CHECK-NEXT: [[Y_CAST:%.*]] = uitofp <2 x i32> [[Y]] to <2 x float> -; CHECK-NEXT: [[TMP1:%.*]] = fptosi <2 x float> [[Y_CAST]] to <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fptosi nnan ninf afn <2 x float> [[Y_CAST]] to <2 x i32> ; CHECK-NEXT: [[__FABS:%.*]] = call nnan ninf afn <2 x float> @llvm.fabs.v2f32(<2 x float> [[X]]) ; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn <2 x float> @llvm.log2.v2f32(<2 x float> [[__FABS]]) -; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x float> +; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp nnan ninf afn <2 x i32> [[TMP1]] to <2 x float> ; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x float> [[__LOG2]], [[POWNI2F]] ; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn <2 x float> @llvm.exp2.v2f32(<2 x float> [[__YLOGX]]) ; CHECK-NEXT: [[__YEVEN:%.*]] = shl <2 x i32> [[TMP1]], splat (i32 31) @@ -2548,10 +2548,10 @@ define float @test_pow_afn_f32_nnan_ninf__y_known_integral_trunc(float %x, float ; CHECK-LABEL: define float @test_pow_afn_f32_nnan_ninf__y_known_integral_trunc ; CHECK-SAME: (float [[X:%.*]], float [[Y_ARG:%.*]]) { ; CHECK-NEXT: [[Y:%.*]] = call float @llvm.trunc.f32(float [[Y_ARG]]) -; CHECK-NEXT: [[TMP1:%.*]] = fptosi float [[Y]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = fptosi nnan ninf afn float [[Y]] to i32 ; CHECK-NEXT: [[__FABS:%.*]] = call nnan ninf afn float @llvm.fabs.f32(float [[X]]) ; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn float @llvm.log2.f32(float [[__FABS]]) -; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp i32 [[TMP1]] to float +; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp nnan ninf afn i32 [[TMP1]] to float ; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn float [[__LOG2]], [[POWNI2F]] ; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn float @llvm.exp2.f32(float [[__YLOGX]]) ; CHECK-NEXT: [[__YEVEN:%.*]] = shl i32 [[TMP1]], 31 @@ -2571,7 +2571,7 @@ define float @test_pow_afn_f32__y_known_integral_trunc(float %x, float nofpclass ; CHECK-LABEL: define float @test_pow_afn_f32__y_known_integral_trunc ; CHECK-SAME: (float [[X:%.*]], float nofpclass(nan inf) [[Y_ARG:%.*]]) { ; CHECK-NEXT: [[Y:%.*]] = call float @llvm.trunc.f32(float [[Y_ARG]]) -; CHECK-NEXT: [[TMP1:%.*]] = fptosi float [[Y]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = fptosi afn float [[Y]] to i32 ; CHECK-NEXT: [[POW:%.*]] = tail call afn float @_Z4pownfi(float [[X]], i32 [[TMP1]]) ; CHECK-NEXT: ret float [[POW]] ; diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll index 27d204c1a253f..b7e6272761c1b 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll @@ -671,7 +671,7 @@ define float @test_pown_afn_nnan_ninf_f32(float %x, i32 %y) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__FABS:%.*]] = call nnan ninf afn float @llvm.fabs.f32(float [[X]]) ; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn float @llvm.log2.f32(float [[__FABS]]) -; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp i32 [[Y]] to float +; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp nnan ninf afn i32 [[Y]] to float ; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn float [[__LOG2]], [[POWNI2F]] ; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn float @llvm.exp2.f32(float [[__YLOGX]]) ; CHECK-NEXT: [[__YEVEN:%.*]] = shl i32 [[Y]], 31 @@ -693,7 +693,7 @@ define <2 x float> @test_pown_afn_nnan_ninf_v2f32(<2 x float> %x, <2 x i32> %y) ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__FABS:%.*]] = call nnan ninf afn <2 x float> @llvm.fabs.v2f32(<2 x float> [[X]]) ; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn <2 x float> @llvm.log2.v2f32(<2 x float> [[__FABS]]) -; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp <2 x i32> [[Y]] to <2 x float> +; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp nnan ninf afn <2 x i32> [[Y]] to <2 x float> ; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x float> [[__LOG2]], [[POWNI2F]] ; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn <2 x float> @llvm.exp2.v2f32(<2 x float> [[__YLOGX]]) ; CHECK-NEXT: [[__YEVEN:%.*]] = shl <2 x i32> [[Y]], splat (i32 31) @@ -715,7 +715,7 @@ define double @test_pown_afn_nnan_ninf_f64(double %x, i32 %y) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__FABS:%.*]] = call nnan ninf afn double @llvm.fabs.f64(double [[X]]) ; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn double @_Z4log2d(double [[__FABS]]) -; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp i32 [[Y]] to double +; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp nnan ninf afn i32 [[Y]] to double ; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn double [[__LOG2]], [[POWNI2F]] ; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn double @_Z4exp2d(double [[__YLOGX]]) ; CHECK-NEXT: [[__YTOU:%.*]] = zext i32 [[Y]] to i64 @@ -738,7 +738,7 @@ define <2 x double> @test_pown_afn_nnan_ninf_v2f64(<2 x double> %x, <2 x i32> %y ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__FABS:%.*]] = call nnan ninf afn <2 x double> @llvm.fabs.v2f64(<2 x double> [[X]]) ; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn <2 x double> @_Z4log2Dv2_d(<2 x double> [[__FABS]]) -; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp <2 x i32> [[Y]] to <2 x double> +; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp nnan ninf afn <2 x i32> [[Y]] to <2 x double> ; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x double> [[__LOG2]], [[POWNI2F]] ; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn <2 x double> @_Z4exp2Dv2_d(<2 x double> [[__YLOGX]]) ; CHECK-NEXT: [[__YTOU:%.*]] = zext <2 x i32> [[Y]] to <2 x i64> @@ -761,7 +761,7 @@ define half @test_pown_afn_nnan_ninf_f16(half %x, i32 %y) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__FABS:%.*]] = call nnan ninf afn half @llvm.fabs.f16(half [[X]]) ; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn half @llvm.log2.f16(half [[__FABS]]) -; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp i32 [[Y]] to half +; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp nnan ninf afn i32 [[Y]] to half ; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn half [[__LOG2]], [[POWNI2F]] ; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn half @llvm.exp2.f16(half [[__YLOGX]]) ; CHECK-NEXT: [[__YTOU:%.*]] = trunc i32 [[Y]] to i16 @@ -784,7 +784,7 @@ define <2 x half> @test_pown_afn_nnan_ninf_v2f16(<2 x half> %x, <2 x i32> %y) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__FABS:%.*]] = call nnan ninf afn <2 x half> @llvm.fabs.v2f16(<2 x half> [[X]]) ; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn <2 x half> @llvm.log2.v2f16(<2 x half> [[__FABS]]) -; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp <2 x i32> [[Y]] to <2 x half> +; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp nnan ninf afn <2 x i32> [[Y]] to <2 x half> ; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x half> [[__LOG2]], [[POWNI2F]] ; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn <2 x half> @llvm.exp2.v2f16(<2 x half> [[__YLOGX]]) ; CHECK-NEXT: [[__YTOU:%.*]] = trunc <2 x i32> [[Y]] to <2 x i16> @@ -1065,7 +1065,7 @@ define float @test_pown_afn_ninf_nnan_f32__x_known_positive(float nofpclass(ninf ; CHECK-NEXT: entry: ; CHECK-NEXT: [[__FABS:%.*]] = call nnan ninf afn float @llvm.fabs.f32(float [[X]]) ; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn float @llvm.log2.f32(float [[__FABS]]) -; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp i32 [[Y]] to float +; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp nnan ninf afn i32 [[Y]] to float ; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn float [[__LOG2]], [[POWNI2F]] ; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn float @llvm.exp2.f32(float [[__YLOGX]]) ; CHECK-NEXT: [[__YEVEN:%.*]] = shl i32 [[Y]], 31 @@ -1126,7 +1126,7 @@ define float @test_fast_pown_f32_y_known_even(float %x, i32 %y.arg) { ; CHECK-NEXT: [[Y:%.*]] = shl i32 [[Y_ARG]], 1 ; CHECK-NEXT: [[__FABS:%.*]] = call fast float @llvm.fabs.f32(float [[X]]) ; CHECK-NEXT: [[__LOG2:%.*]] = call fast float @llvm.log2.f32(float [[__FABS]]) -; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp i32 [[Y]] to float +; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp fast i32 [[Y]] to float ; CHECK-NEXT: [[__YLOGX:%.*]] = fmul fast float [[__LOG2]], [[POWNI2F]] ; CHECK-NEXT: [[__EXP2:%.*]] = call fast float @llvm.exp2.f32(float [[__YLOGX]]) ; CHECK-NEXT: ret float [[__EXP2]] @@ -1144,7 +1144,7 @@ define float @test_fast_pown_f32_known_positive_y_known_even(float nofpclass(nin ; CHECK-NEXT: [[Y:%.*]] = shl i32 [[Y_ARG]], 1 ; CHECK-NEXT: [[__FABS:%.*]] = call fast float @llvm.fabs.f32(float [[X]]) ; CHECK-NEXT: [[__LOG2:%.*]] = call fast float @llvm.log2.f32(float [[__FABS]]) -; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp i32 [[Y]] to float +; CHECK-NEXT: [[POWNI2F:%.*]] = sitofp fast i32 [[Y]] to float ; CHECK-NEXT: [[__YLOGX:%.*]] = fmul fast float [[__LOG2]], [[POWNI2F]] ; CHECK-NEXT: [[__EXP2:%.*]] = call fast float @llvm.exp2.f32(float [[__YLOGX]]) ; CHECK-NEXT: ret float [[__EXP2]] diff --git a/llvm/test/CodeGen/AMDGPU/divrem24-assume.ll b/llvm/test/CodeGen/AMDGPU/divrem24-assume.ll index dc79385d9eaca..46ff0f61b31a8 100644 --- a/llvm/test/CodeGen/AMDGPU/divrem24-assume.ll +++ b/llvm/test/CodeGen/AMDGPU/divrem24-assume.ll @@ -4,7 +4,7 @@ define amdgpu_kernel void @divrem24_assume(ptr addrspace(1) %arg, i32 %arg1) { ; CHECK-LABEL: @divrem24_assume( ; CHECK-NEXT: bb: -; CHECK-NEXT: [[TMP:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x(), !range !0 +; CHECK-NEXT: [[TMP:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x(), !range [[RNG0:![0-9]+]] ; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[ARG1:%.*]], 42 ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP2]]) ; CHECK-NEXT: [[TMP0:%.*]] = uitofp i32 [[TMP]] to float @@ -14,7 +14,7 @@ define amdgpu_kernel void @divrem24_assume(ptr addrspace(1) %arg, i32 %arg1) { ; CHECK-NEXT: [[TMP4:%.*]] = call fast float @llvm.trunc.f32(float [[TMP3]]) ; CHECK-NEXT: [[TMP5:%.*]] = fneg fast float [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = call fast float @llvm.amdgcn.fmad.ftz.f32(float [[TMP5]], float [[TMP1]], float [[TMP0]]) -; CHECK-NEXT: [[TMP7:%.*]] = fptoui float [[TMP4]] to i32 +; CHECK-NEXT: [[TMP7:%.*]] = fptoui fast float [[TMP4]] to i32 ; CHECK-NEXT: [[TMP8:%.*]] = call fast float @llvm.fabs.f32(float [[TMP6]]) ; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.fabs.f32(float [[TMP1]]) ; CHECK-NEXT: [[TMP10:%.*]] = fcmp fast oge float [[TMP8]], [[TMP9]] diff --git a/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll b/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll index 6a88be6e55859..02d42f1774c26 100644 --- a/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll +++ b/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll @@ -352,7 +352,7 @@ declare half @_Z4pownDhi(half, i32) ; GCN-LABEL: {{^}}define half @test_pown_f16( ; GCN-NATIVE: %__fabs = tail call fast half @llvm.fabs.f16(half %x) ; GCN-NATIVE: %__log2 = tail call fast half @llvm.log2.f16(half %__fabs) -; GCN-NATIVE: %pownI2F = sitofp i32 %y to half +; GCN-NATIVE: %pownI2F = sitofp fast i32 %y to half ; GCN-NATIVE: %__ylogx = fmul fast half %__log2, %pownI2F ; GCN-NATIVE: %__exp2 = tail call fast half @llvm.exp2.f16(half %__ylogx) ; GCN-NATIVE: %__ytou = trunc i32 %y to i16 @@ -404,7 +404,7 @@ entry: ; GCN: %conv = fptosi float %tmp1 to i32 ; GCN: %__fabs = tail call fast float @llvm.fabs.f32(float %tmp) ; GCN: %__log2 = tail call fast float @llvm.log2.f32(float %__fabs) -; GCN: %pownI2F = sitofp i32 %conv to float +; GCN: %pownI2F = sitofp fast i32 %conv to float ; GCN: %__ylogx = fmul fast float %__log2, %pownI2F ; GCN: %__exp2 = tail call fast float @llvm.exp2.f32(float %__ylogx) ; GCN: %__yeven = shl i32 %conv, 31 diff --git a/llvm/test/Transforms/InstCombine/log-pow.ll b/llvm/test/Transforms/InstCombine/log-pow.ll index 374115953145d..f5091c4a21b7b 100644 --- a/llvm/test/Transforms/InstCombine/log-pow.ll +++ b/llvm/test/Transforms/InstCombine/log-pow.ll @@ -26,7 +26,7 @@ define double @log_powi_const(double %x) { define double @log_powi_nonconst(double %x, i32 %y) { ; CHECK-LABEL: @log_powi_nonconst( ; CHECK-NEXT: [[LOG1:%.*]] = call fast double @llvm.log.f64(double [[X:%.*]]) -; CHECK-NEXT: [[CAST:%.*]] = sitofp i32 [[Y:%.*]] to double +; CHECK-NEXT: [[CAST:%.*]] = sitofp fast i32 [[Y:%.*]] to double ; CHECK-NEXT: [[MUL:%.*]] = fmul fast double [[LOG1]], [[CAST]] ; CHECK-NEXT: ret double [[MUL]] ; @@ -38,7 +38,7 @@ define double @log_powi_nonconst(double %x, i32 %y) { define double @logf64_powi_nonconst(double %x, i32 %y) { ; CHECK-LABEL: @logf64_powi_nonconst( ; CHECK-NEXT: [[LOG1:%.*]] = call fast double @llvm.log.f64(double [[X:%.*]]) -; CHECK-NEXT: [[CAST:%.*]] = sitofp i32 [[Y:%.*]] to double +; CHECK-NEXT: [[CAST:%.*]] = sitofp fast i32 [[Y:%.*]] to double ; CHECK-NEXT: [[MUL:%.*]] = fmul fast double [[LOG1]], [[CAST]] ; CHECK-NEXT: ret double [[MUL]] ; @@ -61,7 +61,7 @@ define float @logf_powfi_const(float %x) { define float @logf_powfi_nonconst(float %x, i32 %y) { ; CHECK-LABEL: @logf_powfi_nonconst( ; CHECK-NEXT: [[LOG1:%.*]] = call fast float @llvm.log.f32(float [[X:%.*]]) -; CHECK-NEXT: [[CAST:%.*]] = sitofp i32 [[Y:%.*]] to float +; CHECK-NEXT: [[CAST:%.*]] = sitofp fast i32 [[Y:%.*]] to float ; CHECK-NEXT: [[MUL:%.*]] = fmul fast float [[LOG1]], [[CAST]] ; CHECK-NEXT: ret float [[MUL]] ; diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/vplan-scalarivsext-crash.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/vplan-scalarivsext-crash.ll index 34b8deaa8de03..11f4a7bc81b4c 100644 --- a/llvm/test/Transforms/LoopVectorize/PowerPC/vplan-scalarivsext-crash.ll +++ b/llvm/test/Transforms/LoopVectorize/PowerPC/vplan-scalarivsext-crash.ll @@ -31,13 +31,13 @@ define void @test_iv_trunc_crash(ptr %a, ptr %b, i32 %n) { ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 ; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 8, i32 [[N_MOD_VF]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP3]], [[TMP11]] -; CHECK-NEXT: [[DOTCAST:%.*]] = sitofp i32 [[N_VEC]] to double +; CHECK-NEXT: [[DOTCAST:%.*]] = sitofp reassoc i32 [[N_VEC]] to double ; CHECK-NEXT: [[TMP12:%.*]] = fmul reassoc double [[X]], [[DOTCAST]] ; CHECK-NEXT: [[TMP13:%.*]] = fadd reassoc double [[SUM_0]], [[TMP12]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[DOTCAST2:%.*]] = sitofp i32 [[INDEX]] to double +; CHECK-NEXT: [[DOTCAST2:%.*]] = sitofp reassoc i32 [[INDEX]] to double ; CHECK-NEXT: [[TMP14:%.*]] = fmul reassoc double [[X]], [[DOTCAST2]] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = fadd reassoc double [[SUM_0]], [[TMP14]] ; CHECK-NEXT: [[TMP15:%.*]] = fmul reassoc double 7.000000e+00, [[X]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll b/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll index c0ff8816c2543..2e1698a3940c8 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll @@ -23,14 +23,14 @@ define void @fp_iv_loop1(ptr noalias nocapture %A, i32 %N) #0 { ; AUTO_VEC: [[ITER_CHECK]]: ; AUTO_VEC-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64 ; AUTO_VEC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4 -; AUTO_VEC-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[FOR_BODY:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]] +; AUTO_VEC-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]] ; AUTO_VEC: [[VECTOR_MAIN_LOOP_ITER_CHECK]]: ; AUTO_VEC-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[TMP0]], 32 ; AUTO_VEC-NEXT: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]] ; AUTO_VEC: [[VECTOR_PH]]: ; AUTO_VEC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 32 ; AUTO_VEC-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] -; AUTO_VEC-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float +; AUTO_VEC-NEXT: [[DOTCAST:%.*]] = sitofp fast i64 [[N_VEC]] to float ; AUTO_VEC-NEXT: [[TMP6:%.*]] = fmul fast float 5.000000e-01, [[DOTCAST]] ; AUTO_VEC-NEXT: [[IND_END:%.*]] = fadd fast float 1.000000e+00, [[TMP6]] ; AUTO_VEC-NEXT: br label %[[VECTOR_BODY:.*]] @@ -56,18 +56,18 @@ define void @fp_iv_loop1(ptr noalias nocapture %A, i32 %N) #0 { ; AUTO_VEC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; AUTO_VEC-NEXT: br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]] ; AUTO_VEC: [[VEC_EPILOG_ITER_CHECK]]: -; AUTO_VEC-NEXT: [[DOTCAST12:%.*]] = sitofp i64 [[N_VEC]] to float +; AUTO_VEC-NEXT: [[DOTCAST12:%.*]] = sitofp fast i64 [[N_VEC]] to float ; AUTO_VEC-NEXT: [[TMP11:%.*]] = fmul fast float 5.000000e-01, [[DOTCAST12]] ; AUTO_VEC-NEXT: [[IND_END1:%.*]] = fadd fast float 1.000000e+00, [[TMP11]] ; AUTO_VEC-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[TMP0]], [[N_VEC]] ; AUTO_VEC-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 4 -; AUTO_VEC-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[FOR_BODY]], label %[[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]] +; AUTO_VEC-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]] ; AUTO_VEC: [[VEC_EPILOG_PH]]: ; AUTO_VEC-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; AUTO_VEC-NEXT: [[BC_RESUME_VAL:%.*]] = phi float [ [[IND_END]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 1.000000e+00, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; AUTO_VEC-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[TMP0]], 4 ; AUTO_VEC-NEXT: [[N_VEC3:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF2]] -; AUTO_VEC-NEXT: [[DOTCAST4:%.*]] = sitofp i64 [[N_VEC3]] to float +; AUTO_VEC-NEXT: [[DOTCAST4:%.*]] = sitofp fast i64 [[N_VEC3]] to float ; AUTO_VEC-NEXT: [[TMP12:%.*]] = fmul fast float 5.000000e-01, [[DOTCAST4]] ; AUTO_VEC-NEXT: [[TMP10:%.*]] = fadd fast float 1.000000e+00, [[TMP12]] ; AUTO_VEC-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[BC_RESUME_VAL]], i64 0 @@ -85,14 +85,14 @@ define void @fp_iv_loop1(ptr noalias nocapture %A, i32 %N) #0 { ; AUTO_VEC-NEXT: br i1 [[TMP9]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; AUTO_VEC: [[VEC_EPILOG_MIDDLE_BLOCK]]: ; AUTO_VEC-NEXT: [[CMP_N9:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC3]] -; AUTO_VEC-NEXT: br i1 [[CMP_N9]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]] -; AUTO_VEC: [[FOR_BODY]]: +; AUTO_VEC-NEXT: br i1 [[CMP_N9]], label %[[FOR_END_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]] +; AUTO_VEC: [[VEC_EPILOG_SCALAR_PH]]: ; AUTO_VEC-NEXT: [[BC_RESUME_VAL10:%.*]] = phi i64 [ [[N_VEC3]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] ; AUTO_VEC-NEXT: [[BC_RESUME_VAL11:%.*]] = phi float [ [[TMP10]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END1]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 1.000000e+00, %[[ITER_CHECK]] ] ; AUTO_VEC-NEXT: br label %[[LOOP:.*]] ; AUTO_VEC: [[LOOP]]: -; AUTO_VEC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL10]], %[[FOR_BODY]] ] -; AUTO_VEC-NEXT: [[X_06:%.*]] = phi float [ [[CONV1:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL11]], %[[FOR_BODY]] ] +; AUTO_VEC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL10]], %[[VEC_EPILOG_SCALAR_PH]] ] +; AUTO_VEC-NEXT: [[X_06:%.*]] = phi float [ [[CONV1:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL11]], %[[VEC_EPILOG_SCALAR_PH]] ] ; AUTO_VEC-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]] ; AUTO_VEC-NEXT: store float [[X_06]], ptr [[ARRAYIDX]], align 4 ; AUTO_VEC-NEXT: [[CONV1]] = fadd fast float [[X_06]], 5.000000e-01 @@ -145,19 +145,19 @@ define void @fp_iv_loop2(ptr noalias nocapture %A, i32 %N) { ; AUTO_VEC-SAME: ptr noalias captures(none) [[A:%.*]], i32 [[N:%.*]]) #[[ATTR0]] { ; AUTO_VEC-NEXT: [[ENTRY:.*:]] ; AUTO_VEC-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N]], 0 -; AUTO_VEC-NEXT: br i1 [[CMP4]], label %[[FOR_BODY_PREHEADER:.*]], label %[[FOR_END:.*]] -; AUTO_VEC: [[FOR_BODY_PREHEADER]]: -; AUTO_VEC-NEXT: br label %[[FOR_BODY:.*]] -; AUTO_VEC: [[FOR_BODY]]: -; AUTO_VEC-NEXT: [[INDVARS_IV_EPIL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_EPIL:%.*]], %[[FOR_BODY]] ], [ 0, %[[FOR_BODY_PREHEADER]] ] -; AUTO_VEC-NEXT: [[X_06_EPIL:%.*]] = phi float [ [[CONV1_EPIL:%.*]], %[[FOR_BODY]] ], [ 1.000000e+00, %[[FOR_BODY_PREHEADER]] ] +; AUTO_VEC-NEXT: br i1 [[CMP4]], label %[[LOOP_PREHEADER:.*]], label %[[FOR_END:.*]] +; AUTO_VEC: [[LOOP_PREHEADER]]: +; AUTO_VEC-NEXT: br label %[[LOOP:.*]] +; AUTO_VEC: [[LOOP]]: +; AUTO_VEC-NEXT: [[INDVARS_IV_EPIL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_EPIL:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ] +; AUTO_VEC-NEXT: [[X_06_EPIL:%.*]] = phi float [ [[CONV1_EPIL:%.*]], %[[LOOP]] ], [ 1.000000e+00, %[[LOOP_PREHEADER]] ] ; AUTO_VEC-NEXT: [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV_EPIL]] ; AUTO_VEC-NEXT: store float [[X_06_EPIL]], ptr [[ARRAYIDX_EPIL]], align 4 ; AUTO_VEC-NEXT: [[CONV1_EPIL]] = fadd float [[X_06_EPIL]], 5.000000e-01 ; AUTO_VEC-NEXT: [[INDVARS_IV_NEXT_EPIL]] = add nuw nsw i64 [[INDVARS_IV_EPIL]], 1 ; AUTO_VEC-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT_EPIL]] to i32 ; AUTO_VEC-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] -; AUTO_VEC-NEXT: br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT:.*]], label %[[FOR_BODY]] +; AUTO_VEC-NEXT: br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT:.*]], label %[[LOOP]] ; AUTO_VEC: [[FOR_END_LOOPEXIT]]: ; AUTO_VEC-NEXT: br label %[[FOR_END]] ; AUTO_VEC: [[FOR_END]]: @@ -194,11 +194,11 @@ define double @external_use_with_fast_math(ptr %a, i64 %n) { ; AUTO_VEC-NEXT: [[ENTRY:.*]]: ; AUTO_VEC-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1) ; AUTO_VEC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 16 -; AUTO_VEC-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[FOR_BODY:.*]], label %[[VECTOR_PH:.*]] +; AUTO_VEC-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; AUTO_VEC: [[VECTOR_PH]]: ; AUTO_VEC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[SMAX]], 16 ; AUTO_VEC-NEXT: [[N_VEC:%.*]] = sub i64 [[SMAX]], [[N_MOD_VF]] -; AUTO_VEC-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to double +; AUTO_VEC-NEXT: [[DOTCAST:%.*]] = sitofp fast i64 [[N_VEC]] to double ; AUTO_VEC-NEXT: [[TMP0:%.*]] = fmul fast double 3.000000e+00, [[DOTCAST]] ; AUTO_VEC-NEXT: [[TMP6:%.*]] = fadd fast double 0.000000e+00, [[TMP0]] ; AUTO_VEC-NEXT: br label %[[VECTOR_BODY:.*]] @@ -223,14 +223,14 @@ define double @external_use_with_fast_math(ptr %a, i64 %n) { ; AUTO_VEC: [[MIDDLE_BLOCK]]: ; AUTO_VEC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] ; AUTO_VEC-NEXT: [[TMP7:%.*]] = fsub fast double [[TMP6]], 3.000000e+00 -; AUTO_VEC-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[FOR_BODY]] -; AUTO_VEC: [[FOR_BODY]]: +; AUTO_VEC-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]] +; AUTO_VEC: [[SCALAR_PH]]: ; AUTO_VEC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; AUTO_VEC-NEXT: [[BC_RESUME_VAL1:%.*]] = phi double [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[ENTRY]] ] ; AUTO_VEC-NEXT: br label %[[LOOP:.*]] ; AUTO_VEC: [[LOOP]]: -; AUTO_VEC-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[FOR_BODY]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ] -; AUTO_VEC-NEXT: [[J:%.*]] = phi double [ [[BC_RESUME_VAL1]], %[[FOR_BODY]] ], [ [[J_NEXT:%.*]], %[[LOOP]] ] +; AUTO_VEC-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ] +; AUTO_VEC-NEXT: [[J:%.*]] = phi double [ [[BC_RESUME_VAL1]], %[[SCALAR_PH]] ], [ [[J_NEXT:%.*]], %[[LOOP]] ] ; AUTO_VEC-NEXT: [[T0:%.*]] = getelementptr double, ptr [[A]], i64 [[I]] ; AUTO_VEC-NEXT: store double [[J]], ptr [[T0]], align 8 ; AUTO_VEC-NEXT: [[I_NEXT]] = add i64 [[I]], 1 @@ -262,19 +262,19 @@ for.end: define double @external_use_without_fast_math(ptr %a, i64 %n) { ; AUTO_VEC-LABEL: define double @external_use_without_fast_math( ; AUTO_VEC-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { -; AUTO_VEC-NEXT: [[ENTRY_NEW:.*]]: -; AUTO_VEC-NEXT: br label %[[FOR_BODY:.*]] -; AUTO_VEC: [[FOR_BODY]]: -; AUTO_VEC-NEXT: [[I:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[I_NEXT_7:%.*]], %[[FOR_BODY]] ] -; AUTO_VEC-NEXT: [[J:%.*]] = phi double [ 0.000000e+00, %[[ENTRY_NEW]] ], [ [[J_NEXT_7:%.*]], %[[FOR_BODY]] ] +; AUTO_VEC-NEXT: [[ENTRY:.*]]: +; AUTO_VEC-NEXT: br label %[[LOOP:.*]] +; AUTO_VEC: [[LOOP]]: +; AUTO_VEC-NEXT: [[I:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[I_NEXT_7:%.*]], %[[LOOP]] ] +; AUTO_VEC-NEXT: [[J:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[J_NEXT_7:%.*]], %[[LOOP]] ] ; AUTO_VEC-NEXT: [[TMP7:%.*]] = getelementptr double, ptr [[A]], i64 [[I]] ; AUTO_VEC-NEXT: store double [[J]], ptr [[TMP7]], align 8 ; AUTO_VEC-NEXT: [[I_NEXT_7]] = add i64 [[I]], 1 ; AUTO_VEC-NEXT: [[J_NEXT_7]] = fadd double [[J]], 3.000000e+00 ; AUTO_VEC-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT_7]], [[N]] -; AUTO_VEC-NEXT: br i1 [[COND]], label %[[FOR_BODY]], label %[[FOR_END:.*]] +; AUTO_VEC-NEXT: br i1 [[COND]], label %[[LOOP]], label %[[FOR_END:.*]] ; AUTO_VEC: [[FOR_END]]: -; AUTO_VEC-NEXT: [[J_LCSSA:%.*]] = phi double [ [[J]], %[[FOR_BODY]] ] +; AUTO_VEC-NEXT: [[J_LCSSA:%.*]] = phi double [ [[J]], %[[LOOP]] ] ; AUTO_VEC-NEXT: ret double [[J_LCSSA]] ; entry: @@ -309,14 +309,14 @@ define void @fadd_reassoc_FMF(ptr nocapture %p, i32 %N) { ; AUTO_VEC-NEXT: [[ITER_CHECK:.*]]: ; AUTO_VEC-NEXT: [[TMP0:%.*]] = zext i32 [[N]] to i64 ; AUTO_VEC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4 -; AUTO_VEC-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[FOR_BODY:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]] +; AUTO_VEC-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]] ; AUTO_VEC: [[VECTOR_MAIN_LOOP_ITER_CHECK]]: ; AUTO_VEC-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[TMP0]], 32 ; AUTO_VEC-NEXT: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]] ; AUTO_VEC: [[VECTOR_PH]]: ; AUTO_VEC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 32 ; AUTO_VEC-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] -; AUTO_VEC-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float +; AUTO_VEC-NEXT: [[DOTCAST:%.*]] = sitofp reassoc i64 [[N_VEC]] to float ; AUTO_VEC-NEXT: [[TMP1:%.*]] = fmul reassoc float 4.200000e+01, [[DOTCAST]] ; AUTO_VEC-NEXT: [[IND_END:%.*]] = fadd reassoc float 1.000000e+00, [[TMP1]] ; AUTO_VEC-NEXT: br label %[[VECTOR_BODY:.*]] @@ -350,18 +350,18 @@ define void @fadd_reassoc_FMF(ptr nocapture %p, i32 %N) { ; AUTO_VEC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; AUTO_VEC-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]] ; AUTO_VEC: [[VEC_EPILOG_ITER_CHECK]]: -; AUTO_VEC-NEXT: [[DOTCAST16:%.*]] = sitofp i64 [[N_VEC]] to float +; AUTO_VEC-NEXT: [[DOTCAST16:%.*]] = sitofp reassoc i64 [[N_VEC]] to float ; AUTO_VEC-NEXT: [[TMP12:%.*]] = fmul reassoc float 4.200000e+01, [[DOTCAST16]] ; AUTO_VEC-NEXT: [[IND_END1:%.*]] = fadd reassoc float 1.000000e+00, [[TMP12]] ; AUTO_VEC-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[TMP0]], [[N_VEC]] ; AUTO_VEC-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 4 -; AUTO_VEC-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[FOR_BODY]], label %[[VEC_EPILOG_PH]], !prof [[PROF3]] +; AUTO_VEC-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF3]] ; AUTO_VEC: [[VEC_EPILOG_PH]]: ; AUTO_VEC-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; AUTO_VEC-NEXT: [[BC_RESUME_VAL:%.*]] = phi float [ [[IND_END]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 1.000000e+00, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; AUTO_VEC-NEXT: [[N_MOD_VF5:%.*]] = urem i64 [[TMP0]], 4 ; AUTO_VEC-NEXT: [[N_VEC6:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF5]] -; AUTO_VEC-NEXT: [[DOTCAST7:%.*]] = sitofp i64 [[N_VEC6]] to float +; AUTO_VEC-NEXT: [[DOTCAST7:%.*]] = sitofp reassoc i64 [[N_VEC6]] to float ; AUTO_VEC-NEXT: [[TMP17:%.*]] = fmul reassoc float 4.200000e+01, [[DOTCAST7]] ; AUTO_VEC-NEXT: [[TMP18:%.*]] = fadd reassoc float 1.000000e+00, [[TMP17]] ; AUTO_VEC-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[BC_RESUME_VAL]], i64 0 @@ -381,14 +381,14 @@ define void @fadd_reassoc_FMF(ptr nocapture %p, i32 %N) { ; AUTO_VEC-NEXT: br i1 [[TMP15]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; AUTO_VEC: [[VEC_EPILOG_MIDDLE_BLOCK]]: ; AUTO_VEC-NEXT: [[CMP_N18:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC6]] -; AUTO_VEC-NEXT: br i1 [[CMP_N18]], label %[[EXIT]], label %[[FOR_BODY]] -; AUTO_VEC: [[FOR_BODY]]: +; AUTO_VEC-NEXT: br i1 [[CMP_N18]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]] +; AUTO_VEC: [[VEC_EPILOG_SCALAR_PH]]: ; AUTO_VEC-NEXT: [[BC_RESUME_VAL14:%.*]] = phi i64 [ [[N_VEC6]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] ; AUTO_VEC-NEXT: [[BC_RESUME_VAL15:%.*]] = phi float [ [[TMP18]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END1]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 1.000000e+00, %[[ITER_CHECK]] ] ; AUTO_VEC-NEXT: br label %[[LOOP:.*]] ; AUTO_VEC: [[LOOP]]: -; AUTO_VEC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL14]], %[[FOR_BODY]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[LOOP]] ] -; AUTO_VEC-NEXT: [[X_012:%.*]] = phi float [ [[BC_RESUME_VAL15]], %[[FOR_BODY]] ], [ [[ADD3:%.*]], %[[LOOP]] ] +; AUTO_VEC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL14]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[LOOP]] ] +; AUTO_VEC-NEXT: [[X_012:%.*]] = phi float [ [[BC_RESUME_VAL15]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[ADD3:%.*]], %[[LOOP]] ] ; AUTO_VEC-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[P]], i64 [[INDVARS_IV]] ; AUTO_VEC-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4 ; AUTO_VEC-NEXT: [[ADD:%.*]] = fadd reassoc float [[X_012]], [[TMP16]] diff --git a/llvm/test/Transforms/LoopVectorize/float-induction.ll b/llvm/test/Transforms/LoopVectorize/float-induction.ll index 2b15aae628274..f000fceaf6e50 100644 --- a/llvm/test/Transforms/LoopVectorize/float-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/float-induction.ll @@ -148,7 +148,7 @@ define void @fp_iv_loop1_fast_FMF(float %init, ptr noalias nocapture %A, i32 %N) ; VEC1_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] ; VEC1_INTERL2: vector.body: ; VEC1_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; VEC1_INTERL2-NEXT: [[DOTCAST2:%.*]] = sitofp i64 [[INDEX]] to float +; VEC1_INTERL2-NEXT: [[DOTCAST2:%.*]] = sitofp fast i64 [[INDEX]] to float ; VEC1_INTERL2-NEXT: [[TMP3:%.*]] = fmul fast float [[FPINC]], [[DOTCAST2]] ; VEC1_INTERL2-NEXT: [[OFFSET_IDX:%.*]] = fsub fast float [[INIT]], [[TMP3]] ; VEC1_INTERL2-NEXT: [[TMP4:%.*]] = fsub fast float [[OFFSET_IDX]], [[FPINC]] @@ -395,7 +395,7 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, ptr noalias nocapture %A, i32 ; VEC1_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] ; VEC1_INTERL2: vector.body: ; VEC1_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; VEC1_INTERL2-NEXT: [[DOTCAST2:%.*]] = sitofp i64 [[INDEX]] to float +; VEC1_INTERL2-NEXT: [[DOTCAST2:%.*]] = sitofp reassoc i64 [[INDEX]] to float ; VEC1_INTERL2-NEXT: [[TMP3:%.*]] = fmul reassoc float [[FPINC]], [[DOTCAST2]] ; VEC1_INTERL2-NEXT: [[OFFSET_IDX:%.*]] = fsub reassoc float [[INIT]], [[TMP3]] ; VEC1_INTERL2-NEXT: [[TMP6:%.*]] = fsub reassoc float [[OFFSET_IDX]], [[FPINC]] @@ -629,7 +629,7 @@ define void @fp_iv_loop2(float %init, ptr noalias nocapture %A, i32 %N) #0 { ; VEC1_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] ; VEC1_INTERL2: vector.body: ; VEC1_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; VEC1_INTERL2-NEXT: [[DOTCAST2:%.*]] = sitofp i64 [[INDEX]] to float +; VEC1_INTERL2-NEXT: [[DOTCAST2:%.*]] = sitofp fast i64 [[INDEX]] to float ; VEC1_INTERL2-NEXT: [[TMP3:%.*]] = fmul fast float [[DOTCAST2]], 5.000000e-01 ; VEC1_INTERL2-NEXT: [[OFFSET_IDX:%.*]] = fadd fast float [[INIT]], [[TMP3]] ; VEC1_INTERL2-NEXT: [[TMP4:%.*]] = fadd fast float [[OFFSET_IDX]], 5.000000e-01 @@ -930,9 +930,9 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca ; VEC1_INTERL2: vector.body: ; VEC1_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VEC1_INTERL2-NEXT: [[TMP4:%.*]] = or disjoint i64 [[INDEX]], 1 -; VEC1_INTERL2-NEXT: [[DOTCAST5:%.*]] = sitofp i64 [[INDEX]] to float +; VEC1_INTERL2-NEXT: [[DOTCAST5:%.*]] = sitofp fast i64 [[INDEX]] to float ; VEC1_INTERL2-NEXT: [[TMP5:%.*]] = fmul fast float [[DOTCAST5]], -5.000000e-01 -; VEC1_INTERL2-NEXT: [[DOTCAST6:%.*]] = sitofp i64 [[INDEX]] to float +; VEC1_INTERL2-NEXT: [[DOTCAST6:%.*]] = sitofp fast i64 [[INDEX]] to float ; VEC1_INTERL2-NEXT: [[TMP6:%.*]] = fmul fast float [[TMP0]], [[DOTCAST6]] ; VEC1_INTERL2-NEXT: [[OFFSET_IDX7:%.*]] = fadd fast float [[INIT]], [[TMP6]] ; VEC1_INTERL2-NEXT: [[TMP7:%.*]] = fadd fast float [[OFFSET_IDX7]], [[TMP0]] @@ -1210,7 +1210,7 @@ define void @fp_iv_loop4(ptr noalias nocapture %A, i32 %N) { ; VEC1_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] ; VEC1_INTERL2: vector.body: ; VEC1_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; VEC1_INTERL2-NEXT: [[DOTCAST2:%.*]] = sitofp i64 [[INDEX]] to float +; VEC1_INTERL2-NEXT: [[DOTCAST2:%.*]] = sitofp fast i64 [[INDEX]] to float ; VEC1_INTERL2-NEXT: [[TMP3:%.*]] = fmul fast float [[DOTCAST2]], 5.000000e-01 ; VEC1_INTERL2-NEXT: [[OFFSET_IDX:%.*]] = fadd fast float [[TMP3]], 1.000000e+00 ; VEC1_INTERL2-NEXT: [[TMP4:%.*]] = fadd fast float [[TMP3]], 1.500000e+00 @@ -1321,7 +1321,7 @@ define void @non_primary_iv_float_scalar(ptr %A, i64 %N) { ; VEC4_INTERL1-NEXT: br label [[VECTOR_BODY:%.*]] ; VEC4_INTERL1: vector.body: ; VEC4_INTERL1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE7:%.*]] ] -; VEC4_INTERL1-NEXT: [[DOTCAST2:%.*]] = sitofp i64 [[INDEX]] to float +; VEC4_INTERL1-NEXT: [[DOTCAST2:%.*]] = sitofp fast i64 [[INDEX]] to float ; VEC4_INTERL1-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] ; VEC4_INTERL1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 4 ; VEC4_INTERL1-NEXT: [[TMP1:%.*]] = fcmp fast oeq <4 x float> [[WIDE_LOAD]], zeroinitializer @@ -1398,7 +1398,7 @@ define void @non_primary_iv_float_scalar(ptr %A, i64 %N) { ; VEC4_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] ; VEC4_INTERL2: vector.body: ; VEC4_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE16:%.*]] ] -; VEC4_INTERL2-NEXT: [[DOTCAST2:%.*]] = sitofp i64 [[INDEX]] to float +; VEC4_INTERL2-NEXT: [[DOTCAST2:%.*]] = sitofp fast i64 [[INDEX]] to float ; VEC4_INTERL2-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] ; VEC4_INTERL2-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 16 ; VEC4_INTERL2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4 @@ -1514,7 +1514,7 @@ define void @non_primary_iv_float_scalar(ptr %A, i64 %N) { ; VEC1_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] ; VEC1_INTERL2: vector.body: ; VEC1_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE3:%.*]] ] -; VEC1_INTERL2-NEXT: [[DOTCAST1:%.*]] = sitofp i64 [[INDEX]] to float +; VEC1_INTERL2-NEXT: [[DOTCAST1:%.*]] = sitofp fast i64 [[INDEX]] to float ; VEC1_INTERL2-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] ; VEC1_INTERL2-NEXT: [[TMP1:%.*]] = getelementptr float, ptr [[A]], i64 [[INDEX]] ; VEC1_INTERL2-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i64 4 @@ -1572,7 +1572,7 @@ define void @non_primary_iv_float_scalar(ptr %A, i64 %N) { ; VEC2_INTERL1_PRED_STORE-NEXT: br label [[VECTOR_BODY:%.*]] ; VEC2_INTERL1_PRED_STORE: vector.body: ; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE3:%.*]] ] -; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTCAST2:%.*]] = sitofp i64 [[INDEX]] to float +; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTCAST2:%.*]] = sitofp fast i64 [[INDEX]] to float ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] ; VEC2_INTERL1_PRED_STORE-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP0]], align 4 ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP1:%.*]] = fcmp fast oeq <2 x float> [[WIDE_LOAD]], zeroinitializer @@ -1693,7 +1693,7 @@ define i32 @float_induction_with_dbg_on_fadd(ptr %dst) { ; VEC1_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] ; VEC1_INTERL2: vector.body: ; VEC1_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; VEC1_INTERL2-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[INDEX]] to float +; VEC1_INTERL2-NEXT: [[DOTCAST:%.*]] = sitofp reassoc i64 [[INDEX]] to float ; VEC1_INTERL2-NEXT: [[TMP0:%.*]] = call reassoc float @llvm.copysign.f32(float 0.000000e+00, float [[DOTCAST]]) ; VEC1_INTERL2-NEXT: [[OFFSET_IDX:%.*]] = fadd reassoc float [[TMP0]], 0.000000e+00 ; VEC1_INTERL2-NEXT: [[TMP1:%.*]] = getelementptr float, ptr null, i64 [[INDEX]] diff --git a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll index 3f91baa117b7f..3d0feb12f68b0 100644 --- a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll +++ b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll @@ -734,7 +734,7 @@ define float @fp_postinc_use_fadd(float %init, ptr noalias nocapture %A, i64 %N, ; VEC: [[VECTOR_PH]]: ; VEC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2 ; VEC-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; VEC-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC-NEXT: [[DOTCAST:%.*]] = sitofp fast i64 [[N_VEC]] to float ; VEC-NEXT: [[TMP0:%.*]] = fmul fast float [[FPINC]], [[DOTCAST]] ; VEC-NEXT: [[TMP1:%.*]] = fadd fast float [[INIT]], [[TMP0]] ; VEC-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0 @@ -784,14 +784,14 @@ define float @fp_postinc_use_fadd(float %init, ptr noalias nocapture %A, i64 %N, ; INTERLEAVE: [[VECTOR_PH]]: ; INTERLEAVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2 ; INTERLEAVE-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; INTERLEAVE-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float +; INTERLEAVE-NEXT: [[DOTCAST:%.*]] = sitofp fast i64 [[N_VEC]] to float ; INTERLEAVE-NEXT: [[TMP0:%.*]] = fmul fast float [[FPINC]], [[DOTCAST]] ; INTERLEAVE-NEXT: [[TMP1:%.*]] = fadd fast float [[INIT]], [[TMP0]] ; INTERLEAVE-NEXT: br label %[[VECTOR_BODY:.*]] ; INTERLEAVE: [[VECTOR_BODY]]: ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; INTERLEAVE-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 1 -; INTERLEAVE-NEXT: [[DOTCAST1:%.*]] = sitofp i64 [[INDEX]] to float +; INTERLEAVE-NEXT: [[DOTCAST1:%.*]] = sitofp fast i64 [[INDEX]] to float ; INTERLEAVE-NEXT: [[TMP4:%.*]] = fmul fast float [[FPINC]], [[DOTCAST1]] ; INTERLEAVE-NEXT: [[OFFSET_IDX:%.*]] = fadd fast float [[INIT]], [[TMP4]] ; INTERLEAVE-NEXT: [[TMP7:%.*]] = fmul fast float 1.000000e+00, [[FPINC]] @@ -849,7 +849,7 @@ define float @fp_postinc_use_fadd_ops_swapped(float %init, ptr noalias nocapture ; VEC: [[VECTOR_PH]]: ; VEC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2 ; VEC-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; VEC-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC-NEXT: [[DOTCAST:%.*]] = sitofp fast i64 [[N_VEC]] to float ; VEC-NEXT: [[TMP0:%.*]] = fmul fast float [[FPINC]], [[DOTCAST]] ; VEC-NEXT: [[TMP1:%.*]] = fadd fast float [[INIT]], [[TMP0]] ; VEC-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0 @@ -899,14 +899,14 @@ define float @fp_postinc_use_fadd_ops_swapped(float %init, ptr noalias nocapture ; INTERLEAVE: [[VECTOR_PH]]: ; INTERLEAVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2 ; INTERLEAVE-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; INTERLEAVE-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float +; INTERLEAVE-NEXT: [[DOTCAST:%.*]] = sitofp fast i64 [[N_VEC]] to float ; INTERLEAVE-NEXT: [[TMP0:%.*]] = fmul fast float [[FPINC]], [[DOTCAST]] ; INTERLEAVE-NEXT: [[TMP1:%.*]] = fadd fast float [[INIT]], [[TMP0]] ; INTERLEAVE-NEXT: br label %[[VECTOR_BODY:.*]] ; INTERLEAVE: [[VECTOR_BODY]]: ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; INTERLEAVE-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 1 -; INTERLEAVE-NEXT: [[DOTCAST1:%.*]] = sitofp i64 [[INDEX]] to float +; INTERLEAVE-NEXT: [[DOTCAST1:%.*]] = sitofp fast i64 [[INDEX]] to float ; INTERLEAVE-NEXT: [[TMP4:%.*]] = fmul fast float [[FPINC]], [[DOTCAST1]] ; INTERLEAVE-NEXT: [[OFFSET_IDX:%.*]] = fadd fast float [[INIT]], [[TMP4]] ; INTERLEAVE-NEXT: [[TMP7:%.*]] = fmul fast float 1.000000e+00, [[FPINC]] @@ -964,7 +964,7 @@ define float @fp_postinc_use_fsub(float %init, ptr noalias nocapture %A, i64 %N, ; VEC: [[VECTOR_PH]]: ; VEC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2 ; VEC-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; VEC-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC-NEXT: [[DOTCAST:%.*]] = sitofp fast i64 [[N_VEC]] to float ; VEC-NEXT: [[TMP0:%.*]] = fmul fast float [[FPINC]], [[DOTCAST]] ; VEC-NEXT: [[TMP1:%.*]] = fsub fast float [[INIT]], [[TMP0]] ; VEC-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0 @@ -1014,14 +1014,14 @@ define float @fp_postinc_use_fsub(float %init, ptr noalias nocapture %A, i64 %N, ; INTERLEAVE: [[VECTOR_PH]]: ; INTERLEAVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2 ; INTERLEAVE-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; INTERLEAVE-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float +; INTERLEAVE-NEXT: [[DOTCAST:%.*]] = sitofp fast i64 [[N_VEC]] to float ; INTERLEAVE-NEXT: [[TMP0:%.*]] = fmul fast float [[FPINC]], [[DOTCAST]] ; INTERLEAVE-NEXT: [[TMP1:%.*]] = fsub fast float [[INIT]], [[TMP0]] ; INTERLEAVE-NEXT: br label %[[VECTOR_BODY:.*]] ; INTERLEAVE: [[VECTOR_BODY]]: ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; INTERLEAVE-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 1 -; INTERLEAVE-NEXT: [[DOTCAST1:%.*]] = sitofp i64 [[INDEX]] to float +; INTERLEAVE-NEXT: [[DOTCAST1:%.*]] = sitofp fast i64 [[INDEX]] to float ; INTERLEAVE-NEXT: [[TMP4:%.*]] = fmul fast float [[FPINC]], [[DOTCAST1]] ; INTERLEAVE-NEXT: [[OFFSET_IDX:%.*]] = fsub fast float [[INIT]], [[TMP4]] ; INTERLEAVE-NEXT: [[TMP7:%.*]] = fmul fast float 1.000000e+00, [[FPINC]] diff --git a/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll b/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll index 54408b24db114..c0767ac153af4 100644 --- a/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll +++ b/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll @@ -273,7 +273,7 @@ define float @same_exit_block_pre_inc_use1_iv64_endf32() { ; CHECK: vector.early.exit: ; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true) ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], [[FIRST_ACTIVE_LANE]] -; CHECK-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[TMP10]] to float +; CHECK-NEXT: [[DOTCAST:%.*]] = sitofp fast i64 [[TMP10]] to float ; CHECK-NEXT: [[TMP11:%.*]] = fmul fast float 1.000000e+00, [[DOTCAST]] ; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = fadd fast float 9.000000e+00, [[TMP11]] ; CHECK-NEXT: br label [[LOOP_END]]