Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

AMDGPU: Don't fold rootn(x, 1) to input for strictfp functions #92595

Merged
merged 1 commit into from
May 20, 2024

Conversation

arsenm
Copy link
Contributor

@arsenm arsenm commented May 17, 2024

We need to insert a constrained canonicalize.

Depends #92594

@llvmbot
Copy link

llvmbot commented May 17, 2024

@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)

Changes

We need to insert a constrained canonicalize.

Depends #92594


Patch is 21.03 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/92595.diff

2 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp (+11-10)
  • (modified) llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-rootn.ll (+52-59)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
index faf04c3c7e709..47de1791dae31 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
@@ -1156,25 +1156,26 @@ bool AMDGPULibCalls::fold_pow(FPMathOperator *FPOp, IRBuilder<> &B,
 
 bool AMDGPULibCalls::fold_rootn(FPMathOperator *FPOp, IRBuilder<> &B,
                                 const FuncInfo &FInfo) {
-  // skip vector function
-  if (getVecSize(FInfo) != 1)
-    return false;
-
   Value *opr0 = FPOp->getOperand(0);
   Value *opr1 = FPOp->getOperand(1);
 
-  ConstantInt *CINT = dyn_cast<ConstantInt>(opr1);
-  if (!CINT) {
+  const APInt *CINT = nullptr;
+  if (!match(opr1, m_APIntAllowPoison(CINT)))
     return false;
-  }
+
+  Function *Parent = B.GetInsertBlock()->getParent();
+
   int ci_opr1 = (int)CINT->getSExtValue();
-  if (ci_opr1 == 1) {  // rootn(x, 1) = x
-    LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " << *opr0 << "\n");
+  if (ci_opr1 == 1 && !Parent->hasFnAttribute(Attribute::StrictFP)) {
+    // rootn(x, 1) = x
+    //
+    // TODO: Insert constrained canonicalize for strictfp case.
+    LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " << *opr0 << '\n');
     replaceCall(FPOp, opr0);
     return true;
   }
 
-  Module *M = B.GetInsertBlock()->getModule();
+  Module *M = Parent->getParent();
   if (ci_opr1 == 2) { // rootn(x, 2) = sqrt(x)
     if (FunctionCallee FPExpr =
             getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) {
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-rootn.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-rootn.ll
index 2e64a3456c242..d75517cb26875 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-rootn.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-rootn.ll
@@ -342,8 +342,7 @@ define <2 x half> @test_rootn_v2f16_0(<2 x half> %x) {
 define <2 x half> @test_rootn_v2f16_1(<2 x half> %x) {
 ; CHECK-LABEL: define <2 x half> @test_rootn_v2f16_1(
 ; CHECK-SAME: <2 x half> [[X:%.*]]) {
-; CHECK-NEXT:    [[CALL:%.*]] = tail call <2 x half> @_Z5rootnDv2_DhDv2_i(<2 x half> [[X]], <2 x i32> <i32 1, i32 1>)
-; CHECK-NEXT:    ret <2 x half> [[CALL]]
+; CHECK-NEXT:    ret <2 x half> [[X]]
 ;
   %call = tail call <2 x half> @_Z5rootnDv2_DhDv2_i(<2 x half> %x, <2 x i32> <i32 1, i32 1>)
   ret <2 x half> %call
@@ -352,8 +351,8 @@ define <2 x half> @test_rootn_v2f16_1(<2 x half> %x) {
 define <2 x half> @test_rootn_v2f16_2(<2 x half> %x) {
 ; CHECK-LABEL: define <2 x half> @test_rootn_v2f16_2(
 ; CHECK-SAME: <2 x half> [[X:%.*]]) {
-; CHECK-NEXT:    [[CALL:%.*]] = tail call <2 x half> @_Z5rootnDv2_DhDv2_i(<2 x half> [[X]], <2 x i32> <i32 2, i32 2>)
-; CHECK-NEXT:    ret <2 x half> [[CALL]]
+; CHECK-NEXT:    [[__ROOTN2SQRT:%.*]] = call <2 x half> @_Z4sqrtDv2_Dh(<2 x half> [[X]])
+; CHECK-NEXT:    ret <2 x half> [[__ROOTN2SQRT]]
 ;
   %call = tail call <2 x half> @_Z5rootnDv2_DhDv2_i(<2 x half> %x, <2 x i32> <i32 2, i32 2>)
   ret <2 x half> %call
@@ -362,8 +361,8 @@ define <2 x half> @test_rootn_v2f16_2(<2 x half> %x) {
 define <2 x half> @test_rootn_v2f16_neg1(<2 x half> %x) {
 ; CHECK-LABEL: define <2 x half> @test_rootn_v2f16_neg1(
 ; CHECK-SAME: <2 x half> [[X:%.*]]) {
-; CHECK-NEXT:    [[CALL:%.*]] = tail call <2 x half> @_Z5rootnDv2_DhDv2_i(<2 x half> [[X]], <2 x i32> <i32 -1, i32 -1>)
-; CHECK-NEXT:    ret <2 x half> [[CALL]]
+; CHECK-NEXT:    [[__ROOTN2DIV:%.*]] = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, [[X]]
+; CHECK-NEXT:    ret <2 x half> [[__ROOTN2DIV]]
 ;
   %call = tail call <2 x half> @_Z5rootnDv2_DhDv2_i(<2 x half> %x, <2 x i32> <i32 -1, i32 -1>)
   ret <2 x half> %call
@@ -372,8 +371,8 @@ define <2 x half> @test_rootn_v2f16_neg1(<2 x half> %x) {
 define <2 x half> @test_rootn_v2f16_neg2(<2 x half> %x) {
 ; CHECK-LABEL: define <2 x half> @test_rootn_v2f16_neg2(
 ; CHECK-SAME: <2 x half> [[X:%.*]]) {
-; CHECK-NEXT:    [[CALL:%.*]] = tail call <2 x half> @_Z5rootnDv2_DhDv2_i(<2 x half> [[X]], <2 x i32> <i32 -2, i32 -2>)
-; CHECK-NEXT:    ret <2 x half> [[CALL]]
+; CHECK-NEXT:    [[__ROOTN2RSQRT:%.*]] = call <2 x half> @_Z5rsqrtDv2_Dh(<2 x half> [[X]])
+; CHECK-NEXT:    ret <2 x half> [[__ROOTN2RSQRT]]
 ;
   %call = tail call <2 x half> @_Z5rootnDv2_DhDv2_i(<2 x half> %x, <2 x i32> <i32 -2, i32 -2>)
   ret <2 x half> %call
@@ -512,7 +511,8 @@ define float @test_rootn_f32__y_1__strictfp(float %x) #1 {
 ; CHECK-LABEL: define float @test_rootn_f32__y_1__strictfp(
 ; CHECK-SAME: float [[X:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    ret float [[X]]
+; CHECK-NEXT:    [[CALL:%.*]] = tail call float @_Z5rootnfi(float [[X]], i32 1) #[[ATTR0]]
+; CHECK-NEXT:    ret float [[CALL]]
 ;
 entry:
   %call = tail call float @_Z5rootnfi(float %x, i32 1) #1
@@ -523,8 +523,7 @@ define <2 x float> @test_rootn_v2f32__y_1(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_rootn_v2f32__y_1(
 ; CHECK-SAME: <2 x float> [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = tail call <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> [[X]], <2 x i32> <i32 1, i32 1>)
-; CHECK-NEXT:    ret <2 x float> [[CALL]]
+; CHECK-NEXT:    ret <2 x float> [[X]]
 ;
 entry:
   %call = tail call <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> %x, <2 x i32> <i32 1, i32 1>)
@@ -547,8 +546,7 @@ define <2 x float> @test_rootn_v2f32__y_1_undef(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_rootn_v2f32__y_1_undef(
 ; CHECK-SAME: <2 x float> [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = tail call <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> [[X]], <2 x i32> <i32 1, i32 poison>)
-; CHECK-NEXT:    ret <2 x float> [[CALL]]
+; CHECK-NEXT:    ret <2 x float> [[X]]
 ;
 entry:
   %call = tail call <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> %x, <2 x i32> <i32 1, i32 poison>)
@@ -559,8 +557,7 @@ define <3 x float> @test_rootn_v3f32__y_1(<3 x float> %x) {
 ; CHECK-LABEL: define <3 x float> @test_rootn_v3f32__y_1(
 ; CHECK-SAME: <3 x float> [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = tail call <3 x float> @_Z5rootnDv3_fDv3_i(<3 x float> [[X]], <3 x i32> <i32 1, i32 1, i32 1>)
-; CHECK-NEXT:    ret <3 x float> [[CALL]]
+; CHECK-NEXT:    ret <3 x float> [[X]]
 ;
 entry:
   %call = tail call <3 x float> @_Z5rootnDv3_fDv3_i(<3 x float> %x, <3 x i32> <i32 1, i32 1, i32 1>)
@@ -571,8 +568,7 @@ define <3 x float> @test_rootn_v3f32__y_1_undef(<3 x float> %x) {
 ; CHECK-LABEL: define <3 x float> @test_rootn_v3f32__y_1_undef(
 ; CHECK-SAME: <3 x float> [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = tail call <3 x float> @_Z5rootnDv3_fDv3_i(<3 x float> [[X]], <3 x i32> <i32 1, i32 1, i32 poison>)
-; CHECK-NEXT:    ret <3 x float> [[CALL]]
+; CHECK-NEXT:    ret <3 x float> [[X]]
 ;
 entry:
   %call = tail call <3 x float> @_Z5rootnDv3_fDv3_i(<3 x float> %x, <3 x i32> <i32 1, i32 1, i32 poison>)
@@ -583,8 +579,7 @@ define <4 x float> @test_rootn_v4f32__y_1(<4 x float> %x) {
 ; CHECK-LABEL: define <4 x float> @test_rootn_v4f32__y_1(
 ; CHECK-SAME: <4 x float> [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = tail call <4 x float> @_Z5rootnDv4_fDv4_i(<4 x float> [[X]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
-; CHECK-NEXT:    ret <4 x float> [[CALL]]
+; CHECK-NEXT:    ret <4 x float> [[X]]
 ;
 entry:
   %call = tail call <4 x float> @_Z5rootnDv4_fDv4_i(<4 x float> %x, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
@@ -595,8 +590,7 @@ define <8 x float> @test_rootn_v8f32__y_1(<8 x float> %x) {
 ; CHECK-LABEL: define <8 x float> @test_rootn_v8f32__y_1(
 ; CHECK-SAME: <8 x float> [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = tail call <8 x float> @_Z5rootnDv8_fDv8_i(<8 x float> [[X]], <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>)
-; CHECK-NEXT:    ret <8 x float> [[CALL]]
+; CHECK-NEXT:    ret <8 x float> [[X]]
 ;
 entry:
   %call = tail call <8 x float> @_Z5rootnDv8_fDv8_i(<8 x float> %x, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>)
@@ -607,8 +601,7 @@ define <16 x float> @test_rootn_v16f32__y_1(<16 x float> %x) {
 ; CHECK-LABEL: define <16 x float> @test_rootn_v16f32__y_1(
 ; CHECK-SAME: <16 x float> [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = tail call <16 x float> @_Z5rootnDv16_fDv16_i(<16 x float> [[X]], <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>)
-; CHECK-NEXT:    ret <16 x float> [[CALL]]
+; CHECK-NEXT:    ret <16 x float> [[X]]
 ;
 entry:
   %call = tail call <16 x float> @_Z5rootnDv16_fDv16_i(<16 x float> %x, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>)
@@ -656,8 +649,8 @@ define <2 x float> @test_rootn_v2f32__y_2_flags(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_rootn_v2f32__y_2_flags(
 ; CHECK-SAME: <2 x float> [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = tail call nnan nsz <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> [[X]], <2 x i32> <i32 2, i32 2>)
-; CHECK-NEXT:    ret <2 x float> [[CALL]]
+; CHECK-NEXT:    [[__ROOTN2SQRT:%.*]] = call nnan nsz <2 x float> @_Z4sqrtDv2_f(<2 x float> [[X]])
+; CHECK-NEXT:    ret <2 x float> [[__ROOTN2SQRT]]
 ;
 entry:
   %call = tail call nnan nsz <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> %x, <2 x i32> <i32 2, i32 2>)
@@ -668,8 +661,8 @@ define <3 x float> @test_rootn_v3f32__y_2(<3 x float> %x) {
 ; CHECK-LABEL: define <3 x float> @test_rootn_v3f32__y_2(
 ; CHECK-SAME: <3 x float> [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = tail call <3 x float> @_Z5rootnDv3_fDv3_i(<3 x float> [[X]], <3 x i32> <i32 2, i32 2, i32 2>)
-; CHECK-NEXT:    ret <3 x float> [[CALL]]
+; CHECK-NEXT:    [[__ROOTN2SQRT:%.*]] = call <3 x float> @_Z4sqrtDv3_f(<3 x float> [[X]])
+; CHECK-NEXT:    ret <3 x float> [[__ROOTN2SQRT]]
 ;
 entry:
   %call = tail call <3 x float> @_Z5rootnDv3_fDv3_i(<3 x float> %x, <3 x i32> <i32 2, i32 2, i32 2>)
@@ -680,8 +673,8 @@ define <3 x float> @test_rootn_v3f32__y_2_undef(<3 x float> %x) {
 ; CHECK-LABEL: define <3 x float> @test_rootn_v3f32__y_2_undef(
 ; CHECK-SAME: <3 x float> [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = tail call <3 x float> @_Z5rootnDv3_fDv3_i(<3 x float> [[X]], <3 x i32> <i32 2, i32 poison, i32 2>)
-; CHECK-NEXT:    ret <3 x float> [[CALL]]
+; CHECK-NEXT:    [[__ROOTN2SQRT:%.*]] = call <3 x float> @_Z4sqrtDv3_f(<3 x float> [[X]])
+; CHECK-NEXT:    ret <3 x float> [[__ROOTN2SQRT]]
 ;
 entry:
   %call = tail call <3 x float> @_Z5rootnDv3_fDv3_i(<3 x float> %x, <3 x i32> <i32 2, i32 poison, i32 2>)
@@ -692,8 +685,8 @@ define <4 x float> @test_rootn_v4f32__y_2(<4 x float> %x) {
 ; CHECK-LABEL: define <4 x float> @test_rootn_v4f32__y_2(
 ; CHECK-SAME: <4 x float> [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = tail call <4 x float> @_Z5rootnDv4_fDv4_i(<4 x float> [[X]], <4 x i32> <i32 2, i32 2, i32 2, i32 2>)
-; CHECK-NEXT:    ret <4 x float> [[CALL]]
+; CHECK-NEXT:    [[__ROOTN2SQRT:%.*]] = call <4 x float> @_Z4sqrtDv4_f(<4 x float> [[X]])
+; CHECK-NEXT:    ret <4 x float> [[__ROOTN2SQRT]]
 ;
 entry:
   %call = tail call <4 x float> @_Z5rootnDv4_fDv4_i(<4 x float> %x, <4 x i32> <i32 2, i32 2, i32 2, i32 2>)
@@ -704,8 +697,8 @@ define <8 x float> @test_rootn_v8f32__y_2(<8 x float> %x) {
 ; CHECK-LABEL: define <8 x float> @test_rootn_v8f32__y_2(
 ; CHECK-SAME: <8 x float> [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = tail call <8 x float> @_Z5rootnDv8_fDv8_i(<8 x float> [[X]], <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>)
-; CHECK-NEXT:    ret <8 x float> [[CALL]]
+; CHECK-NEXT:    [[__ROOTN2SQRT:%.*]] = call <8 x float> @_Z4sqrtDv8_f(<8 x float> [[X]])
+; CHECK-NEXT:    ret <8 x float> [[__ROOTN2SQRT]]
 ;
 entry:
   %call = tail call <8 x float> @_Z5rootnDv8_fDv8_i(<8 x float> %x, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>)
@@ -716,8 +709,8 @@ define <16 x float> @test_rootn_v16f32__y_2(<16 x float> %x) {
 ; CHECK-LABEL: define <16 x float> @test_rootn_v16f32__y_2(
 ; CHECK-SAME: <16 x float> [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = tail call <16 x float> @_Z5rootnDv16_fDv16_i(<16 x float> [[X]], <16 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>)
-; CHECK-NEXT:    ret <16 x float> [[CALL]]
+; CHECK-NEXT:    [[__ROOTN2SQRT:%.*]] = call <16 x float> @_Z4sqrtDv16_f(<16 x float> [[X]])
+; CHECK-NEXT:    ret <16 x float> [[__ROOTN2SQRT]]
 ;
 entry:
   %call = tail call <16 x float> @_Z5rootnDv16_fDv16_i(<16 x float> %x, <16 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>)
@@ -740,8 +733,8 @@ define <2 x float> @test_rootn_v2f32__y_3(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_rootn_v2f32__y_3(
 ; CHECK-SAME: <2 x float> [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = tail call <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> [[X]], <2 x i32> <i32 3, i32 3>)
-; CHECK-NEXT:    ret <2 x float> [[CALL]]
+; CHECK-NEXT:    [[__ROOTN2CBRT:%.*]] = call <2 x float> @_Z4cbrtDv2_f(<2 x float> [[X]])
+; CHECK-NEXT:    ret <2 x float> [[__ROOTN2CBRT]]
 ;
 entry:
   %call = tail call <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> %x, <2 x i32> <i32 3, i32 3>)
@@ -764,8 +757,8 @@ define <2 x float> @test_rootn_v2f32__y_nonsplat_2_poison(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_rootn_v2f32__y_nonsplat_2_poison(
 ; CHECK-SAME: <2 x float> [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = tail call <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> [[X]], <2 x i32> <i32 2, i32 poison>)
-; CHECK-NEXT:    ret <2 x float> [[CALL]]
+; CHECK-NEXT:    [[__ROOTN2SQRT:%.*]] = call <2 x float> @_Z4sqrtDv2_f(<2 x float> [[X]])
+; CHECK-NEXT:    ret <2 x float> [[__ROOTN2SQRT]]
 ;
 entry:
   %call = tail call <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> %x, <2 x i32> <i32 2, i32 poison>)
@@ -800,8 +793,8 @@ define <2 x float> @test_rootn_v2f32__y_neg1(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_rootn_v2f32__y_neg1(
 ; CHECK-SAME: <2 x float> [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = tail call <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> [[X]], <2 x i32> <i32 -1, i32 -1>)
-; CHECK-NEXT:    ret <2 x float> [[CALL]]
+; CHECK-NEXT:    [[__ROOTN2DIV:%.*]] = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, [[X]]
+; CHECK-NEXT:    ret <2 x float> [[__ROOTN2DIV]]
 ;
 entry:
   %call = tail call <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> %x, <2 x i32> <i32 -1, i32 -1>)
@@ -812,8 +805,8 @@ define <3 x float> @test_rootn_v3f32__y_neg1(<3 x float> %x) {
 ; CHECK-LABEL: define <3 x float> @test_rootn_v3f32__y_neg1(
 ; CHECK-SAME: <3 x float> [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = tail call <3 x float> @_Z5rootnDv3_fDv3_i(<3 x float> [[X]], <3 x i32> <i32 -1, i32 -1, i32 -1>)
-; CHECK-NEXT:    ret <3 x float> [[CALL]]
+; CHECK-NEXT:    [[__ROOTN2DIV:%.*]] = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, [[X]]
+; CHECK-NEXT:    ret <3 x float> [[__ROOTN2DIV]]
 ;
 entry:
   %call = tail call <3 x float> @_Z5rootnDv3_fDv3_i(<3 x float> %x, <3 x i32> <i32 -1, i32 -1, i32 -1>)
@@ -824,8 +817,8 @@ define <3 x float> @test_rootn_v3f32__y_neg1_undef(<3 x float> %x) {
 ; CHECK-LABEL: define <3 x float> @test_rootn_v3f32__y_neg1_undef(
 ; CHECK-SAME: <3 x float> [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = tail call <3 x float> @_Z5rootnDv3_fDv3_i(<3 x float> [[X]], <3 x i32> <i32 -1, i32 -1, i32 poison>)
-; CHECK-NEXT:    ret <3 x float> [[CALL]]
+; CHECK-NEXT:    [[__ROOTN2DIV:%.*]] = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, [[X]]
+; CHECK-NEXT:    ret <3 x float> [[__ROOTN2DIV]]
 ;
 entry:
   %call = tail call <3 x float> @_Z5rootnDv3_fDv3_i(<3 x float> %x, <3 x i32> <i32 -1, i32 -1, i32 poison>)
@@ -836,8 +829,8 @@ define <4 x float> @test_rootn_v4f32__y_neg1(<4 x float> %x) {
 ; CHECK-LABEL: define <4 x float> @test_rootn_v4f32__y_neg1(
 ; CHECK-SAME: <4 x float> [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = tail call <4 x float> @_Z5rootnDv4_fDv4_i(<4 x float> [[X]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
-; CHECK-NEXT:    ret <4 x float> [[CALL]]
+; CHECK-NEXT:    [[__ROOTN2DIV:%.*]] = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, [[X]]
+; CHECK-NEXT:    ret <4 x float> [[__ROOTN2DIV]]
 ;
 entry:
   %call = tail call <4 x float> @_Z5rootnDv4_fDv4_i(<4 x float> %x, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
@@ -848,8 +841,8 @@ define <8 x float> @test_rootn_v8f32__y_neg1(<8 x float> %x) {
 ; CHECK-LABEL: define <8 x float> @test_rootn_v8f32__y_neg1(
 ; CHECK-SAME: <8 x float> [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = tail call <8 x float> @_Z5rootnDv8_fDv8_i(<8 x float> [[X]], <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>)
-; CHECK-NEXT:    ret <8 x float> [[CALL]]
+; CHECK-NEXT:    [[__ROOTN2DIV:%.*]] = fdiv <8 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, [[X]]
+; CHECK-NEXT:    ret <8 x float> [[__ROOTN2DIV]]
 ;
 entry:
   %call = tail call <8 x float> @_Z5rootnDv8_fDv8_i(<8 x float> %x, <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>)
@@ -860,8 +853,8 @@ define <16 x float> @test_rootn_v16f32__y_neg1(<16 x float> %x) {
 ; CHECK-LABEL: define <16 x float> @test_rootn_v16f32__y_neg1(
 ; CHECK-SAME: <16 x float> [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = tail call <16 x float> @_Z5rootnDv16_fDv16_i(<16 x float> [[X]], <16 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>)
-; CHECK-NEXT:    ret <16 x float> [[CALL]]
+; CHECK-NEXT:    [[__ROOTN2DIV:%.*]] = fdiv <16 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, [[X]]
+; CHECK-NEXT:    ret <16 x float> [[__ROOTN2DIV]]
 ;
 entry:
   %call = tail call <16 x float> @_Z5rootnDv16_fDv16_i(<16 x float> %x, <16 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>)
@@ -932,8 +925,8 @@ define <2 x float> @test_rootn_v2f32__y_neg2(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_rootn_v2f32__y_neg2(
 ; CHECK-SAME: <2 x float> [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = tail call <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> [[X]], <2 x i32> <i32 -2, i32 -2>)
-; CHECK-NEXT:    ret <2 x float> [[CALL]]
+; CHECK-NEXT:    [[__ROOTN2RSQRT:%.*]] = call <2 x float> @_Z5rsqrtDv2_f(<2 x float> [[X]])
+; CHECK-NEXT:    ret <2 x float> [[__ROOTN2RSQRT]]
 ;
 entry:
   %call = tail call <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> %x, <2 x i32> <i32 -2, i32 -2>)
@@ -944,8 +937,8 @@ define <2 x float> @test_rootn_v2f32__y_neg2__flags(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_rootn_v2f32__y_neg2__flags(
 ; CHECK-SAME: <2 x float> [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = tail call nnan nsz <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> [[X]], <2 x i32> <i32 -2, i32 -2>)
-; CHECK-NEXT:    ret <2 x float> [[CALL]]
+; CHECK-NEXT:    [[__ROOTN2RSQRT:%.*]] = call nnan nsz <2 x float> @_Z5rsqrtDv2_f(<2 x float> [[X]])
+; CHECK-NEXT:    ret <2 x float> [[__ROOTN2RSQRT]]
 ;
 entry:
   %call = tail call nsz nnan <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> %x, <2 x i32> <i32 -2, i32 -2>)
@@ -956,8 +949,8 @@ define <2 x float> @test_rootn_v2f32__y_neg2__strictfp(<2 x float> %x) #1 {
 ; CHECK-LABEL: define <2 x float> @test_rootn_v2f32__y_neg2__strictfp(
 ; CHECK-SAME: <2 x float> [[X:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = tail call <2 x float> @_Z5rootnDv2_fDv2_...
[truncated]

We need to insert a constrained canonicalize.
@arsenm arsenm force-pushed the amdgpu-no-fold-rootn-strictfp branch from b1cf24a to 92b6815 Compare May 20, 2024 16:39
@arsenm arsenm merged commit 3cb1fe6 into llvm:main May 20, 2024
3 of 4 checks passed
@arsenm arsenm deleted the amdgpu-no-fold-rootn-strictfp branch May 20, 2024 20:23
arsenm added a commit that referenced this pull request May 21, 2024
This avoids depending on pre/post link runs.

Depends #92595
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants