Skip to content

Conversation

@RKSimon
Copy link
Collaborator

@RKSimon RKSimon commented Dec 1, 2025

Limited to 128->256 cases as we can't safely convert to the RCP14/RSQRT14 variants

…ntrinsics together

Limited to 128->256 cases as we can't safely convert to the RCP14/RSQRT14 variants
@llvmbot
Copy link
Member

llvmbot commented Dec 1, 2025

@llvm/pr-subscribers-backend-x86

Author: Simon Pilgrim (RKSimon)

Changes

Limited to 128->256 cases as we can't safely convert to the RCP14/RSQRT14 variants


Full diff: https://github.com/llvm/llvm-project/pull/170148.diff

3 Files Affected:

  • (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+6)
  • (modified) llvm/test/CodeGen/X86/combine-rcp.ll (+12-12)
  • (modified) llvm/test/CodeGen/X86/combine-rsqrt.ll (+12-12)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 9e20807ddf304..74a02711bd98a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -59464,6 +59464,12 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
         return DAG.getNode(Opcode, DL, VT, ConcatSubOperand(VT, Ops, 0));
       }
       break;
+    case X86ISD::FRCP:
+    case X86ISD::FRSQRT:
+      if (!IsSplat && VT.is256BitVector()) {
+        return DAG.getNode(Opcode, DL, VT, ConcatSubOperand(VT, Ops, 0));
+      }
+      break;
     case X86ISD::HADD:
     case X86ISD::HSUB:
     case X86ISD::FHADD:
diff --git a/llvm/test/CodeGen/X86/combine-rcp.ll b/llvm/test/CodeGen/X86/combine-rcp.ll
index 7de3e96d592db..4647516528bf3 100644
--- a/llvm/test/CodeGen/X86/combine-rcp.ll
+++ b/llvm/test/CodeGen/X86/combine-rcp.ll
@@ -14,9 +14,9 @@ define <8 x float> @concat_rcp_v8f32_v4f32(<4 x float> %a0, <4 x float> %a1) {
 ;
 ; AVX-LABEL: concat_rcp_v8f32_v4f32:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vrcpps %xmm0, %xmm0
-; AVX-NEXT:    vrcpps %xmm1, %xmm1
+; AVX-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
 ; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX-NEXT:    vrcpps %ymm0, %ymm0
 ; AVX-NEXT:    retq
   %v0 = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0)
   %v1 = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a1)
@@ -36,23 +36,23 @@ define <16 x float> @concat_rcp_v16f32_v4f32(<4 x float> %a0, <4 x float> %a1, <
 ;
 ; AVX1OR2-LABEL: concat_rcp_v16f32_v4f32:
 ; AVX1OR2:       # %bb.0:
-; AVX1OR2-NEXT:    vrcpps %xmm0, %xmm0
-; AVX1OR2-NEXT:    vrcpps %xmm1, %xmm1
-; AVX1OR2-NEXT:    vrcpps %xmm2, %xmm2
-; AVX1OR2-NEXT:    vrcpps %xmm3, %xmm3
+; AVX1OR2-NEXT:    # kill: def $xmm2 killed $xmm2 def $ymm2
+; AVX1OR2-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
 ; AVX1OR2-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1OR2-NEXT:    vrcpps %ymm0, %ymm0
 ; AVX1OR2-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm1
+; AVX1OR2-NEXT:    vrcpps %ymm1, %ymm1
 ; AVX1OR2-NEXT:    retq
 ;
 ; AVX512-LABEL: concat_rcp_v16f32_v4f32:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vrcpps %xmm0, %xmm0
-; AVX512-NEXT:    vrcpps %xmm1, %xmm1
-; AVX512-NEXT:    vrcpps %xmm2, %xmm2
-; AVX512-NEXT:    vrcpps %xmm3, %xmm3
-; AVX512-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
+; AVX512-NEXT:    # kill: def $xmm2 killed $xmm2 def $ymm2
+; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
 ; AVX512-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX512-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
+; AVX512-NEXT:    vrcpps %ymm0, %ymm0
+; AVX512-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm1
+; AVX512-NEXT:    vrcpps %ymm1, %ymm1
+; AVX512-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
 ; AVX512-NEXT:    retq
   %v0 = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0)
   %v1 = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a1)
diff --git a/llvm/test/CodeGen/X86/combine-rsqrt.ll b/llvm/test/CodeGen/X86/combine-rsqrt.ll
index 78688701f8cd3..b373458654419 100644
--- a/llvm/test/CodeGen/X86/combine-rsqrt.ll
+++ b/llvm/test/CodeGen/X86/combine-rsqrt.ll
@@ -14,9 +14,9 @@ define <8 x float> @concat_rsqrt_v8f32_v4f32(<4 x float> %a0, <4 x float> %a1) {
 ;
 ; AVX-LABEL: concat_rsqrt_v8f32_v4f32:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vrsqrtps %xmm0, %xmm0
-; AVX-NEXT:    vrsqrtps %xmm1, %xmm1
+; AVX-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
 ; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX-NEXT:    vrsqrtps %ymm0, %ymm0
 ; AVX-NEXT:    retq
   %v0 = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0)
   %v1 = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a1)
@@ -36,23 +36,23 @@ define <16 x float> @concat_rsqrt_v16f32_v4f32(<4 x float> %a0, <4 x float> %a1,
 ;
 ; AVX1OR2-LABEL: concat_rsqrt_v16f32_v4f32:
 ; AVX1OR2:       # %bb.0:
-; AVX1OR2-NEXT:    vrsqrtps %xmm0, %xmm0
-; AVX1OR2-NEXT:    vrsqrtps %xmm1, %xmm1
-; AVX1OR2-NEXT:    vrsqrtps %xmm2, %xmm2
-; AVX1OR2-NEXT:    vrsqrtps %xmm3, %xmm3
+; AVX1OR2-NEXT:    # kill: def $xmm2 killed $xmm2 def $ymm2
+; AVX1OR2-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
 ; AVX1OR2-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1OR2-NEXT:    vrsqrtps %ymm0, %ymm0
 ; AVX1OR2-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm1
+; AVX1OR2-NEXT:    vrsqrtps %ymm1, %ymm1
 ; AVX1OR2-NEXT:    retq
 ;
 ; AVX512-LABEL: concat_rsqrt_v16f32_v4f32:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vrsqrtps %xmm0, %xmm0
-; AVX512-NEXT:    vrsqrtps %xmm1, %xmm1
-; AVX512-NEXT:    vrsqrtps %xmm2, %xmm2
-; AVX512-NEXT:    vrsqrtps %xmm3, %xmm3
-; AVX512-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
+; AVX512-NEXT:    # kill: def $xmm2 killed $xmm2 def $ymm2
+; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
 ; AVX512-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX512-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
+; AVX512-NEXT:    vrsqrtps %ymm0, %ymm0
+; AVX512-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm1
+; AVX512-NEXT:    vrsqrtps %ymm1, %ymm1
+; AVX512-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
 ; AVX512-NEXT:    retq
   %v0 = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0)
   %v1 = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a1)

@RKSimon RKSimon enabled auto-merge (squash) December 1, 2025 14:55
@RKSimon RKSimon merged commit b76cada into llvm:main Dec 1, 2025
11 of 12 checks passed
@RKSimon RKSimon deleted the x86-concat-recip branch December 1, 2025 15:41
augusto2112 pushed a commit to augusto2112/llvm-project that referenced this pull request Dec 3, 2025
…ntrinsics together (llvm#170148)

Limited to 128->256 cases as we can't safely convert to the RCP14/RSQRT14 variants
kcloudy0717 pushed a commit to kcloudy0717/llvm-project that referenced this pull request Dec 4, 2025
…ntrinsics together (llvm#170148)

Limited to 128->256 cases as we can't safely convert to the RCP14/RSQRT14 variants
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

2 participants