Skip to content

Conversation

@RKSimon
Copy link
Collaborator

@RKSimon RKSimon commented Dec 1, 2025

No description provided.

@RKSimon RKSimon enabled auto-merge (squash) December 1, 2025 15:55
@llvmbot
Copy link
Member

llvmbot commented Dec 1, 2025

@llvm/pr-subscribers-backend-x86

Author: Simon Pilgrim (RKSimon)

Changes

Patch is 45.68 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/170160.diff

7 Files Affected:

  • (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+16)
  • (modified) llvm/test/CodeGen/X86/combine-fceil.ll (+54-36)
  • (modified) llvm/test/CodeGen/X86/combine-fnearbyint.ll (+54-36)
  • (modified) llvm/test/CodeGen/X86/combine-frint.ll (+54-36)
  • (modified) llvm/test/CodeGen/X86/combine-froundeven.ll (+54-36)
  • (modified) llvm/test/CodeGen/X86/combine-ftrunc.ll (+54-36)
  • (modified) llvm/test/CodeGen/X86/combine-rndscale.ll (+54-36)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 74a02711bd98a..539b238d5043f 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -59459,6 +59459,11 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
       }
       break;
     case ISD::FSQRT:
+    case ISD::FCEIL:
+    case ISD::FTRUNC:
+    case ISD::FRINT:
+    case ISD::FNEARBYINT:
+    case ISD::FROUNDEVEN:
       if (!IsSplat && (VT.is256BitVector() ||
                        (VT.is512BitVector() && Subtarget.useAVX512Regs()))) {
         return DAG.getNode(Opcode, DL, VT, ConcatSubOperand(VT, Ops, 0));
@@ -59470,6 +59475,17 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
         return DAG.getNode(Opcode, DL, VT, ConcatSubOperand(VT, Ops, 0));
       }
       break;
+    case X86ISD::VRNDSCALE:
+      if (!IsSplat &&
+          (VT.is256BitVector() ||
+           (VT.is512BitVector() && Subtarget.useAVX512Regs())) &&
+          llvm::all_of(Ops, [Op0](SDValue Op) {
+            return Op0.getOperand(1) == Op.getOperand(1);
+          })) {
+        return DAG.getNode(Opcode, DL, VT, ConcatSubOperand(VT, Ops, 0),
+                           Op0.getOperand(1));
+      }
+      break;
     case X86ISD::HADD:
     case X86ISD::HSUB:
     case X86ISD::FHADD:
diff --git a/llvm/test/CodeGen/X86/combine-fceil.ll b/llvm/test/CodeGen/X86/combine-fceil.ll
index 78f1476a49152..a3f55e8f64b80 100644
--- a/llvm/test/CodeGen/X86/combine-fceil.ll
+++ b/llvm/test/CodeGen/X86/combine-fceil.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge | FileCheck %s --check-prefixes=AVX,AVX1OR2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=AVX,AVX1OR2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX2
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=AVX,AVX512
 
 define <4 x double> @concat_ceil_v4f64_v2f64(<2 x double> %a0, <2 x double> %a1) {
@@ -13,9 +13,9 @@ define <4 x double> @concat_ceil_v4f64_v2f64(<2 x double> %a0, <2 x double> %a1)
 ;
 ; AVX-LABEL: concat_ceil_v4f64_v2f64:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vroundpd $10, %xmm0, %xmm0
-; AVX-NEXT:    vroundpd $10, %xmm1, %xmm1
+; AVX-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
 ; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX-NEXT:    vroundpd $10, %ymm0, %ymm0
 ; AVX-NEXT:    retq
   %v0 = call <2 x double> @llvm.ceil.v2f64(<2 x double> %a0)
   %v1 = call <2 x double> @llvm.ceil.v2f64(<2 x double> %a1)
@@ -32,9 +32,9 @@ define <8 x float> @concat_ceil_v8f32_v4f32(<4 x float> %a0, <4 x float> %a1) {
 ;
 ; AVX-LABEL: concat_ceil_v8f32_v4f32:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vroundps $10, %xmm0, %xmm0
-; AVX-NEXT:    vroundps $10, %xmm1, %xmm1
+; AVX-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
 ; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX-NEXT:    vroundps $10, %ymm0, %ymm0
 ; AVX-NEXT:    retq
   %v0 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %a0)
   %v1 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %a1)
@@ -51,25 +51,34 @@ define <8 x double> @concat_ceil_v8f64_v2f64(<2 x double> %a0, <2 x double> %a1,
 ; SSE-NEXT:    roundpd $10, %xmm3, %xmm3
 ; SSE-NEXT:    retq
 ;
-; AVX1OR2-LABEL: concat_ceil_v8f64_v2f64:
-; AVX1OR2:       # %bb.0:
-; AVX1OR2-NEXT:    vroundpd $10, %xmm0, %xmm0
-; AVX1OR2-NEXT:    vroundpd $10, %xmm1, %xmm1
-; AVX1OR2-NEXT:    vroundpd $10, %xmm2, %xmm2
-; AVX1OR2-NEXT:    vroundpd $10, %xmm3, %xmm3
-; AVX1OR2-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1OR2-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm1
-; AVX1OR2-NEXT:    retq
+; AVX1-LABEL: concat_ceil_v8f64_v2f64:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    # kill: def $xmm2 killed $xmm2 def $ymm2
+; AVX1-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    vroundpd $10, %ymm0, %ymm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm1
+; AVX1-NEXT:    vroundpd $10, %ymm1, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: concat_ceil_v8f64_v2f64:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
+; AVX2-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT:    vroundpd $10, %ymm0, %ymm0
+; AVX2-NEXT:    # kill: def $xmm2 killed $xmm2 def $ymm2
+; AVX2-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm1
+; AVX2-NEXT:    vroundpd $10, %ymm1, %ymm1
+; AVX2-NEXT:    retq
 ;
 ; AVX512-LABEL: concat_ceil_v8f64_v2f64:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vroundpd $10, %xmm0, %xmm0
-; AVX512-NEXT:    vroundpd $10, %xmm1, %xmm1
-; AVX512-NEXT:    vroundpd $10, %xmm2, %xmm2
-; AVX512-NEXT:    vroundpd $10, %xmm3, %xmm3
+; AVX512-NEXT:    # kill: def $xmm2 killed $xmm2 def $ymm2
+; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
 ; AVX512-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
 ; AVX512-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
 ; AVX512-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
+; AVX512-NEXT:    vrndscalepd $10, %zmm0, %zmm0
 ; AVX512-NEXT:    retq
   %v0 = call <2 x double> @llvm.ceil.v2f64(<2 x double> %a0)
   %v1 = call <2 x double> @llvm.ceil.v2f64(<2 x double> %a1)
@@ -90,25 +99,34 @@ define <16 x float> @concat_ceil_v16f32_v4f32(<4 x float> %a0, <4 x float> %a1,
 ; SSE-NEXT:    roundps $10, %xmm3, %xmm3
 ; SSE-NEXT:    retq
 ;
-; AVX1OR2-LABEL: concat_ceil_v16f32_v4f32:
-; AVX1OR2:       # %bb.0:
-; AVX1OR2-NEXT:    vroundps $10, %xmm0, %xmm0
-; AVX1OR2-NEXT:    vroundps $10, %xmm1, %xmm1
-; AVX1OR2-NEXT:    vroundps $10, %xmm2, %xmm2
-; AVX1OR2-NEXT:    vroundps $10, %xmm3, %xmm3
-; AVX1OR2-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1OR2-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm1
-; AVX1OR2-NEXT:    retq
+; AVX1-LABEL: concat_ceil_v16f32_v4f32:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    # kill: def $xmm2 killed $xmm2 def $ymm2
+; AVX1-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    vroundps $10, %ymm0, %ymm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm1
+; AVX1-NEXT:    vroundps $10, %ymm1, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: concat_ceil_v16f32_v4f32:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
+; AVX2-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT:    vroundps $10, %ymm0, %ymm0
+; AVX2-NEXT:    # kill: def $xmm2 killed $xmm2 def $ymm2
+; AVX2-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm1
+; AVX2-NEXT:    vroundps $10, %ymm1, %ymm1
+; AVX2-NEXT:    retq
 ;
 ; AVX512-LABEL: concat_ceil_v16f32_v4f32:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vroundps $10, %xmm0, %xmm0
-; AVX512-NEXT:    vroundps $10, %xmm1, %xmm1
-; AVX512-NEXT:    vroundps $10, %xmm2, %xmm2
-; AVX512-NEXT:    vroundps $10, %xmm3, %xmm3
+; AVX512-NEXT:    # kill: def $xmm2 killed $xmm2 def $ymm2
+; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
 ; AVX512-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
 ; AVX512-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
 ; AVX512-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
+; AVX512-NEXT:    vrndscaleps $10, %zmm0, %zmm0
 ; AVX512-NEXT:    retq
   %v0 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %a0)
   %v1 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %a1)
@@ -137,9 +155,9 @@ define <8 x double> @concat_ceil_v8f64_v4f64(<4 x double> %a0, <4 x double> %a1)
 ;
 ; AVX512-LABEL: concat_ceil_v8f64_v4f64:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vroundpd $10, %ymm0, %ymm0
-; AVX512-NEXT:    vroundpd $10, %ymm1, %ymm1
+; AVX512-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
 ; AVX512-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512-NEXT:    vrndscalepd $10, %zmm0, %zmm0
 ; AVX512-NEXT:    retq
   %v0 = call <4 x double> @llvm.ceil.v4f64(<4 x double> %a0)
   %v1 = call <4 x double> @llvm.ceil.v4f64(<4 x double> %a1)
@@ -164,9 +182,9 @@ define <16 x float> @concat_ceil_v16f32_v8f32(<8 x float> %a0, <8 x float> %a1)
 ;
 ; AVX512-LABEL: concat_ceil_v16f32_v8f32:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vroundps $10, %ymm0, %ymm0
-; AVX512-NEXT:    vroundps $10, %ymm1, %ymm1
+; AVX512-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
 ; AVX512-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512-NEXT:    vrndscaleps $10, %zmm0, %zmm0
 ; AVX512-NEXT:    retq
   %v0 = call <8 x float> @llvm.ceil.v8f32(<8 x float> %a0)
   %v1 = call <8 x float> @llvm.ceil.v8f32(<8 x float> %a1)
diff --git a/llvm/test/CodeGen/X86/combine-fnearbyint.ll b/llvm/test/CodeGen/X86/combine-fnearbyint.ll
index 14d1017aec630..fde136af7c4c2 100644
--- a/llvm/test/CodeGen/X86/combine-fnearbyint.ll
+++ b/llvm/test/CodeGen/X86/combine-fnearbyint.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge | FileCheck %s --check-prefixes=AVX,AVX1OR2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=AVX,AVX1OR2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX2
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=AVX,AVX512
 
 define <4 x double> @concat_nearbyint_v4f64_v2f64(<2 x double> %a0, <2 x double> %a1) {
@@ -13,9 +13,9 @@ define <4 x double> @concat_nearbyint_v4f64_v2f64(<2 x double> %a0, <2 x double>
 ;
 ; AVX-LABEL: concat_nearbyint_v4f64_v2f64:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vroundpd $12, %xmm0, %xmm0
-; AVX-NEXT:    vroundpd $12, %xmm1, %xmm1
+; AVX-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
 ; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX-NEXT:    vroundpd $12, %ymm0, %ymm0
 ; AVX-NEXT:    retq
   %v0 = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %a0)
   %v1 = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %a1)
@@ -32,9 +32,9 @@ define <8 x float> @concat_nearbyint_v8f32_v4f32(<4 x float> %a0, <4 x float> %a
 ;
 ; AVX-LABEL: concat_nearbyint_v8f32_v4f32:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vroundps $12, %xmm0, %xmm0
-; AVX-NEXT:    vroundps $12, %xmm1, %xmm1
+; AVX-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
 ; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX-NEXT:    vroundps $12, %ymm0, %ymm0
 ; AVX-NEXT:    retq
   %v0 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %a0)
   %v1 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %a1)
@@ -51,25 +51,34 @@ define <8 x double> @concat_nearbyint_v8f64_v2f64(<2 x double> %a0, <2 x double>
 ; SSE-NEXT:    roundpd $12, %xmm3, %xmm3
 ; SSE-NEXT:    retq
 ;
-; AVX1OR2-LABEL: concat_nearbyint_v8f64_v2f64:
-; AVX1OR2:       # %bb.0:
-; AVX1OR2-NEXT:    vroundpd $12, %xmm0, %xmm0
-; AVX1OR2-NEXT:    vroundpd $12, %xmm1, %xmm1
-; AVX1OR2-NEXT:    vroundpd $12, %xmm2, %xmm2
-; AVX1OR2-NEXT:    vroundpd $12, %xmm3, %xmm3
-; AVX1OR2-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1OR2-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm1
-; AVX1OR2-NEXT:    retq
+; AVX1-LABEL: concat_nearbyint_v8f64_v2f64:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    # kill: def $xmm2 killed $xmm2 def $ymm2
+; AVX1-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    vroundpd $12, %ymm0, %ymm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm1
+; AVX1-NEXT:    vroundpd $12, %ymm1, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: concat_nearbyint_v8f64_v2f64:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
+; AVX2-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT:    vroundpd $12, %ymm0, %ymm0
+; AVX2-NEXT:    # kill: def $xmm2 killed $xmm2 def $ymm2
+; AVX2-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm1
+; AVX2-NEXT:    vroundpd $12, %ymm1, %ymm1
+; AVX2-NEXT:    retq
 ;
 ; AVX512-LABEL: concat_nearbyint_v8f64_v2f64:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vroundpd $12, %xmm0, %xmm0
-; AVX512-NEXT:    vroundpd $12, %xmm1, %xmm1
-; AVX512-NEXT:    vroundpd $12, %xmm2, %xmm2
-; AVX512-NEXT:    vroundpd $12, %xmm3, %xmm3
+; AVX512-NEXT:    # kill: def $xmm2 killed $xmm2 def $ymm2
+; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
 ; AVX512-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
 ; AVX512-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
 ; AVX512-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
+; AVX512-NEXT:    vrndscalepd $12, %zmm0, %zmm0
 ; AVX512-NEXT:    retq
   %v0 = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %a0)
   %v1 = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %a1)
@@ -90,25 +99,34 @@ define <16 x float> @concat_nearbyint_v16f32_v4f32(<4 x float> %a0, <4 x float>
 ; SSE-NEXT:    roundps $12, %xmm3, %xmm3
 ; SSE-NEXT:    retq
 ;
-; AVX1OR2-LABEL: concat_nearbyint_v16f32_v4f32:
-; AVX1OR2:       # %bb.0:
-; AVX1OR2-NEXT:    vroundps $12, %xmm0, %xmm0
-; AVX1OR2-NEXT:    vroundps $12, %xmm1, %xmm1
-; AVX1OR2-NEXT:    vroundps $12, %xmm2, %xmm2
-; AVX1OR2-NEXT:    vroundps $12, %xmm3, %xmm3
-; AVX1OR2-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1OR2-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm1
-; AVX1OR2-NEXT:    retq
+; AVX1-LABEL: concat_nearbyint_v16f32_v4f32:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    # kill: def $xmm2 killed $xmm2 def $ymm2
+; AVX1-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    vroundps $12, %ymm0, %ymm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm1
+; AVX1-NEXT:    vroundps $12, %ymm1, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: concat_nearbyint_v16f32_v4f32:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
+; AVX2-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT:    vroundps $12, %ymm0, %ymm0
+; AVX2-NEXT:    # kill: def $xmm2 killed $xmm2 def $ymm2
+; AVX2-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm1
+; AVX2-NEXT:    vroundps $12, %ymm1, %ymm1
+; AVX2-NEXT:    retq
 ;
 ; AVX512-LABEL: concat_nearbyint_v16f32_v4f32:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vroundps $12, %xmm0, %xmm0
-; AVX512-NEXT:    vroundps $12, %xmm1, %xmm1
-; AVX512-NEXT:    vroundps $12, %xmm2, %xmm2
-; AVX512-NEXT:    vroundps $12, %xmm3, %xmm3
+; AVX512-NEXT:    # kill: def $xmm2 killed $xmm2 def $ymm2
+; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
 ; AVX512-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
 ; AVX512-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
 ; AVX512-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
+; AVX512-NEXT:    vrndscaleps $12, %zmm0, %zmm0
 ; AVX512-NEXT:    retq
   %v0 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %a0)
   %v1 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %a1)
@@ -137,9 +155,9 @@ define <8 x double> @concat_nearbyint_v8f64_v4f64(<4 x double> %a0, <4 x double>
 ;
 ; AVX512-LABEL: concat_nearbyint_v8f64_v4f64:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vroundpd $12, %ymm0, %ymm0
-; AVX512-NEXT:    vroundpd $12, %ymm1, %ymm1
+; AVX512-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
 ; AVX512-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512-NEXT:    vrndscalepd $12, %zmm0, %zmm0
 ; AVX512-NEXT:    retq
   %v0 = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %a0)
   %v1 = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %a1)
@@ -164,9 +182,9 @@ define <16 x float> @concat_nearbyint_v16f32_v8f32(<8 x float> %a0, <8 x float>
 ;
 ; AVX512-LABEL: concat_nearbyint_v16f32_v8f32:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vroundps $12, %ymm0, %ymm0
-; AVX512-NEXT:    vroundps $12, %ymm1, %ymm1
+; AVX512-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
 ; AVX512-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512-NEXT:    vrndscaleps $12, %zmm0, %zmm0
 ; AVX512-NEXT:    retq
   %v0 = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %a0)
   %v1 = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %a1)
diff --git a/llvm/test/CodeGen/X86/combine-frint.ll b/llvm/test/CodeGen/X86/combine-frint.ll
index 901ce2c1f0d82..1c52529e8386c 100644
--- a/llvm/test/CodeGen/X86/combine-frint.ll
+++ b/llvm/test/CodeGen/X86/combine-frint.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge | FileCheck %s --check-prefixes=AVX,AVX1OR2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=AVX,AVX1OR2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX2
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=AVX,AVX512
 
 define <4 x double> @concat_rint_v4f64_v2f64(<2 x double> %a0, <2 x double> %a1) {
@@ -13,9 +13,9 @@ define <4 x double> @concat_rint_v4f64_v2f64(<2 x double> %a0, <2 x double> %a1)
 ;
 ; AVX-LABEL: concat_rint_v4f64_v2f64:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vroundpd $4, %xmm0, %xmm0
-; AVX-NEXT:    vroundpd $4, %xmm1, %xmm1
+; AVX-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
 ; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX-NEXT:    vroundpd $4, %ymm0, %ymm0
 ; AVX-NEXT:    retq
   %v0 = call <2 x double> @llvm.rint.v2f64(<2 x double> %a0)
   %v1 = call <2 x double> @llvm.rint.v2f64(<2 x double> %a1)
@@ -32,9 +32,9 @@ define <8 x float> @concat_rint_v8f32_v4f32(<4 x float> %a0, <4 x float> %a1) {
 ;
 ; AVX-LABEL: concat_rint_v8f32_v4f32:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vroundps $4, %xmm0, %xmm0
-; AVX-NEXT:    vroundps $4, %xmm1, %xmm1
+; AVX-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
 ; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX-NEXT:    vroundps $4, %ymm0, %ymm0
 ; AVX-NEXT:    retq
   %v0 = call <4 x float> @llvm.rint.v4f32(<4 x float> %a0)
   %v1 = call <4 x float> @llvm.rint.v4f32(<4 x float> %a1)
@@ -51,25 +51,34 @@ define <8 x double> @concat_rint_v8f64_v2f64(<2 x double> %a0, <2 x double> %a1,
 ; SSE-NEXT:    roundpd $4, %xmm3, %xmm3
 ; SSE-NEXT:    retq
 ;
-; AVX1OR2-LABEL: concat_rint_v8f64_v2f64:
-; AVX1OR2:       # %bb.0:
-; AVX1OR2-NEXT:    vroundpd $4, %xmm0, %xmm0
-; AVX1OR2-NEXT:    vroundpd $4, %xmm1, %xmm1
-; AVX1OR2-NEXT:    vroundpd $4, %xmm2, %xmm2
-; AVX1OR2-NEXT:    vroundpd $4, %xmm3, %xmm3
-; AVX1OR2-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1OR2-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm1
-; AVX1OR2-NEXT:    retq
+; AVX1-LABEL: concat_rint_v8f64_v2f64:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    # kill: def $xmm2 killed $xmm2 def $ymm2
+; AVX1-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    vroundpd $4, %ymm0, %ymm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm1
+; AVX1-NEXT:    vroundpd $4, %ymm1, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: concat_rint_v8f64_v2f64:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
+; AVX2-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT:    vroundpd $4, %ymm0, %ymm0
+; AVX2-NEXT:    # kill: def $xmm2 killed $xmm2 def $ymm2
+; AVX2-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm1
+; AVX2-NEXT:    vroundpd $4, %ymm1, %ymm1
+; AVX2-NEXT:    retq
 ;
 ; AVX512-LABEL: concat_rint_v8f64_v2f64:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vroundpd $4, %xmm0, %xmm0
-; AVX512-NEXT:    vroundpd $4, %xmm1, %xmm1
-; AVX512-NEXT:    vroundpd $4, %xmm2, %xmm2
-; AVX512-NEXT:    vroundpd $4, %xmm3, %xmm3
+; AVX512-NEXT:    # kill: def $xmm2 killed $xmm2 def $ymm2
+; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
 ; AVX512-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
 ; AVX512-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
 ; A...
[truncated]

@RKSimon RKSimon merged commit 318d932 into llvm:main Dec 1, 2025
9 of 11 checks passed
@RKSimon RKSimon deleted the x86-concat-round branch December 1, 2025 16:31
RKSimon added a commit to RKSimon/llvm-project that referenced this pull request Dec 1, 2025
RKSimon added a commit to RKSimon/llvm-project that referenced this pull request Dec 1, 2025
RKSimon added a commit that referenced this pull request Dec 1, 2025
RKSimon added a commit that referenced this pull request Dec 1, 2025
RKSimon added a commit to RKSimon/llvm-project that referenced this pull request Dec 1, 2025
RKSimon added a commit that referenced this pull request Dec 1, 2025
augusto2112 pushed a commit to augusto2112/llvm-project that referenced this pull request Dec 3, 2025
augusto2112 pushed a commit to augusto2112/llvm-project that referenced this pull request Dec 3, 2025
augusto2112 pushed a commit to augusto2112/llvm-project that referenced this pull request Dec 3, 2025
augusto2112 pushed a commit to augusto2112/llvm-project that referenced this pull request Dec 3, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

2 participants