-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[X86] combineConcatVectorOps - add handling to concat fp rounding intrinsics together #170160
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Member
|
@llvm/pr-subscribers-backend-x86 Author: Simon Pilgrim (RKSimon) ChangesPatch is 45.68 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/170160.diff 7 Files Affected:
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 74a02711bd98a..539b238d5043f 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -59459,6 +59459,11 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
}
break;
case ISD::FSQRT:
+ case ISD::FCEIL:
+ case ISD::FTRUNC:
+ case ISD::FRINT:
+ case ISD::FNEARBYINT:
+ case ISD::FROUNDEVEN:
if (!IsSplat && (VT.is256BitVector() ||
(VT.is512BitVector() && Subtarget.useAVX512Regs()))) {
return DAG.getNode(Opcode, DL, VT, ConcatSubOperand(VT, Ops, 0));
@@ -59470,6 +59475,17 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
return DAG.getNode(Opcode, DL, VT, ConcatSubOperand(VT, Ops, 0));
}
break;
+ case X86ISD::VRNDSCALE:
+ if (!IsSplat &&
+ (VT.is256BitVector() ||
+ (VT.is512BitVector() && Subtarget.useAVX512Regs())) &&
+ llvm::all_of(Ops, [Op0](SDValue Op) {
+ return Op0.getOperand(1) == Op.getOperand(1);
+ })) {
+ return DAG.getNode(Opcode, DL, VT, ConcatSubOperand(VT, Ops, 0),
+ Op0.getOperand(1));
+ }
+ break;
case X86ISD::HADD:
case X86ISD::HSUB:
case X86ISD::FHADD:
diff --git a/llvm/test/CodeGen/X86/combine-fceil.ll b/llvm/test/CodeGen/X86/combine-fceil.ll
index 78f1476a49152..a3f55e8f64b80 100644
--- a/llvm/test/CodeGen/X86/combine-fceil.ll
+++ b/llvm/test/CodeGen/X86/combine-fceil.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge | FileCheck %s --check-prefixes=AVX,AVX1OR2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=AVX,AVX1OR2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=AVX,AVX512
define <4 x double> @concat_ceil_v4f64_v2f64(<2 x double> %a0, <2 x double> %a1) {
@@ -13,9 +13,9 @@ define <4 x double> @concat_ceil_v4f64_v2f64(<2 x double> %a0, <2 x double> %a1)
;
; AVX-LABEL: concat_ceil_v4f64_v2f64:
; AVX: # %bb.0:
-; AVX-NEXT: vroundpd $10, %xmm0, %xmm0
-; AVX-NEXT: vroundpd $10, %xmm1, %xmm1
+; AVX-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX-NEXT: vroundpd $10, %ymm0, %ymm0
; AVX-NEXT: retq
%v0 = call <2 x double> @llvm.ceil.v2f64(<2 x double> %a0)
%v1 = call <2 x double> @llvm.ceil.v2f64(<2 x double> %a1)
@@ -32,9 +32,9 @@ define <8 x float> @concat_ceil_v8f32_v4f32(<4 x float> %a0, <4 x float> %a1) {
;
; AVX-LABEL: concat_ceil_v8f32_v4f32:
; AVX: # %bb.0:
-; AVX-NEXT: vroundps $10, %xmm0, %xmm0
-; AVX-NEXT: vroundps $10, %xmm1, %xmm1
+; AVX-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX-NEXT: vroundps $10, %ymm0, %ymm0
; AVX-NEXT: retq
%v0 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %a0)
%v1 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %a1)
@@ -51,25 +51,34 @@ define <8 x double> @concat_ceil_v8f64_v2f64(<2 x double> %a0, <2 x double> %a1,
; SSE-NEXT: roundpd $10, %xmm3, %xmm3
; SSE-NEXT: retq
;
-; AVX1OR2-LABEL: concat_ceil_v8f64_v2f64:
-; AVX1OR2: # %bb.0:
-; AVX1OR2-NEXT: vroundpd $10, %xmm0, %xmm0
-; AVX1OR2-NEXT: vroundpd $10, %xmm1, %xmm1
-; AVX1OR2-NEXT: vroundpd $10, %xmm2, %xmm2
-; AVX1OR2-NEXT: vroundpd $10, %xmm3, %xmm3
-; AVX1OR2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1OR2-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm1
-; AVX1OR2-NEXT: retq
+; AVX1-LABEL: concat_ceil_v8f64_v2f64:
+; AVX1: # %bb.0:
+; AVX1-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2
+; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vroundpd $10, %ymm0, %ymm0
+; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm1
+; AVX1-NEXT: vroundpd $10, %ymm1, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: concat_ceil_v8f64_v2f64:
+; AVX2: # %bb.0:
+; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: vroundpd $10, %ymm0, %ymm0
+; AVX2-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2
+; AVX2-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm1
+; AVX2-NEXT: vroundpd $10, %ymm1, %ymm1
+; AVX2-NEXT: retq
;
; AVX512-LABEL: concat_ceil_v8f64_v2f64:
; AVX512: # %bb.0:
-; AVX512-NEXT: vroundpd $10, %xmm0, %xmm0
-; AVX512-NEXT: vroundpd $10, %xmm1, %xmm1
-; AVX512-NEXT: vroundpd $10, %xmm2, %xmm2
-; AVX512-NEXT: vroundpd $10, %xmm3, %xmm3
+; AVX512-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2
+; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; AVX512-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
; AVX512-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX512-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
+; AVX512-NEXT: vrndscalepd $10, %zmm0, %zmm0
; AVX512-NEXT: retq
%v0 = call <2 x double> @llvm.ceil.v2f64(<2 x double> %a0)
%v1 = call <2 x double> @llvm.ceil.v2f64(<2 x double> %a1)
@@ -90,25 +99,34 @@ define <16 x float> @concat_ceil_v16f32_v4f32(<4 x float> %a0, <4 x float> %a1,
; SSE-NEXT: roundps $10, %xmm3, %xmm3
; SSE-NEXT: retq
;
-; AVX1OR2-LABEL: concat_ceil_v16f32_v4f32:
-; AVX1OR2: # %bb.0:
-; AVX1OR2-NEXT: vroundps $10, %xmm0, %xmm0
-; AVX1OR2-NEXT: vroundps $10, %xmm1, %xmm1
-; AVX1OR2-NEXT: vroundps $10, %xmm2, %xmm2
-; AVX1OR2-NEXT: vroundps $10, %xmm3, %xmm3
-; AVX1OR2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1OR2-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm1
-; AVX1OR2-NEXT: retq
+; AVX1-LABEL: concat_ceil_v16f32_v4f32:
+; AVX1: # %bb.0:
+; AVX1-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2
+; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vroundps $10, %ymm0, %ymm0
+; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm1
+; AVX1-NEXT: vroundps $10, %ymm1, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: concat_ceil_v16f32_v4f32:
+; AVX2: # %bb.0:
+; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: vroundps $10, %ymm0, %ymm0
+; AVX2-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2
+; AVX2-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm1
+; AVX2-NEXT: vroundps $10, %ymm1, %ymm1
+; AVX2-NEXT: retq
;
; AVX512-LABEL: concat_ceil_v16f32_v4f32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vroundps $10, %xmm0, %xmm0
-; AVX512-NEXT: vroundps $10, %xmm1, %xmm1
-; AVX512-NEXT: vroundps $10, %xmm2, %xmm2
-; AVX512-NEXT: vroundps $10, %xmm3, %xmm3
+; AVX512-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2
+; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; AVX512-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
; AVX512-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX512-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
+; AVX512-NEXT: vrndscaleps $10, %zmm0, %zmm0
; AVX512-NEXT: retq
%v0 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %a0)
%v1 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %a1)
@@ -137,9 +155,9 @@ define <8 x double> @concat_ceil_v8f64_v4f64(<4 x double> %a0, <4 x double> %a1)
;
; AVX512-LABEL: concat_ceil_v8f64_v4f64:
; AVX512: # %bb.0:
-; AVX512-NEXT: vroundpd $10, %ymm0, %ymm0
-; AVX512-NEXT: vroundpd $10, %ymm1, %ymm1
+; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512-NEXT: vrndscalepd $10, %zmm0, %zmm0
; AVX512-NEXT: retq
%v0 = call <4 x double> @llvm.ceil.v4f64(<4 x double> %a0)
%v1 = call <4 x double> @llvm.ceil.v4f64(<4 x double> %a1)
@@ -164,9 +182,9 @@ define <16 x float> @concat_ceil_v16f32_v8f32(<8 x float> %a0, <8 x float> %a1)
;
; AVX512-LABEL: concat_ceil_v16f32_v8f32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vroundps $10, %ymm0, %ymm0
-; AVX512-NEXT: vroundps $10, %ymm1, %ymm1
+; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512-NEXT: vrndscaleps $10, %zmm0, %zmm0
; AVX512-NEXT: retq
%v0 = call <8 x float> @llvm.ceil.v8f32(<8 x float> %a0)
%v1 = call <8 x float> @llvm.ceil.v8f32(<8 x float> %a1)
diff --git a/llvm/test/CodeGen/X86/combine-fnearbyint.ll b/llvm/test/CodeGen/X86/combine-fnearbyint.ll
index 14d1017aec630..fde136af7c4c2 100644
--- a/llvm/test/CodeGen/X86/combine-fnearbyint.ll
+++ b/llvm/test/CodeGen/X86/combine-fnearbyint.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge | FileCheck %s --check-prefixes=AVX,AVX1OR2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=AVX,AVX1OR2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=AVX,AVX512
define <4 x double> @concat_nearbyint_v4f64_v2f64(<2 x double> %a0, <2 x double> %a1) {
@@ -13,9 +13,9 @@ define <4 x double> @concat_nearbyint_v4f64_v2f64(<2 x double> %a0, <2 x double>
;
; AVX-LABEL: concat_nearbyint_v4f64_v2f64:
; AVX: # %bb.0:
-; AVX-NEXT: vroundpd $12, %xmm0, %xmm0
-; AVX-NEXT: vroundpd $12, %xmm1, %xmm1
+; AVX-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX-NEXT: vroundpd $12, %ymm0, %ymm0
; AVX-NEXT: retq
%v0 = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %a0)
%v1 = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %a1)
@@ -32,9 +32,9 @@ define <8 x float> @concat_nearbyint_v8f32_v4f32(<4 x float> %a0, <4 x float> %a
;
; AVX-LABEL: concat_nearbyint_v8f32_v4f32:
; AVX: # %bb.0:
-; AVX-NEXT: vroundps $12, %xmm0, %xmm0
-; AVX-NEXT: vroundps $12, %xmm1, %xmm1
+; AVX-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX-NEXT: vroundps $12, %ymm0, %ymm0
; AVX-NEXT: retq
%v0 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %a0)
%v1 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %a1)
@@ -51,25 +51,34 @@ define <8 x double> @concat_nearbyint_v8f64_v2f64(<2 x double> %a0, <2 x double>
; SSE-NEXT: roundpd $12, %xmm3, %xmm3
; SSE-NEXT: retq
;
-; AVX1OR2-LABEL: concat_nearbyint_v8f64_v2f64:
-; AVX1OR2: # %bb.0:
-; AVX1OR2-NEXT: vroundpd $12, %xmm0, %xmm0
-; AVX1OR2-NEXT: vroundpd $12, %xmm1, %xmm1
-; AVX1OR2-NEXT: vroundpd $12, %xmm2, %xmm2
-; AVX1OR2-NEXT: vroundpd $12, %xmm3, %xmm3
-; AVX1OR2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1OR2-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm1
-; AVX1OR2-NEXT: retq
+; AVX1-LABEL: concat_nearbyint_v8f64_v2f64:
+; AVX1: # %bb.0:
+; AVX1-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2
+; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vroundpd $12, %ymm0, %ymm0
+; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm1
+; AVX1-NEXT: vroundpd $12, %ymm1, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: concat_nearbyint_v8f64_v2f64:
+; AVX2: # %bb.0:
+; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: vroundpd $12, %ymm0, %ymm0
+; AVX2-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2
+; AVX2-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm1
+; AVX2-NEXT: vroundpd $12, %ymm1, %ymm1
+; AVX2-NEXT: retq
;
; AVX512-LABEL: concat_nearbyint_v8f64_v2f64:
; AVX512: # %bb.0:
-; AVX512-NEXT: vroundpd $12, %xmm0, %xmm0
-; AVX512-NEXT: vroundpd $12, %xmm1, %xmm1
-; AVX512-NEXT: vroundpd $12, %xmm2, %xmm2
-; AVX512-NEXT: vroundpd $12, %xmm3, %xmm3
+; AVX512-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2
+; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; AVX512-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
; AVX512-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX512-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
+; AVX512-NEXT: vrndscalepd $12, %zmm0, %zmm0
; AVX512-NEXT: retq
%v0 = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %a0)
%v1 = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %a1)
@@ -90,25 +99,34 @@ define <16 x float> @concat_nearbyint_v16f32_v4f32(<4 x float> %a0, <4 x float>
; SSE-NEXT: roundps $12, %xmm3, %xmm3
; SSE-NEXT: retq
;
-; AVX1OR2-LABEL: concat_nearbyint_v16f32_v4f32:
-; AVX1OR2: # %bb.0:
-; AVX1OR2-NEXT: vroundps $12, %xmm0, %xmm0
-; AVX1OR2-NEXT: vroundps $12, %xmm1, %xmm1
-; AVX1OR2-NEXT: vroundps $12, %xmm2, %xmm2
-; AVX1OR2-NEXT: vroundps $12, %xmm3, %xmm3
-; AVX1OR2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1OR2-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm1
-; AVX1OR2-NEXT: retq
+; AVX1-LABEL: concat_nearbyint_v16f32_v4f32:
+; AVX1: # %bb.0:
+; AVX1-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2
+; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vroundps $12, %ymm0, %ymm0
+; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm1
+; AVX1-NEXT: vroundps $12, %ymm1, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: concat_nearbyint_v16f32_v4f32:
+; AVX2: # %bb.0:
+; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: vroundps $12, %ymm0, %ymm0
+; AVX2-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2
+; AVX2-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm1
+; AVX2-NEXT: vroundps $12, %ymm1, %ymm1
+; AVX2-NEXT: retq
;
; AVX512-LABEL: concat_nearbyint_v16f32_v4f32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vroundps $12, %xmm0, %xmm0
-; AVX512-NEXT: vroundps $12, %xmm1, %xmm1
-; AVX512-NEXT: vroundps $12, %xmm2, %xmm2
-; AVX512-NEXT: vroundps $12, %xmm3, %xmm3
+; AVX512-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2
+; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; AVX512-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
; AVX512-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX512-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
+; AVX512-NEXT: vrndscaleps $12, %zmm0, %zmm0
; AVX512-NEXT: retq
%v0 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %a0)
%v1 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %a1)
@@ -137,9 +155,9 @@ define <8 x double> @concat_nearbyint_v8f64_v4f64(<4 x double> %a0, <4 x double>
;
; AVX512-LABEL: concat_nearbyint_v8f64_v4f64:
; AVX512: # %bb.0:
-; AVX512-NEXT: vroundpd $12, %ymm0, %ymm0
-; AVX512-NEXT: vroundpd $12, %ymm1, %ymm1
+; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512-NEXT: vrndscalepd $12, %zmm0, %zmm0
; AVX512-NEXT: retq
%v0 = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %a0)
%v1 = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %a1)
@@ -164,9 +182,9 @@ define <16 x float> @concat_nearbyint_v16f32_v8f32(<8 x float> %a0, <8 x float>
;
; AVX512-LABEL: concat_nearbyint_v16f32_v8f32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vroundps $12, %ymm0, %ymm0
-; AVX512-NEXT: vroundps $12, %ymm1, %ymm1
+; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512-NEXT: vrndscaleps $12, %zmm0, %zmm0
; AVX512-NEXT: retq
%v0 = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %a0)
%v1 = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %a1)
diff --git a/llvm/test/CodeGen/X86/combine-frint.ll b/llvm/test/CodeGen/X86/combine-frint.ll
index 901ce2c1f0d82..1c52529e8386c 100644
--- a/llvm/test/CodeGen/X86/combine-frint.ll
+++ b/llvm/test/CodeGen/X86/combine-frint.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge | FileCheck %s --check-prefixes=AVX,AVX1OR2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=AVX,AVX1OR2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=AVX,AVX1OR2,AVX2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=AVX,AVX512
define <4 x double> @concat_rint_v4f64_v2f64(<2 x double> %a0, <2 x double> %a1) {
@@ -13,9 +13,9 @@ define <4 x double> @concat_rint_v4f64_v2f64(<2 x double> %a0, <2 x double> %a1)
;
; AVX-LABEL: concat_rint_v4f64_v2f64:
; AVX: # %bb.0:
-; AVX-NEXT: vroundpd $4, %xmm0, %xmm0
-; AVX-NEXT: vroundpd $4, %xmm1, %xmm1
+; AVX-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX-NEXT: vroundpd $4, %ymm0, %ymm0
; AVX-NEXT: retq
%v0 = call <2 x double> @llvm.rint.v2f64(<2 x double> %a0)
%v1 = call <2 x double> @llvm.rint.v2f64(<2 x double> %a1)
@@ -32,9 +32,9 @@ define <8 x float> @concat_rint_v8f32_v4f32(<4 x float> %a0, <4 x float> %a1) {
;
; AVX-LABEL: concat_rint_v8f32_v4f32:
; AVX: # %bb.0:
-; AVX-NEXT: vroundps $4, %xmm0, %xmm0
-; AVX-NEXT: vroundps $4, %xmm1, %xmm1
+; AVX-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX-NEXT: vroundps $4, %ymm0, %ymm0
; AVX-NEXT: retq
%v0 = call <4 x float> @llvm.rint.v4f32(<4 x float> %a0)
%v1 = call <4 x float> @llvm.rint.v4f32(<4 x float> %a1)
@@ -51,25 +51,34 @@ define <8 x double> @concat_rint_v8f64_v2f64(<2 x double> %a0, <2 x double> %a1,
; SSE-NEXT: roundpd $4, %xmm3, %xmm3
; SSE-NEXT: retq
;
-; AVX1OR2-LABEL: concat_rint_v8f64_v2f64:
-; AVX1OR2: # %bb.0:
-; AVX1OR2-NEXT: vroundpd $4, %xmm0, %xmm0
-; AVX1OR2-NEXT: vroundpd $4, %xmm1, %xmm1
-; AVX1OR2-NEXT: vroundpd $4, %xmm2, %xmm2
-; AVX1OR2-NEXT: vroundpd $4, %xmm3, %xmm3
-; AVX1OR2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1OR2-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm1
-; AVX1OR2-NEXT: retq
+; AVX1-LABEL: concat_rint_v8f64_v2f64:
+; AVX1: # %bb.0:
+; AVX1-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2
+; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vroundpd $4, %ymm0, %ymm0
+; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm1
+; AVX1-NEXT: vroundpd $4, %ymm1, %ymm1
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: concat_rint_v8f64_v2f64:
+; AVX2: # %bb.0:
+; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
+; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT: vroundpd $4, %ymm0, %ymm0
+; AVX2-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2
+; AVX2-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm1
+; AVX2-NEXT: vroundpd $4, %ymm1, %ymm1
+; AVX2-NEXT: retq
;
; AVX512-LABEL: concat_rint_v8f64_v2f64:
; AVX512: # %bb.0:
-; AVX512-NEXT: vroundpd $4, %xmm0, %xmm0
-; AVX512-NEXT: vroundpd $4, %xmm1, %xmm1
-; AVX512-NEXT: vroundpd $4, %xmm2, %xmm2
-; AVX512-NEXT: vroundpd $4, %xmm3, %xmm3
+; AVX512-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2
+; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; AVX512-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
; AVX512-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; A...
[truncated]
|
RKSimon
added a commit
to RKSimon/llvm-project
that referenced
this pull request
Dec 1, 2025
These were missed in llvm#170160
RKSimon
added a commit
to RKSimon/llvm-project
that referenced
this pull request
Dec 1, 2025
These were missed in llvm#170160
RKSimon
added a commit
to RKSimon/llvm-project
that referenced
this pull request
Dec 1, 2025
…OOR intrinsics together These were missed in llvm#170160
augusto2112
pushed a commit
to augusto2112/llvm-project
that referenced
this pull request
Dec 3, 2025
…rinsics together (llvm#170160)
augusto2112
pushed a commit
to augusto2112/llvm-project
that referenced
this pull request
Dec 3, 2025
…cs (llvm#170168) These were missed in llvm#170160
augusto2112
pushed a commit
to augusto2112/llvm-project
that referenced
this pull request
Dec 3, 2025
…cs (llvm#170166) These were missed in llvm#170160
augusto2112
pushed a commit
to augusto2112/llvm-project
that referenced
this pull request
Dec 3, 2025
…OOR intrinsics together (llvm#170176) These were missed in llvm#170160
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
No description provided.