-
Notifications
You must be signed in to change notification settings - Fork 15.7k
[X86] combineConcatVectorOps - add handling for FADD/FMUL etc. with a repeated vector op #172682
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
… repeated vector op Similar to what we already do for integer binops - concatenate the repeated operand instead of the results
|
@llvm/pr-subscribers-backend-x86 Author: Simon Pilgrim (RKSimon) ChangesSimilar to what we already do for integer binops - concatenate the repeated operand instead of the results Full diff: https://github.com/llvm/llvm-project/pull/172682.diff 2 Files Affected:
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index aec11ad97ab5c..82cde9c6127fa 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -59662,7 +59662,9 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
(VT.is512BitVector() && Subtarget.useAVX512Regs()))) {
SDValue Concat0 = CombineSubOperand(VT, Ops, 0);
SDValue Concat1 = CombineSubOperand(VT, Ops, 1);
- if (Concat0 || Concat1)
+ if (Concat0 || Concat1 || llvm::all_of(Ops, [](SDValue Op) {
+ return Op.getOperand(0) == Op.getOperand(1);
+ }))
return DAG.getNode(Opcode, DL, VT,
Concat0 ? Concat0 : ConcatSubOperand(VT, Ops, 0),
Concat1 ? Concat1 : ConcatSubOperand(VT, Ops, 1));
diff --git a/llvm/test/CodeGen/X86/combine-fmul.ll b/llvm/test/CodeGen/X86/combine-fmul.ll
index 258f782897716..c5966a81147e9 100644
--- a/llvm/test/CodeGen/X86/combine-fmul.ll
+++ b/llvm/test/CodeGen/X86/combine-fmul.ll
@@ -13,9 +13,9 @@ define <4 x double> @concat_fmul_self_v4f64_v2f64(<2 x double> %a0, <2 x double>
;
; AVX-LABEL: concat_fmul_self_v4f64_v2f64:
; AVX: # %bb.0:
-; AVX-NEXT: vmulpd %xmm0, %xmm0, %xmm0
-; AVX-NEXT: vmulpd %xmm1, %xmm1, %xmm1
+; AVX-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX-NEXT: vmulpd %ymm0, %ymm0, %ymm0
; AVX-NEXT: retq
%v0 = fmul <2 x double> %a0, %a0
%v1 = fmul <2 x double> %a1, %a1
@@ -32,9 +32,9 @@ define <8 x float> @concat_fmul_self_v8f32_v4f32(<4 x float> %a0, <4 x float> %a
;
; AVX-LABEL: concat_fmul_self_v8f32_v4f32:
; AVX: # %bb.0:
-; AVX-NEXT: vmulps %xmm0, %xmm0, %xmm0
-; AVX-NEXT: vmulps %xmm1, %xmm1, %xmm1
+; AVX-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX-NEXT: vmulps %ymm0, %ymm0, %ymm0
; AVX-NEXT: retq
%v0 = fmul <4 x float> %a0, %a0
%v1 = fmul <4 x float> %a1, %a1
@@ -53,23 +53,22 @@ define <8 x double> @concat_fmul_self_v8f64_v2f64(<2 x double> %a0, <2 x double>
;
; AVX1OR2-LABEL: concat_fmul_self_v8f64_v2f64:
; AVX1OR2: # %bb.0:
-; AVX1OR2-NEXT: vmulpd %xmm0, %xmm0, %xmm0
-; AVX1OR2-NEXT: vmulpd %xmm1, %xmm1, %xmm1
-; AVX1OR2-NEXT: vmulpd %xmm2, %xmm2, %xmm2
-; AVX1OR2-NEXT: vmulpd %xmm3, %xmm3, %xmm3
+; AVX1OR2-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2
+; AVX1OR2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; AVX1OR2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1OR2-NEXT: vmulpd %ymm0, %ymm0, %ymm0
; AVX1OR2-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm1
+; AVX1OR2-NEXT: vmulpd %ymm1, %ymm1, %ymm1
; AVX1OR2-NEXT: retq
;
; AVX512-LABEL: concat_fmul_self_v8f64_v2f64:
; AVX512: # %bb.0:
-; AVX512-NEXT: vmulpd %xmm0, %xmm0, %xmm0
-; AVX512-NEXT: vmulpd %xmm1, %xmm1, %xmm1
-; AVX512-NEXT: vmulpd %xmm2, %xmm2, %xmm2
-; AVX512-NEXT: vmulpd %xmm3, %xmm3, %xmm3
+; AVX512-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2
+; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; AVX512-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
; AVX512-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX512-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
+; AVX512-NEXT: vmulpd %zmm0, %zmm0, %zmm0
; AVX512-NEXT: retq
%v0 = fmul <2 x double> %a0, %a0
%v1 = fmul <2 x double> %a1, %a1
@@ -92,23 +91,22 @@ define <16 x float> @concat_fmul_self_v16f32_v4f32(<4 x float> %a0, <4 x float>
;
; AVX1OR2-LABEL: concat_fmul_self_v16f32_v4f32:
; AVX1OR2: # %bb.0:
-; AVX1OR2-NEXT: vmulps %xmm0, %xmm0, %xmm0
-; AVX1OR2-NEXT: vmulps %xmm1, %xmm1, %xmm1
-; AVX1OR2-NEXT: vmulps %xmm2, %xmm2, %xmm2
-; AVX1OR2-NEXT: vmulps %xmm3, %xmm3, %xmm3
+; AVX1OR2-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2
+; AVX1OR2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; AVX1OR2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1OR2-NEXT: vmulps %ymm0, %ymm0, %ymm0
; AVX1OR2-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm1
+; AVX1OR2-NEXT: vmulps %ymm1, %ymm1, %ymm1
; AVX1OR2-NEXT: retq
;
; AVX512-LABEL: concat_fmul_self_v16f32_v4f32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vmulps %xmm0, %xmm0, %xmm0
-; AVX512-NEXT: vmulps %xmm1, %xmm1, %xmm1
-; AVX512-NEXT: vmulps %xmm2, %xmm2, %xmm2
-; AVX512-NEXT: vmulps %xmm3, %xmm3, %xmm3
+; AVX512-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2
+; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; AVX512-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
; AVX512-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX512-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
+; AVX512-NEXT: vmulps %zmm0, %zmm0, %zmm0
; AVX512-NEXT: retq
%v0 = fmul <4 x float> %a0, %a0
%v1 = fmul <4 x float> %a1, %a1
@@ -137,9 +135,9 @@ define <8 x double> @concat_fmul_self_v8f64_v4f64(<4 x double> %a0, <4 x double>
;
; AVX512-LABEL: concat_fmul_self_v8f64_v4f64:
; AVX512: # %bb.0:
-; AVX512-NEXT: vmulpd %ymm0, %ymm0, %ymm0
-; AVX512-NEXT: vmulpd %ymm1, %ymm1, %ymm1
+; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512-NEXT: vmulpd %zmm0, %zmm0, %zmm0
; AVX512-NEXT: retq
%v0 = fmul <4 x double> %a0, %a0
%v1 = fmul <4 x double> %a1, %a1
@@ -164,9 +162,9 @@ define <16 x float> @concat_fmul_self_v16f32_v8f32(<8 x float> %a0, <8 x float>
;
; AVX512-LABEL: concat_fmul_self_v16f32_v8f32:
; AVX512: # %bb.0:
-; AVX512-NEXT: vmulps %ymm0, %ymm0, %ymm0
-; AVX512-NEXT: vmulps %ymm1, %ymm1, %ymm1
+; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512-NEXT: vmulps %zmm0, %zmm0, %zmm0
; AVX512-NEXT: retq
%v0 = fmul <8 x float> %a0, %a0
%v1 = fmul <8 x float> %a1, %a1
|
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/129/builds/35164 Here is the relevant piece of the build log for the reference |
… repeated vector op (llvm#172682) Similar to what we already do for integer binops - concatenate the repeated operand instead of the results
… repeated vector op (llvm#172682) Similar to what we already do for integer binops - concatenate the repeated operand instead of the results
Similar to what we already do for integer binops - concatenate the repeated operand instead of the results