-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[DAGCombiner][X86][WIP] Combine (build_vector (load X))->(vecty (load X)) #88753
base: main
Are you sure you want to change the base?
Conversation
… X)) This is an alternative to llvm#88261. This gets the mmx cases from that patch without affecting RISC-V. The code here is largely lifted from the (bitcast (load))->(load) code.
@llvm/pr-subscribers-llvm-selectiondag @llvm/pr-subscribers-backend-x86 Author: Craig Topper (topperc) ChangesThis is an alternative to #88261. This gets the mmx cases from that patch without affecting RISC-V. The code here is largely lifted from the (bitcast (load))->(load) code. Patch is 67.95 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/88753.diff 4 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index dbbe8d9193c25c..beab5a052a048b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -23501,6 +23501,32 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V);
}
+ if (N->getNumOperands() == 1) {
+ SDValue N0 = N->getOperand(0);
+ if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
+ // Do not remove the cast if the types differ in endian layout.
+ TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
+ TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
+ // If the load is volatile, we only want to change the load type if the
+ // resulting load is legal. Otherwise we might increase the number of
+ // memory accesses. We don't care if the original type was legal or not
+ // as we assume software couldn't rely on the number of accesses of an
+ // illegal type.
+ ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) ||
+ TLI.isOperationLegal(ISD::LOAD, VT))) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+
+ if (TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG,
+ *LN0->getMemOperand())) {
+ SDValue Load =
+ DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
+ LN0->getMemOperand());
+ DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
+ return Load;
+ }
+ }
+ }
+
return SDValue();
}
diff --git a/llvm/test/CodeGen/X86/2007-05-15-maskmovq.ll b/llvm/test/CodeGen/X86/2007-05-15-maskmovq.ll
index 69f733461efc77..3ced6c9483a6c8 100644
--- a/llvm/test/CodeGen/X86/2007-05-15-maskmovq.ll
+++ b/llvm/test/CodeGen/X86/2007-05-15-maskmovq.ll
@@ -6,22 +6,11 @@ define void @test(<1 x i64> %c64, <1 x i64> %mask1, ptr %P) {
; CHECK: ## %bb.0: ## %entry
; CHECK-NEXT: pushl %edi
; CHECK-NEXT: .cfi_def_cfa_offset 8
-; CHECK-NEXT: subl $16, %esp
-; CHECK-NEXT: .cfi_def_cfa_offset 24
; CHECK-NEXT: .cfi_offset %edi, -8
-; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; CHECK-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; CHECK-NEXT: movl %eax, (%esp)
-; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; CHECK-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp)
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi
-; CHECK-NEXT: movq (%esp), %mm0
+; CHECK-NEXT: movq {{[0-9]+}}(%esp), %mm0
; CHECK-NEXT: movq {{[0-9]+}}(%esp), %mm1
-; CHECK-NEXT: maskmovq %mm0, %mm1
-; CHECK-NEXT: addl $16, %esp
+; CHECK-NEXT: maskmovq %mm1, %mm0
; CHECK-NEXT: popl %edi
; CHECK-NEXT: retl
entry:
diff --git a/llvm/test/CodeGen/X86/mmx-intrinsics.ll b/llvm/test/CodeGen/X86/mmx-intrinsics.ll
index a43d9400cde6c8..a7917d58cde5d7 100644
--- a/llvm/test/CodeGen/X86/mmx-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/mmx-intrinsics.ll
@@ -12,17 +12,9 @@ define i64 @test1(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: phaddw {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
+; X86-NEXT: phaddw 16(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -57,17 +49,9 @@ define i64 @test88(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: pcmpgtd {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
+; X86-NEXT: pcmpgtd 16(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -102,17 +86,9 @@ define i64 @test87(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: pcmpgtw {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
+; X86-NEXT: pcmpgtw 16(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -147,17 +123,9 @@ define i64 @test86(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: pcmpgtb {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
+; X86-NEXT: pcmpgtb 16(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -192,17 +160,9 @@ define i64 @test85(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: pcmpeqd {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
+; X86-NEXT: pcmpeqd 16(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -237,17 +197,9 @@ define i64 @test84(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: pcmpeqw {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
+; X86-NEXT: pcmpeqw 16(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -282,17 +234,9 @@ define i64 @test83(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: pcmpeqb {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
+; X86-NEXT: pcmpeqb 16(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -327,17 +271,9 @@ define i64 @test82(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: punpckldq {{[0-9]+}}(%esp), %mm0 # mm0 = mm0[0],mem[0]
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
+; X86-NEXT: punpckldq 16(%ebp), %mm0 # mm0 = mm0[0],mem[0]
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -372,17 +308,9 @@ define i64 @test81(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: punpcklwd {{[0-9]+}}(%esp), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1]
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
+; X86-NEXT: punpcklwd 16(%ebp), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1]
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -417,17 +345,9 @@ define i64 @test80(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: punpcklbw {{[0-9]+}}(%esp), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3]
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
+; X86-NEXT: punpcklbw 16(%ebp), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3]
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -462,17 +382,9 @@ define i64 @test79(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: punpckhdq {{[0-9]+}}(%esp), %mm0 # mm0 = mm0[1],mem[1]
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
+; X86-NEXT: punpckhdq 16(%ebp), %mm0 # mm0 = mm0[1],mem[1]
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -507,17 +419,9 @@ define i64 @test78(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: punpckhwd {{[0-9]+}}(%esp), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3]
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
+; X86-NEXT: punpckhwd 16(%ebp), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3]
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -552,17 +456,9 @@ define i64 @test77(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: punpckhbw {{[0-9]+}}(%esp), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7]
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
+; X86-NEXT: punpckhbw 16(%ebp), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7]
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -597,17 +493,9 @@ define i64 @test76(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: packuswb {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
+; X86-NEXT: packuswb 16(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -642,17 +530,9 @@ define i64 @test75(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: packssdw {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
+; X86-NEXT: packssdw 16(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -687,17 +567,9 @@ define i64 @test74(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $24, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 16(%ebp), %eax
-; X86-NEXT: movl 20(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
-; X86-NEXT: packsswb {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
+; X86-NEXT: packsswb 16(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -732,12 +604,8 @@ define i64 @test73(<1 x i64> %a) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $16, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
; X86-NEXT: psrad $3, %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
@@ -770,12 +638,8 @@ define i64 @test72(<1 x i64> %a) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $16, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
; X86-NEXT: psraw $3, %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
@@ -806,12 +670,8 @@ define i64 @test72_2(<1 x i64> %a) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $16, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -873,12 +733,8 @@ define i64 @test70(<1 x i64> %a) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $16, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
; X86-NEXT: psrld $3, %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
@@ -909,12 +765,8 @@ define i64 @test70_2(<1 x i64> %a) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $16, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movq {{[0-9]+}}(%esp), %mm0
+; X86-NEXT: subl $8, %esp
+; X86-NEXT: movq 8(%ebp), %mm0
; X86-NEXT: movq %mm0, (%esp)
; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
@@ -944,12 +796,8 @@ define i64 @test69(<1 x i64> %a) nounwind readnone optsize ssp {
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: andl $-8, %esp
-; X86-NEXT: subl $16, %esp
-; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl 12(%ebp), %ecx
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
-; X86-NEXT: m...
[truncated]
|
You can test this locally with the following command:git-clang-format --diff a06073f91e7bbbb532e68bbf6b903c2f5051f4c2 c1570e959de48e876202d8a6e679abd20a554176 -- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp View the diff from clang-format here.diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index beab5a052a..e758f25eec 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -23518,9 +23518,8 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
if (TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG,
*LN0->getMemOperand())) {
- SDValue Load =
- DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
- LN0->getMemOperand());
+ SDValue Load = DAG.getLoad(VT, SDLoc(N), LN0->getChain(),
+ LN0->getBasePtr(), LN0->getMemOperand());
DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
return Load;
}
|
Just to clear - #88261 is not about improving MMX codegen (it was just a weird WTF moment). I'm happy to continue work on the riscv changes\regressions. |
Is it part of a larger series or should I look closer at the other changes in 88261? It just seemed like build_pair should be removed by type legalization and the opportunities should be visible post type legalization without the build_pair. |
This is an alternative to #88261.
This gets the mmx cases from that patch without affecting RISC-V.
The code here is largely lifted from the (bitcast (load))->(load) code.