-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[X86] Improve transform for add-like nodes to add
#83691
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-llvm-globalisel @llvm/pr-subscribers-backend-x86 Author: None (goldsteinn) ChangesWe previously did this only in tablegen, after we would have already Patch is 337.50 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/83691.diff 115 Files Affected:
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 5cbd9ab4dc2d6c..9f34a4e1870f71 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -5294,11 +5294,26 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
return;
if (tryVPTERNLOG(Node))
return;
-
[[fallthrough]];
case ISD::ADD:
if (Opcode == ISD::ADD && matchBitExtract(Node))
return;
+
+ // Convert addlike to add before final selection. Do this before we drop
+ // flags like `disjoint`.
+ // NB: Conversion to add is preferable so we use `lea` in codegen.
+ if (Opcode != ISD::ADD && NVT.isScalarInteger() &&
+ (Opcode == ISD::OR ||
+ (NVT == MVT::i8 || NVT == MVT::i16 || NVT == MVT::i32)) &&
+ CurDAG->isADDLike(SDValue(Node, 0))
+ ) {
+ SDValue AsAdd = CurDAG->getNode(ISD::ADD, SDLoc(Node), NVT,
+ Node->getOperand(0), Node->getOperand(1));
+ ReplaceUses(SDValue(Node, 0), AsAdd);
+ CurDAG->RemoveDeadNode(Node);
+ Node = AsAdd.getNode();
+ Opcode = ISD::ADD;
+ }
[[fallthrough]];
case ISD::SUB: {
// Try to avoid folding immediates with multiple uses for optsize.
diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td
index f393f86e64aadd..b31c5be87a5839 100644
--- a/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -1574,7 +1574,24 @@ def : Pat<(or (and GR64:$dst, -65536),
def : Pat<(or (and GR32:$dst, -65536),
(i32 (zextloadi16 addr:$src))),
- (INSERT_SUBREG (i32 (COPY $dst)), (MOV16rm i16mem:$src), sub_16bit)>;
+ (INSERT_SUBREG (i32 (COPY $dst)), (MOV16rm i16mem:$src), sub_16bit)>;
+
+// We convert or -> add when the or is disjoint so need to handle for add as well.
+def : Pat<(add (and GR64:$dst, -256),
+ (i64 (zextloadi8 addr:$src))),
+ (INSERT_SUBREG (i64 (COPY $dst)), (MOV8rm i8mem:$src), sub_8bit)>;
+
+def : Pat<(add (and GR32:$dst, -256),
+ (i32 (zextloadi8 addr:$src))),
+ (INSERT_SUBREG (i32 (COPY $dst)), (MOV8rm i8mem:$src), sub_8bit)>;
+
+def : Pat<(add (and GR64:$dst, -65536),
+ (i64 (zextloadi16 addr:$src))),
+ (INSERT_SUBREG (i64 (COPY $dst)), (MOV16rm i16mem:$src), sub_16bit)>;
+
+def : Pat<(add (and GR32:$dst, -65536),
+ (i32 (zextloadi16 addr:$src))),
+ (INSERT_SUBREG (i32 (COPY $dst)), (MOV16rm i16mem:$src), sub_16bit)>;
// To avoid needing to materialize an immediate in a register, use a 32-bit and
// with implicit zero-extension instead of a 64-bit and if the immediate has at
diff --git a/llvm/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll b/llvm/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll
index 609be3bb2e54f0..50e736ac68d29e 100644
--- a/llvm/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll
+++ b/llvm/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc < %s | FileCheck %s
; Check that the shr(shl X, 56), 48) is not mistakenly turned into
@@ -16,11 +17,13 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
target triple = "x86_64-unknown-linux-gnu"
define i64 @foo(i64 %b) nounwind readnone {
-entry:
; CHECK-LABEL: foo:
-; CHECK: movsbq %dil, %rax
-; CHECK: shlq $8, %rax
-; CHECK: orq $1, %rax
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movsbq %dil, %rax
+; CHECK-NEXT: shlq $8, %rax
+; CHECK-NEXT: incq %rax
+; CHECK-NEXT: retq
+entry:
%shl = shl i64 %b, 56 ; <i64> [#uses=1]
%shr = ashr i64 %shl, 48 ; <i64> [#uses=1]
%add5 = or i64 %shr, 1 ; <i64> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/3addr-or.ll b/llvm/test/CodeGen/X86/3addr-or.ll
index 65f6d2b4123e8e..1f466afcadc9ca 100644
--- a/llvm/test/CodeGen/X86/3addr-or.ll
+++ b/llvm/test/CodeGen/X86/3addr-or.ll
@@ -24,7 +24,7 @@ define i64 @test2(i8 %A, i8 %B) nounwind {
; CHECK-NEXT: andl $48, %edi
; CHECK-NEXT: movzbl %sil, %eax
; CHECK-NEXT: shrl $4, %eax
-; CHECK-NEXT: orl %edi, %eax
+; CHECK-NEXT: addl %edi, %eax
; CHECK-NEXT: retq
%C = zext i8 %A to i64
%D = shl i64 %C, 4
@@ -42,7 +42,7 @@ define void @test3(i32 %x, ptr %P) nounwind readnone ssp {
; CHECK-LABEL: test3:
; CHECK: # %bb.0:
; CHECK-NEXT: shll $5, %edi
-; CHECK-NEXT: orl $3, %edi
+; CHECK-NEXT: addl $3, %edi
; CHECK-NEXT: movl %edi, (%rsi)
; CHECK-NEXT: retq
%t0 = shl i32 %x, 5
@@ -71,7 +71,7 @@ define void @test5(i32 %a, i32 %b, ptr nocapture %P) nounwind ssp {
; CHECK: # %bb.0:
; CHECK-NEXT: andl $6, %edi
; CHECK-NEXT: andl $16, %esi
-; CHECK-NEXT: orl %edi, %esi
+; CHECK-NEXT: addl %edi, %esi
; CHECK-NEXT: movl %esi, (%rdx)
; CHECK-NEXT: retq
%and = and i32 %a, 6
diff --git a/llvm/test/CodeGen/X86/addcarry2.ll b/llvm/test/CodeGen/X86/addcarry2.ll
index 0338577dbddc2b..1a5d0f4fe45416 100644
--- a/llvm/test/CodeGen/X86/addcarry2.ll
+++ b/llvm/test/CodeGen/X86/addcarry2.ll
@@ -138,7 +138,7 @@ define void @adc_load_store_32_127(ptr inreg %x, ptr inreg %x2, i32 inreg %y) no
; X64-NEXT: movl (%rdi), %eax # encoding: [0x8b,0x07]
; X64-NEXT: shlq $32, %rax # encoding: [0x48,0xc1,0xe0,0x20]
; X64-NEXT: movl %edx, %ecx # encoding: [0x89,0xd1]
-; X64-NEXT: orq %rax, %rcx # encoding: [0x48,0x09,0xc1]
+; X64-NEXT: addq %rax, %rcx # encoding: [0x48,0x01,0xc1]
; X64-NEXT: movabsq $545460846593, %rax # encoding: [0x48,0xb8,0x01,0x00,0x00,0x00,0x7f,0x00,0x00,0x00]
; X64-NEXT: # imm = 0x7F00000001
; X64-NEXT: xorl %edx, %edx # encoding: [0x31,0xd2]
@@ -178,7 +178,7 @@ define void @adc_load_store_32_128(ptr inreg %x, ptr inreg %x2, i32 inreg %y) no
; X64-NEXT: movl (%rdi), %eax # encoding: [0x8b,0x07]
; X64-NEXT: shlq $32, %rax # encoding: [0x48,0xc1,0xe0,0x20]
; X64-NEXT: movl %edx, %ecx # encoding: [0x89,0xd1]
-; X64-NEXT: orq %rax, %rcx # encoding: [0x48,0x09,0xc1]
+; X64-NEXT: addq %rax, %rcx # encoding: [0x48,0x01,0xc1]
; X64-NEXT: movabsq $549755813889, %rax # encoding: [0x48,0xb8,0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00]
; X64-NEXT: # imm = 0x8000000001
; X64-NEXT: xorl %edx, %edx # encoding: [0x31,0xd2]
diff --git a/llvm/test/CodeGen/X86/and-or-fold.ll b/llvm/test/CodeGen/X86/and-or-fold.ll
index 1bb5fdeebac71c..4071b364a25c3b 100644
--- a/llvm/test/CodeGen/X86/and-or-fold.ll
+++ b/llvm/test/CodeGen/X86/and-or-fold.ll
@@ -45,7 +45,7 @@ define i32 @test1(i32 %x, i16 %y) {
; DARWIN-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
; DARWIN-NEXT: movl {{[0-9]+}}(%esp), %eax
; DARWIN-NEXT: shll $16, %eax
-; DARWIN-NEXT: orl %ecx, %eax
+; DARWIN-NEXT: addl %ecx, %eax
; DARWIN-NEXT: andl $16711807, %eax ## imm = 0xFF007F
; DARWIN-NEXT: retl
;
@@ -54,7 +54,7 @@ define i32 @test1(i32 %x, i16 %y) {
; DARWIN-OPT-NEXT: andl $127, %esi
; DARWIN-OPT-NEXT: movzbl %dil, %eax
; DARWIN-OPT-NEXT: shll $16, %eax
-; DARWIN-OPT-NEXT: orl %esi, %eax
+; DARWIN-OPT-NEXT: addl %esi, %eax
; DARWIN-OPT-NEXT: retq
%tmp1 = zext i16 %y to i32
%tmp2 = and i32 %tmp1, 127
diff --git a/llvm/test/CodeGen/X86/andimm8.ll b/llvm/test/CodeGen/X86/andimm8.ll
index 6242d4f4c222bb..506e28300e71b0 100644
--- a/llvm/test/CodeGen/X86/andimm8.ll
+++ b/llvm/test/CodeGen/X86/andimm8.ll
@@ -29,7 +29,7 @@ define void @foo(i64 %zed, ptr %x) nounwind {
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x08]
; X86-NEXT: andl $-4, %ecx # encoding: [0x83,0xe1,0xfc]
-; X86-NEXT: orl $2, %ecx # encoding: [0x83,0xc9,0x02]
+; X86-NEXT: addl $2, %ecx # encoding: [0x83,0xc1,0x02]
; X86-NEXT: movl %edx, 4(%eax) # encoding: [0x89,0x50,0x04]
; X86-NEXT: movl %ecx, (%eax) # encoding: [0x89,0x08]
; X86-NEXT: retl # encoding: [0xc3]
@@ -37,7 +37,7 @@ define void @foo(i64 %zed, ptr %x) nounwind {
; X64-LABEL: foo:
; X64: # %bb.0:
; X64-NEXT: andq $-4, %rdi # encoding: [0x48,0x83,0xe7,0xfc]
-; X64-NEXT: orq $2, %rdi # encoding: [0x48,0x83,0xcf,0x02]
+; X64-NEXT: addq $2, %rdi # encoding: [0x48,0x83,0xc7,0x02]
; X64-NEXT: movq %rdi, (%rsi) # encoding: [0x48,0x89,0x3e]
; X64-NEXT: retq # encoding: [0xc3]
%t1 = and i64 %zed, -4
diff --git a/llvm/test/CodeGen/X86/atomic-unordered.ll b/llvm/test/CodeGen/X86/atomic-unordered.ll
index df123be53474f0..903951dd5a8cff 100644
--- a/llvm/test/CodeGen/X86/atomic-unordered.ll
+++ b/llvm/test/CodeGen/X86/atomic-unordered.ll
@@ -2359,7 +2359,7 @@ define i16 @load_combine(ptr %p) {
; CHECK-O3-NEXT: movzbl (%rdi), %ecx
; CHECK-O3-NEXT: movzbl 1(%rdi), %eax
; CHECK-O3-NEXT: shll $8, %eax
-; CHECK-O3-NEXT: orl %ecx, %eax
+; CHECK-O3-NEXT: addl %ecx, %eax
; CHECK-O3-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-O3-NEXT: retq
%v1 = load atomic i8, ptr %p unordered, align 2
diff --git a/llvm/test/CodeGen/X86/avx512-calling-conv.ll b/llvm/test/CodeGen/X86/avx512-calling-conv.ll
index b39b089faa2a5e..b4c37a2e34d95d 100644
--- a/llvm/test/CodeGen/X86/avx512-calling-conv.ll
+++ b/llvm/test/CodeGen/X86/avx512-calling-conv.ll
@@ -910,13 +910,13 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind {
; KNL-NEXT: kandw %k2, %k0, %k0
; KNL-NEXT: kmovw %r10d, %k2
; KNL-NEXT: kandw %k1, %k2, %k1
-; KNL-NEXT: kmovw %k1, %edx
+; KNL-NEXT: kmovw %k1, %esi
; KNL-NEXT: kshiftrw $1, %k0, %k1
; KNL-NEXT: kmovw %k1, %r9d
; KNL-NEXT: kshiftrw $2, %k0, %k1
; KNL-NEXT: kmovw %k1, %r8d
; KNL-NEXT: kshiftrw $3, %k0, %k1
-; KNL-NEXT: kmovw %k1, %esi
+; KNL-NEXT: kmovw %k1, %edx
; KNL-NEXT: kshiftrw $4, %k0, %k1
; KNL-NEXT: kmovw %k1, %edi
; KNL-NEXT: kshiftrw $5, %k0, %k1
@@ -928,9 +928,9 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind {
; KNL-NEXT: kshiftrw $8, %k0, %k1
; KNL-NEXT: kmovw %k1, %ebp
; KNL-NEXT: kshiftrw $9, %k0, %k1
-; KNL-NEXT: kmovw %k1, %r14d
-; KNL-NEXT: kshiftrw $10, %k0, %k1
; KNL-NEXT: kmovw %k1, %r11d
+; KNL-NEXT: kshiftrw $10, %k0, %k1
+; KNL-NEXT: kmovw %k1, %r14d
; KNL-NEXT: kshiftrw $11, %k0, %k1
; KNL-NEXT: kmovw %k1, %r15d
; KNL-NEXT: kshiftrw $12, %k0, %k1
@@ -938,25 +938,25 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind {
; KNL-NEXT: kshiftrw $13, %k0, %k1
; KNL-NEXT: kmovw %k1, %r13d
; KNL-NEXT: kshiftrw $14, %k0, %k1
-; KNL-NEXT: andl $1, %edx
-; KNL-NEXT: movb %dl, 2(%rax)
-; KNL-NEXT: kmovw %k0, %edx
-; KNL-NEXT: andl $1, %edx
+; KNL-NEXT: andl $1, %esi
+; KNL-NEXT: movb %sil, 2(%rax)
+; KNL-NEXT: kmovw %k0, %esi
+; KNL-NEXT: andl $1, %esi
; KNL-NEXT: andl $1, %r9d
-; KNL-NEXT: leal (%rdx,%r9,2), %r9d
-; KNL-NEXT: kmovw %k1, %edx
+; KNL-NEXT: leal (%rsi,%r9,2), %r9d
+; KNL-NEXT: kmovw %k1, %esi
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: andl $1, %r8d
; KNL-NEXT: leal (%r9,%r8,4), %r9d
; KNL-NEXT: kmovw %k0, %r8d
-; KNL-NEXT: andl $1, %esi
-; KNL-NEXT: leal (%r9,%rsi,8), %esi
+; KNL-NEXT: andl $1, %edx
+; KNL-NEXT: leal (%r9,%rdx,8), %edx
; KNL-NEXT: andl $1, %edi
; KNL-NEXT: shll $4, %edi
-; KNL-NEXT: orl %esi, %edi
; KNL-NEXT: andl $1, %ecx
; KNL-NEXT: shll $5, %ecx
-; KNL-NEXT: orl %edi, %ecx
+; KNL-NEXT: addl %edi, %ecx
+; KNL-NEXT: addl %edx, %ecx
; KNL-NEXT: andl $1, %r10d
; KNL-NEXT: shll $6, %r10d
; KNL-NEXT: andl $1, %ebx
@@ -965,28 +965,28 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind {
; KNL-NEXT: andl $1, %ebp
; KNL-NEXT: shll $8, %ebp
; KNL-NEXT: orl %ebx, %ebp
-; KNL-NEXT: andl $1, %r14d
-; KNL-NEXT: shll $9, %r14d
-; KNL-NEXT: orl %ebp, %r14d
; KNL-NEXT: andl $1, %r11d
-; KNL-NEXT: shll $10, %r11d
-; KNL-NEXT: orl %r14d, %r11d
+; KNL-NEXT: shll $9, %r11d
+; KNL-NEXT: orl %ebp, %r11d
; KNL-NEXT: orl %ecx, %r11d
+; KNL-NEXT: andl $1, %r14d
+; KNL-NEXT: shll $10, %r14d
; KNL-NEXT: andl $1, %r15d
; KNL-NEXT: shll $11, %r15d
+; KNL-NEXT: orl %r14d, %r15d
; KNL-NEXT: andl $1, %r12d
; KNL-NEXT: shll $12, %r12d
; KNL-NEXT: orl %r15d, %r12d
; KNL-NEXT: andl $1, %r13d
; KNL-NEXT: shll $13, %r13d
; KNL-NEXT: orl %r12d, %r13d
-; KNL-NEXT: andl $1, %edx
-; KNL-NEXT: shll $14, %edx
-; KNL-NEXT: orl %r13d, %edx
+; KNL-NEXT: andl $1, %esi
+; KNL-NEXT: shll $14, %esi
+; KNL-NEXT: orl %r13d, %esi
+; KNL-NEXT: orl %r11d, %esi
; KNL-NEXT: andl $1, %r8d
; KNL-NEXT: shll $15, %r8d
-; KNL-NEXT: orl %edx, %r8d
-; KNL-NEXT: orl %r11d, %r8d
+; KNL-NEXT: orl %esi, %r8d
; KNL-NEXT: movw %r8w, (%rax)
; KNL-NEXT: popq %rbx
; KNL-NEXT: popq %r12
@@ -1223,13 +1223,13 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind {
; SKX-NEXT: kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload
; SKX-NEXT: kandd %k1, %k0, %k0
; SKX-NEXT: kshiftrd $16, %k0, %k1
-; SKX-NEXT: kmovd %k1, %edx
+; SKX-NEXT: kmovd %k1, %esi
; SKX-NEXT: kshiftrd $1, %k0, %k1
; SKX-NEXT: kmovd %k1, %r9d
; SKX-NEXT: kshiftrd $2, %k0, %k1
; SKX-NEXT: kmovd %k1, %r8d
; SKX-NEXT: kshiftrd $3, %k0, %k1
-; SKX-NEXT: kmovd %k1, %esi
+; SKX-NEXT: kmovd %k1, %edx
; SKX-NEXT: kshiftrd $4, %k0, %k1
; SKX-NEXT: kmovd %k1, %edi
; SKX-NEXT: kshiftrd $5, %k0, %k1
@@ -1241,9 +1241,9 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind {
; SKX-NEXT: kshiftrd $8, %k0, %k1
; SKX-NEXT: kmovd %k1, %ebp
; SKX-NEXT: kshiftrd $9, %k0, %k1
-; SKX-NEXT: kmovd %k1, %r14d
-; SKX-NEXT: kshiftrd $10, %k0, %k1
; SKX-NEXT: kmovd %k1, %r11d
+; SKX-NEXT: kshiftrd $10, %k0, %k1
+; SKX-NEXT: kmovd %k1, %r14d
; SKX-NEXT: kshiftrd $11, %k0, %k1
; SKX-NEXT: kmovd %k1, %r15d
; SKX-NEXT: kshiftrd $12, %k0, %k1
@@ -1251,25 +1251,25 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind {
; SKX-NEXT: kshiftrd $13, %k0, %k1
; SKX-NEXT: kmovd %k1, %r13d
; SKX-NEXT: kshiftrd $14, %k0, %k1
-; SKX-NEXT: andl $1, %edx
-; SKX-NEXT: movb %dl, 2(%rax)
-; SKX-NEXT: kmovd %k0, %edx
-; SKX-NEXT: andl $1, %edx
+; SKX-NEXT: andl $1, %esi
+; SKX-NEXT: movb %sil, 2(%rax)
+; SKX-NEXT: kmovd %k0, %esi
+; SKX-NEXT: andl $1, %esi
; SKX-NEXT: andl $1, %r9d
-; SKX-NEXT: leal (%rdx,%r9,2), %r9d
-; SKX-NEXT: kmovd %k1, %edx
+; SKX-NEXT: leal (%rsi,%r9,2), %r9d
+; SKX-NEXT: kmovd %k1, %esi
; SKX-NEXT: kshiftrd $15, %k0, %k0
; SKX-NEXT: andl $1, %r8d
; SKX-NEXT: leal (%r9,%r8,4), %r9d
; SKX-NEXT: kmovd %k0, %r8d
-; SKX-NEXT: andl $1, %esi
-; SKX-NEXT: leal (%r9,%rsi,8), %esi
+; SKX-NEXT: andl $1, %edx
+; SKX-NEXT: leal (%r9,%rdx,8), %edx
; SKX-NEXT: andl $1, %edi
; SKX-NEXT: shll $4, %edi
-; SKX-NEXT: orl %esi, %edi
; SKX-NEXT: andl $1, %ecx
; SKX-NEXT: shll $5, %ecx
-; SKX-NEXT: orl %edi, %ecx
+; SKX-NEXT: addl %edi, %ecx
+; SKX-NEXT: addl %edx, %ecx
; SKX-NEXT: andl $1, %r10d
; SKX-NEXT: shll $6, %r10d
; SKX-NEXT: andl $1, %ebx
@@ -1278,28 +1278,28 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind {
; SKX-NEXT: andl $1, %ebp
; SKX-NEXT: shll $8, %ebp
; SKX-NEXT: orl %ebx, %ebp
-; SKX-NEXT: andl $1, %r14d
-; SKX-NEXT: shll $9, %r14d
-; SKX-NEXT: orl %ebp, %r14d
; SKX-NEXT: andl $1, %r11d
-; SKX-NEXT: shll $10, %r11d
-; SKX-NEXT: orl %r14d, %r11d
+; SKX-NEXT: shll $9, %r11d
+; SKX-NEXT: orl %ebp, %r11d
; SKX-NEXT: orl %ecx, %r11d
+; SKX-NEXT: andl $1, %r14d
+; SKX-NEXT: shll $10, %r14d
; SKX-NEXT: andl $1, %r15d
; SKX-NEXT: shll $11, %r15d
+; SKX-NEXT: orl %r14d, %r15d
; SKX-NEXT: andl $1, %r12d
; SKX-NEXT: shll $12, %r12d
; SKX-NEXT: orl %r15d, %r12d
; SKX-NEXT: andl $1, %r13d
; SKX-NEXT: shll $13, %r13d
; SKX-NEXT: orl %r12d, %r13d
-; SKX-NEXT: andl $1, %edx
-; SKX-NEXT: shll $14, %edx
-; SKX-NEXT: orl %r13d, %edx
+; SKX-NEXT: andl $1, %esi
+; SKX-NEXT: shll $14, %esi
+; SKX-NEXT: orl %r13d, %esi
+; SKX-NEXT: orl %r11d, %esi
; SKX-NEXT: andl $1, %r8d
; SKX-NEXT: shll $15, %r8d
-; SKX-NEXT: orl %edx, %r8d
-; SKX-NEXT: orl %r11d, %r8d
+; SKX-NEXT: orl %esi, %r8d
; SKX-NEXT: movw %r8w, (%rax)
; SKX-NEXT: popq %rbx
; SKX-NEXT: popq %r12
@@ -1556,9 +1556,9 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind {
; KNL_X32-NEXT: kshiftrw $1, %k0, %k1
; KNL_X32-NEXT: kmovw %k1, %ebp
; KNL_X32-NEXT: kshiftrw $2, %k0, %k1
-; KNL_X32-NEXT: kmovw %k1, %esi
-; KNL_X32-NEXT: kshiftrw $3, %k0, %k1
; KNL_X32-NEXT: kmovw %k1, %edi
+; KNL_X32-NEXT: kshiftrw $3, %k0, %k1
+; KNL_X32-NEXT: kmovw %k1, %esi
; KNL_X32-NEXT: kshiftrw $4, %k0, %k1
; KNL_X32-NEXT: kmovw %k1, %edx
; KNL_X32-NEXT: kshiftrw $5, %k0, %k1
@@ -1569,67 +1569,67 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind {
; KNL_X32-NEXT: kmovw %k0, %ebx
; KNL_X32-NEXT: andl $1, %ebx
; KNL_X32-NEXT: andl $1, %ebp
-; KNL_X32-NEXT: leal (%ebx,%ebp,2), %ebx
-; KNL_X32-NEXT: kmovw %k1, %ebp
+; KNL_X32-NEXT: leal (%ebx,%ebp,2), %ebp
+; KNL_X32-NEXT: kmovw %k1, %ebx
; KNL_X32-NEXT: kshiftrw $7, %k0, %k1
-; KNL_X32-NEXT: andl $1, %esi
-; KNL_X32-NEXT: leal (%ebx,%esi,4), %ebx
-; KNL_X32-NEXT: kmovw %k1, %esi
-; KNL_X32-NEXT: kshiftrw $8, %k0, %k1
; KNL_X32-NEXT: andl $1, %edi
-; KNL_X32-NEXT: leal (%ebx,%edi,8), %ebx
+; KNL_X32-NEXT: leal (%ebp,%edi,4), %ebp
; KNL_X32-NEXT: kmovw %k1, %edi
+; KNL_X32-NEXT: kshiftrw $8, %k0, %k1
+; KNL_X32-NEXT: andl $1, %esi
+; KNL_X32-NEXT: leal (%ebp,%esi,8), %ebp
+; KNL_X32-NEXT: kmovw %k1, %esi
; KNL_X32-NEXT: kshiftrw $9, %k0, %k1
; KNL_X32-NEXT: andl $1, %edx
; KNL_X32-NEXT: shll $4, %edx
-; KNL_X32-NEXT: orl %ebx, %edx
-; KNL_X32-NEXT: kmovw %k1, %ebx
-; KNL_X32-NEXT: kshiftrw $10, %k0, %k1
; KNL_X32-NEXT: andl $1, %ecx
; KNL_X32-NEXT: shll $5, %ecx
-; KNL_X32-NEXT: orl %edx, %ecx
+; KNL_X32-NEXT: addl %edx, %ecx
; KNL_X32-NEXT: kmovw %k1, %edx
-; KNL_X32-NEXT: kshiftrw $11, %k0, %k1
-; KNL_X32-NEXT: andl $1, %ebp
-; KNL_X32-NEXT: shll $6, %ebp
-; KNL_X32-NEXT: andl $1, %esi
-; KNL_X32-NEXT: shll $7, %esi
-; KNL_X32-NEXT: orl %ebp, %esi
+; KNL_X32-NEXT: kshiftrw $10, %k0, %k1
+; KNL_X32-NEXT: addl %ebp, %ecx
; KNL_X32-NEXT: kmovw %k1, %ebp
-; KNL_X32-NEXT: kshiftrw $12, %k0, %k1
-; KNL_X32-NEXT: andl $1, %edi
-; KNL_X32-NEXT: shll $8, %edi
-; KNL_X32-NEXT: orl %esi, %edi
-; KNL_X32-NEXT: kmovw %k1, %esi
-; KNL_X32-NEXT: kshiftrw $13, %k0, %k1
+; KNL_X32-NEXT: kshiftrw $11, %k0, %k1
; KNL_X32-NEXT: andl $1, %ebx
-; KNL_X32-NEXT: shll $9, %ebx
-; KNL_X32-NEXT: orl %edi, %ebx
+; KNL_X32-NEXT: shll $6, %ebx
+; KNL_X32-NEXT: andl $1, %edi
+; KNL_X32-NEXT: shll $7, %edi
+; KNL_X32-NEXT: orl %ebx, %edi
+; KNL_X32-NEXT: kmovw %k1, %ebx
+; KNL_X32-NEXT: kshiftrw $12, %k0, %k1
+; KNL_X32-NEXT: andl $1, %esi
+; KNL_X32-NEXT: shll $8, %esi
+; KNL_X32-NEXT: orl %edi, %esi
; KNL_X32-NEXT: kmovw %k1, %edi
-; KNL_X32-NEXT: kshiftrw $14, %k0, %k1
+; KNL_X32-NEXT: kshiftrw $13, %k0, %k1
; KNL_X32-NEXT: andl $1, %edx
-; KNL_X32-NEXT: shll $10, %edx
-; KNL_X32-NEXT: orl %ebx, %edx
-; KNL_X32-NEXT: kmovw %k1, %ebx
-; KNL_X32-NEXT: kshiftrw $15, %k0, %k0
+; KNL_X32-NEXT: shll $9, %edx
+; KNL_X32-NEXT: orl %esi, %edx
+; KNL_X32-NEXT: kmovw %k1, %esi
+; KNL_X32-NEXT: kshiftrw $14, %k0, %k1
; KNL_X32-NEXT: orl %ecx, %edx
-; KNL_X32-NEXT: kmovw %k0, %ecx
+; KNL_X32-NEXT: kmovw %k1, %ecx
+; KNL_X32-NEXT: kshiftrw $15, %k0, %k0
; KNL_X32-NEXT: andl $1, %ebp
-; KNL_X32-NEXT: shll $11, %ebp
-; KNL_X32-NEXT: andl $1, %esi
-; KNL_X32-NEXT: shll $12, %esi
-; KNL_X32-NEXT: orl %ebp, %esi
-; KNL_X32-NEXT: andl $1, %edi
-; KNL_X32-NEXT: shll $13, %edi
-; KNL_X32-NEXT: orl %esi, %edi
+; KNL_X32-NEXT: shll $10, %ebp
; KNL_X32-NEXT: andl $1, %ebx
-; KNL_X32-NEXT: shll $14, %ebx
-; KNL_X32-NEXT: orl %edi, %ebx
+; KNL_X32-NEXT: shll $11, %ebx
+; KNL_X...
[truncated]
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
[[fallthrough]]; | ||
case ISD::ADD: | ||
if (Opcode == ISD::ADD && matchBitExtract(Node)) | ||
return; | ||
|
||
// Convert addlike to add before final selection. Do this before we drop | ||
// flags like `disjoint`. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Where does the flag get dropped?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
in SelectCode i assume, when we get to or_is_add
didn't have disjoint
or ISD::OR
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do you have a specific test case I can look at?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You're right, I re-checked. I must have made a mistake earlier (edit: thought disjoint
would dump, but apparently not, but isADDLike
returns true and opcode is indeed ISD::OR
).
It doesn't seem to work though (I thought I had the reason, but now I'm stumped).
If I change it to your code 104 of the changes files revert back.
Why would that be the case?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We have special pseudo instruction we use for or_is_add. ADD_DB. It's convertible to LEA but it always converts back to OR if no LEA is needed.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ahh, so this patch is essentially not useful, it should just be what you have.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ill update, thank you for explaining.
This seems like maybe the better patch
|
// Convert addlike to add before final selection. Do this before we drop | ||
// flags like `disjoint`. | ||
// NB: Conversion to add is preferable so we use `lea` in codegen. | ||
if (Opcode != ISD::ADD && NVT.isScalarInteger() && |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should move it ahead since we never do for ISD::ADD
?
I tried that, but it didn't work. What seemed to be the case is when we handle |
518caa6
to
bdee43a
Compare
Did you mean |
@@ -1,3 +1,4 @@ | |||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
regenerate as a precommit first
(INSERT_SUBREG (i32 (COPY $dst)), (MOV16rm i16mem:$src), sub_16bit)>; | ||
(INSERT_SUBREG (i32 (COPY $dst)), (MOV16rm i16mem:$src), sub_16bit)>; | ||
|
||
// We convert or -> add when the or is disjoint so need to handle for add as well. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
but these are and patterns?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These are (or (and...), (zextload))
?
Edit: What im trying to say is we need to combine (add (and...), (zextload))
as well. Ill update the comment to make it more clear.
3a677d3
to
9fdab97
Compare
; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 | ||
; SI-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[COPY]].sub1 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It makes no sense that there are any amdgpu test diffs here
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You're right, those tests are just failing independently. Ill drop.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
fixed.
9fdab97
to
3ffe549
Compare
3ffe549
to
093818c
Compare
rebased |
(INSERT_SUBREG (i32 (COPY $dst)), (MOV16rm i16mem:$src), sub_16bit)>; | ||
(INSERT_SUBREG (i32 (COPY $dst)), (MOV16rm i16mem:$src), sub_16bit)>; | ||
|
||
// Same pattern as above but supporting `add` as the join |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Are these patterns tested? Wouldn't DAGCombine convert these to adds
to or
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I can drop (they where needed w/ the old impl that converted all disjoint or -> add instead of just the ones that can become LEA).
KnownBits Known0 = CurDAG->computeKnownBits(N->getOperand(0), 0); | ||
KnownBits Known1 = CurDAG->computeKnownBits(N->getOperand(1), 0); | ||
return (~Known0.Zero & ~Known1.Zero) == 0; | ||
return N->getOpcode() == ISD::OR && CurDAG->isADDLike(SDValue(N, 0)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why do you need to check the opcode? Doesn't (or node:$lhs, node:$rhs)
guarantee the opcode is OR?
Remove bespoke logic and use `isADDLike`.
093818c
to
b5a6ff4
Compare
Remove bespoke logic and use
isADDLike
.