[X86] Improve transform for add-like nodes to `add` #83691

goldsteinn · 2024-03-02T20:56:20Z

Remove bespoke logic and use isADDLike.

llvmbot · 2024-03-02T20:56:51Z

@llvm/pr-subscribers-llvm-globalisel
@llvm/pr-subscribers-backend-amdgpu

@llvm/pr-subscribers-backend-x86

Author: None (goldsteinn)

Changes

We previously did this only in tablegen, after we would have already
dropped disjoint flag from or.

Patch is 337.50 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/83691.diff

115 Files Affected:

(modified) llvm/lib/Target/X86/X86ISelDAGToDAG.cpp (+16-1)
(modified) llvm/lib/Target/X86/X86InstrCompiler.td (+18-1)
(modified) llvm/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll (+7-4)
(modified) llvm/test/CodeGen/X86/3addr-or.ll (+3-3)
(modified) llvm/test/CodeGen/X86/addcarry2.ll (+2-2)
(modified) llvm/test/CodeGen/X86/and-or-fold.ll (+2-2)
(modified) llvm/test/CodeGen/X86/andimm8.ll (+2-2)
(modified) llvm/test/CodeGen/X86/atomic-unordered.ll (+1-1)
(modified) llvm/test/CodeGen/X86/avx512-calling-conv.ll (+146-146)
(modified) llvm/test/CodeGen/X86/avx512-insert-extract.ll (+17-17)
(modified) llvm/test/CodeGen/X86/avx512-vec-cmp.ll (+4-4)
(modified) llvm/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll (+32-32)
(modified) llvm/test/CodeGen/X86/bfloat.ll (+64-64)
(modified) llvm/test/CodeGen/X86/bitcast-and-setcc-256.ll (+2-2)
(modified) llvm/test/CodeGen/X86/bitcast-and-setcc-512.ll (+11-11)
(modified) llvm/test/CodeGen/X86/bitcast-setcc-128.ll (+2-2)
(modified) llvm/test/CodeGen/X86/bitcast-setcc-256.ll (+4-4)
(modified) llvm/test/CodeGen/X86/bitcast-setcc-512.ll (+20-20)
(modified) llvm/test/CodeGen/X86/bitcast-vector-bool.ll (+41-41)
(modified) llvm/test/CodeGen/X86/bitreverse.ll (+65-65)
(modified) llvm/test/CodeGen/X86/bitselect.ll (+12-11)
(modified) llvm/test/CodeGen/X86/bool-math.ll (+6-6)
(modified) llvm/test/CodeGen/X86/bool-vector.ll (+6-6)
(modified) llvm/test/CodeGen/X86/bswap.ll (+2-2)
(modified) llvm/test/CodeGen/X86/bswap_tree2.ll (+8-8)
(modified) llvm/test/CodeGen/X86/buildvec-insertvec.ll (+2-2)
(modified) llvm/test/CodeGen/X86/clz.ll (+6-6)
(modified) llvm/test/CodeGen/X86/combine-bitreverse.ll (+18-18)
(modified) llvm/test/CodeGen/X86/combine-bswap.ll (+1-1)
(modified) llvm/test/CodeGen/X86/combine-fneg.ll (+4-4)
(modified) llvm/test/CodeGen/X86/combine-rotates.ll (+1-1)
(modified) llvm/test/CodeGen/X86/commute-two-addr.ll (+62-13)
(modified) llvm/test/CodeGen/X86/dagcombine-select.ll (+1-1)
(modified) llvm/test/CodeGen/X86/dagcombine-shifts.ll (+2-2)
(modified) llvm/test/CodeGen/X86/disable-shrink-store.ll (+1-1)
(modified) llvm/test/CodeGen/X86/extract-bits.ll (+64-64)
(modified) llvm/test/CodeGen/X86/fold-masked-merge.ll (+26-22)
(modified) llvm/test/CodeGen/X86/fp128-i128.ll (+4-4)
(modified) llvm/test/CodeGen/X86/fpenv.ll (+13-13)
(modified) llvm/test/CodeGen/X86/fptosi-sat-vector-128.ll (+17-17)
(modified) llvm/test/CodeGen/X86/fptoui-sat-vector-128.ll (+17-17)
(modified) llvm/test/CodeGen/X86/fshl.ll (+10-10)
(modified) llvm/test/CodeGen/X86/fshr.ll (+11-11)
(modified) llvm/test/CodeGen/X86/funnel-shift.ll (+2-2)
(modified) llvm/test/CodeGen/X86/half.ll (+3-3)
(modified) llvm/test/CodeGen/X86/illegal-bitfield-loadstore.ll (+14-14)
(modified) llvm/test/CodeGen/X86/inline-spiller-impdef-on-implicit-def-regression.ll (+1-1)
(modified) llvm/test/CodeGen/X86/insert.ll (+2-2)
(modified) llvm/test/CodeGen/X86/is_fpclass-fp80.ll (+3-3)
(modified) llvm/test/CodeGen/X86/is_fpclass.ll (+6-6)
(modified) llvm/test/CodeGen/X86/kshift.ll (+8-8)
(modified) llvm/test/CodeGen/X86/limited-prec.ll (+9-9)
(modified) llvm/test/CodeGen/X86/llvm.frexp.ll (+14-14)
(modified) llvm/test/CodeGen/X86/load-chain.ll (+1-1)
(modified) llvm/test/CodeGen/X86/load-combine.ll (+46-41)
(modified) llvm/test/CodeGen/X86/load-local-v3i1.ll (+4-4)
(modified) llvm/test/CodeGen/X86/load-local-v3i129.ll (+2-2)
(modified) llvm/test/CodeGen/X86/load-local-v4i5.ll (+3-3)
(modified) llvm/test/CodeGen/X86/logic-shift.ll (+2-2)
(modified) llvm/test/CodeGen/X86/lsr-loop-exit-cond.ll (+4-4)
(modified) llvm/test/CodeGen/X86/madd.ll (+3-3)
(modified) llvm/test/CodeGen/X86/masked_compressstore.ll (+3-3)
(modified) llvm/test/CodeGen/X86/masked_expandload.ll (+3-3)
(modified) llvm/test/CodeGen/X86/masked_load.ll (+3-3)
(modified) llvm/test/CodeGen/X86/masked_store.ll (+9-9)
(modified) llvm/test/CodeGen/X86/masked_store_trunc.ll (+3-3)
(modified) llvm/test/CodeGen/X86/masked_store_trunc_ssat.ll (+3-3)
(modified) llvm/test/CodeGen/X86/masked_store_trunc_usat.ll (+3-3)
(modified) llvm/test/CodeGen/X86/memset-inline.ll (+1-1)
(modified) llvm/test/CodeGen/X86/movmsk-cmp.ll (+2-2)
(modified) llvm/test/CodeGen/X86/mul128.ll (+1-1)
(modified) llvm/test/CodeGen/X86/no-wide-load.ll (+1-1)
(modified) llvm/test/CodeGen/X86/or-lea.ll (+2-2)
(modified) llvm/test/CodeGen/X86/pr20011.ll (+2-2)
(modified) llvm/test/CodeGen/X86/pr23664.ll (+1-1)
(modified) llvm/test/CodeGen/X86/pr27202.ll (+1-1)
(modified) llvm/test/CodeGen/X86/pr28173.ll (+2-2)
(modified) llvm/test/CodeGen/X86/pr35636.ll (+2-2)
(modified) llvm/test/CodeGen/X86/pr35763.ll (+1-1)
(modified) llvm/test/CodeGen/X86/pr43820.ll (+16-16)
(modified) llvm/test/CodeGen/X86/pr47299.ll (+5-5)
(modified) llvm/test/CodeGen/X86/pr62653.ll (+38-36)
(modified) llvm/test/CodeGen/X86/pr69965.ll (+2-2)
(modified) llvm/test/CodeGen/X86/pr77459.ll (+17-17)
(modified) llvm/test/CodeGen/X86/promote-vec3.ll (+1-1)
(modified) llvm/test/CodeGen/X86/rev16.ll (+5-5)
(modified) llvm/test/CodeGen/X86/rotate-extract.ll (+8-8)
(modified) llvm/test/CodeGen/X86/select.ll (+2-2)
(modified) llvm/test/CodeGen/X86/select_const.ll (+3-3)
(modified) llvm/test/CodeGen/X86/setcc-fsh.ll (+1-1)
(modified) llvm/test/CodeGen/X86/shrink-compare-pgso.ll (+1-1)
(modified) llvm/test/CodeGen/X86/shrink-compare.ll (+1-1)
(modified) llvm/test/CodeGen/X86/smul_fix.ll (+2-2)
(modified) llvm/test/CodeGen/X86/smul_fix_sat.ll (+2-2)
(modified) llvm/test/CodeGen/X86/split-store.ll (+2-2)
(modified) llvm/test/CodeGen/X86/srem-seteq-vec-nonsplat.ll (+2-2)
(modified) llvm/test/CodeGen/X86/subvectorwise-store-of-vector-splat.ll (+52-52)
(modified) llvm/test/CodeGen/X86/umul_fix_sat.ll (+2-2)
(modified) llvm/test/CodeGen/X86/unfold-masked-merge-scalar-constmask-innerouter.ll (+2-2)
(modified) llvm/test/CodeGen/X86/unfold-masked-merge-scalar-constmask-interleavedbits.ll (+2-2)
(modified) llvm/test/CodeGen/X86/unfold-masked-merge-scalar-constmask-interleavedbytehalves.ll (+2-2)
(modified) llvm/test/CodeGen/X86/unfold-masked-merge-scalar-constmask-lowhigh.ll (+6-6)
(modified) llvm/test/CodeGen/X86/unfold-masked-merge-scalar-variablemask.ll (+35-34)
(modified) llvm/test/CodeGen/X86/unfold-masked-merge-vector-variablemask.ll (+28-21)
(modified) llvm/test/CodeGen/X86/urem-seteq-illegal-types.ll (+3-3)
(modified) llvm/test/CodeGen/X86/vector-bitreverse.ll (+20-20)
(modified) llvm/test/CodeGen/X86/vector-compare-all_of.ll (+2-2)
(modified) llvm/test/CodeGen/X86/vector-compare-results.ll (+35-35)
(modified) llvm/test/CodeGen/X86/vector-pcmp.ll (+2-2)
(modified) llvm/test/CodeGen/X86/vector-sext.ll (+4-4)
(modified) llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll (+2-2)
(modified) llvm/test/CodeGen/X86/vector-shuffle-v1.ll (+4-4)
(modified) llvm/test/CodeGen/X86/vector-trunc.ll (+1-1)
(modified) llvm/test/CodeGen/X86/vector-zext.ll (+6-6)
(modified) llvm/test/CodeGen/X86/xor-lea.ll (+2-2)

diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 5cbd9ab4dc2d6c..9f34a4e1870f71 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -5294,11 +5294,26 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
       return;
     if (tryVPTERNLOG(Node))
       return;
-
     [[fallthrough]];
   case ISD::ADD:
     if (Opcode == ISD::ADD && matchBitExtract(Node))
       return;
+
+    // Convert addlike to add before final selection. Do this before we drop
+    // flags like `disjoint`.
+    // NB: Conversion to add is preferable so we use `lea` in codegen.
+    if (Opcode != ISD::ADD && NVT.isScalarInteger() &&
+        (Opcode == ISD::OR ||
+         (NVT == MVT::i8 || NVT == MVT::i16 || NVT == MVT::i32)) &&
+        CurDAG->isADDLike(SDValue(Node, 0))
+        ) {
+      SDValue AsAdd = CurDAG->getNode(ISD::ADD, SDLoc(Node), NVT,
+                                      Node->getOperand(0), Node->getOperand(1));
+      ReplaceUses(SDValue(Node, 0), AsAdd);
+      CurDAG->RemoveDeadNode(Node);
+      Node = AsAdd.getNode();
+      Opcode = ISD::ADD;
+    }
     [[fallthrough]];
   case ISD::SUB: {
     // Try to avoid folding immediates with multiple uses for optsize.
diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td
index f393f86e64aadd..b31c5be87a5839 100644
--- a/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -1574,7 +1574,24 @@ def : Pat<(or (and GR64:$dst, -65536),
 
 def : Pat<(or (and GR32:$dst, -65536), 
               (i32 (zextloadi16 addr:$src))),
-          (INSERT_SUBREG (i32 (COPY $dst)), (MOV16rm  i16mem:$src), sub_16bit)>; 
+          (INSERT_SUBREG (i32 (COPY $dst)), (MOV16rm  i16mem:$src), sub_16bit)>;
+
+// We convert or -> add when the or is disjoint so need to handle for add as well.
+def : Pat<(add (and GR64:$dst, -256), 
+               (i64 (zextloadi8 addr:$src))),
+          (INSERT_SUBREG (i64 (COPY $dst)), (MOV8rm  i8mem:$src), sub_8bit)>; 
+
+def : Pat<(add (and GR32:$dst, -256), 
+               (i32 (zextloadi8 addr:$src))),
+          (INSERT_SUBREG (i32 (COPY $dst)), (MOV8rm  i8mem:$src), sub_8bit)>; 
+
+def : Pat<(add (and GR64:$dst, -65536), 
+               (i64 (zextloadi16 addr:$src))),
+          (INSERT_SUBREG (i64 (COPY $dst)), (MOV16rm  i16mem:$src), sub_16bit)>;
+
+def : Pat<(add (and GR32:$dst, -65536), 
+               (i32 (zextloadi16 addr:$src))),
+          (INSERT_SUBREG (i32 (COPY $dst)), (MOV16rm  i16mem:$src), sub_16bit)>;     
 
 // To avoid needing to materialize an immediate in a register, use a 32-bit and
 // with implicit zero-extension instead of a 64-bit and if the immediate has at
diff --git a/llvm/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll b/llvm/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll
index 609be3bb2e54f0..50e736ac68d29e 100644
--- a/llvm/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll
+++ b/llvm/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
 ; RUN: llc < %s | FileCheck %s
 
 ; Check that the shr(shl X, 56), 48) is not mistakenly turned into
@@ -16,11 +17,13 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "x86_64-unknown-linux-gnu"
 
 define i64 @foo(i64 %b) nounwind readnone {
-entry:
 ; CHECK-LABEL: foo:
-; CHECK: movsbq %dil, %rax
-; CHECK: shlq $8, %rax
-; CHECK: orq $1, %rax
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movsbq %dil, %rax
+; CHECK-NEXT:    shlq $8, %rax
+; CHECK-NEXT:    incq %rax
+; CHECK-NEXT:    retq
+entry:
 	%shl = shl i64 %b, 56		; <i64> [#uses=1]
 	%shr = ashr i64 %shl, 48		; <i64> [#uses=1]
 	%add5 = or i64 %shr, 1		; <i64> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/3addr-or.ll b/llvm/test/CodeGen/X86/3addr-or.ll
index 65f6d2b4123e8e..1f466afcadc9ca 100644
--- a/llvm/test/CodeGen/X86/3addr-or.ll
+++ b/llvm/test/CodeGen/X86/3addr-or.ll
@@ -24,7 +24,7 @@ define i64 @test2(i8 %A, i8 %B) nounwind {
 ; CHECK-NEXT:    andl $48, %edi
 ; CHECK-NEXT:    movzbl %sil, %eax
 ; CHECK-NEXT:    shrl $4, %eax
-; CHECK-NEXT:    orl %edi, %eax
+; CHECK-NEXT:    addl %edi, %eax
 ; CHECK-NEXT:    retq
   %C = zext i8 %A to i64
   %D = shl i64 %C, 4
@@ -42,7 +42,7 @@ define void @test3(i32 %x, ptr %P) nounwind readnone ssp {
 ; CHECK-LABEL: test3:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    shll $5, %edi
-; CHECK-NEXT:    orl $3, %edi
+; CHECK-NEXT:    addl $3, %edi
 ; CHECK-NEXT:    movl %edi, (%rsi)
 ; CHECK-NEXT:    retq
   %t0 = shl i32 %x, 5
@@ -71,7 +71,7 @@ define void @test5(i32 %a, i32 %b, ptr nocapture %P) nounwind ssp {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    andl $6, %edi
 ; CHECK-NEXT:    andl $16, %esi
-; CHECK-NEXT:    orl %edi, %esi
+; CHECK-NEXT:    addl %edi, %esi
 ; CHECK-NEXT:    movl %esi, (%rdx)
 ; CHECK-NEXT:    retq
   %and = and i32 %a, 6
diff --git a/llvm/test/CodeGen/X86/addcarry2.ll b/llvm/test/CodeGen/X86/addcarry2.ll
index 0338577dbddc2b..1a5d0f4fe45416 100644
--- a/llvm/test/CodeGen/X86/addcarry2.ll
+++ b/llvm/test/CodeGen/X86/addcarry2.ll
@@ -138,7 +138,7 @@ define void @adc_load_store_32_127(ptr inreg %x, ptr inreg %x2, i32 inreg %y) no
 ; X64-NEXT:    movl (%rdi), %eax # encoding: [0x8b,0x07]
 ; X64-NEXT:    shlq $32, %rax # encoding: [0x48,0xc1,0xe0,0x20]
 ; X64-NEXT:    movl %edx, %ecx # encoding: [0x89,0xd1]
-; X64-NEXT:    orq %rax, %rcx # encoding: [0x48,0x09,0xc1]
+; X64-NEXT:    addq %rax, %rcx # encoding: [0x48,0x01,0xc1]
 ; X64-NEXT:    movabsq $545460846593, %rax # encoding: [0x48,0xb8,0x01,0x00,0x00,0x00,0x7f,0x00,0x00,0x00]
 ; X64-NEXT:    # imm = 0x7F00000001
 ; X64-NEXT:    xorl %edx, %edx # encoding: [0x31,0xd2]
@@ -178,7 +178,7 @@ define void @adc_load_store_32_128(ptr inreg %x, ptr inreg %x2, i32 inreg %y) no
 ; X64-NEXT:    movl (%rdi), %eax # encoding: [0x8b,0x07]
 ; X64-NEXT:    shlq $32, %rax # encoding: [0x48,0xc1,0xe0,0x20]
 ; X64-NEXT:    movl %edx, %ecx # encoding: [0x89,0xd1]
-; X64-NEXT:    orq %rax, %rcx # encoding: [0x48,0x09,0xc1]
+; X64-NEXT:    addq %rax, %rcx # encoding: [0x48,0x01,0xc1]
 ; X64-NEXT:    movabsq $549755813889, %rax # encoding: [0x48,0xb8,0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00]
 ; X64-NEXT:    # imm = 0x8000000001
 ; X64-NEXT:    xorl %edx, %edx # encoding: [0x31,0xd2]
diff --git a/llvm/test/CodeGen/X86/and-or-fold.ll b/llvm/test/CodeGen/X86/and-or-fold.ll
index 1bb5fdeebac71c..4071b364a25c3b 100644
--- a/llvm/test/CodeGen/X86/and-or-fold.ll
+++ b/llvm/test/CodeGen/X86/and-or-fold.ll
@@ -45,7 +45,7 @@ define i32 @test1(i32 %x, i16 %y) {
 ; DARWIN-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
 ; DARWIN-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; DARWIN-NEXT:    shll $16, %eax
-; DARWIN-NEXT:    orl %ecx, %eax
+; DARWIN-NEXT:    addl %ecx, %eax
 ; DARWIN-NEXT:    andl $16711807, %eax ## imm = 0xFF007F
 ; DARWIN-NEXT:    retl
 ;
@@ -54,7 +54,7 @@ define i32 @test1(i32 %x, i16 %y) {
 ; DARWIN-OPT-NEXT:    andl $127, %esi
 ; DARWIN-OPT-NEXT:    movzbl %dil, %eax
 ; DARWIN-OPT-NEXT:    shll $16, %eax
-; DARWIN-OPT-NEXT:    orl %esi, %eax
+; DARWIN-OPT-NEXT:    addl %esi, %eax
 ; DARWIN-OPT-NEXT:    retq
   %tmp1 = zext i16 %y to i32
   %tmp2 = and i32 %tmp1, 127
diff --git a/llvm/test/CodeGen/X86/andimm8.ll b/llvm/test/CodeGen/X86/andimm8.ll
index 6242d4f4c222bb..506e28300e71b0 100644
--- a/llvm/test/CodeGen/X86/andimm8.ll
+++ b/llvm/test/CodeGen/X86/andimm8.ll
@@ -29,7 +29,7 @@ define void @foo(i64 %zed, ptr %x) nounwind {
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x08]
 ; X86-NEXT:    andl $-4, %ecx # encoding: [0x83,0xe1,0xfc]
-; X86-NEXT:    orl $2, %ecx # encoding: [0x83,0xc9,0x02]
+; X86-NEXT:    addl $2, %ecx # encoding: [0x83,0xc1,0x02]
 ; X86-NEXT:    movl %edx, 4(%eax) # encoding: [0x89,0x50,0x04]
 ; X86-NEXT:    movl %ecx, (%eax) # encoding: [0x89,0x08]
 ; X86-NEXT:    retl # encoding: [0xc3]
@@ -37,7 +37,7 @@ define void @foo(i64 %zed, ptr %x) nounwind {
 ; X64-LABEL: foo:
 ; X64:       # %bb.0:
 ; X64-NEXT:    andq $-4, %rdi # encoding: [0x48,0x83,0xe7,0xfc]
-; X64-NEXT:    orq $2, %rdi # encoding: [0x48,0x83,0xcf,0x02]
+; X64-NEXT:    addq $2, %rdi # encoding: [0x48,0x83,0xc7,0x02]
 ; X64-NEXT:    movq %rdi, (%rsi) # encoding: [0x48,0x89,0x3e]
 ; X64-NEXT:    retq # encoding: [0xc3]
   %t1 = and i64 %zed, -4
diff --git a/llvm/test/CodeGen/X86/atomic-unordered.ll b/llvm/test/CodeGen/X86/atomic-unordered.ll
index df123be53474f0..903951dd5a8cff 100644
--- a/llvm/test/CodeGen/X86/atomic-unordered.ll
+++ b/llvm/test/CodeGen/X86/atomic-unordered.ll
@@ -2359,7 +2359,7 @@ define i16 @load_combine(ptr %p) {
 ; CHECK-O3-NEXT:    movzbl (%rdi), %ecx
 ; CHECK-O3-NEXT:    movzbl 1(%rdi), %eax
 ; CHECK-O3-NEXT:    shll $8, %eax
-; CHECK-O3-NEXT:    orl %ecx, %eax
+; CHECK-O3-NEXT:    addl %ecx, %eax
 ; CHECK-O3-NEXT:    # kill: def $ax killed $ax killed $eax
 ; CHECK-O3-NEXT:    retq
   %v1 = load atomic i8, ptr %p unordered, align 2
diff --git a/llvm/test/CodeGen/X86/avx512-calling-conv.ll b/llvm/test/CodeGen/X86/avx512-calling-conv.ll
index b39b089faa2a5e..b4c37a2e34d95d 100644
--- a/llvm/test/CodeGen/X86/avx512-calling-conv.ll
+++ b/llvm/test/CodeGen/X86/avx512-calling-conv.ll
@@ -910,13 +910,13 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind {
 ; KNL-NEXT:    kandw %k2, %k0, %k0
 ; KNL-NEXT:    kmovw %r10d, %k2
 ; KNL-NEXT:    kandw %k1, %k2, %k1
-; KNL-NEXT:    kmovw %k1, %edx
+; KNL-NEXT:    kmovw %k1, %esi
 ; KNL-NEXT:    kshiftrw $1, %k0, %k1
 ; KNL-NEXT:    kmovw %k1, %r9d
 ; KNL-NEXT:    kshiftrw $2, %k0, %k1
 ; KNL-NEXT:    kmovw %k1, %r8d
 ; KNL-NEXT:    kshiftrw $3, %k0, %k1
-; KNL-NEXT:    kmovw %k1, %esi
+; KNL-NEXT:    kmovw %k1, %edx
 ; KNL-NEXT:    kshiftrw $4, %k0, %k1
 ; KNL-NEXT:    kmovw %k1, %edi
 ; KNL-NEXT:    kshiftrw $5, %k0, %k1
@@ -928,9 +928,9 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind {
 ; KNL-NEXT:    kshiftrw $8, %k0, %k1
 ; KNL-NEXT:    kmovw %k1, %ebp
 ; KNL-NEXT:    kshiftrw $9, %k0, %k1
-; KNL-NEXT:    kmovw %k1, %r14d
-; KNL-NEXT:    kshiftrw $10, %k0, %k1
 ; KNL-NEXT:    kmovw %k1, %r11d
+; KNL-NEXT:    kshiftrw $10, %k0, %k1
+; KNL-NEXT:    kmovw %k1, %r14d
 ; KNL-NEXT:    kshiftrw $11, %k0, %k1
 ; KNL-NEXT:    kmovw %k1, %r15d
 ; KNL-NEXT:    kshiftrw $12, %k0, %k1
@@ -938,25 +938,25 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind {
 ; KNL-NEXT:    kshiftrw $13, %k0, %k1
 ; KNL-NEXT:    kmovw %k1, %r13d
 ; KNL-NEXT:    kshiftrw $14, %k0, %k1
-; KNL-NEXT:    andl $1, %edx
-; KNL-NEXT:    movb %dl, 2(%rax)
-; KNL-NEXT:    kmovw %k0, %edx
-; KNL-NEXT:    andl $1, %edx
+; KNL-NEXT:    andl $1, %esi
+; KNL-NEXT:    movb %sil, 2(%rax)
+; KNL-NEXT:    kmovw %k0, %esi
+; KNL-NEXT:    andl $1, %esi
 ; KNL-NEXT:    andl $1, %r9d
-; KNL-NEXT:    leal (%rdx,%r9,2), %r9d
-; KNL-NEXT:    kmovw %k1, %edx
+; KNL-NEXT:    leal (%rsi,%r9,2), %r9d
+; KNL-NEXT:    kmovw %k1, %esi
 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
 ; KNL-NEXT:    andl $1, %r8d
 ; KNL-NEXT:    leal (%r9,%r8,4), %r9d
 ; KNL-NEXT:    kmovw %k0, %r8d
-; KNL-NEXT:    andl $1, %esi
-; KNL-NEXT:    leal (%r9,%rsi,8), %esi
+; KNL-NEXT:    andl $1, %edx
+; KNL-NEXT:    leal (%r9,%rdx,8), %edx
 ; KNL-NEXT:    andl $1, %edi
 ; KNL-NEXT:    shll $4, %edi
-; KNL-NEXT:    orl %esi, %edi
 ; KNL-NEXT:    andl $1, %ecx
 ; KNL-NEXT:    shll $5, %ecx
-; KNL-NEXT:    orl %edi, %ecx
+; KNL-NEXT:    addl %edi, %ecx
+; KNL-NEXT:    addl %edx, %ecx
 ; KNL-NEXT:    andl $1, %r10d
 ; KNL-NEXT:    shll $6, %r10d
 ; KNL-NEXT:    andl $1, %ebx
@@ -965,28 +965,28 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind {
 ; KNL-NEXT:    andl $1, %ebp
 ; KNL-NEXT:    shll $8, %ebp
 ; KNL-NEXT:    orl %ebx, %ebp
-; KNL-NEXT:    andl $1, %r14d
-; KNL-NEXT:    shll $9, %r14d
-; KNL-NEXT:    orl %ebp, %r14d
 ; KNL-NEXT:    andl $1, %r11d
-; KNL-NEXT:    shll $10, %r11d
-; KNL-NEXT:    orl %r14d, %r11d
+; KNL-NEXT:    shll $9, %r11d
+; KNL-NEXT:    orl %ebp, %r11d
 ; KNL-NEXT:    orl %ecx, %r11d
+; KNL-NEXT:    andl $1, %r14d
+; KNL-NEXT:    shll $10, %r14d
 ; KNL-NEXT:    andl $1, %r15d
 ; KNL-NEXT:    shll $11, %r15d
+; KNL-NEXT:    orl %r14d, %r15d
 ; KNL-NEXT:    andl $1, %r12d
 ; KNL-NEXT:    shll $12, %r12d
 ; KNL-NEXT:    orl %r15d, %r12d
 ; KNL-NEXT:    andl $1, %r13d
 ; KNL-NEXT:    shll $13, %r13d
 ; KNL-NEXT:    orl %r12d, %r13d
-; KNL-NEXT:    andl $1, %edx
-; KNL-NEXT:    shll $14, %edx
-; KNL-NEXT:    orl %r13d, %edx
+; KNL-NEXT:    andl $1, %esi
+; KNL-NEXT:    shll $14, %esi
+; KNL-NEXT:    orl %r13d, %esi
+; KNL-NEXT:    orl %r11d, %esi
 ; KNL-NEXT:    andl $1, %r8d
 ; KNL-NEXT:    shll $15, %r8d
-; KNL-NEXT:    orl %edx, %r8d
-; KNL-NEXT:    orl %r11d, %r8d
+; KNL-NEXT:    orl %esi, %r8d
 ; KNL-NEXT:    movw %r8w, (%rax)
 ; KNL-NEXT:    popq %rbx
 ; KNL-NEXT:    popq %r12
@@ -1223,13 +1223,13 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind {
 ; SKX-NEXT:    kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload
 ; SKX-NEXT:    kandd %k1, %k0, %k0
 ; SKX-NEXT:    kshiftrd $16, %k0, %k1
-; SKX-NEXT:    kmovd %k1, %edx
+; SKX-NEXT:    kmovd %k1, %esi
 ; SKX-NEXT:    kshiftrd $1, %k0, %k1
 ; SKX-NEXT:    kmovd %k1, %r9d
 ; SKX-NEXT:    kshiftrd $2, %k0, %k1
 ; SKX-NEXT:    kmovd %k1, %r8d
 ; SKX-NEXT:    kshiftrd $3, %k0, %k1
-; SKX-NEXT:    kmovd %k1, %esi
+; SKX-NEXT:    kmovd %k1, %edx
 ; SKX-NEXT:    kshiftrd $4, %k0, %k1
 ; SKX-NEXT:    kmovd %k1, %edi
 ; SKX-NEXT:    kshiftrd $5, %k0, %k1
@@ -1241,9 +1241,9 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind {
 ; SKX-NEXT:    kshiftrd $8, %k0, %k1
 ; SKX-NEXT:    kmovd %k1, %ebp
 ; SKX-NEXT:    kshiftrd $9, %k0, %k1
-; SKX-NEXT:    kmovd %k1, %r14d
-; SKX-NEXT:    kshiftrd $10, %k0, %k1
 ; SKX-NEXT:    kmovd %k1, %r11d
+; SKX-NEXT:    kshiftrd $10, %k0, %k1
+; SKX-NEXT:    kmovd %k1, %r14d
 ; SKX-NEXT:    kshiftrd $11, %k0, %k1
 ; SKX-NEXT:    kmovd %k1, %r15d
 ; SKX-NEXT:    kshiftrd $12, %k0, %k1
@@ -1251,25 +1251,25 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind {
 ; SKX-NEXT:    kshiftrd $13, %k0, %k1
 ; SKX-NEXT:    kmovd %k1, %r13d
 ; SKX-NEXT:    kshiftrd $14, %k0, %k1
-; SKX-NEXT:    andl $1, %edx
-; SKX-NEXT:    movb %dl, 2(%rax)
-; SKX-NEXT:    kmovd %k0, %edx
-; SKX-NEXT:    andl $1, %edx
+; SKX-NEXT:    andl $1, %esi
+; SKX-NEXT:    movb %sil, 2(%rax)
+; SKX-NEXT:    kmovd %k0, %esi
+; SKX-NEXT:    andl $1, %esi
 ; SKX-NEXT:    andl $1, %r9d
-; SKX-NEXT:    leal (%rdx,%r9,2), %r9d
-; SKX-NEXT:    kmovd %k1, %edx
+; SKX-NEXT:    leal (%rsi,%r9,2), %r9d
+; SKX-NEXT:    kmovd %k1, %esi
 ; SKX-NEXT:    kshiftrd $15, %k0, %k0
 ; SKX-NEXT:    andl $1, %r8d
 ; SKX-NEXT:    leal (%r9,%r8,4), %r9d
 ; SKX-NEXT:    kmovd %k0, %r8d
-; SKX-NEXT:    andl $1, %esi
-; SKX-NEXT:    leal (%r9,%rsi,8), %esi
+; SKX-NEXT:    andl $1, %edx
+; SKX-NEXT:    leal (%r9,%rdx,8), %edx
 ; SKX-NEXT:    andl $1, %edi
 ; SKX-NEXT:    shll $4, %edi
-; SKX-NEXT:    orl %esi, %edi
 ; SKX-NEXT:    andl $1, %ecx
 ; SKX-NEXT:    shll $5, %ecx
-; SKX-NEXT:    orl %edi, %ecx
+; SKX-NEXT:    addl %edi, %ecx
+; SKX-NEXT:    addl %edx, %ecx
 ; SKX-NEXT:    andl $1, %r10d
 ; SKX-NEXT:    shll $6, %r10d
 ; SKX-NEXT:    andl $1, %ebx
@@ -1278,28 +1278,28 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind {
 ; SKX-NEXT:    andl $1, %ebp
 ; SKX-NEXT:    shll $8, %ebp
 ; SKX-NEXT:    orl %ebx, %ebp
-; SKX-NEXT:    andl $1, %r14d
-; SKX-NEXT:    shll $9, %r14d
-; SKX-NEXT:    orl %ebp, %r14d
 ; SKX-NEXT:    andl $1, %r11d
-; SKX-NEXT:    shll $10, %r11d
-; SKX-NEXT:    orl %r14d, %r11d
+; SKX-NEXT:    shll $9, %r11d
+; SKX-NEXT:    orl %ebp, %r11d
 ; SKX-NEXT:    orl %ecx, %r11d
+; SKX-NEXT:    andl $1, %r14d
+; SKX-NEXT:    shll $10, %r14d
 ; SKX-NEXT:    andl $1, %r15d
 ; SKX-NEXT:    shll $11, %r15d
+; SKX-NEXT:    orl %r14d, %r15d
 ; SKX-NEXT:    andl $1, %r12d
 ; SKX-NEXT:    shll $12, %r12d
 ; SKX-NEXT:    orl %r15d, %r12d
 ; SKX-NEXT:    andl $1, %r13d
 ; SKX-NEXT:    shll $13, %r13d
 ; SKX-NEXT:    orl %r12d, %r13d
-; SKX-NEXT:    andl $1, %edx
-; SKX-NEXT:    shll $14, %edx
-; SKX-NEXT:    orl %r13d, %edx
+; SKX-NEXT:    andl $1, %esi
+; SKX-NEXT:    shll $14, %esi
+; SKX-NEXT:    orl %r13d, %esi
+; SKX-NEXT:    orl %r11d, %esi
 ; SKX-NEXT:    andl $1, %r8d
 ; SKX-NEXT:    shll $15, %r8d
-; SKX-NEXT:    orl %edx, %r8d
-; SKX-NEXT:    orl %r11d, %r8d
+; SKX-NEXT:    orl %esi, %r8d
 ; SKX-NEXT:    movw %r8w, (%rax)
 ; SKX-NEXT:    popq %rbx
 ; SKX-NEXT:    popq %r12
@@ -1556,9 +1556,9 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind {
 ; KNL_X32-NEXT:    kshiftrw $1, %k0, %k1
 ; KNL_X32-NEXT:    kmovw %k1, %ebp
 ; KNL_X32-NEXT:    kshiftrw $2, %k0, %k1
-; KNL_X32-NEXT:    kmovw %k1, %esi
-; KNL_X32-NEXT:    kshiftrw $3, %k0, %k1
 ; KNL_X32-NEXT:    kmovw %k1, %edi
+; KNL_X32-NEXT:    kshiftrw $3, %k0, %k1
+; KNL_X32-NEXT:    kmovw %k1, %esi
 ; KNL_X32-NEXT:    kshiftrw $4, %k0, %k1
 ; KNL_X32-NEXT:    kmovw %k1, %edx
 ; KNL_X32-NEXT:    kshiftrw $5, %k0, %k1
@@ -1569,67 +1569,67 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind {
 ; KNL_X32-NEXT:    kmovw %k0, %ebx
 ; KNL_X32-NEXT:    andl $1, %ebx
 ; KNL_X32-NEXT:    andl $1, %ebp
-; KNL_X32-NEXT:    leal (%ebx,%ebp,2), %ebx
-; KNL_X32-NEXT:    kmovw %k1, %ebp
+; KNL_X32-NEXT:    leal (%ebx,%ebp,2), %ebp
+; KNL_X32-NEXT:    kmovw %k1, %ebx
 ; KNL_X32-NEXT:    kshiftrw $7, %k0, %k1
-; KNL_X32-NEXT:    andl $1, %esi
-; KNL_X32-NEXT:    leal (%ebx,%esi,4), %ebx
-; KNL_X32-NEXT:    kmovw %k1, %esi
-; KNL_X32-NEXT:    kshiftrw $8, %k0, %k1
 ; KNL_X32-NEXT:    andl $1, %edi
-; KNL_X32-NEXT:    leal (%ebx,%edi,8), %ebx
+; KNL_X32-NEXT:    leal (%ebp,%edi,4), %ebp
 ; KNL_X32-NEXT:    kmovw %k1, %edi
+; KNL_X32-NEXT:    kshiftrw $8, %k0, %k1
+; KNL_X32-NEXT:    andl $1, %esi
+; KNL_X32-NEXT:    leal (%ebp,%esi,8), %ebp
+; KNL_X32-NEXT:    kmovw %k1, %esi
 ; KNL_X32-NEXT:    kshiftrw $9, %k0, %k1
 ; KNL_X32-NEXT:    andl $1, %edx
 ; KNL_X32-NEXT:    shll $4, %edx
-; KNL_X32-NEXT:    orl %ebx, %edx
-; KNL_X32-NEXT:    kmovw %k1, %ebx
-; KNL_X32-NEXT:    kshiftrw $10, %k0, %k1
 ; KNL_X32-NEXT:    andl $1, %ecx
 ; KNL_X32-NEXT:    shll $5, %ecx
-; KNL_X32-NEXT:    orl %edx, %ecx
+; KNL_X32-NEXT:    addl %edx, %ecx
 ; KNL_X32-NEXT:    kmovw %k1, %edx
-; KNL_X32-NEXT:    kshiftrw $11, %k0, %k1
-; KNL_X32-NEXT:    andl $1, %ebp
-; KNL_X32-NEXT:    shll $6, %ebp
-; KNL_X32-NEXT:    andl $1, %esi
-; KNL_X32-NEXT:    shll $7, %esi
-; KNL_X32-NEXT:    orl %ebp, %esi
+; KNL_X32-NEXT:    kshiftrw $10, %k0, %k1
+; KNL_X32-NEXT:    addl %ebp, %ecx
 ; KNL_X32-NEXT:    kmovw %k1, %ebp
-; KNL_X32-NEXT:    kshiftrw $12, %k0, %k1
-; KNL_X32-NEXT:    andl $1, %edi
-; KNL_X32-NEXT:    shll $8, %edi
-; KNL_X32-NEXT:    orl %esi, %edi
-; KNL_X32-NEXT:    kmovw %k1, %esi
-; KNL_X32-NEXT:    kshiftrw $13, %k0, %k1
+; KNL_X32-NEXT:    kshiftrw $11, %k0, %k1
 ; KNL_X32-NEXT:    andl $1, %ebx
-; KNL_X32-NEXT:    shll $9, %ebx
-; KNL_X32-NEXT:    orl %edi, %ebx
+; KNL_X32-NEXT:    shll $6, %ebx
+; KNL_X32-NEXT:    andl $1, %edi
+; KNL_X32-NEXT:    shll $7, %edi
+; KNL_X32-NEXT:    orl %ebx, %edi
+; KNL_X32-NEXT:    kmovw %k1, %ebx
+; KNL_X32-NEXT:    kshiftrw $12, %k0, %k1
+; KNL_X32-NEXT:    andl $1, %esi
+; KNL_X32-NEXT:    shll $8, %esi
+; KNL_X32-NEXT:    orl %edi, %esi
 ; KNL_X32-NEXT:    kmovw %k1, %edi
-; KNL_X32-NEXT:    kshiftrw $14, %k0, %k1
+; KNL_X32-NEXT:    kshiftrw $13, %k0, %k1
 ; KNL_X32-NEXT:    andl $1, %edx
-; KNL_X32-NEXT:    shll $10, %edx
-; KNL_X32-NEXT:    orl %ebx, %edx
-; KNL_X32-NEXT:    kmovw %k1, %ebx
-; KNL_X32-NEXT:    kshiftrw $15, %k0, %k0
+; KNL_X32-NEXT:    shll $9, %edx
+; KNL_X32-NEXT:    orl %esi, %edx
+; KNL_X32-NEXT:    kmovw %k1, %esi
+; KNL_X32-NEXT:    kshiftrw $14, %k0, %k1
 ; KNL_X32-NEXT:    orl %ecx, %edx
-; KNL_X32-NEXT:    kmovw %k0, %ecx
+; KNL_X32-NEXT:    kmovw %k1, %ecx
+; KNL_X32-NEXT:    kshiftrw $15, %k0, %k0
 ; KNL_X32-NEXT:    andl $1, %ebp
-; KNL_X32-NEXT:    shll $11, %ebp
-; KNL_X32-NEXT:    andl $1, %esi
-; KNL_X32-NEXT:    shll $12, %esi
-; KNL_X32-NEXT:    orl %ebp, %esi
-; KNL_X32-NEXT:    andl $1, %edi
-; KNL_X32-NEXT:    shll $13, %edi
-; KNL_X32-NEXT:    orl %esi, %edi
+; KNL_X32-NEXT:    shll $10, %ebp
 ; KNL_X32-NEXT:    andl $1, %ebx
-; KNL_X32-NEXT:    shll $14, %ebx
-; KNL_X32-NEXT:    orl %edi, %ebx
+; KNL_X32-NEXT:    shll $11, %ebx
+; KNL_X...
[truncated]

github-actions · 2024-03-02T20:58:44Z

✅ With the latest revision this PR passed the C/C++ code formatter.

topperc · 2024-03-03T01:17:38Z

llvm/lib/Target/X86/X86ISelDAGToDAG.cpp

    [[fallthrough]];
  case ISD::ADD:
    if (Opcode == ISD::ADD && matchBitExtract(Node))
      return;
+
+    // Convert addlike to add before final selection. Do this before we drop
+    // flags like `disjoint`.


Where does the flag get dropped?

in SelectCode i assume, when we get to or_is_add didn't have disjoint or ISD::OR.

Do you have a specific test case I can look at?

You're right, I re-checked. I must have made a mistake earlier (edit: thought disjoint would dump, but apparently not, but isADDLike returns true and opcode is indeed ISD::OR).
It doesn't seem to work though (I thought I had the reason, but now I'm stumped).
If I change it to your code 104 of the changes files revert back.
Why would that be the case?

We have special pseudo instruction we use for or_is_add. ADD_DB. It's convertible to LEA but it always converts back to OR if no LEA is needed.

ahh, so this patch is essentially not useful, it should just be what you have.

Ill update, thank you for explaining.

topperc · 2024-03-03T01:27:02Z

This seems like maybe the better patch

diff --git a/llvm/lib/Target/X86/X86InstrFragments.td b/llvm/lib/Target/X86/X86InstrFragments.td
index adf527d72f5b..90ff0625e812 100644
--- a/llvm/lib/Target/X86/X86InstrFragments.td
+++ b/llvm/lib/Target/X86/X86InstrFragments.td
@@ -676,12 +676,7 @@ def def32 : PatLeaf<(i32 GR32:$src), [{
 
 // Treat an 'or' node is as an 'add' if the or'ed bits are known to be zero.
 def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{
-  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))
-    return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue());
-
-  KnownBits Known0 = CurDAG->computeKnownBits(N->getOperand(0), 0);
-  KnownBits Known1 = CurDAG->computeKnownBits(N->getOperand(1), 0);
-  return (~Known0.Zero & ~Known1.Zero) == 0;
+  return CurDAG->isADDLike(SDValue(N, 0));
 }]>;
 
 def shiftMask8 : PatFrag<(ops node:$lhs), (and node:$lhs, imm), [{

phoebewang · 2024-03-03T02:08:36Z

llvm/lib/Target/X86/X86ISelDAGToDAG.cpp

+    // Convert addlike to add before final selection. Do this before we drop
+    // flags like `disjoint`.
+    // NB: Conversion to add is preferable so we use `lea` in codegen.
+    if (Opcode != ISD::ADD && NVT.isScalarInteger() &&


Should move it ahead since we never do for ISD::ADD?

goldsteinn · 2024-03-03T07:07:04Z

This seems like maybe the better patch

diff --git a/llvm/lib/Target/X86/X86InstrFragments.td b/llvm/lib/Target/X86/X86InstrFragments.td
index adf527d72f5b..90ff0625e812 100644
--- a/llvm/lib/Target/X86/X86InstrFragments.td
+++ b/llvm/lib/Target/X86/X86InstrFragments.td
@@ -676,12 +676,7 @@ def def32 : PatLeaf<(i32 GR32:$src), [{
 
 // Treat an 'or' node is as an 'add' if the or'ed bits are known to be zero.
 def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{
-  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))
-    return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue());
-
-  KnownBits Known0 = CurDAG->computeKnownBits(N->getOperand(0), 0);
-  KnownBits Known1 = CurDAG->computeKnownBits(N->getOperand(1), 0);
-  return (~Known0.Zero & ~Known1.Zero) == 0;
+  return CurDAG->isADDLike(SDValue(N, 0));
 }]>;
 
 def shiftMask8 : PatFrag<(ops node:$lhs), (and node:$lhs, imm), [{

I tried that, but it didn't work. What seemed to be the case is when we handle or_is_add we no longer have ISD::OR, but instead X86ISD::OR{width}r{i/r}

topperc · 2024-03-03T08:02:20Z

This seems like maybe the better patch

diff --git a/llvm/lib/Target/X86/X86InstrFragments.td b/llvm/lib/Target/X86/X86InstrFragments.td
index adf527d72f5b..90ff0625e812 100644
--- a/llvm/lib/Target/X86/X86InstrFragments.td
+++ b/llvm/lib/Target/X86/X86InstrFragments.td
@@ -676,12 +676,7 @@ def def32 : PatLeaf<(i32 GR32:$src), [{
 
 // Treat an 'or' node is as an 'add' if the or'ed bits are known to be zero.
 def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{
-  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))
-    return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue());
-
-  KnownBits Known0 = CurDAG->computeKnownBits(N->getOperand(0), 0);
-  KnownBits Known1 = CurDAG->computeKnownBits(N->getOperand(1), 0);
-  return (~Known0.Zero & ~Known1.Zero) == 0;
+  return CurDAG->isADDLike(SDValue(N, 0));
 }]>;
 
 def shiftMask8 : PatFrag<(ops node:$lhs), (and node:$lhs, imm), [{

I tried that, but it didn't work. What seemed to be the case is when we handle or_is_add we no longer have ISD::OR, but instead X86ISD::OR{width}r{i/r}

Did you mean X86::OR{width}r{i/r} as in an already selected instruction? There is an X86ISD::OR but it doesn't have width or r/i.

RKSimon · 2024-03-03T11:17:39Z

llvm/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll

@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4


regenerate as a precommit first

RKSimon · 2024-03-03T11:17:55Z

llvm/lib/Target/X86/X86InstrCompiler.td

-          (INSERT_SUBREG (i32 (COPY $dst)), (MOV16rm  i16mem:$src), sub_16bit)>; 
+          (INSERT_SUBREG (i32 (COPY $dst)), (MOV16rm  i16mem:$src), sub_16bit)>;
+
+// We convert or -> add when the or is disjoint so need to handle for add as well.


but these are and patterns?

These are (or (and...), (zextload))?
Edit: What im trying to say is we need to combine (add (and...), (zextload)) as well. Ill update the comment to make it more clear.

arsenm · 2024-03-04T08:41:11Z

llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fabs.mir

    ; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
-    ; SI-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[COPY]].sub1


It makes no sense that there are any amdgpu test diffs here

You're right, those tests are just failing independently. Ill drop.

goldsteinn · 2024-04-03T19:15:01Z

rebased

topperc · 2024-04-03T23:25:36Z

llvm/lib/Target/X86/X86InstrCompiler.td

-          (INSERT_SUBREG (i32 (COPY $dst)), (MOV16rm  i16mem:$src), sub_16bit)>; 
+          (INSERT_SUBREG (i32 (COPY $dst)), (MOV16rm  i16mem:$src), sub_16bit)>;
+
+// Same pattern as above but supporting `add` as the join


Are these patterns tested? Wouldn't DAGCombine convert these to adds to or?

I can drop (they where needed w/ the old impl that converted all disjoint or -> add instead of just the ones that can become LEA).

topperc · 2024-04-03T23:28:12Z

llvm/lib/Target/X86/X86InstrFragments.td

-  KnownBits Known0 = CurDAG->computeKnownBits(N->getOperand(0), 0);
-  KnownBits Known1 = CurDAG->computeKnownBits(N->getOperand(1), 0);
-  return (~Known0.Zero & ~Known1.Zero) == 0;
+  return N->getOpcode() == ISD::OR && CurDAG->isADDLike(SDValue(N, 0));


Why do you need to check the opcode? Doesn't (or node:$lhs, node:$rhs) guarantee the opcode is OR?

Remove bespoke logic and use `isADDLike`.

goldsteinn requested review from phoebewang and RKSimon March 2, 2024 20:56

llvmbot added the backend:X86 label Mar 2, 2024

goldsteinn mentioned this pull request Mar 2, 2024

[X86] Transform (xor x, SIGN_BIT) -> (add x, SIGN_BIT) 32 bit and smaller scalars #83659

Closed

topperc reviewed Mar 3, 2024

View reviewed changes

phoebewang reviewed Mar 3, 2024

View reviewed changes

goldsteinn force-pushed the goldsteinn/adlike-to-add branch from 518caa6 to bdee43a Compare March 3, 2024 07:16

RKSimon reviewed Mar 3, 2024

View reviewed changes

goldsteinn force-pushed the goldsteinn/adlike-to-add branch 2 times, most recently from 3a677d3 to 9fdab97 Compare March 4, 2024 08:29

llvmbot added backend:AMDGPU llvm:globalisel labels Mar 4, 2024

arsenm reviewed Mar 4, 2024

View reviewed changes

goldsteinn force-pushed the goldsteinn/adlike-to-add branch from 9fdab97 to 3ffe549 Compare March 7, 2024 19:28

RKSimon mentioned this pull request Apr 3, 2024

[LegalizeTypes][X86][PowerPC] Use shift by 1 instead of adding a value to itself to double. #86857

Open

goldsteinn force-pushed the goldsteinn/adlike-to-add branch from 3ffe549 to 093818c Compare April 3, 2024 19:15

topperc reviewed Apr 3, 2024

View reviewed changes

[X86] Improve transform for add-like nodes to add

b5a6ff4

Remove bespoke logic and use `isADDLike`.

goldsteinn force-pushed the goldsteinn/adlike-to-add branch from 093818c to b5a6ff4 Compare April 4, 2024 22:01

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[X86] Improve transform for add-like nodes to `add` #83691

[X86] Improve transform for add-like nodes to `add` #83691

goldsteinn commented Mar 2, 2024 •

edited

llvmbot commented Mar 2, 2024 •

edited

github-actions bot commented Mar 2, 2024 •

edited

topperc Mar 3, 2024

goldsteinn Mar 3, 2024

topperc Mar 3, 2024

goldsteinn Mar 3, 2024 •

edited

topperc Mar 3, 2024

goldsteinn Mar 3, 2024

goldsteinn Mar 3, 2024

topperc commented Mar 3, 2024 •

edited

phoebewang Mar 3, 2024

goldsteinn commented Mar 3, 2024

topperc commented Mar 3, 2024 •

edited

RKSimon Mar 3, 2024

RKSimon Mar 3, 2024

goldsteinn Mar 3, 2024 •

edited

arsenm Mar 4, 2024

goldsteinn Mar 4, 2024

goldsteinn Mar 7, 2024

goldsteinn commented Apr 3, 2024

topperc Apr 3, 2024

goldsteinn Apr 4, 2024

topperc Apr 3, 2024

		@@ -1,3 +1,4 @@
		; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4

		; SI-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
		; SI-NEXT: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY [[COPY]].sub1

[X86] Improve transform for add-like nodes to add #83691

Are you sure you want to change the base?

[X86] Improve transform for add-like nodes to add #83691

Conversation

goldsteinn commented Mar 2, 2024 • edited

llvmbot commented Mar 2, 2024 • edited

github-actions bot commented Mar 2, 2024 • edited

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

goldsteinn Mar 3, 2024 • edited

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

topperc commented Mar 3, 2024 • edited

Choose a reason for hiding this comment

goldsteinn commented Mar 3, 2024

topperc commented Mar 3, 2024 • edited

Choose a reason for hiding this comment

Choose a reason for hiding this comment

goldsteinn Mar 3, 2024 • edited

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

goldsteinn commented Apr 3, 2024

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

[X86] Improve transform for add-like nodes to `add` #83691

[X86] Improve transform for add-like nodes to `add` #83691

goldsteinn commented Mar 2, 2024 •

edited

llvmbot commented Mar 2, 2024 •

edited

github-actions bot commented Mar 2, 2024 •

edited

goldsteinn Mar 3, 2024 •

edited

topperc commented Mar 3, 2024 •

edited

topperc commented Mar 3, 2024 •

edited

goldsteinn Mar 3, 2024 •

edited