Skip to content

Commit

Permalink
[DAG] reassociateOpsCommutative - peek through bitcasts to find const…
Browse files Browse the repository at this point in the history
…ants

Now that FoldConstantArithmetic can fold bitcasted constants, we should peek through bitcasts of binop operands to try and find foldable constants
  • Loading branch information
RKSimon committed Nov 11, 2021
1 parent b72727a commit 82b7436
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 70 deletions.
4 changes: 2 additions & 2 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Expand Up @@ -1066,8 +1066,8 @@ SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
SDValue N00 = N0.getOperand(0);
SDValue N01 = N0.getOperand(1);

if (DAG.isConstantIntBuildVectorOrConstantInt(N01)) {
if (DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N01))) {
if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N1))) {
// Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, {N01, N1}))
return DAG.getNode(Opc, DL, VT, N00, OpNode);
Expand Down
58 changes: 22 additions & 36 deletions llvm/test/CodeGen/Thumb2/mve-vecreduce-addpred.ll
Expand Up @@ -46,9 +46,7 @@ define arm_aapcs_vfpcc i64 @add_v2i32_v2i64_zext(<2 x i32> %x, <2 x i32> %b) {
; CHECK-LABEL: add_v2i32_v2i64_zext:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov r0, s6
; CHECK-NEXT: vmov.i64 q2, #0xffffffff
; CHECK-NEXT: vmov r1, s4
; CHECK-NEXT: vand q0, q0, q2
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: cset r0, eq
; CHECK-NEXT: cmp r0, #0
Expand All @@ -58,7 +56,8 @@ define arm_aapcs_vfpcc i64 @add_v2i32_v2i64_zext(<2 x i32> %x, <2 x i32> %b) {
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: csetm r1, ne
; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
; CHECK-NEXT: vmov q1[3], q1[1], r1, r0
; CHECK-NEXT: vand q0, q0, q1
; CHECK-NEXT: vmov.i64 q1, #0xffffffff
; CHECK-NEXT: vand q0, q0, q1
; CHECK-NEXT: vmov r0, r1, d1
; CHECK-NEXT: vmov r2, r3, d0
Expand Down Expand Up @@ -427,7 +426,6 @@ define arm_aapcs_vfpcc i64 @add_v2i16_v2i64_zext(<2 x i16> %x, <2 x i16> %b) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.i64 q2, #0xffff
; CHECK-NEXT: vand q1, q1, q2
; CHECK-NEXT: vand q0, q0, q2
; CHECK-NEXT: vmov r0, s6
; CHECK-NEXT: vmov r1, s4
; CHECK-NEXT: cmp r0, #0
Expand All @@ -439,12 +437,11 @@ define arm_aapcs_vfpcc i64 @add_v2i16_v2i64_zext(<2 x i16> %x, <2 x i16> %b) {
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: csetm r1, ne
; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
; CHECK-NEXT: vmov q1[3], q1[1], r1, r0
; CHECK-NEXT: vand q0, q0, q1
; CHECK-NEXT: vmov r0, r1, d1
; CHECK-NEXT: vmov r2, r3, d0
; CHECK-NEXT: vand q0, q0, q2
; CHECK-NEXT: vmov r0, s2
; CHECK-NEXT: vmov r2, r1, d0
; CHECK-NEXT: add r0, r2
; CHECK-NEXT: orrs r1, r3
; CHECK-NEXT: bx lr
entry:
%c = icmp eq <2 x i16> %b, zeroinitializer
Expand Down Expand Up @@ -1386,7 +1383,6 @@ define arm_aapcs_vfpcc i64 @add_v2i8_v2i64_zext(<2 x i8> %x, <2 x i8> %b) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.i64 q2, #0xff
; CHECK-NEXT: vand q1, q1, q2
; CHECK-NEXT: vand q0, q0, q2
; CHECK-NEXT: vmov r0, s6
; CHECK-NEXT: vmov r1, s4
; CHECK-NEXT: cmp r0, #0
Expand All @@ -1398,12 +1394,11 @@ define arm_aapcs_vfpcc i64 @add_v2i8_v2i64_zext(<2 x i8> %x, <2 x i8> %b) {
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: csetm r1, ne
; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
; CHECK-NEXT: vmov q1[3], q1[1], r1, r0
; CHECK-NEXT: vand q0, q0, q1
; CHECK-NEXT: vmov r0, r1, d1
; CHECK-NEXT: vmov r2, r3, d0
; CHECK-NEXT: vand q0, q0, q2
; CHECK-NEXT: vmov r0, s2
; CHECK-NEXT: vmov r2, r1, d0
; CHECK-NEXT: add r0, r2
; CHECK-NEXT: orrs r1, r3
; CHECK-NEXT: bx lr
entry:
%c = icmp eq <2 x i8> %b, zeroinitializer
Expand Down Expand Up @@ -1530,9 +1525,7 @@ define arm_aapcs_vfpcc i64 @add_v2i32_v2i64_acc_zext(<2 x i32> %x, <2 x i32> %b,
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: vmov r2, s6
; CHECK-NEXT: vmov.i64 q2, #0xffffffff
; CHECK-NEXT: vmov r3, s4
; CHECK-NEXT: vand q0, q0, q2
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: cset r2, eq
; CHECK-NEXT: cmp r2, #0
Expand All @@ -1542,7 +1535,8 @@ define arm_aapcs_vfpcc i64 @add_v2i32_v2i64_acc_zext(<2 x i32> %x, <2 x i32> %b,
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: csetm r3, ne
; CHECK-NEXT: vmov q1[2], q1[0], r3, r2
; CHECK-NEXT: vmov q1[3], q1[1], r3, r2
; CHECK-NEXT: vand q0, q0, q1
; CHECK-NEXT: vmov.i64 q1, #0xffffffff
; CHECK-NEXT: vand q0, q0, q1
; CHECK-NEXT: vmov lr, r12, d1
; CHECK-NEXT: vmov r3, r2, d0
Expand Down Expand Up @@ -1900,11 +1894,8 @@ entry:
define arm_aapcs_vfpcc i64 @add_v2i16_v2i64_acc_zext(<2 x i16> %x, <2 x i16> %b, i64 %a) {
; CHECK-LABEL: add_v2i16_v2i64_acc_zext:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: vmov.i64 q2, #0xffff
; CHECK-NEXT: vand q1, q1, q2
; CHECK-NEXT: vand q0, q0, q2
; CHECK-NEXT: vmov r2, s6
; CHECK-NEXT: vmov r3, s4
; CHECK-NEXT: cmp r2, #0
Expand All @@ -1916,15 +1907,14 @@ define arm_aapcs_vfpcc i64 @add_v2i16_v2i64_acc_zext(<2 x i16> %x, <2 x i16> %b,
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: csetm r3, ne
; CHECK-NEXT: vmov q1[2], q1[0], r3, r2
; CHECK-NEXT: vmov q1[3], q1[1], r3, r2
; CHECK-NEXT: vand q0, q0, q1
; CHECK-NEXT: vmov r12, lr, d1
; CHECK-NEXT: vmov r2, r3, d0
; CHECK-NEXT: add r2, r12
; CHECK-NEXT: orr.w r3, r3, lr
; CHECK-NEXT: vand q0, q0, q2
; CHECK-NEXT: vmov r2, s2
; CHECK-NEXT: vmov r3, r12, d0
; CHECK-NEXT: add r2, r3
; CHECK-NEXT: adds r0, r0, r2
; CHECK-NEXT: adcs r1, r3
; CHECK-NEXT: pop {r7, pc}
; CHECK-NEXT: adc.w r1, r1, r12
; CHECK-NEXT: bx lr
entry:
%c = icmp eq <2 x i16> %b, zeroinitializer
%xx = zext <2 x i16> %x to <2 x i64>
Expand Down Expand Up @@ -2600,11 +2590,8 @@ entry:
define arm_aapcs_vfpcc i64 @add_v2i8_v2i64_acc_zext(<2 x i8> %x, <2 x i8> %b, i64 %a) {
; CHECK-LABEL: add_v2i8_v2i64_acc_zext:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: vmov.i64 q2, #0xff
; CHECK-NEXT: vand q1, q1, q2
; CHECK-NEXT: vand q0, q0, q2
; CHECK-NEXT: vmov r2, s6
; CHECK-NEXT: vmov r3, s4
; CHECK-NEXT: cmp r2, #0
Expand All @@ -2616,15 +2603,14 @@ define arm_aapcs_vfpcc i64 @add_v2i8_v2i64_acc_zext(<2 x i8> %x, <2 x i8> %b, i6
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: csetm r3, ne
; CHECK-NEXT: vmov q1[2], q1[0], r3, r2
; CHECK-NEXT: vmov q1[3], q1[1], r3, r2
; CHECK-NEXT: vand q0, q0, q1
; CHECK-NEXT: vmov r12, lr, d1
; CHECK-NEXT: vmov r2, r3, d0
; CHECK-NEXT: add r2, r12
; CHECK-NEXT: orr.w r3, r3, lr
; CHECK-NEXT: vand q0, q0, q2
; CHECK-NEXT: vmov r2, s2
; CHECK-NEXT: vmov r3, r12, d0
; CHECK-NEXT: add r2, r3
; CHECK-NEXT: adds r0, r0, r2
; CHECK-NEXT: adcs r1, r3
; CHECK-NEXT: pop {r7, pc}
; CHECK-NEXT: adc.w r1, r1, r12
; CHECK-NEXT: bx lr
entry:
%c = icmp eq <2 x i8> %b, zeroinitializer
%xx = zext <2 x i8> %x to <2 x i64>
Expand Down
60 changes: 28 additions & 32 deletions llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll
Expand Up @@ -1937,9 +1937,8 @@ define i64 @test_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1) nounwind {
define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) nounwind {
; X86-LABEL: test_mask_cmp_b_512:
; X86: # %bb.0:
; X86-NEXT: pushl %edi # encoding: [0x57]
; X86-NEXT: pushl %esi # encoding: [0x56]
; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x0c]
; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
; X86-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x74,0xc1]
; X86-NEXT: kshiftrq $32, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x31,0xd0,0x20]
; X86-NEXT: kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca]
Expand All @@ -1958,26 +1957,25 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) nounwin
; X86-NEXT: adcl %eax, %ecx # encoding: [0x11,0xc1]
; X86-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3f,0xc1,0x04]
; X86-NEXT: kshiftrq $32, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x31,0xd0,0x20]
; X86-NEXT: kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
; X86-NEXT: addl %edx, %esi # encoding: [0x01,0xd6]
; X86-NEXT: adcl %ecx, %eax # encoding: [0x11,0xc8]
; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
; X86-NEXT: addl %edx, %eax # encoding: [0x01,0xd0]
; X86-NEXT: kmovd %k2, %edx # encoding: [0xc5,0xfb,0x93,0xd2]
; X86-NEXT: adcl %ecx, %edx # encoding: [0x11,0xca]
; X86-NEXT: vpcmpnltb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3f,0xc1,0x05]
; X86-NEXT: kshiftrq $32, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x31,0xd0,0x20]
; X86-NEXT: kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca]
; X86-NEXT: kmovd %k0, %edi # encoding: [0xc5,0xfb,0x93,0xf8]
; X86-NEXT: addl %esi, %edi # encoding: [0x01,0xf7]
; X86-NEXT: adcl %eax, %ecx # encoding: [0x11,0xc1]
; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
; X86-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1]
; X86-NEXT: kmovd %k2, %esi # encoding: [0xc5,0xfb,0x93,0xf2]
; X86-NEXT: adcl %edx, %esi # encoding: [0x11,0xd6]
; X86-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x64,0xc1]
; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
; X86-NEXT: kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1]
; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
; X86-NEXT: addl %edi, %eax # encoding: [0x01,0xf8]
; X86-NEXT: adcl %ecx, %edx # encoding: [0x11,0xca]
; X86-NEXT: addl {{[0-9]+}}(%esp), %eax # encoding: [0x03,0x44,0x24,0x0c]
; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx # encoding: [0x13,0x54,0x24,0x10]
; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8]
; X86-NEXT: kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1]
; X86-NEXT: adcl %esi, %edx # encoding: [0x11,0xf2]
; X86-NEXT: addl {{[0-9]+}}(%esp), %eax # encoding: [0x03,0x44,0x24,0x08]
; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx # encoding: [0x13,0x54,0x24,0x0c]
; X86-NEXT: popl %esi # encoding: [0x5e]
; X86-NEXT: popl %edi # encoding: [0x5f]
; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X86-NEXT: retl # encoding: [0xc3]
;
Expand Down Expand Up @@ -2112,9 +2110,8 @@ define i64 @test_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1) nounwind {
define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) nounwind {
; X86-LABEL: test_mask_x86_avx512_ucmp_b_512:
; X86: # %bb.0:
; X86-NEXT: pushl %edi # encoding: [0x57]
; X86-NEXT: pushl %esi # encoding: [0x56]
; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x0c]
; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
; X86-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x74,0xc1]
; X86-NEXT: kshiftrq $32, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x31,0xd0,0x20]
; X86-NEXT: kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca]
Expand All @@ -2133,26 +2130,25 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m
; X86-NEXT: adcl %eax, %ecx # encoding: [0x11,0xc1]
; X86-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3f,0xc1,0x04]
; X86-NEXT: kshiftrq $32, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x31,0xd0,0x20]
; X86-NEXT: kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
; X86-NEXT: kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
; X86-NEXT: addl %edx, %esi # encoding: [0x01,0xd6]
; X86-NEXT: adcl %ecx, %eax # encoding: [0x11,0xc8]
; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
; X86-NEXT: addl %edx, %eax # encoding: [0x01,0xd0]
; X86-NEXT: kmovd %k2, %edx # encoding: [0xc5,0xfb,0x93,0xd2]
; X86-NEXT: adcl %ecx, %edx # encoding: [0x11,0xca]
; X86-NEXT: vpcmpnltub %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3e,0xc1,0x05]
; X86-NEXT: kshiftrq $32, %k0, %k2 # encoding: [0xc4,0xe3,0xf9,0x31,0xd0,0x20]
; X86-NEXT: kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca]
; X86-NEXT: kmovd %k0, %edi # encoding: [0xc5,0xfb,0x93,0xf8]
; X86-NEXT: addl %esi, %edi # encoding: [0x01,0xf7]
; X86-NEXT: adcl %eax, %ecx # encoding: [0x11,0xc1]
; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
; X86-NEXT: addl %eax, %ecx # encoding: [0x01,0xc1]
; X86-NEXT: kmovd %k2, %esi # encoding: [0xc5,0xfb,0x93,0xf2]
; X86-NEXT: adcl %edx, %esi # encoding: [0x11,0xd6]
; X86-NEXT: vpcmpnleub %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3e,0xc1,0x06]
; X86-NEXT: kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
; X86-NEXT: kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1]
; X86-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
; X86-NEXT: addl %edi, %eax # encoding: [0x01,0xf8]
; X86-NEXT: adcl %ecx, %edx # encoding: [0x11,0xca]
; X86-NEXT: addl {{[0-9]+}}(%esp), %eax # encoding: [0x03,0x44,0x24,0x0c]
; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx # encoding: [0x13,0x54,0x24,0x10]
; X86-NEXT: addl %ecx, %eax # encoding: [0x01,0xc8]
; X86-NEXT: kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1]
; X86-NEXT: adcl %esi, %edx # encoding: [0x11,0xf2]
; X86-NEXT: addl {{[0-9]+}}(%esp), %eax # encoding: [0x03,0x44,0x24,0x08]
; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx # encoding: [0x13,0x54,0x24,0x0c]
; X86-NEXT: popl %esi # encoding: [0x5e]
; X86-NEXT: popl %edi # encoding: [0x5f]
; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
; X86-NEXT: retl # encoding: [0xc3]
;
Expand Down

0 comments on commit 82b7436

Please sign in to comment.