Skip to content

Commit

Permalink
[DAGCombiner] Reassociate the operands from (OR (OR(CMP1, CMP2)), CMP…
Browse files Browse the repository at this point in the history
…3) to (OR (OR(CMP1, CMP3)), CMP2)

This happens when CMP1 and CMP3 have the same predicate (or CMP2 and CMP3 have
the same predicate).

This helps optimizations such as the fololowing one:
CMP(A,C)||CMP(B,C) => CMP(MIN/MAX(A,B), C)
CMP(A,C)&&CMP(B,C) => CMP(MIN/MAX(A,B), C)

Reviewed By: arsenm

Differential Revision: https://reviews.llvm.org/D156215
  • Loading branch information
kmitropoulou committed Aug 9, 2023
1 parent 51202b8 commit 2c5d1b5
Show file tree
Hide file tree
Showing 5 changed files with 89 additions and 57 deletions.
24 changes: 24 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1334,6 +1334,30 @@ SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N00);
}
}

// Reassociate the operands from (OR/AND (OR/AND(N00, N001)), N1) to (OR/AND
// (OR/AND(N00, N1)), N01) when N00 and N1 are comparisons with the same
// predicate or to (OR/AND (OR/AND(N1, N01)), N00) when N01 and N1 are
// comparisons with the same predicate. This enables optimizations as the
// following one:
// CMP(A,C)||CMP(B,C) => CMP(MIN/MAX(A,B), C)
// CMP(A,C)&&CMP(B,C) => CMP(MIN/MAX(A,B), C)
if (Opc == ISD::AND || Opc == ISD::OR) {
if (N1->getOpcode() == ISD::SETCC && N00->getOpcode() == ISD::SETCC &&
N01->getOpcode() == ISD::SETCC) {
ISD::CondCode CC1 = cast<CondCodeSDNode>(N1.getOperand(2))->get();
ISD::CondCode CC00 = cast<CondCodeSDNode>(N00.getOperand(2))->get();
ISD::CondCode CC01 = cast<CondCodeSDNode>(N01.getOperand(2))->get();
if (CC1 == CC00 && CC1 != CC01) {
SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1, Flags);
return DAG.getNode(Opc, DL, VT, OpNode, N01, Flags);
}
if (CC1 == CC01 && CC1 != CC00) {
SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N01, N1, Flags);
return DAG.getNode(Opc, DL, VT, OpNode, N00, Flags);
}
}
}
}

return SDValue();
Expand Down
40 changes: 18 additions & 22 deletions llvm/test/CodeGen/AMDGPU/combine_andor_with_cmps.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1722,14 +1722,12 @@ define i1 @test103(i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %a
; CHECK-LABEL: test103:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_max_u32_e32 v0, v0, v1
; CHECK-NEXT: v_max_u32_e32 v1, v2, v3
; CHECK-NEXT: v_max_u32_e32 v2, v4, v5
; CHECK-NEXT: v_cmp_lt_u32_e32 vcc_lo, v0, v6
; CHECK-NEXT: v_cmp_gt_u32_e64 s0, v1, v6
; CHECK-NEXT: v_cmp_lt_u32_e64 s1, v2, v6
; CHECK-NEXT: s_or_b32 s0, vcc_lo, s0
; CHECK-NEXT: s_or_b32 s0, s0, s1
; CHECK-NEXT: v_max_u32_e32 v4, v4, v5
; CHECK-NEXT: v_max_u32_e32 v2, v2, v3
; CHECK-NEXT: v_maxmin_u32 v0, v0, v1, v4
; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, v2, v6
; CHECK-NEXT: v_cmp_lt_u32_e64 s0, v0, v6
; CHECK-NEXT: s_or_b32 s0, s0, vcc_lo
; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; CHECK-NEXT: s_setpc_b64 s[30:31]
%cmp1 = icmp ult i32 %arg1, %C
Expand All @@ -1751,20 +1749,18 @@ define i1 @test104(i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %a
; CHECK-LABEL: test104:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_min_u32_e32 v0, v0, v1
; CHECK-NEXT: v_max_u32_e32 v1, v2, v3
; CHECK-NEXT: v_min_u32_e32 v2, v4, v5
; CHECK-NEXT: v_max_u32_e32 v3, v6, v7
; CHECK-NEXT: v_min_u32_e32 v4, v8, v9
; CHECK-NEXT: v_cmp_lt_u32_e32 vcc_lo, v0, v10
; CHECK-NEXT: v_cmp_gt_u32_e64 s0, v1, v10
; CHECK-NEXT: v_cmp_lt_u32_e64 s1, v2, v10
; CHECK-NEXT: v_cmp_gt_u32_e64 s2, v3, v10
; CHECK-NEXT: v_cmp_lt_u32_e64 s3, v4, v10
; CHECK-NEXT: s_or_b32 s0, vcc_lo, s0
; CHECK-NEXT: s_or_b32 s1, s1, s2
; CHECK-NEXT: s_or_b32 s0, s3, s0
; CHECK-NEXT: s_or_b32 s0, s1, s0
; CHECK-NEXT: v_min_u32_e32 v8, v8, v9
; CHECK-NEXT: v_max_u32_e32 v2, v2, v3
; CHECK-NEXT: v_min_u32_e32 v3, v4, v5
; CHECK-NEXT: v_max_u32_e32 v4, v6, v7
; CHECK-NEXT: v_min3_u32 v0, v0, v1, v8
; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, v2, v10
; CHECK-NEXT: v_cmp_lt_u32_e64 s0, v3, v10
; CHECK-NEXT: v_cmp_gt_u32_e64 s1, v4, v10
; CHECK-NEXT: v_cmp_lt_u32_e64 s2, v0, v10
; CHECK-NEXT: s_or_b32 s0, s0, s1
; CHECK-NEXT: s_or_b32 s1, s2, vcc_lo
; CHECK-NEXT: s_or_b32 s0, s0, s1
; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
; CHECK-NEXT: s_setpc_b64 s[30:31]
%cmp1 = icmp ult i32 %arg1, %C
Expand Down
14 changes: 7 additions & 7 deletions llvm/test/CodeGen/AMDGPU/wave32.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1476,9 +1476,9 @@ define amdgpu_kernel void @test_preserve_condition_undef_flag(float %arg, i32 %a
; GFX1032-NEXT: s_load_dword s2, s[0:1], 0x2c
; GFX1032-NEXT: s_load_dword s3, s[0:1], 0x24
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
; GFX1032-NEXT: v_cmp_ngt_f32_e64 s0, s2, 0
; GFX1032-NEXT: v_cmp_nlt_f32_e64 s1, s2, 1.0
; GFX1032-NEXT: v_cmp_nlt_f32_e64 s2, s3, 1.0
; GFX1032-NEXT: v_cmp_nlt_f32_e64 s0, s2, 1.0
; GFX1032-NEXT: v_cmp_nlt_f32_e64 s1, s3, 1.0
; GFX1032-NEXT: v_cmp_ngt_f32_e64 s2, s2, 0
; GFX1032-NEXT: s_or_b32 s0, s0, s1
; GFX1032-NEXT: s_or_b32 s0, s0, s2
; GFX1032-NEXT: s_and_b32 vcc_lo, exec_lo, s0
Expand All @@ -1493,12 +1493,12 @@ define amdgpu_kernel void @test_preserve_condition_undef_flag(float %arg, i32 %a
; GFX1064-LABEL: test_preserve_condition_undef_flag:
; GFX1064: ; %bb.0: ; %bb0
; GFX1064-NEXT: s_clause 0x1
; GFX1064-NEXT: s_load_dword s2, s[0:1], 0x2c
; GFX1064-NEXT: s_load_dword s4, s[0:1], 0x24
; GFX1064-NEXT: s_load_dword s4, s[0:1], 0x2c
; GFX1064-NEXT: s_load_dword s2, s[0:1], 0x24
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
; GFX1064-NEXT: v_cmp_ngt_f32_e64 s[0:1], s2, 0
; GFX1064-NEXT: v_cmp_nlt_f32_e64 s[0:1], s4, 1.0
; GFX1064-NEXT: v_cmp_nlt_f32_e64 s[2:3], s2, 1.0
; GFX1064-NEXT: v_cmp_nlt_f32_e64 s[4:5], s4, 1.0
; GFX1064-NEXT: v_cmp_ngt_f32_e64 s[4:5], s4, 0
; GFX1064-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3]
; GFX1064-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
; GFX1064-NEXT: s_and_b64 vcc, exec, s[0:1]
Expand Down
36 changes: 24 additions & 12 deletions llvm/test/CodeGen/Hexagon/isel/logical.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1250,13 +1250,16 @@ define <4 x i16> @f35(<4 x i16> %a0, <4 x i16> %a1, <4 x i16> %a2) #1 {
; CHECK-NEXT: p0 = vcmph.eq(r1:0,r7:6)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: p1 = vcmph.eq(r3:2,r7:6)
; CHECK-NEXT: p1 = vcmph.eq(r5:4,r7:6)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: p2 = vcmph.eq(r5:4,r7:6)
; CHECK-NEXT: p2 = vcmph.eq(r3:2,r7:6)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: p0 = and(p2,and(p0,!p1))
; CHECK-NEXT: p0 = and(p0,p1)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: p0 = and(p0,!p2)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: r1:0 = mask(p0)
Expand Down Expand Up @@ -1364,13 +1367,16 @@ define <4 x i16> @f38(<4 x i16> %a0, <4 x i16> %a1, <4 x i16> %a2) #1 {
; CHECK-NEXT: p0 = vcmph.eq(r1:0,r7:6)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: p1 = vcmph.eq(r3:2,r7:6)
; CHECK-NEXT: p1 = vcmph.eq(r5:4,r7:6)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: p2 = vcmph.eq(r5:4,r7:6)
; CHECK-NEXT: p2 = vcmph.eq(r3:2,r7:6)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: p0 = or(p2,or(p0,!p1))
; CHECK-NEXT: p0 = or(p0,p1)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: p0 = or(p0,!p2)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: r1:0 = mask(p0)
Expand Down Expand Up @@ -1712,13 +1718,16 @@ define <8 x i8> @f48(<8 x i8> %a0, <8 x i8> %a1, <8 x i8> %a2) #1 {
; CHECK-NEXT: p0 = vcmpb.eq(r1:0,r7:6)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: p1 = vcmpb.eq(r3:2,r7:6)
; CHECK-NEXT: p1 = vcmpb.eq(r5:4,r7:6)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: p2 = vcmpb.eq(r5:4,r7:6)
; CHECK-NEXT: p2 = vcmpb.eq(r3:2,r7:6)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: p0 = and(p2,and(p0,!p1))
; CHECK-NEXT: p0 = and(p0,p1)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: p0 = and(p0,!p2)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: r1:0 = mask(p0)
Expand Down Expand Up @@ -1826,13 +1835,16 @@ define <8 x i8> @f51(<8 x i8> %a0, <8 x i8> %a1, <8 x i8> %a2) #1 {
; CHECK-NEXT: p0 = vcmpb.eq(r1:0,r7:6)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: p1 = vcmpb.eq(r3:2,r7:6)
; CHECK-NEXT: p1 = vcmpb.eq(r5:4,r7:6)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: p2 = vcmpb.eq(r5:4,r7:6)
; CHECK-NEXT: p2 = vcmpb.eq(r3:2,r7:6)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: p0 = or(p2,or(p0,!p1))
; CHECK-NEXT: p0 = or(p0,p1)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: p0 = or(p0,!p2)
; CHECK-NEXT: }
; CHECK-NEXT: {
; CHECK-NEXT: r1:0 = mask(p0)
Expand Down
32 changes: 16 additions & 16 deletions llvm/test/CodeGen/X86/v8i1-masks.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1402,9 +1402,9 @@ define <8 x i32> @six_or_and_xor(<8 x float> %x) {
; X86-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm2
; X86-NEXT: vxorps %xmm3, %xmm3, %xmm3
; X86-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3
; X86-NEXT: vandps %ymm3, %ymm2, %ymm2
; X86-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm3
; X86-NEXT: vandps %ymm3, %ymm2, %ymm2
; X86-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm4
; X86-NEXT: vandps %ymm4, %ymm3, %ymm3
; X86-NEXT: vandps %ymm2, %ymm3, %ymm2
; X86-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm3
; X86-NEXT: vxorps %ymm1, %ymm3, %ymm1
; X86-NEXT: vxorps %ymm2, %ymm1, %ymm1
Expand All @@ -1419,9 +1419,9 @@ define <8 x i32> @six_or_and_xor(<8 x float> %x) {
; X64-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm2
; X64-NEXT: vxorps %xmm3, %xmm3, %xmm3
; X64-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3
; X64-NEXT: vandps %ymm3, %ymm2, %ymm2
; X64-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm3
; X64-NEXT: vandps %ymm3, %ymm2, %ymm2
; X64-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm4
; X64-NEXT: vandps %ymm4, %ymm3, %ymm3
; X64-NEXT: vandps %ymm2, %ymm3, %ymm2
; X64-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm3
; X64-NEXT: vxorps %ymm1, %ymm3, %ymm1
; X64-NEXT: vxorps %ymm2, %ymm1, %ymm1
Expand All @@ -1437,10 +1437,10 @@ define <8 x i32> @six_or_and_xor(<8 x float> %x) {
; X86-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm2
; X86-AVX2-NEXT: vxorps %xmm3, %xmm3, %xmm3
; X86-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3
; X86-AVX2-NEXT: vandps %ymm3, %ymm2, %ymm2
; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1]
; X86-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3
; X86-AVX2-NEXT: vandps %ymm3, %ymm2, %ymm2
; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm4 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1]
; X86-AVX2-NEXT: vcmpneqps %ymm4, %ymm0, %ymm4
; X86-AVX2-NEXT: vandps %ymm4, %ymm3, %ymm3
; X86-AVX2-NEXT: vandps %ymm2, %ymm3, %ymm2
; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1]
; X86-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3
; X86-AVX2-NEXT: vxorps %ymm1, %ymm3, %ymm1
Expand All @@ -1458,10 +1458,10 @@ define <8 x i32> @six_or_and_xor(<8 x float> %x) {
; X64-AVX2-NEXT: vcmpltps %ymm2, %ymm0, %ymm2
; X64-AVX2-NEXT: vxorps %xmm3, %xmm3, %xmm3
; X64-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3
; X64-AVX2-NEXT: vandps %ymm3, %ymm2, %ymm2
; X64-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1]
; X64-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3
; X64-AVX2-NEXT: vandps %ymm3, %ymm2, %ymm2
; X64-AVX2-NEXT: vbroadcastss {{.*#+}} ymm4 = [1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1,1.00000001E-1]
; X64-AVX2-NEXT: vcmpneqps %ymm4, %ymm0, %ymm4
; X64-AVX2-NEXT: vandps %ymm4, %ymm3, %ymm3
; X64-AVX2-NEXT: vandps %ymm2, %ymm3, %ymm2
; X64-AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1,2.00000003E-1]
; X64-AVX2-NEXT: vcmpneqps %ymm3, %ymm0, %ymm3
; X64-AVX2-NEXT: vxorps %ymm1, %ymm3, %ymm1
Expand All @@ -1476,8 +1476,8 @@ define <8 x i32> @six_or_and_xor(<8 x float> %x) {
; X86-AVX512-NEXT: vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k0
; X86-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X86-AVX512-NEXT: vcmpneqps %ymm1, %ymm0, %k1
; X86-AVX512-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1 {%k1}
; X86-AVX512-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1 {%k1}
; X86-AVX512-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k1 {%k1}
; X86-AVX512-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %k2
; X86-AVX512-NEXT: kxorw %k0, %k2, %k0
; X86-AVX512-NEXT: kxorw %k1, %k0, %k0
Expand All @@ -1492,8 +1492,8 @@ define <8 x i32> @six_or_and_xor(<8 x float> %x) {
; X64-AVX512-NEXT: vcmpgeps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k0
; X64-AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X64-AVX512-NEXT: vcmpneqps %ymm1, %ymm0, %k1
; X64-AVX512-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 {%k1}
; X64-AVX512-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 {%k1}
; X64-AVX512-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 {%k1}
; X64-AVX512-NEXT: vcmpneqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k2
; X64-AVX512-NEXT: kxorw %k0, %k2, %k0
; X64-AVX512-NEXT: kxorw %k1, %k0, %k0
Expand Down

0 comments on commit 2c5d1b5

Please sign in to comment.