Skip to content

Commit

Permalink
[SelectionDAG] Handle more opcodes in canCreateUndefOrPoison (#84921)
Browse files Browse the repository at this point in the history
[SelectionDAG] Handle more opcodes in canCreateUndefOrPoison

Handle SELECT_CC similarly as SETCC.

Handle these operations that only propagate poison/undef based on the
input operands:
  SADDSAT, UADDSAT, SSUBSAT, USUBSAT, MULHU, MULHS,
  SMIN, SMAX, UMIN, UMAX

These operations may create poison based on shift amount and exact
flag being violated:
  SRL, SRA

One goal here is to allow pushing freeze through these operations
when allowed, as well as letting analyses such as
isGuaranteedNotToBeUndefOrPoison to not break on such operations.

Since some problems have been observed with pushing freeze through
SRA/SRL we block that explicitly in DAGCombiner::visitFreeze now.
That way we can still model SRA/SRL properly in
SelectionDAG::canCreateUndefOrPoison, e.g. when used by
isGuaranteedNotToBeUndefOrPoison, even if we do not want to push
freeze through those instructions.
  • Loading branch information
bjope committed Apr 29, 2024
1 parent 6cd6bde commit b3c55b7
Show file tree
Hide file tree
Showing 7 changed files with 556 additions and 563 deletions.
6 changes: 6 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15459,6 +15459,12 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) {
if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, /*PoisonOnly*/ false))
return N0;

// We currently avoid folding freeze over SRA/SRL, due to the problems seen
// with (freeze (assert ext)) blocking simplifications of SRA/SRL. See for
// example https://reviews.llvm.org/D136529#4120959.
if (N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::SRL)
return SDValue();

// Fold freeze(op(x, ...)) -> op(freeze(x), ...).
// Try to push freeze through instructions that propagate but don't produce
// poison as far as possible. If an operand of freeze follows three
Expand Down
16 changes: 15 additions & 1 deletion llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5137,6 +5137,16 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
case ISD::FREEZE:
case ISD::CONCAT_VECTORS:
case ISD::INSERT_SUBVECTOR:
case ISD::SADDSAT:
case ISD::UADDSAT:
case ISD::SSUBSAT:
case ISD::USUBSAT:
case ISD::MULHU:
case ISD::MULHS:
case ISD::SMIN:
case ISD::SMAX:
case ISD::UMIN:
case ISD::UMAX:
case ISD::AND:
case ISD::XOR:
case ISD::ROTL:
Expand All @@ -5157,6 +5167,7 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
case ISD::BUILD_PAIR:
return false;

case ISD::SELECT_CC:
case ISD::SETCC: {
// Integer setcc cannot create undef or poison.
if (Op.getOperand(0).getValueType().isInteger())
Expand All @@ -5166,7 +5177,8 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
// based on options and flags. The options and flags also cause special
// nonan condition codes to be used. Those condition codes may be preserved
// even if the nonan flag is dropped somewhere.
ISD::CondCode CCCode = cast<CondCodeSDNode>(Op.getOperand(2))->get();
unsigned CCOp = Opcode == ISD::SETCC ? 2 : 4;
ISD::CondCode CCCode = cast<CondCodeSDNode>(Op.getOperand(CCOp))->get();
if (((unsigned)CCCode & 0x10U))
return true;

Expand All @@ -5183,6 +5195,8 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
return false;

case ISD::SHL:
case ISD::SRL:
case ISD::SRA:
// If the max shift amount isn't in range, then the shift can create poison.
return !getValidMaximumShiftAmountConstant(Op, DemandedElts);

Expand Down
96 changes: 53 additions & 43 deletions llvm/test/CodeGen/AMDGPU/div_i128.ll
Original file line number Diff line number Diff line change
Expand Up @@ -282,21 +282,21 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: ; kill: def $vgpr15 killed $vgpr15 def $vgpr15_vgpr16 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v16, v1
; GFX9-O0-NEXT: v_mov_b32_e32 v9, v15
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v16
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v16
; GFX9-O0-NEXT: v_mov_b32_e32 v10, v13
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v14
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v14
; GFX9-O0-NEXT: v_sub_co_u32_e32 v9, vcc, v9, v4
; GFX9-O0-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v6, vcc
; GFX9-O0-NEXT: v_subb_co_u32_e32 v13, vcc, v10, v4, vcc
; GFX9-O0-NEXT: v_subb_co_u32_e32 v5, vcc, v5, v6, vcc
; GFX9-O0-NEXT: v_subb_co_u32_e32 v13, vcc, v10, v4, vcc
; GFX9-O0-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v6, vcc
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v14, v5
; GFX9-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v10, v5
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v10, v1
; GFX9-O0-NEXT: ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v14, v1
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v12
; GFX9-O0-NEXT: v_xor_b32_e64 v1, v5, v1
Expand All @@ -312,21 +312,21 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v8, v1
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v7
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v8
; GFX9-O0-NEXT: v_mov_b32_e32 v8, v11
; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 killed $vgpr7_vgpr8 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v11
; GFX9-O0-NEXT: v_mov_b32_e32 v2, v12
; GFX9-O0-NEXT: v_sub_co_u32_e32 v1, vcc, v1, v3
; GFX9-O0-NEXT: v_subb_co_u32_e32 v7, vcc, v7, v5, vcc
; GFX9-O0-NEXT: v_subb_co_u32_e32 v11, vcc, v8, v3, vcc
; GFX9-O0-NEXT: v_subb_co_u32_e32 v2, vcc, v2, v5, vcc
; GFX9-O0-NEXT: v_subb_co_u32_e32 v8, vcc, v8, v5, vcc
; GFX9-O0-NEXT: v_subb_co_u32_e32 v11, vcc, v7, v3, vcc
; GFX9-O0-NEXT: v_subb_co_u32_e32 v7, vcc, v2, v5, vcc
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v12, v2
; GFX9-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v2, v8
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v2, v7
; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v12, v7
; GFX9-O0-NEXT: v_xor_b32_e64 v5, v5, v6
; GFX9-O0-NEXT: v_xor_b32_e64 v3, v3, v4
; GFX9-O0-NEXT: ; kill: def $vgpr3 killed $vgpr3 def $vgpr3_vgpr4 killed $exec
Expand All @@ -339,18 +339,26 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v11
; GFX9-O0-NEXT: v_mov_b32_e32 v4, v12
; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
; GFX9-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
; GFX9-O0-NEXT: v_mov_b32_e32 v4, v2
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v1
; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
; GFX9-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v13
; GFX9-O0-NEXT: v_mov_b32_e32 v4, v14
; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v9
; GFX9-O0-NEXT: v_mov_b32_e32 v4, v10
; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v12
; GFX9-O0-NEXT: v_mov_b32_e32 v8, v2
; GFX9-O0-NEXT: v_or_b32_e64 v3, v8, v7
Expand Down Expand Up @@ -403,7 +411,8 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v9, v5
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v9
; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[12:13], v[11:12], s[6:7]
; GFX9-O0-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[12:13], v[11:12], s[12:13]
; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v10, s[12:13]
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v8
Expand Down Expand Up @@ -439,7 +448,8 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v12, v5
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v12
; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[8:9], v[13:14], s[6:7]
; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[8:9], v[13:14], s[8:9]
; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v8, s[8:9]
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v11
Expand Down Expand Up @@ -690,10 +700,10 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: buffer_load_dword v28, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v25, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v26, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:276 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:280 ; 4-byte Folded Reload
Expand Down Expand Up @@ -903,14 +913,14 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1
; GFX9-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19]
; GFX9-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
; GFX9-O0-NEXT: s_waitcnt vmcnt(9)
; GFX9-O0-NEXT: v_mov_b32_e32 v4, v10
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
Expand Down Expand Up @@ -1028,10 +1038,10 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1
; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19]
; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
Expand Down

0 comments on commit b3c55b7

Please sign in to comment.