Skip to content

Commit

Permalink
Revert "[SelectionDAG] Handle more opcodes in canCreateUndefOrPoison (#…
Browse files Browse the repository at this point in the history
…84921)" and more...

This reverts:
b3c55b7 - "[SelectionDAG] Handle more opcodes in canCreateUndefOrPoison (#84921)"
(because it updates a test case that I don't know how to resolve the conflict for)
8e2f649 - "[DAGCombiner] Do not always fold FREEZE over BUILD_VECTOR (#85932)"
73472c5 - "[SelectionDAG] Treat CopyFromReg as freezing the value (#85932)"

Due to a test suite failure on AArch64 when compiling for SVE.
https://lab.llvm.org/buildbot/#/builders/197/builds/13955

clang: ../llvm/llvm/include/llvm/CodeGen/ValueTypes.h:307: MVT llvm::EVT::getSimpleVT() const: Assertion `isSimple() && "Expected a SimpleValueType!"' failed.
  • Loading branch information
DavidSpickett committed Apr 29, 2024
1 parent f029da5 commit 16bd10a
Show file tree
Hide file tree
Showing 49 changed files with 2,298 additions and 2,204 deletions.
1 change: 0 additions & 1 deletion llvm/include/llvm/CodeGen/ISDOpcodes.h
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,6 @@ enum NodeType {
/// CopyFromReg - This node indicates that the input value is a virtual or
/// physical register that is defined outside of the scope of this
/// SelectionDAG. The register is available from the RegisterSDNode object.
/// Note that CopyFromReg is considered as also freezing the value.
CopyFromReg,

/// UNDEF - An undefined node.
Expand Down
26 changes: 0 additions & 26 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15459,12 +15459,6 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) {
if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, /*PoisonOnly*/ false))
return N0;

// We currently avoid folding freeze over SRA/SRL, due to the problems seen
// with (freeze (assert ext)) blocking simplifications of SRA/SRL. See for
// example https://reviews.llvm.org/D136529#4120959.
if (N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::SRL)
return SDValue();

// Fold freeze(op(x, ...)) -> op(freeze(x), ...).
// Try to push freeze through instructions that propagate but don't produce
// poison as far as possible. If an operand of freeze follows three
Expand All @@ -15481,26 +15475,6 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) {
N0.getOpcode() == ISD::BUILD_PAIR ||
N0.getOpcode() == ISD::CONCAT_VECTORS;

// Avoid turning a BUILD_VECTOR that can be recognized as "all zeros", "all
// ones" or "constant" into something that depends on FrozenUndef. We can
// instead pick undef values to keep those properties, while at the same time
// folding away the freeze.
// If we implement a more general solution for folding away freeze(undef) in
// the future, then this special handling can be removed.
if (N0.getOpcode() == ISD::BUILD_VECTOR) {
SDLoc DL(N0);
MVT VT = N0.getSimpleValueType();
if (llvm::ISD::isBuildVectorAllOnes(N0.getNode()))
return DAG.getAllOnesConstant(DL, VT);
if (llvm::ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
SmallVector<SDValue, 8> NewVecC;
for (const SDValue &Op : N0->op_values())
NewVecC.push_back(
Op.isUndef() ? DAG.getConstant(0, DL, Op.getValueType()) : Op);
return DAG.getBuildVector(VT, DL, NewVecC);
}
}

SmallSetVector<SDValue, 8> MaybePoisonOperands;
for (SDValue Op : N0->ops()) {
if (DAG.isGuaranteedNotToBeUndefOrPoison(Op, /*PoisonOnly*/ false,
Expand Down
17 changes: 1 addition & 16 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5063,7 +5063,6 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op,
case ISD::VALUETYPE:
case ISD::FrameIndex:
case ISD::TargetFrameIndex:
case ISD::CopyFromReg:
return true;

case ISD::UNDEF:
Expand Down Expand Up @@ -5137,16 +5136,6 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
case ISD::FREEZE:
case ISD::CONCAT_VECTORS:
case ISD::INSERT_SUBVECTOR:
case ISD::SADDSAT:
case ISD::UADDSAT:
case ISD::SSUBSAT:
case ISD::USUBSAT:
case ISD::MULHU:
case ISD::MULHS:
case ISD::SMIN:
case ISD::SMAX:
case ISD::UMIN:
case ISD::UMAX:
case ISD::AND:
case ISD::XOR:
case ISD::ROTL:
Expand All @@ -5167,7 +5156,6 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
case ISD::BUILD_PAIR:
return false;

case ISD::SELECT_CC:
case ISD::SETCC: {
// Integer setcc cannot create undef or poison.
if (Op.getOperand(0).getValueType().isInteger())
Expand All @@ -5177,8 +5165,7 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
// based on options and flags. The options and flags also cause special
// nonan condition codes to be used. Those condition codes may be preserved
// even if the nonan flag is dropped somewhere.
unsigned CCOp = Opcode == ISD::SETCC ? 2 : 4;
ISD::CondCode CCCode = cast<CondCodeSDNode>(Op.getOperand(CCOp))->get();
ISD::CondCode CCCode = cast<CondCodeSDNode>(Op.getOperand(2))->get();
if (((unsigned)CCCode & 0x10U))
return true;

Expand All @@ -5195,8 +5182,6 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
return false;

case ISD::SHL:
case ISD::SRL:
case ISD::SRA:
// If the max shift amount isn't in range, then the shift can create poison.
return !getValidMaximumShiftAmountConstant(Op, DemandedElts);

Expand Down
7 changes: 4 additions & 3 deletions llvm/test/CodeGen/AArch64/combine-mul.ll
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@ define <4 x i1> @PR48683_vec_undef(<4 x i32> %x) {
define i64 @combine_mul_self_demandedbits(i64 %x) {
; CHECK-LABEL: combine_mul_self_demandedbits:
; CHECK: // %bb.0:
; CHECK-NEXT: mul x0, x0, x0
; CHECK-NEXT: mul x8, x0, x0
; CHECK-NEXT: and x0, x8, #0xfffffffffffffffd
; CHECK-NEXT: ret
%1 = mul i64 %x, %x
%2 = and i64 %1, -3
Expand Down Expand Up @@ -76,7 +77,7 @@ define i8 @one_demanded_bit(i8 %x) {
define <2 x i64> @one_demanded_bit_splat(<2 x i64> %x) {
; CHECK-LABEL: one_demanded_bit_splat:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #32 // =0x20
; CHECK-NEXT: mov w8, #32
; CHECK-NEXT: shl v0.2d, v0.2d, #5
; CHECK-NEXT: dup v1.2d, x8
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
Expand Down Expand Up @@ -130,7 +131,7 @@ define i32 @squared_demanded_2_low_bits(i32 %x) {
define <2 x i64> @squared_demanded_2_low_bits_splat(<2 x i64> %x) {
; CHECK-LABEL: squared_demanded_2_low_bits_splat:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #-2 // =0xfffffffffffffffe
; CHECK-NEXT: mov x8, #-2
; CHECK-NEXT: dup v1.2d, x8
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
Expand Down
96 changes: 43 additions & 53 deletions llvm/test/CodeGen/AMDGPU/div_i128.ll
Original file line number Diff line number Diff line change
Expand Up @@ -282,21 +282,21 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: ; kill: def $vgpr15 killed $vgpr15 def $vgpr15_vgpr16 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v16, v1
; GFX9-O0-NEXT: v_mov_b32_e32 v9, v15
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v16
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v16
; GFX9-O0-NEXT: v_mov_b32_e32 v10, v13
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v14
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v14
; GFX9-O0-NEXT: v_sub_co_u32_e32 v9, vcc, v9, v4
; GFX9-O0-NEXT: v_subb_co_u32_e32 v5, vcc, v5, v6, vcc
; GFX9-O0-NEXT: v_subb_co_u32_e32 v13, vcc, v10, v4, vcc
; GFX9-O0-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v6, vcc
; GFX9-O0-NEXT: v_subb_co_u32_e32 v13, vcc, v10, v4, vcc
; GFX9-O0-NEXT: v_subb_co_u32_e32 v5, vcc, v5, v6, vcc
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v10, v5
; GFX9-O0-NEXT: ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v14, v5
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; kill: def $vgpr13 killed $vgpr13 def $vgpr13_vgpr14 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v14, v1
; GFX9-O0-NEXT: ; kill: def $vgpr9 killed $vgpr9 def $vgpr9_vgpr10 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v10, v1
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v12
; GFX9-O0-NEXT: v_xor_b32_e64 v1, v5, v1
Expand All @@ -312,21 +312,21 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v8, v1
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v7
; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 killed $vgpr7_vgpr8 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v11
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v8
; GFX9-O0-NEXT: v_mov_b32_e32 v8, v11
; GFX9-O0-NEXT: v_mov_b32_e32 v2, v12
; GFX9-O0-NEXT: v_sub_co_u32_e32 v1, vcc, v1, v3
; GFX9-O0-NEXT: v_subb_co_u32_e32 v8, vcc, v8, v5, vcc
; GFX9-O0-NEXT: v_subb_co_u32_e32 v11, vcc, v7, v3, vcc
; GFX9-O0-NEXT: v_subb_co_u32_e32 v7, vcc, v2, v5, vcc
; GFX9-O0-NEXT: v_subb_co_u32_e32 v7, vcc, v7, v5, vcc
; GFX9-O0-NEXT: v_subb_co_u32_e32 v11, vcc, v8, v3, vcc
; GFX9-O0-NEXT: v_subb_co_u32_e32 v2, vcc, v2, v5, vcc
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v2, v8
; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v12, v2
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; implicit-def: $sgpr4
; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v12, v7
; GFX9-O0-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v2, v7
; GFX9-O0-NEXT: v_xor_b32_e64 v5, v5, v6
; GFX9-O0-NEXT: v_xor_b32_e64 v3, v3, v4
; GFX9-O0-NEXT: ; kill: def $vgpr3 killed $vgpr3 def $vgpr3_vgpr4 killed $exec
Expand All @@ -339,26 +339,18 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v11
; GFX9-O0-NEXT: v_mov_b32_e32 v4, v12
; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
; GFX9-O0-NEXT: v_mov_b32_e32 v4, v2
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v1
; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
; GFX9-O0-NEXT: buffer_store_dword v11, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v13
; GFX9-O0-NEXT: v_mov_b32_e32 v4, v14
; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
; GFX9-O0-NEXT: buffer_store_dword v12, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v9
; GFX9-O0-NEXT: v_mov_b32_e32 v4, v10
; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v12
; GFX9-O0-NEXT: v_mov_b32_e32 v8, v2
; GFX9-O0-NEXT: v_or_b32_e64 v3, v8, v7
Expand Down Expand Up @@ -411,8 +403,7 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: ; kill: def $vgpr8 killed $vgpr8 def $vgpr8_vgpr9 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v9, v5
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v9
; GFX9-O0-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[12:13], v[11:12], s[12:13]
; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[12:13], v[11:12], s[6:7]
; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v10, s[12:13]
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v8
Expand Down Expand Up @@ -448,8 +439,7 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: ; kill: def $vgpr11 killed $vgpr11 def $vgpr11_vgpr12 killed $exec
; GFX9-O0-NEXT: v_mov_b32_e32 v12, v5
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v12
; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[8:9], v[13:14], s[8:9]
; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[8:9], v[13:14], s[6:7]
; GFX9-O0-NEXT: v_cndmask_b32_e64 v5, v5, v8, s[8:9]
; GFX9-O0-NEXT: v_mov_b32_e32 v7, v6
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v11
Expand Down Expand Up @@ -700,10 +690,10 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: buffer_load_dword v28, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v25, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v26, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:276 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:280 ; 4-byte Folded Reload
Expand Down Expand Up @@ -913,14 +903,14 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1
; GFX9-O0-NEXT: buffer_load_dword v16, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19]
; GFX9-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
; GFX9-O0-NEXT: s_waitcnt vmcnt(9)
; GFX9-O0-NEXT: v_mov_b32_e32 v4, v10
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
Expand Down Expand Up @@ -1038,10 +1028,10 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
; GFX9-O0-NEXT: s_or_saveexec_b64 s[18:19], -1
; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX9-O0-NEXT: s_mov_b64 exec, s[18:19]
; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
Expand Down
Loading

0 comments on commit 16bd10a

Please sign in to comment.