diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 036486fbb93605..bbffd9ba7e6483 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -1204,19 +1204,14 @@ CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.dump(&DAG); dbgs() << "\nWith: "; TLO.New.dump(&DAG); dbgs() << '\n'); - // Replace all uses. If any nodes become isomorphic to other nodes and - // are deleted, make sure to remove them from our worklist. - WorklistRemover DeadNodes(*this); + // Replace all uses. DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New); // Push the new node and any (possibly new) users onto the worklist. AddToWorklistWithUsers(TLO.New.getNode()); - // Finally, if the node is now dead, remove it from the graph. The node - // may not be dead if the replacement process recursively simplified to - // something else needing this node. - if (TLO.Old->use_empty()) - deleteAndRecombine(TLO.Old.getNode()); + // Finally, if the node is now dead, remove it from the graph. + recursivelyDeleteUnusedNodes(TLO.Old.getNode()); } /// Check the specified integer node value to see if it can be simplified or if diff --git a/llvm/test/CodeGen/AMDGPU/idot4u.ll b/llvm/test/CodeGen/AMDGPU/idot4u.ll index 2710aa098722c0..13da0e16a529c0 100644 --- a/llvm/test/CodeGen/AMDGPU/idot4u.ll +++ b/llvm/test/CodeGen/AMDGPU/idot4u.ll @@ -1851,14 +1851,14 @@ define amdgpu_kernel void @udot4_acc16_vecMul(<4 x i8> addrspace(1)* %src1, ; GFX7-NEXT: buffer_load_ushort v1, off, s[0:3], 0 ; GFX7-NEXT: s_waitcnt vmcnt(2) ; GFX7-NEXT: v_and_b32_e32 v3, 0xff00, v2 -; GFX7-NEXT: v_bfe_u32 v4, v2, 16, 8 +; GFX7-NEXT: v_lshrrev_b32_e32 v4, 24, v2 ; GFX7-NEXT: s_waitcnt vmcnt(1) ; GFX7-NEXT: v_and_b32_e32 v6, 0xff00, v0 -; GFX7-NEXT: v_lshrrev_b32_e32 v5, 24, v2 +; GFX7-NEXT: v_bfe_u32 v5, v2, 16, 8 ; GFX7-NEXT: v_and_b32_e32 v2, 0xff, v2 ; GFX7-NEXT: v_lshlrev_b32_e32 v3, 8, v3 -; GFX7-NEXT: v_bfe_u32 v7, v0, 16, 8 -; GFX7-NEXT: v_lshrrev_b32_e32 v8, 24, v0 +; GFX7-NEXT: v_lshrrev_b32_e32 v7, 24, v0 +; GFX7-NEXT: v_bfe_u32 v8, v0, 16, 8 ; GFX7-NEXT: v_and_b32_e32 v0, 0xff, v0 ; GFX7-NEXT: v_lshlrev_b32_e32 v6, 8, v6 ; GFX7-NEXT: v_alignbit_b32 v3, s10, v3, 16 @@ -1866,8 +1866,8 @@ define amdgpu_kernel void @udot4_acc16_vecMul(<4 x i8> addrspace(1)* %src1, ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: v_mad_u32_u24 v0, v2, v0, v1 ; GFX7-NEXT: v_mad_u32_u24 v0, v3, v6, v0 -; GFX7-NEXT: v_mad_u32_u24 v0, v4, v7, v0 ; GFX7-NEXT: v_mad_u32_u24 v0, v5, v8, v0 +; GFX7-NEXT: v_mad_u32_u24 v0, v4, v7, v0 ; GFX7-NEXT: buffer_store_short v0, off, s[0:3], 0 ; GFX7-NEXT: s_endpgm ; diff --git a/llvm/test/CodeGen/AMDGPU/idot8u.ll b/llvm/test/CodeGen/AMDGPU/idot8u.ll index 7021bcc803ff32..1a47a80d03e2c4 100644 --- a/llvm/test/CodeGen/AMDGPU/idot8u.ll +++ b/llvm/test/CodeGen/AMDGPU/idot8u.ll @@ -2471,35 +2471,35 @@ define amdgpu_kernel void @udot8_acc8_vecMul(<8 x i4> addrspace(1)* %src1, ; GFX7-NEXT: s_waitcnt vmcnt(1) ; GFX7-NEXT: v_and_b32_e32 v14, 15, v0 ; GFX7-NEXT: v_bfe_u32 v8, v2, 12, 4 -; GFX7-NEXT: v_lshrrev_b32_e32 v9, 28, v2 ; GFX7-NEXT: v_bfe_u32 v13, v0, 4, 4 ; GFX7-NEXT: v_bfe_u32 v15, v0, 12, 4 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: v_mad_u32_u24 v1, v7, v14, v1 -; GFX7-NEXT: v_bfe_u32 v3, v2, 20, 4 -; GFX7-NEXT: v_bfe_u32 v4, v2, 16, 4 ; GFX7-NEXT: v_bfe_u32 v5, v2, 8, 4 ; GFX7-NEXT: v_bfe_u32 v12, v0, 8, 4 -; GFX7-NEXT: v_alignbit_b32 v2, v9, v2, 24 ; GFX7-NEXT: v_lshlrev_b32_e32 v8, 24, v8 -; GFX7-NEXT: v_lshlrev_b32_e32 v9, 24, v15 +; GFX7-NEXT: v_lshlrev_b32_e32 v15, 24, v15 ; GFX7-NEXT: v_mad_u32_u24 v1, v6, v13, v1 ; GFX7-NEXT: v_alignbit_b32 v8, 0, v8, 24 -; GFX7-NEXT: v_alignbit_b32 v7, 0, v9, 24 +; GFX7-NEXT: v_alignbit_b32 v14, 0, v15, 24 ; GFX7-NEXT: v_mad_u32_u24 v1, v5, v12, v1 +; GFX7-NEXT: v_bfe_u32 v4, v2, 16, 4 +; GFX7-NEXT: v_lshrrev_b32_e32 v9, 28, v2 ; GFX7-NEXT: v_bfe_u32 v11, v0, 16, 4 ; GFX7-NEXT: v_lshrrev_b32_e32 v16, 28, v0 -; GFX7-NEXT: v_mad_u32_u24 v1, v8, v7, v1 +; GFX7-NEXT: v_mad_u32_u24 v1, v8, v14, v1 +; GFX7-NEXT: v_bfe_u32 v3, v2, 20, 4 ; GFX7-NEXT: v_bfe_u32 v10, v0, 20, 4 +; GFX7-NEXT: v_alignbit_b32 v2, v9, v2, 24 ; GFX7-NEXT: v_alignbit_b32 v0, v16, v0, 24 ; GFX7-NEXT: v_mad_u32_u24 v1, v4, v11, v1 -; GFX7-NEXT: v_lshrrev_b32_e32 v15, 8, v2 +; GFX7-NEXT: v_lshrrev_b32_e32 v9, 8, v2 ; GFX7-NEXT: v_and_b32_e32 v2, 15, v2 -; GFX7-NEXT: v_lshrrev_b32_e32 v9, 8, v0 +; GFX7-NEXT: v_lshrrev_b32_e32 v7, 8, v0 ; GFX7-NEXT: v_and_b32_e32 v0, 15, v0 ; GFX7-NEXT: v_mad_u32_u24 v1, v3, v10, v1 ; GFX7-NEXT: v_mad_u32_u24 v0, v2, v0, v1 -; GFX7-NEXT: v_mad_u32_u24 v0, v15, v9, v0 +; GFX7-NEXT: v_mad_u32_u24 v0, v9, v7, v0 ; GFX7-NEXT: buffer_store_byte v0, off, s[0:3], 0 ; GFX7-NEXT: s_endpgm ; diff --git a/llvm/test/CodeGen/AMDGPU/sdiv64.ll b/llvm/test/CodeGen/AMDGPU/sdiv64.ll index ac15a6a14a5b14..d760ea415e37a8 100644 --- a/llvm/test/CodeGen/AMDGPU/sdiv64.ll +++ b/llvm/test/CodeGen/AMDGPU/sdiv64.ll @@ -473,7 +473,7 @@ define amdgpu_kernel void @s_test_sdiv24_64(i64 addrspace(1)* %out, i64 %x, i64 ; GCN-LABEL: s_test_sdiv24_64: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd +; GCN-NEXT: s_load_dword s1, s[0:1], 0xe ; GCN-NEXT: s_mov_b32 s3, 0xf000 ; GCN-NEXT: s_mov_b32 s2, -1 ; GCN-NEXT: s_waitcnt lgkmcnt(0) @@ -503,7 +503,7 @@ define amdgpu_kernel void @s_test_sdiv24_64(i64 addrspace(1)* %out, i64 %x, i64 ; GCN-IR-LABEL: s_test_sdiv24_64: ; GCN-IR: ; %bb.0: ; GCN-IR-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GCN-IR-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd +; GCN-IR-NEXT: s_load_dword s1, s[0:1], 0xe ; GCN-IR-NEXT: s_mov_b32 s3, 0xf000 ; GCN-IR-NEXT: s_mov_b32 s2, -1 ; GCN-IR-NEXT: s_waitcnt lgkmcnt(0) @@ -643,7 +643,7 @@ define amdgpu_kernel void @s_test_sdiv31_64(i64 addrspace(1)* %out, i64 %x, i64 ; GCN-LABEL: s_test_sdiv31_64: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd +; GCN-NEXT: s_load_dword s1, s[0:1], 0xe ; GCN-NEXT: s_mov_b32 s3, 0xf000 ; GCN-NEXT: s_mov_b32 s2, -1 ; GCN-NEXT: s_waitcnt lgkmcnt(0) @@ -673,7 +673,7 @@ define amdgpu_kernel void @s_test_sdiv31_64(i64 addrspace(1)* %out, i64 %x, i64 ; GCN-IR-LABEL: s_test_sdiv31_64: ; GCN-IR: ; %bb.0: ; GCN-IR-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GCN-IR-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd +; GCN-IR-NEXT: s_load_dword s1, s[0:1], 0xe ; GCN-IR-NEXT: s_mov_b32 s3, 0xf000 ; GCN-IR-NEXT: s_mov_b32 s2, -1 ; GCN-IR-NEXT: s_waitcnt lgkmcnt(0) @@ -710,7 +710,7 @@ define amdgpu_kernel void @s_test_sdiv23_64(i64 addrspace(1)* %out, i64 %x, i64 ; GCN-LABEL: s_test_sdiv23_64: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd +; GCN-NEXT: s_load_dword s1, s[0:1], 0xe ; GCN-NEXT: s_mov_b32 s3, 0xf000 ; GCN-NEXT: s_mov_b32 s2, -1 ; GCN-NEXT: s_waitcnt lgkmcnt(0) @@ -740,7 +740,7 @@ define amdgpu_kernel void @s_test_sdiv23_64(i64 addrspace(1)* %out, i64 %x, i64 ; GCN-IR-LABEL: s_test_sdiv23_64: ; GCN-IR: ; %bb.0: ; GCN-IR-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GCN-IR-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd +; GCN-IR-NEXT: s_load_dword s1, s[0:1], 0xe ; GCN-IR-NEXT: s_mov_b32 s3, 0xf000 ; GCN-IR-NEXT: s_mov_b32 s2, -1 ; GCN-IR-NEXT: s_waitcnt lgkmcnt(0) @@ -777,7 +777,7 @@ define amdgpu_kernel void @s_test_sdiv25_64(i64 addrspace(1)* %out, i64 %x, i64 ; GCN-LABEL: s_test_sdiv25_64: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd +; GCN-NEXT: s_load_dword s1, s[0:1], 0xe ; GCN-NEXT: s_mov_b32 s3, 0xf000 ; GCN-NEXT: s_mov_b32 s2, -1 ; GCN-NEXT: s_waitcnt lgkmcnt(0) @@ -807,7 +807,7 @@ define amdgpu_kernel void @s_test_sdiv25_64(i64 addrspace(1)* %out, i64 %x, i64 ; GCN-IR-LABEL: s_test_sdiv25_64: ; GCN-IR: ; %bb.0: ; GCN-IR-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; GCN-IR-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd +; GCN-IR-NEXT: s_load_dword s1, s[0:1], 0xe ; GCN-IR-NEXT: s_mov_b32 s3, 0xf000 ; GCN-IR-NEXT: s_mov_b32 s2, -1 ; GCN-IR-NEXT: s_waitcnt lgkmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/srem64.ll b/llvm/test/CodeGen/AMDGPU/srem64.ll index d5668881f96f9b..9e8a8d665c0312 100644 --- a/llvm/test/CodeGen/AMDGPU/srem64.ll +++ b/llvm/test/CodeGen/AMDGPU/srem64.ll @@ -452,7 +452,7 @@ define i64 @v_test_srem(i64 %x, i64 %y) { define amdgpu_kernel void @s_test_srem23_64(i64 addrspace(1)* %out, i64 %x, i64 %y) { ; GCN-LABEL: s_test_srem23_64: ; GCN: ; %bb.0: -; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xd +; GCN-NEXT: s_load_dword s5, s[0:1], 0xe ; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 ; GCN-NEXT: s_mov_b32 s7, 0xf000 ; GCN-NEXT: s_mov_b32 s6, -1 @@ -484,7 +484,7 @@ define amdgpu_kernel void @s_test_srem23_64(i64 addrspace(1)* %out, i64 %x, i64 ; ; GCN-IR-LABEL: s_test_srem23_64: ; GCN-IR: ; %bb.0: -; GCN-IR-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xd +; GCN-IR-NEXT: s_load_dword s5, s[0:1], 0xe ; GCN-IR-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 ; GCN-IR-NEXT: s_mov_b32 s7, 0xf000 ; GCN-IR-NEXT: s_mov_b32 s6, -1 @@ -523,7 +523,7 @@ define amdgpu_kernel void @s_test_srem23_64(i64 addrspace(1)* %out, i64 %x, i64 define amdgpu_kernel void @s_test_srem24_64(i64 addrspace(1)* %out, i64 %x, i64 %y) { ; GCN-LABEL: s_test_srem24_64: ; GCN: ; %bb.0: -; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xd +; GCN-NEXT: s_load_dword s5, s[0:1], 0xe ; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 ; GCN-NEXT: s_mov_b32 s7, 0xf000 ; GCN-NEXT: s_mov_b32 s6, -1 @@ -555,7 +555,7 @@ define amdgpu_kernel void @s_test_srem24_64(i64 addrspace(1)* %out, i64 %x, i64 ; ; GCN-IR-LABEL: s_test_srem24_64: ; GCN-IR: ; %bb.0: -; GCN-IR-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xd +; GCN-IR-NEXT: s_load_dword s5, s[0:1], 0xe ; GCN-IR-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 ; GCN-IR-NEXT: s_mov_b32 s7, 0xf000 ; GCN-IR-NEXT: s_mov_b32 s6, -1 @@ -648,7 +648,7 @@ define i64 @v_test_srem24_64(i64 %x, i64 %y) { define amdgpu_kernel void @s_test_srem25_64(i64 addrspace(1)* %out, i64 %x, i64 %y) { ; GCN-LABEL: s_test_srem25_64: ; GCN: ; %bb.0: -; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xd +; GCN-NEXT: s_load_dword s5, s[0:1], 0xe ; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 ; GCN-NEXT: s_mov_b32 s7, 0xf000 ; GCN-NEXT: s_mov_b32 s6, -1 @@ -680,7 +680,7 @@ define amdgpu_kernel void @s_test_srem25_64(i64 addrspace(1)* %out, i64 %x, i64 ; ; GCN-IR-LABEL: s_test_srem25_64: ; GCN-IR: ; %bb.0: -; GCN-IR-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xd +; GCN-IR-NEXT: s_load_dword s5, s[0:1], 0xe ; GCN-IR-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 ; GCN-IR-NEXT: s_mov_b32 s7, 0xf000 ; GCN-IR-NEXT: s_mov_b32 s6, -1 @@ -719,7 +719,7 @@ define amdgpu_kernel void @s_test_srem25_64(i64 addrspace(1)* %out, i64 %x, i64 define amdgpu_kernel void @s_test_srem31_64(i64 addrspace(1)* %out, i64 %x, i64 %y) { ; GCN-LABEL: s_test_srem31_64: ; GCN: ; %bb.0: -; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xd +; GCN-NEXT: s_load_dword s5, s[0:1], 0xe ; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 ; GCN-NEXT: s_mov_b32 s7, 0xf000 ; GCN-NEXT: s_mov_b32 s6, -1 @@ -751,7 +751,7 @@ define amdgpu_kernel void @s_test_srem31_64(i64 addrspace(1)* %out, i64 %x, i64 ; ; GCN-IR-LABEL: s_test_srem31_64: ; GCN-IR: ; %bb.0: -; GCN-IR-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xd +; GCN-IR-NEXT: s_load_dword s5, s[0:1], 0xe ; GCN-IR-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 ; GCN-IR-NEXT: s_mov_b32 s7, 0xf000 ; GCN-IR-NEXT: s_mov_b32 s6, -1