diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 398c99b3bd127..cbb68fa85ca80 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -6127,12 +6127,10 @@ bool SIInstrInfo::isLegalRegOperand(const MachineRegisterInfo &MRI, const TargetRegisterClass *SuperRC = RI.getLargestLegalSuperClass(RC, *MF); if (!SuperRC) return false; - - DRC = RI.getMatchingSuperRegClass(SuperRC, DRC, MO.getSubReg()); - if (!DRC) - return false; + return RI.getMatchingSuperRegClass(SuperRC, DRC, MO.getSubReg()) != nullptr; } - return RC->hasSuperClassEq(DRC); + + return RI.getCommonSubClass(DRC, RC) != nullptr; } bool SIInstrInfo::isLegalRegOperand(const MachineInstr &MI, unsigned OpIdx, diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/vni8-across-blocks.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/vni8-across-blocks.ll index 9c2fabce4bcde..b33b8a7d8cd72 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/vni8-across-blocks.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/vni8-across-blocks.ll @@ -7,33 +7,33 @@ define amdgpu_kernel void @v3i8_liveout(ptr addrspace(1) %src1, ptr addrspace(1) ; GFX906-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 ; GFX906-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 ; GFX906-NEXT: v_lshlrev_b32_e32 v2, 2, v0 -; GFX906-NEXT: v_mov_b32_e32 v4, 8 +; GFX906-NEXT: v_mov_b32_e32 v3, 8 ; GFX906-NEXT: v_mov_b32_e32 v5, 16 ; GFX906-NEXT: s_waitcnt lgkmcnt(0) -; GFX906-NEXT: global_load_dword v3, v2, s[0:1] +; GFX906-NEXT: global_load_dword v4, v2, s[0:1] ; GFX906-NEXT: v_mov_b32_e32 v1, 0xff ; GFX906-NEXT: v_cmp_gt_u32_e32 vcc, 15, v0 ; GFX906-NEXT: s_waitcnt vmcnt(0) -; GFX906-NEXT: v_and_b32_e32 v6, 0xff, v3 -; GFX906-NEXT: v_lshlrev_b32_sdwa v7, v4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 -; GFX906-NEXT: v_lshlrev_b32_sdwa v3, v5, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 -; GFX906-NEXT: v_or3_b32 v3, v6, v7, v3 +; GFX906-NEXT: v_and_b32_e32 v6, 0xff, v4 +; GFX906-NEXT: v_lshlrev_b32_sdwa v7, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX906-NEXT: v_lshlrev_b32_sdwa v4, v5, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 +; GFX906-NEXT: v_or3_b32 v4, v6, v7, v4 ; GFX906-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX906-NEXT: s_cbranch_execz .LBB0_2 ; GFX906-NEXT: ; %bb.1: ; %bb.1 ; GFX906-NEXT: global_load_dword v0, v2, s[2:3] ; GFX906-NEXT: s_waitcnt vmcnt(0) ; GFX906-NEXT: v_and_b32_e32 v2, 0xff, v0 -; GFX906-NEXT: v_lshlrev_b32_sdwa v3, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX906-NEXT: v_lshlrev_b32_sdwa v3, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 ; GFX906-NEXT: v_lshlrev_b32_sdwa v0, v5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 -; GFX906-NEXT: v_or3_b32 v3, v2, v3, v0 +; GFX906-NEXT: v_or3_b32 v4, v2, v3, v0 ; GFX906-NEXT: .LBB0_2: ; %bb.2 ; GFX906-NEXT: s_or_b64 exec, exec, s[0:1] -; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v3 +; GFX906-NEXT: v_lshrrev_b32_e32 v0, 8, v4 ; GFX906-NEXT: v_and_b32_e32 v0, 0xff, v0 ; GFX906-NEXT: v_lshlrev_b16_e32 v0, 8, v0 -; GFX906-NEXT: v_or_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX906-NEXT: v_and_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX906-NEXT: v_or_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX906-NEXT: v_and_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX906-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; GFX906-NEXT: v_lshl_or_b32 v0, v1, 16, v0 diff --git a/llvm/test/CodeGen/AMDGPU/a-v-flat-atomicrmw.ll b/llvm/test/CodeGen/AMDGPU/a-v-flat-atomicrmw.ll index 7b33374453010..6b6eb43baf856 100644 --- a/llvm/test/CodeGen/AMDGPU/a-v-flat-atomicrmw.ll +++ b/llvm/test/CodeGen/AMDGPU/a-v-flat-atomicrmw.ll @@ -969,37 +969,38 @@ define void @flat_atomic_xchg_i64_ret_av_av(ptr %ptr) #0 { ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX950-NEXT: s_mov_b64 s[0:1], 0x50 -; GFX950-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX950-NEXT: v_lshl_add_u64 v[2:3], v[0:1], 0, s[0:1] ; GFX950-NEXT: s_mov_b64 s[0:1], src_private_base -; GFX950-NEXT: v_cmp_ne_u32_e32 vcc, s1, v5 +; GFX950-NEXT: v_cmp_ne_u32_e32 vcc, s1, v3 ; GFX950-NEXT: ;;#ASMSTART -; GFX950-NEXT: ; def v[0:1] +; GFX950-NEXT: ; def v[4:5] ; GFX950-NEXT: ;;#ASMEND -; GFX950-NEXT: ; implicit-def: $vgpr2_vgpr3 +; GFX950-NEXT: ; implicit-def: $vgpr0_vgpr1 ; GFX950-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX950-NEXT: s_xor_b64 s[0:1], exec, s[0:1] ; GFX950-NEXT: s_cbranch_execz .LBB14_2 ; GFX950-NEXT: ; %bb.1: ; %atomicrmw.global ; GFX950-NEXT: buffer_wbl2 sc0 sc1 -; GFX950-NEXT: flat_atomic_swap_x2 v[2:3], v[4:5], v[0:1] sc0 sc1 +; GFX950-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3], v[4:5] sc0 sc1 ; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX950-NEXT: buffer_inv sc0 sc1 +; GFX950-NEXT: ; implicit-def: $vgpr2_vgpr3 ; GFX950-NEXT: ; implicit-def: $vgpr4_vgpr5 ; GFX950-NEXT: .LBB14_2: ; %Flow ; GFX950-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] ; GFX950-NEXT: s_cbranch_execz .LBB14_4 ; GFX950-NEXT: ; %bb.3: ; %atomicrmw.private -; GFX950-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] +; GFX950-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3] ; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v4, -1, v4, vcc -; GFX950-NEXT: scratch_load_dwordx2 v[2:3], v4, off +; GFX950-NEXT: v_cndmask_b32_e32 v2, -1, v2, vcc +; GFX950-NEXT: scratch_load_dwordx2 v[0:1], v2, off ; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: scratch_store_dwordx2 v4, v[0:1], off +; GFX950-NEXT: scratch_store_dwordx2 v2, v[4:5], off ; GFX950-NEXT: .LBB14_4: ; %atomicrmw.phi ; GFX950-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX950-NEXT: s_waitcnt vmcnt(1) ; GFX950-NEXT: ;;#ASMSTART -; GFX950-NEXT: ; use v[2:3] +; GFX950-NEXT: ; use v[0:1] ; GFX950-NEXT: ;;#ASMEND ; GFX950-NEXT: s_waitcnt vmcnt(0) ; GFX950-NEXT: s_setpc_b64 s[30:31] @@ -1058,37 +1059,38 @@ define void @flat_atomic_xchg_i64_ret_av_v(ptr %ptr) #0 { ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX950-NEXT: s_mov_b64 s[0:1], 0x50 -; GFX950-NEXT: v_lshl_add_u64 v[4:5], v[0:1], 0, s[0:1] +; GFX950-NEXT: v_lshl_add_u64 v[2:3], v[0:1], 0, s[0:1] ; GFX950-NEXT: s_mov_b64 s[0:1], src_private_base -; GFX950-NEXT: v_cmp_ne_u32_e32 vcc, s1, v5 +; GFX950-NEXT: v_cmp_ne_u32_e32 vcc, s1, v3 ; GFX950-NEXT: ;;#ASMSTART -; GFX950-NEXT: ; def v[0:1] +; GFX950-NEXT: ; def v[4:5] ; GFX950-NEXT: ;;#ASMEND -; GFX950-NEXT: ; implicit-def: $vgpr2_vgpr3 +; GFX950-NEXT: ; implicit-def: $vgpr0_vgpr1 ; GFX950-NEXT: s_and_saveexec_b64 s[0:1], vcc ; GFX950-NEXT: s_xor_b64 s[0:1], exec, s[0:1] ; GFX950-NEXT: s_cbranch_execz .LBB15_2 ; GFX950-NEXT: ; %bb.1: ; %atomicrmw.global ; GFX950-NEXT: buffer_wbl2 sc0 sc1 -; GFX950-NEXT: flat_atomic_swap_x2 v[2:3], v[4:5], v[0:1] sc0 sc1 +; GFX950-NEXT: flat_atomic_swap_x2 v[0:1], v[2:3], v[4:5] sc0 sc1 ; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX950-NEXT: buffer_inv sc0 sc1 +; GFX950-NEXT: ; implicit-def: $vgpr2_vgpr3 ; GFX950-NEXT: ; implicit-def: $vgpr4_vgpr5 ; GFX950-NEXT: .LBB15_2: ; %Flow ; GFX950-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] ; GFX950-NEXT: s_cbranch_execz .LBB15_4 ; GFX950-NEXT: ; %bb.3: ; %atomicrmw.private -; GFX950-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] +; GFX950-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3] ; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v4, -1, v4, vcc -; GFX950-NEXT: scratch_load_dwordx2 v[2:3], v4, off +; GFX950-NEXT: v_cndmask_b32_e32 v2, -1, v2, vcc +; GFX950-NEXT: scratch_load_dwordx2 v[0:1], v2, off ; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: scratch_store_dwordx2 v4, v[0:1], off +; GFX950-NEXT: scratch_store_dwordx2 v2, v[4:5], off ; GFX950-NEXT: .LBB15_4: ; %atomicrmw.phi ; GFX950-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX950-NEXT: s_waitcnt vmcnt(1) ; GFX950-NEXT: ;;#ASMSTART -; GFX950-NEXT: ; use v[2:3] +; GFX950-NEXT: ; use v[0:1] ; GFX950-NEXT: ;;#ASMEND ; GFX950-NEXT: s_waitcnt vmcnt(0) ; GFX950-NEXT: s_setpc_b64 s[30:31] @@ -1149,11 +1151,11 @@ define void @flat_atomic_xchg_i64_ret_av_a(ptr %ptr) #0 { ; GFX950: ; %bb.0: ; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX950-NEXT: s_mov_b64 s[0:1], 0x50 -; GFX950-NEXT: v_lshl_add_u64 v[2:3], v[0:1], 0, s[0:1] +; GFX950-NEXT: v_lshl_add_u64 v[0:1], v[0:1], 0, s[0:1] ; GFX950-NEXT: s_mov_b64 s[0:1], src_private_base -; GFX950-NEXT: v_cmp_ne_u32_e32 vcc, s1, v3 +; GFX950-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1 ; GFX950-NEXT: ;;#ASMSTART -; GFX950-NEXT: ; def v[0:1] +; GFX950-NEXT: ; def v[2:3] ; GFX950-NEXT: ;;#ASMEND ; GFX950-NEXT: ; implicit-def: $agpr0_agpr1 ; GFX950-NEXT: s_and_saveexec_b64 s[0:1], vcc @@ -1161,22 +1163,23 @@ define void @flat_atomic_xchg_i64_ret_av_a(ptr %ptr) #0 { ; GFX950-NEXT: s_cbranch_execz .LBB16_2 ; GFX950-NEXT: ; %bb.1: ; %atomicrmw.global ; GFX950-NEXT: buffer_wbl2 sc0 sc1 -; GFX950-NEXT: flat_atomic_swap_x2 v[2:3], v[2:3], v[0:1] sc0 sc1 +; GFX950-NEXT: flat_atomic_swap_x2 v[0:1], v[0:1], v[2:3] sc0 sc1 ; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX950-NEXT: buffer_inv sc0 sc1 -; GFX950-NEXT: v_accvgpr_write_b32 a0, v2 -; GFX950-NEXT: v_accvgpr_write_b32 a1, v3 ; GFX950-NEXT: ; implicit-def: $vgpr2_vgpr3 +; GFX950-NEXT: v_accvgpr_write_b32 a0, v0 +; GFX950-NEXT: v_accvgpr_write_b32 a1, v1 +; GFX950-NEXT: ; implicit-def: $vgpr0_vgpr1 ; GFX950-NEXT: .LBB16_2: ; %Flow ; GFX950-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] ; GFX950-NEXT: s_cbranch_execz .LBB16_4 ; GFX950-NEXT: ; %bb.3: ; %atomicrmw.private -; GFX950-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3] +; GFX950-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] ; GFX950-NEXT: s_nop 1 -; GFX950-NEXT: v_cndmask_b32_e32 v2, -1, v2, vcc -; GFX950-NEXT: scratch_load_dwordx2 a[0:1], v2, off +; GFX950-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc +; GFX950-NEXT: scratch_load_dwordx2 a[0:1], v0, off ; GFX950-NEXT: s_nop 0 -; GFX950-NEXT: scratch_store_dwordx2 v2, v[0:1], off +; GFX950-NEXT: scratch_store_dwordx2 v0, v[2:3], off ; GFX950-NEXT: .LBB16_4: ; %atomicrmw.phi ; GFX950-NEXT: s_or_b64 exec, exec, s[0:1] ; GFX950-NEXT: s_waitcnt vmcnt(1) diff --git a/llvm/test/CodeGen/AMDGPU/fold-sgpr-copy.mir b/llvm/test/CodeGen/AMDGPU/fold-sgpr-copy.mir index 103c3e3eb8bc6..e1295d4a09563 100644 --- a/llvm/test/CodeGen/AMDGPU/fold-sgpr-copy.mir +++ b/llvm/test/CodeGen/AMDGPU/fold-sgpr-copy.mir @@ -17,9 +17,8 @@ body: | ... # GCN-LABEL: name: fold_sgpr_to_sgpr_copy_subreg -# GCN: %0:sreg_64 = IMPLICIT_DEF -# GCN-NEXT: %2:sgpr_32 = COPY %0.sub0 -# GCN-NEXT: S_STORE_DWORD_IMM %2, undef $sgpr10_sgpr11, 0, 0 +# GCN: %0:sreg_64_xexec = IMPLICIT_DEF +# GCN-NEXT: S_STORE_DWORD_IMM %0.sub0, undef $sgpr10_sgpr11, 0, 0 name: fold_sgpr_to_sgpr_copy_subreg body: | @@ -32,9 +31,8 @@ body: | ... # GCN-LABEL: name: fold_sgpr_to_sgpr_copy_subreg2 -# GCN: %0:sreg_64 = IMPLICIT_DEF -# GCN-NEXT: %3:sreg_32_xm0_xexec = COPY %0.sub0 -# GCN-NEXT: S_STORE_DWORD_IMM %3, undef $sgpr10_sgpr11, 0, 0 +# GCN: %0:sreg_64_xexec = IMPLICIT_DEF +# GCN-NEXT: S_STORE_DWORD_IMM %0.sub0, undef $sgpr10_sgpr11, 0, 0 name: fold_sgpr_to_sgpr_copy_subreg2 body: |