diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp index b49c5a997af78..e204d6ba356b8 100644 --- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -87,6 +87,8 @@ enum InstClassEnum { GLOBAL_STORE_SADDR, FLAT_LOAD, FLAT_STORE, + FLAT_LOAD_SADDR, + FLAT_STORE_SADDR, GLOBAL_LOAD, // GLOBAL_LOAD/GLOBAL_STORE are never used as the InstClass of GLOBAL_STORE // any CombineInfo, they are only ever returned by // getCommonInstClass. @@ -354,6 +356,8 @@ static unsigned getOpcodeWidth(const MachineInstr &MI, const SIInstrInfo &TII) { case AMDGPU::GLOBAL_STORE_DWORD_SADDR: case AMDGPU::FLAT_LOAD_DWORD: case AMDGPU::FLAT_STORE_DWORD: + case AMDGPU::FLAT_LOAD_DWORD_SADDR: + case AMDGPU::FLAT_STORE_DWORD_SADDR: return 1; case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM: case AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR_IMM: @@ -367,6 +371,8 @@ static unsigned getOpcodeWidth(const MachineInstr &MI, const SIInstrInfo &TII) { case AMDGPU::GLOBAL_STORE_DWORDX2_SADDR: case AMDGPU::FLAT_LOAD_DWORDX2: case AMDGPU::FLAT_STORE_DWORDX2: + case AMDGPU::FLAT_LOAD_DWORDX2_SADDR: + case AMDGPU::FLAT_STORE_DWORDX2_SADDR: return 2; case AMDGPU::S_BUFFER_LOAD_DWORDX3_IMM: case AMDGPU::S_BUFFER_LOAD_DWORDX3_SGPR_IMM: @@ -380,6 +386,8 @@ static unsigned getOpcodeWidth(const MachineInstr &MI, const SIInstrInfo &TII) { case AMDGPU::GLOBAL_STORE_DWORDX3_SADDR: case AMDGPU::FLAT_LOAD_DWORDX3: case AMDGPU::FLAT_STORE_DWORDX3: + case AMDGPU::FLAT_LOAD_DWORDX3_SADDR: + case AMDGPU::FLAT_STORE_DWORDX3_SADDR: return 3; case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM: case AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR_IMM: @@ -393,6 +401,8 @@ static unsigned getOpcodeWidth(const MachineInstr &MI, const SIInstrInfo &TII) { case AMDGPU::GLOBAL_STORE_DWORDX4_SADDR: case AMDGPU::FLAT_LOAD_DWORDX4: case AMDGPU::FLAT_STORE_DWORDX4: + case AMDGPU::FLAT_LOAD_DWORDX4_SADDR: + case AMDGPU::FLAT_STORE_DWORDX4_SADDR: return 4; case AMDGPU::S_BUFFER_LOAD_DWORDX8_IMM: case AMDGPU::S_BUFFER_LOAD_DWORDX8_SGPR_IMM: @@ -575,6 +585,16 @@ static InstClassEnum getInstClass(unsigned Opc, const SIInstrInfo &TII) { case AMDGPU::GLOBAL_STORE_DWORDX3_SADDR: case AMDGPU::GLOBAL_STORE_DWORDX4_SADDR: return GLOBAL_STORE_SADDR; + case AMDGPU::FLAT_LOAD_DWORD_SADDR: + case AMDGPU::FLAT_LOAD_DWORDX2_SADDR: + case AMDGPU::FLAT_LOAD_DWORDX3_SADDR: + case AMDGPU::FLAT_LOAD_DWORDX4_SADDR: + return FLAT_LOAD_SADDR; + case AMDGPU::FLAT_STORE_DWORD_SADDR: + case AMDGPU::FLAT_STORE_DWORDX2_SADDR: + case AMDGPU::FLAT_STORE_DWORDX3_SADDR: + case AMDGPU::FLAT_STORE_DWORDX4_SADDR: + return FLAT_STORE_SADDR; } } @@ -661,6 +681,16 @@ static unsigned getInstSubclass(unsigned Opc, const SIInstrInfo &TII) { case AMDGPU::GLOBAL_STORE_DWORDX3_SADDR: case AMDGPU::GLOBAL_STORE_DWORDX4_SADDR: return AMDGPU::GLOBAL_STORE_DWORD_SADDR; + case AMDGPU::FLAT_LOAD_DWORD_SADDR: + case AMDGPU::FLAT_LOAD_DWORDX2_SADDR: + case AMDGPU::FLAT_LOAD_DWORDX3_SADDR: + case AMDGPU::FLAT_LOAD_DWORDX4_SADDR: + return AMDGPU::FLAT_LOAD_DWORD_SADDR; + case AMDGPU::FLAT_STORE_DWORD_SADDR: + case AMDGPU::FLAT_STORE_DWORDX2_SADDR: + case AMDGPU::FLAT_STORE_DWORDX3_SADDR: + case AMDGPU::FLAT_STORE_DWORDX4_SADDR: + return AMDGPU::FLAT_STORE_DWORD_SADDR; } } @@ -776,6 +806,14 @@ static AddressRegs getRegs(unsigned Opc, const SIInstrInfo &TII) { case AMDGPU::GLOBAL_STORE_DWORDX2_SADDR: case AMDGPU::GLOBAL_STORE_DWORDX3_SADDR: case AMDGPU::GLOBAL_STORE_DWORDX4_SADDR: + case AMDGPU::FLAT_LOAD_DWORD_SADDR: + case AMDGPU::FLAT_LOAD_DWORDX2_SADDR: + case AMDGPU::FLAT_LOAD_DWORDX3_SADDR: + case AMDGPU::FLAT_LOAD_DWORDX4_SADDR: + case AMDGPU::FLAT_STORE_DWORD_SADDR: + case AMDGPU::FLAT_STORE_DWORDX2_SADDR: + case AMDGPU::FLAT_STORE_DWORDX3_SADDR: + case AMDGPU::FLAT_STORE_DWORDX4_SADDR: Result.SAddr = true; [[fallthrough]]; case AMDGPU::GLOBAL_LOAD_DWORD: @@ -1875,6 +1913,28 @@ unsigned SILoadStoreOptimizer::getNewOpcode(const CombineInfo &CI, case 4: return AMDGPU::FLAT_STORE_DWORDX4; } + case FLAT_LOAD_SADDR: + switch (Width) { + default: + return 0; + case 2: + return AMDGPU::FLAT_LOAD_DWORDX2_SADDR; + case 3: + return AMDGPU::FLAT_LOAD_DWORDX3_SADDR; + case 4: + return AMDGPU::FLAT_LOAD_DWORDX4_SADDR; + } + case FLAT_STORE_SADDR: + switch (Width) { + default: + return 0; + case 2: + return AMDGPU::FLAT_STORE_DWORDX2_SADDR; + case 3: + return AMDGPU::FLAT_STORE_DWORDX3_SADDR; + case 4: + return AMDGPU::FLAT_STORE_DWORDX4_SADDR; + } case MIMG: assert(((unsigned)llvm::popcount(CI.DMask | Paired.DMask) == Width) && "No overlaps"); @@ -2508,12 +2568,14 @@ SILoadStoreOptimizer::optimizeInstsWithSameBaseAddr( OptimizeListAgain |= CI.Width + Paired.Width < 4; break; case FLAT_LOAD: + case FLAT_LOAD_SADDR: case GLOBAL_LOAD: case GLOBAL_LOAD_SADDR: NewMI = mergeFlatLoadPair(CI, Paired, Where->I); OptimizeListAgain |= CI.Width + Paired.Width < 4; break; case FLAT_STORE: + case FLAT_STORE_SADDR: case GLOBAL_STORE: case GLOBAL_STORE_SADDR: NewMI = mergeFlatStorePair(CI, Paired, Where->I); diff --git a/llvm/test/CodeGen/AMDGPU/merge-flat-saddr-load-store.mir b/llvm/test/CodeGen/AMDGPU/merge-flat-saddr-load-store.mir new file mode 100644 index 0000000000000..1c133c6114ec2 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/merge-flat-saddr-load-store.mir @@ -0,0 +1,338 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -run-pass=si-load-store-opt -o - %s | FileCheck -check-prefix=GCN %s + +--- +name: merge_flat_load_dword_saddr_2 +body: | + bb.0.entry: + + ; GCN-LABEL: name: merge_flat_load_dword_saddr_2 + ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF + ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[FLAT_LOAD_DWORDX2_SADDR:%[0-9]+]]:vreg_64_align2 = FLAT_LOAD_DWORDX2_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s64) from `ptr addrspace(1) undef`, align 4, addrspace 1) + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[FLAT_LOAD_DWORDX2_SADDR]].sub0 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[FLAT_LOAD_DWORDX2_SADDR]].sub1 + ; GCN-NEXT: S_NOP 0, implicit [[COPY]], implicit [[COPY1]] + %0:sreg_64_xexec_xnull = IMPLICIT_DEF + %1:vgpr_32 = IMPLICIT_DEF + %2:vgpr_32 = FLAT_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr addrspace(1) undef`, align 4, addrspace 1) + %3:vgpr_32 = FLAT_LOAD_DWORD_SADDR %0, %1, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr addrspace(1) undef`, align 4, addrspace 1) + S_NOP 0, implicit %2, implicit %3 +... + +--- +name: merge_flat_load_dword_saddr_3 +body: | + bb.0.entry: + + ; GCN-LABEL: name: merge_flat_load_dword_saddr_3 + ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF + ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[FLAT_LOAD_DWORDX3_SADDR:%[0-9]+]]:vreg_96_align2 = FLAT_LOAD_DWORDX3_SADDR [[DEF]], [[DEF1]], 0, 1, implicit $exec, implicit $flat_scr :: (load (s96) from `ptr addrspace(1) undef`, align 4, addrspace 1) + ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY [[FLAT_LOAD_DWORDX3_SADDR]].sub0_sub1 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[FLAT_LOAD_DWORDX3_SADDR]].sub2 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[COPY]].sub1 + ; GCN-NEXT: S_NOP 0, implicit [[COPY2]], implicit [[COPY3]], implicit [[COPY1]] + %0:sreg_64_xexec_xnull = IMPLICIT_DEF + %1:vgpr_32 = IMPLICIT_DEF + %2:vgpr_32 = FLAT_LOAD_DWORD_SADDR %0, %1, 0, 1, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr addrspace(1) undef`, align 4, addrspace 1) + %3:vgpr_32 = FLAT_LOAD_DWORD_SADDR %0, %1, 4, 1, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr addrspace(1) undef`, align 4, addrspace 1) + %4:vgpr_32 = FLAT_LOAD_DWORD_SADDR %0, %1, 8, 1, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr addrspace(1) undef`, align 4, addrspace 1) + S_NOP 0, implicit %2, implicit %3, implicit %4 +... + +--- +name: merge_flat_load_dword_saddr_4 +body: | + bb.0.entry: + + ; GCN-LABEL: name: merge_flat_load_dword_saddr_4 + ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF + ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[FLAT_LOAD_DWORDX4_SADDR:%[0-9]+]]:vreg_128_align2 = FLAT_LOAD_DWORDX4_SADDR [[DEF]], [[DEF1]], 0, 2, implicit $exec, implicit $flat_scr :: (load (s128) from `ptr addrspace(1) undef`, align 4, addrspace 1) + ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_96_align2 = COPY [[FLAT_LOAD_DWORDX4_SADDR]].sub0_sub1_sub2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[FLAT_LOAD_DWORDX4_SADDR]].sub3 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vreg_64_align2 = COPY [[COPY]].sub0_sub1 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[COPY]].sub2 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY2]].sub0 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY killed [[COPY2]].sub1 + ; GCN-NEXT: S_NOP 0, implicit [[COPY4]], implicit [[COPY5]], implicit [[COPY3]], implicit [[COPY1]] + %0:sreg_64_xexec_xnull = IMPLICIT_DEF + %1:vgpr_32 = IMPLICIT_DEF + %2:vgpr_32 = FLAT_LOAD_DWORD_SADDR %0, %1, 0, 2, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr addrspace(1) undef`, align 4, addrspace 1) + %3:vgpr_32 = FLAT_LOAD_DWORD_SADDR %0, %1, 4, 2, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr addrspace(1) undef`, align 4, addrspace 1) + %4:vgpr_32 = FLAT_LOAD_DWORD_SADDR %0, %1, 8, 2, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr addrspace(1) undef`, align 4, addrspace 1) + %5:vgpr_32 = FLAT_LOAD_DWORD_SADDR %0, %1, 12, 2, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr addrspace(1) undef`, align 4, addrspace 1) + S_NOP 0, implicit %2, implicit %3, implicit %4, implicit %5 +... + +--- +name: merge_flat_load_dword_saddr_6 +body: | + bb.0.entry: + + ; GCN-LABEL: name: merge_flat_load_dword_saddr_6 + ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF + ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[FLAT_LOAD_DWORDX4_SADDR:%[0-9]+]]:vreg_128_align2 = FLAT_LOAD_DWORDX4_SADDR [[DEF]], [[DEF1]], 4, 3, implicit $exec, implicit $flat_scr :: (load (s128) from `ptr addrspace(1) undef`, align 4, addrspace 1) + ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_96_align2 = COPY [[FLAT_LOAD_DWORDX4_SADDR]].sub0_sub1_sub2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[FLAT_LOAD_DWORDX4_SADDR]].sub3 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vreg_64_align2 = COPY [[COPY]].sub0_sub1 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[COPY]].sub2 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY2]].sub0 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY killed [[COPY2]].sub1 + ; GCN-NEXT: [[FLAT_LOAD_DWORDX2_SADDR:%[0-9]+]]:vreg_64_align2 = FLAT_LOAD_DWORDX2_SADDR [[DEF]], [[DEF1]], 20, 3, implicit $exec, implicit $flat_scr :: (load (s64) from `ptr addrspace(1) undef`, align 4, addrspace 1) + ; GCN-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[FLAT_LOAD_DWORDX2_SADDR]].sub0 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY killed [[FLAT_LOAD_DWORDX2_SADDR]].sub1 + ; GCN-NEXT: S_NOP 0, implicit [[COPY4]], implicit [[COPY5]], implicit [[COPY3]], implicit [[COPY1]], implicit [[COPY6]], implicit [[COPY7]] + %0:sreg_64_xexec_xnull = IMPLICIT_DEF + %1:vgpr_32 = IMPLICIT_DEF + %2:vgpr_32 = FLAT_LOAD_DWORD_SADDR %0, %1, 4, 3, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr addrspace(1) undef`, align 4, addrspace 1) + %3:vgpr_32 = FLAT_LOAD_DWORD_SADDR %0, %1, 8, 3, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr addrspace(1) undef`, align 4, addrspace 1) + %4:vgpr_32 = FLAT_LOAD_DWORD_SADDR %0, %1, 12, 3, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr addrspace(1) undef`, align 4, addrspace 1) + %5:vgpr_32 = FLAT_LOAD_DWORD_SADDR %0, %1, 16, 3, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr addrspace(1) undef`, align 4, addrspace 1) + %6:vgpr_32 = FLAT_LOAD_DWORD_SADDR %0, %1, 20, 3, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr addrspace(1) undef`, align 4, addrspace 1) + %7:vgpr_32 = FLAT_LOAD_DWORD_SADDR %0, %1, 24, 3, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr addrspace(1) undef`, align 4, addrspace 1) + S_NOP 0, implicit %2, implicit %3, implicit %4, implicit %5, implicit %6, implicit %7 +... + +--- +name: merge_flat_load_dwordx2_saddr +body: | + bb.0.entry: + + ; GCN-LABEL: name: merge_flat_load_dwordx2_saddr + ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF + ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[FLAT_LOAD_DWORDX4_SADDR:%[0-9]+]]:vreg_128_align2 = FLAT_LOAD_DWORDX4_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from `ptr addrspace(1) undef`, align 4, addrspace 1) + ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY [[FLAT_LOAD_DWORDX4_SADDR]].sub0_sub1 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_64_align2 = COPY killed [[FLAT_LOAD_DWORDX4_SADDR]].sub2_sub3 + ; GCN-NEXT: S_NOP 0, implicit [[COPY]], implicit [[COPY1]] + %0:sreg_64_xexec_xnull = IMPLICIT_DEF + %1:vgpr_32 = IMPLICIT_DEF + %2:vreg_64_align2 = FLAT_LOAD_DWORDX2_SADDR %0, %1, 0, 0, implicit $exec, implicit $flat_scr :: (load (s64) from `ptr addrspace(1) undef`, align 4, addrspace 1) + %3:vreg_64_align2 = FLAT_LOAD_DWORDX2_SADDR %0, %1, 8, 0, implicit $exec, implicit $flat_scr :: (load (s64) from `ptr addrspace(1) undef`, align 4, addrspace 1) + S_NOP 0, implicit %2, implicit %3 +... + +--- +name: no_merge_flat_load_dword_and_flat_load_dword_saddr +body: | + bb.0.entry: + + ; GCN-LABEL: name: no_merge_flat_load_dword_and_flat_load_dword_saddr + ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF + ; GCN-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; GCN-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[DEF1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: [[FLAT_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD_SADDR [[DEF]], [[DEF1]].sub0, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: S_NOP 0, implicit [[FLAT_LOAD_DWORD]], implicit [[FLAT_LOAD_DWORD_SADDR]] + %0:sreg_64_xexec_xnull = IMPLICIT_DEF + %1:vreg_64_align2 = IMPLICIT_DEF + %2:vgpr_32 = FLAT_LOAD_DWORD %1, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr addrspace(1) undef`, align 4, addrspace 1) + %3:vgpr_32 = FLAT_LOAD_DWORD_SADDR %0, %1.sub0, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr addrspace(1) undef`, align 4, addrspace 1) + S_NOP 0, implicit %2, implicit %3 +... + +--- +name: no_merge_flat_load_dword_saddr_different_saddr +body: | + bb.0.entry: + + ; GCN-LABEL: name: no_merge_flat_load_dword_saddr_different_saddr + ; GCN: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[FLAT_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD_SADDR [[DEF]].sub0_sub1, [[DEF1]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: [[FLAT_LOAD_DWORD_SADDR1:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD_SADDR [[DEF]].sub2_sub3, [[DEF1]], 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: S_NOP 0, implicit [[FLAT_LOAD_DWORD_SADDR]], implicit [[FLAT_LOAD_DWORD_SADDR1]] + %0:sgpr_128 = IMPLICIT_DEF + %1:vgpr_32 = IMPLICIT_DEF + %2:vgpr_32 = FLAT_LOAD_DWORD_SADDR %0.sub0_sub1, %1, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr addrspace(1) undef`, align 4, addrspace 1) + %3:vgpr_32 = FLAT_LOAD_DWORD_SADDR %0.sub2_sub3, %1, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr addrspace(1) undef`, align 4, addrspace 1) + S_NOP 0, implicit %2, implicit %3 +... + +--- +name: no_merge_flat_load_dword_saddr_different_vaddr +body: | + bb.0.entry: + + ; GCN-LABEL: name: no_merge_flat_load_dword_saddr_different_vaddr + ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF + ; GCN-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; GCN-NEXT: [[FLAT_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD_SADDR [[DEF]], [[DEF1]].sub0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: [[FLAT_LOAD_DWORD_SADDR1:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD_SADDR [[DEF]], [[DEF1]].sub1, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: S_NOP 0, implicit [[FLAT_LOAD_DWORD_SADDR]], implicit [[FLAT_LOAD_DWORD_SADDR1]] + %0:sreg_64_xexec_xnull = IMPLICIT_DEF + %1:vreg_64_align2 = IMPLICIT_DEF + %2:vgpr_32 = FLAT_LOAD_DWORD_SADDR %0, %1.sub0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr addrspace(1) undef`, align 4, addrspace 1) + %3:vgpr_32 = FLAT_LOAD_DWORD_SADDR %0, %1.sub1, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr addrspace(1) undef`, align 4, addrspace 1) + S_NOP 0, implicit %2, implicit %3 +... +--- +name: merge_flat_store_dword_saddr_2 +body: | + bb.0.entry: + + ; GCN-LABEL: name: merge_flat_store_dword_saddr_2 + ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF + ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF2]], %subreg.sub0, [[DEF3]], %subreg.sub1 + ; GCN-NEXT: FLAT_STORE_DWORDX2_SADDR [[DEF1]], killed [[REG_SEQUENCE]], [[DEF]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) into `ptr addrspace(1) undef`, align 4, addrspace 1) + %0:sreg_64_xexec_xnull = IMPLICIT_DEF + %1:vgpr_32 = IMPLICIT_DEF + %2:vgpr_32 = IMPLICIT_DEF + %3:vgpr_32 = IMPLICIT_DEF + FLAT_STORE_DWORD_SADDR %1, %2, %0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr addrspace(1) undef`, align 4, addrspace 1) + FLAT_STORE_DWORD_SADDR %1, %3, %0, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr addrspace(1) undef`, align 4, addrspace 1) +... + +--- +name: merge_flat_store_dword_saddr_3 +body: | + bb.0.entry: + + ; GCN-LABEL: name: merge_flat_store_dword_saddr_3 + ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF + ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF2]], %subreg.sub0, [[DEF3]], %subreg.sub1 + ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE killed [[REG_SEQUENCE]], %subreg.sub0_sub1, [[DEF4]], %subreg.sub2 + ; GCN-NEXT: FLAT_STORE_DWORDX3_SADDR [[DEF1]], killed [[REG_SEQUENCE1]], [[DEF]], 4, 1, implicit $exec, implicit $flat_scr :: (store (s96) into `ptr addrspace(1) undef`, align 4, addrspace 1) + %0:sreg_64_xexec_xnull = IMPLICIT_DEF + %1:vgpr_32 = IMPLICIT_DEF + %2:vgpr_32 = IMPLICIT_DEF + %3:vgpr_32 = IMPLICIT_DEF + %4:vgpr_32 = IMPLICIT_DEF + FLAT_STORE_DWORD_SADDR %1, %2, %0, 4, 1, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr addrspace(1) undef`, align 4, addrspace 1) + FLAT_STORE_DWORD_SADDR %1, %3, %0, 8, 1, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr addrspace(1) undef`, align 4, addrspace 1) + FLAT_STORE_DWORD_SADDR %1, %4, %0, 12, 1, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr addrspace(1) undef`, align 4, addrspace 1) +... + +--- +name: merge_flat_store_dword_saddr_4 +body: | + bb.0.entry: + + ; GCN-LABEL: name: merge_flat_store_dword_saddr_4 + ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF + ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF2]], %subreg.sub0, [[DEF3]], %subreg.sub1 + ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE killed [[REG_SEQUENCE]], %subreg.sub0_sub1, [[DEF4]], %subreg.sub2 + ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2, [[DEF5]], %subreg.sub3 + ; GCN-NEXT: FLAT_STORE_DWORDX4_SADDR [[DEF1]], killed [[REG_SEQUENCE2]], [[DEF]], 4, 2, implicit $exec, implicit $flat_scr :: (store (s128) into `ptr addrspace(1) undef`, align 4, addrspace 1) + %0:sreg_64_xexec_xnull = IMPLICIT_DEF + %1:vgpr_32 = IMPLICIT_DEF + %2:vgpr_32 = IMPLICIT_DEF + %3:vgpr_32 = IMPLICIT_DEF + %4:vgpr_32 = IMPLICIT_DEF + %5:vgpr_32 = IMPLICIT_DEF + FLAT_STORE_DWORD_SADDR %1, %2, %0, 4, 2, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr addrspace(1) undef`, align 4, addrspace 1) + FLAT_STORE_DWORD_SADDR %1, %3, %0, 8, 2, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr addrspace(1) undef`, align 4, addrspace 1) + FLAT_STORE_DWORD_SADDR %1, %4, %0, 12, 2, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr addrspace(1) undef`, align 4, addrspace 1) + FLAT_STORE_DWORD_SADDR %1, %5, %0, 16, 2, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr addrspace(1) undef`, align 4, addrspace 1) +... + +--- +name: merge_flat_store_dword_saddr_6 +body: | + bb.0.entry: + + ; GCN-LABEL: name: merge_flat_store_dword_saddr_6 + ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF + ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF7:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF2]], %subreg.sub0, [[DEF3]], %subreg.sub1 + ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE killed [[REG_SEQUENCE]], %subreg.sub0_sub1, [[DEF4]], %subreg.sub2 + ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2, [[DEF5]], %subreg.sub3 + ; GCN-NEXT: FLAT_STORE_DWORDX4_SADDR [[DEF1]], killed [[REG_SEQUENCE2]], [[DEF]], 4, 3, implicit $exec, implicit $flat_scr :: (store (s128) into `ptr addrspace(1) undef`, align 4, addrspace 1) + ; GCN-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF6]], %subreg.sub0, [[DEF7]], %subreg.sub1 + ; GCN-NEXT: FLAT_STORE_DWORDX2_SADDR [[DEF1]], killed [[REG_SEQUENCE3]], [[DEF]], 20, 3, implicit $exec, implicit $flat_scr :: (store (s64) into `ptr addrspace(1) undef`, align 4, addrspace 1) + %0:sreg_64_xexec_xnull = IMPLICIT_DEF + %1:vgpr_32 = IMPLICIT_DEF + %2:vgpr_32 = IMPLICIT_DEF + %3:vgpr_32 = IMPLICIT_DEF + %4:vgpr_32 = IMPLICIT_DEF + %5:vgpr_32 = IMPLICIT_DEF + %6:vgpr_32 = IMPLICIT_DEF + %7:vgpr_32 = IMPLICIT_DEF + FLAT_STORE_DWORD_SADDR %1, %2, %0, 4, 3, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr addrspace(1) undef`, align 4, addrspace 1) + FLAT_STORE_DWORD_SADDR %1, %3, %0, 8, 3, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr addrspace(1) undef`, align 4, addrspace 1) + FLAT_STORE_DWORD_SADDR %1, %4, %0, 12, 3, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr addrspace(1) undef`, align 4, addrspace 1) + FLAT_STORE_DWORD_SADDR %1, %5, %0, 16, 3, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr addrspace(1) undef`, align 4, addrspace 1) + FLAT_STORE_DWORD_SADDR %1, %6, %0, 20, 3, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr addrspace(1) undef`, align 4, addrspace 1) + FLAT_STORE_DWORD_SADDR %1, %7, %0, 24, 3, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr addrspace(1) undef`, align 4, addrspace 1) +... + +--- +name: no_merge_flat_store_dword_saddr_with_flat_store_dword +body: | + bb.0.entry: + + ; GCN-LABEL: name: no_merge_flat_store_dword_saddr_with_flat_store_dword + ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF + ; GCN-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: FLAT_STORE_DWORD_SADDR [[DEF1]].sub0, [[DEF2]], [[DEF]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: FLAT_STORE_DWORD [[DEF1]], [[DEF3]], 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) + %0:sreg_64_xexec_xnull = IMPLICIT_DEF + %1:vreg_64_align2 = IMPLICIT_DEF + %2:vgpr_32 = IMPLICIT_DEF + %3:vgpr_32 = IMPLICIT_DEF + FLAT_STORE_DWORD_SADDR %1.sub0, %2, %0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr addrspace(1) undef`, align 4, addrspace 1) + FLAT_STORE_DWORD %1, %3, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr addrspace(1) undef`, align 4, addrspace 1) +... + +--- +name: no_merge_flat_store_dword_saddr_different_vaddr +body: | + bb.0.entry: + + ; GCN-LABEL: name: no_merge_flat_store_dword_saddr_different_vaddr + ; GCN: [[DEF:%[0-9]+]]:sreg_64_xexec_xnull = IMPLICIT_DEF + ; GCN-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: FLAT_STORE_DWORD_SADDR [[DEF1]].sub0, [[DEF2]], [[DEF]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: FLAT_STORE_DWORD_SADDR [[DEF1]].sub1, [[DEF3]], [[DEF]], 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) + %0:sreg_64_xexec_xnull = IMPLICIT_DEF + %1:vreg_64_align2 = IMPLICIT_DEF + %2:vgpr_32 = IMPLICIT_DEF + %3:vgpr_32 = IMPLICIT_DEF + FLAT_STORE_DWORD_SADDR %1.sub0, %2, %0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr addrspace(1) undef`, align 4, addrspace 1) + FLAT_STORE_DWORD_SADDR %1.sub1, %3, %0, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr addrspace(1) undef`, align 4, addrspace 1) +... + +--- +name: no_merge_flat_store_dword_saddr_different_saddr +body: | + bb.0.entry: + + ; GCN-LABEL: name: no_merge_flat_store_dword_saddr_different_saddr + ; GCN: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: FLAT_STORE_DWORD_SADDR [[DEF1]], [[DEF2]], [[DEF]].sub0_sub1, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) + ; GCN-NEXT: FLAT_STORE_DWORD_SADDR [[DEF1]], [[DEF3]], [[DEF]].sub2_sub3, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) + %0:sgpr_128 = IMPLICIT_DEF + %1:vgpr_32 = IMPLICIT_DEF + %2:vgpr_32 = IMPLICIT_DEF + %3:vgpr_32 = IMPLICIT_DEF + FLAT_STORE_DWORD_SADDR %1, %2, %0.sub0_sub1, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr addrspace(1) undef`, align 4, addrspace 1) + FLAT_STORE_DWORD_SADDR %1, %3, %0.sub2_sub3, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr addrspace(1) undef`, align 4, addrspace 1) +...