diff --git a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp index c28314f6ab124..d48c00688e878 100644 --- a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp @@ -316,14 +316,12 @@ class Vectorizer { /// !IsLoad) to ChainBegin -- i.e. there are no intervening may-alias /// instructions. /// - /// The map ChainElemOffsets must contain all of the elements in - /// [ChainBegin, ChainElem] and their offsets from some arbitrary base - /// address. It's ok if it contains additional entries. + /// The map ChainSet must contain all of the elements in + /// [ChainBegin, ChainElem]. It's ok if it contains additional entries. template - bool isSafeToMove( - Instruction *ChainElem, Instruction *ChainBegin, - const DenseMap &ChainOffsets, - BatchAAResults &BatchAA); + bool isSafeToMove(Instruction *ChainElem, Instruction *ChainBegin, + const DenseSet &ChainSet, + BatchAAResults &BatchAA); /// Merges the equivalence classes if they have underlying objects that differ /// by one level of indirection (i.e., one is a getelementptr and the other is @@ -540,9 +538,9 @@ std::vector Vectorizer::splitChainByMayAliasInstrs(Chain &C) { // We know that elements in the chain with nonverlapping offsets can't // alias, but AA may not be smart enough to figure this out. Use a // hashtable. - DenseMap ChainOffsets; + DenseSet ChainSet; for (const auto &E : C) - ChainOffsets.insert({&*E.Inst, E.OffsetFromLeader}); + ChainSet.insert(E.Inst); // Across a single invocation of this function the IR is not changing, so // using a batched Alias Analysis is safe and can reduce compile time. @@ -573,8 +571,8 @@ std::vector Vectorizer::splitChainByMayAliasInstrs(Chain &C) { SmallVector NewChain; NewChain.emplace_back(*ChainBegin); for (auto ChainIt = std::next(ChainBegin); ChainIt != ChainEnd; ++ChainIt) { - if (isSafeToMove(ChainIt->Inst, NewChain.front().Inst, - ChainOffsets, BatchAA)) { + if (isSafeToMove(ChainIt->Inst, NewChain.front().Inst, ChainSet, + BatchAA)) { LLVM_DEBUG(dbgs() << "LSV: No intervening may-alias instrs; can merge " << *ChainIt->Inst << " into " << *ChainBegin->Inst << "\n"); @@ -1037,10 +1035,9 @@ bool Vectorizer::vectorizeChain(Chain &C) { } template -bool Vectorizer::isSafeToMove( - Instruction *ChainElem, Instruction *ChainBegin, - const DenseMap &ChainOffsets, - BatchAAResults &BatchAA) { +bool Vectorizer::isSafeToMove(Instruction *ChainElem, Instruction *ChainBegin, + const DenseSet &ChainSet, + BatchAAResults &BatchAA) { LLVM_DEBUG(dbgs() << "LSV: isSafeToMove(" << *ChainElem << " -> " << *ChainBegin << ")\n"); @@ -1066,10 +1063,6 @@ bool Vectorizer::isSafeToMove( return BasicBlock::iterator(ChainBegin); }()); - const APInt &ChainElemOffset = ChainOffsets.at(ChainElem); - const unsigned ChainElemSize = - DL.getTypeStoreSize(getLoadStoreType(ChainElem)); - for (; BBIt != BBItEnd; ++BBIt) { Instruction *I = &*BBIt; @@ -1084,39 +1077,10 @@ bool Vectorizer::isSafeToMove( if (!IsLoadChain && isInvariantLoad(I)) continue; - // If I is in the chain, we can tell whether it aliases ChainIt by checking - // what offset ChainIt accesses. This may be better than AA is able to do. - // - // We should really only have duplicate offsets for stores (the duplicate - // loads should be CSE'ed), but in case we have a duplicate load, we'll - // split the chain so we don't have to handle this case specially. - if (auto OffsetIt = ChainOffsets.find(I); OffsetIt != ChainOffsets.end()) { - // I and ChainElem overlap if: - // - I and ChainElem have the same offset, OR - // - I's offset is less than ChainElem's, but I touches past the - // beginning of ChainElem, OR - // - ChainElem's offset is less than I's, but ChainElem touches past the - // beginning of I. - const APInt &IOffset = OffsetIt->second; - unsigned IElemSize = DL.getTypeStoreSize(getLoadStoreType(I)); - if (IOffset == ChainElemOffset || - (IOffset.sle(ChainElemOffset) && - (IOffset + IElemSize).sgt(ChainElemOffset)) || - (ChainElemOffset.sle(IOffset) && - (ChainElemOffset + ChainElemSize).sgt(OffsetIt->second))) { - LLVM_DEBUG({ - // Double check that AA also sees this alias. If not, we probably - // have a bug. - ModRefInfo MR = - BatchAA.getModRefInfo(I, MemoryLocation::get(ChainElem)); - assert(IsLoadChain ? isModSet(MR) : isModOrRefSet(MR)); - dbgs() << "LSV: Found alias in chain: " << *I << "\n"; - }); - return false; // We found an aliasing instruction; bail. - } - - continue; // We're confident there's no alias. - } + // Allow on-chain aliasing because write-order is preserved when stores are + // vectorized. + if (ChainSet.count(I)) + continue; LLVM_DEBUG(dbgs() << "LSV: Querying AA for " << *I << "\n"); ModRefInfo MR = BatchAA.getModRefInfo(I, MemoryLocation::get(ChainElem)); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-struct-return-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-struct-return-intrinsics.ll index 01367a5fad447..731d00dcde77d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-struct-return-intrinsics.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-struct-return-intrinsics.ll @@ -13,7 +13,6 @@ define amdgpu_ps void @test_div_scale(float %arg0, float %arg1) { ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), -1 ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[INT1]](s1) - ; CHECK-NEXT: G_STORE [[INT]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) poison`, addrspace 1) ; CHECK-NEXT: G_STORE [[SEXT]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) poison`, addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 %call = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %arg0, float %arg1, i1 true) diff --git a/llvm/test/CodeGen/AMDGPU/function-args-inreg.ll b/llvm/test/CodeGen/AMDGPU/function-args-inreg.ll index 831d10480c51c..35bfb6664904a 100644 --- a/llvm/test/CodeGen/AMDGPU/function-args-inreg.ll +++ b/llvm/test/CodeGen/AMDGPU/function-args-inreg.ll @@ -1696,8 +1696,6 @@ define void @void_func_i32_v2float_inreg(i32 inreg %arg0, <2 x float> inreg %arg ; GFX9-LABEL: void_func_i32_v2float_inreg: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v0, s16 -; GFX9-NEXT: global_store_dword v[0:1], v0, off ; GFX9-NEXT: v_mov_b32_e32 v0, s17 ; GFX9-NEXT: v_mov_b32_e32 v1, s18 ; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off @@ -1707,10 +1705,7 @@ define void @void_func_i32_v2float_inreg(i32 inreg %arg0, <2 x float> inreg %arg ; GFX11-LABEL: void_func_i32_v2float_inreg: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v1, s2 -; GFX11-NEXT: v_mov_b32_e32 v0, s1 -; GFX11-NEXT: s_clause 0x1 -; GFX11-NEXT: global_store_b32 v[0:1], v2, off +; GFX11-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s2 ; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off ; GFX11-NEXT: s_setpc_b64 s[30:31] store i32 %arg0, ptr addrspace(1) poison diff --git a/llvm/test/CodeGen/AMDGPU/isel-amdgpu-cs-chain-preserve-cc.ll b/llvm/test/CodeGen/AMDGPU/isel-amdgpu-cs-chain-preserve-cc.ll index b1d382040addc..b6326d02fee12 100644 --- a/llvm/test/CodeGen/AMDGPU/isel-amdgpu-cs-chain-preserve-cc.ll +++ b/llvm/test/CodeGen/AMDGPU/isel-amdgpu-cs-chain-preserve-cc.ll @@ -371,37 +371,33 @@ define amdgpu_cs_chain_preserve void @amdgpu_cs_chain_preserve_cc_struct( {ptr, ; GISEL-GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GISEL-GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; GISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GISEL-GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GISEL-GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GISEL-GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GISEL-GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY5]], %subreg.sub2, [[COPY6]], %subreg.sub3 - ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr8 - ; GISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr9 - ; GISEL-GFX11-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 - ; GISEL-GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr10 - ; GISEL-GFX11-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY $vgpr11 - ; GISEL-GFX11-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY $vgpr12 - ; GISEL-GFX11-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY $vgpr13 - ; GISEL-GFX11-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY $vgpr14 - ; GISEL-GFX11-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY10]], %subreg.sub0, [[COPY11]], %subreg.sub1, [[COPY12]], %subreg.sub2, [[COPY13]], %subreg.sub3 + ; GISEL-GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GISEL-GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GISEL-GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr8 + ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr9 + ; GISEL-GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 + ; GISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr10 + ; GISEL-GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr11 + ; GISEL-GFX11-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY $vgpr12 + ; GISEL-GFX11-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY $vgpr13 + ; GISEL-GFX11-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY $vgpr14 + ; GISEL-GFX11-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1, [[COPY11]], %subreg.sub2, [[COPY12]], %subreg.sub3 ; GISEL-GFX11-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF - ; GISEL-GFX11-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] - ; GISEL-GFX11-NEXT: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[DEF]] - ; GISEL-GFX11-NEXT: GLOBAL_STORE_DWORDX2 [[COPY15]], [[COPY14]], 0, 0, implicit $exec :: (store (p0) into `ptr addrspace(1) poison`, addrspace 1) - ; GISEL-GFX11-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GISEL-GFX11-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX11-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[DEF]] + ; GISEL-GFX11-NEXT: GLOBAL_STORE_DWORDX2 [[COPY14]], [[COPY13]], 0, 0, implicit $exec :: (store (p0) into `ptr addrspace(1) poison`, addrspace 1) + ; GISEL-GFX11-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 + ; GISEL-GFX11-NEXT: [[COPY15:%[0-9]+]]:vreg_128 = COPY [[REG_SEQUENCE3]] + ; GISEL-GFX11-NEXT: [[COPY16:%[0-9]+]]:vreg_64 = COPY [[DEF]] + ; GISEL-GFX11-NEXT: GLOBAL_STORE_DWORDX4 [[COPY16]], [[COPY15]], 0, 0, implicit $exec :: (store (<4 x s32>) into `ptr addrspace(1) poison`, align 4, addrspace 1) ; GISEL-GFX11-NEXT: [[COPY17:%[0-9]+]]:vreg_64 = COPY [[DEF]] - ; GISEL-GFX11-NEXT: GLOBAL_STORE_DWORD [[COPY17]], [[COPY16]], 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(1) poison`, addrspace 1) - ; GISEL-GFX11-NEXT: [[COPY18:%[0-9]+]]:vreg_128 = COPY [[REG_SEQUENCE1]] + ; GISEL-GFX11-NEXT: GLOBAL_STORE_DWORDX2 [[COPY17]], [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (store (p0) into `ptr addrspace(1) poison`, align 16, addrspace 1) + ; GISEL-GFX11-NEXT: [[COPY18:%[0-9]+]]:vreg_64 = COPY [[DEF]] + ; GISEL-GFX11-NEXT: GLOBAL_STORE_DWORD [[COPY18]], [[COPY8]], 8, 0, implicit $exec :: (store (s32) into `ptr addrspace(1) poison` + 8, align 8, addrspace 1) ; GISEL-GFX11-NEXT: [[COPY19:%[0-9]+]]:vreg_64 = COPY [[DEF]] - ; GISEL-GFX11-NEXT: GLOBAL_STORE_DWORDX4 [[COPY19]], [[COPY18]], 0, 0, implicit $exec :: (store (<4 x s32>) into `ptr addrspace(1) poison`, addrspace 1) - ; GISEL-GFX11-NEXT: [[COPY20:%[0-9]+]]:vreg_64 = COPY [[DEF]] - ; GISEL-GFX11-NEXT: GLOBAL_STORE_DWORDX2 [[COPY20]], [[REG_SEQUENCE2]], 0, 0, implicit $exec :: (store (p0) into `ptr addrspace(1) poison`, align 16, addrspace 1) - ; GISEL-GFX11-NEXT: [[COPY21:%[0-9]+]]:vreg_64 = COPY [[DEF]] - ; GISEL-GFX11-NEXT: GLOBAL_STORE_DWORD [[COPY21]], [[COPY9]], 8, 0, implicit $exec :: (store (s32) into `ptr addrspace(1) poison` + 8, align 8, addrspace 1) - ; GISEL-GFX11-NEXT: [[COPY22:%[0-9]+]]:vreg_64 = COPY [[DEF]] - ; GISEL-GFX11-NEXT: GLOBAL_STORE_DWORDX4 [[COPY22]], [[REG_SEQUENCE3]], 16, 0, implicit $exec :: (store (<4 x s32>) into `ptr addrspace(1) poison` + 16, addrspace 1) + ; GISEL-GFX11-NEXT: GLOBAL_STORE_DWORDX4 [[COPY19]], [[REG_SEQUENCE2]], 16, 0, implicit $exec :: (store (<4 x s32>) into `ptr addrspace(1) poison` + 16, addrspace 1) ; GISEL-GFX11-NEXT: S_ENDPGM 0 ; ; GISEL-GFX10-LABEL: name: amdgpu_cs_chain_preserve_cc_struct @@ -411,42 +407,38 @@ define amdgpu_cs_chain_preserve void @amdgpu_cs_chain_preserve_cc_struct( {ptr, ; GISEL-GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GISEL-GFX10-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 ; GISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 - ; GISEL-GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GISEL-GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GISEL-GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GISEL-GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY5]], %subreg.sub2, [[COPY6]], %subreg.sub3 - ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr8 - ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr9 - ; GISEL-GFX10-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY8]], %subreg.sub1 - ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr10 - ; GISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY $vgpr11 - ; GISEL-GFX10-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY $vgpr12 - ; GISEL-GFX10-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY $vgpr13 - ; GISEL-GFX10-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY $vgpr14 - ; GISEL-GFX10-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY10]], %subreg.sub0, [[COPY11]], %subreg.sub1, [[COPY12]], %subreg.sub2, [[COPY13]], %subreg.sub3 + ; GISEL-GFX10-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GISEL-GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GISEL-GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr8 + ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr9 + ; GISEL-GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 + ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr10 + ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr11 + ; GISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY $vgpr12 + ; GISEL-GFX10-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY $vgpr13 + ; GISEL-GFX10-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY $vgpr14 + ; GISEL-GFX10-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY9]], %subreg.sub0, [[COPY10]], %subreg.sub1, [[COPY11]], %subreg.sub2, [[COPY12]], %subreg.sub3 ; GISEL-GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF - ; GISEL-GFX10-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] - ; GISEL-GFX10-NEXT: [[COPY15:%[0-9]+]]:vreg_64 = COPY [[DEF]] - ; GISEL-GFX10-NEXT: GLOBAL_STORE_DWORDX2 [[COPY15]], [[COPY14]], 0, 0, implicit $exec :: (store (p0) into `ptr addrspace(1) poison`, addrspace 1) - ; GISEL-GFX10-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GISEL-GFX10-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX10-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[DEF]] + ; GISEL-GFX10-NEXT: GLOBAL_STORE_DWORDX2 [[COPY14]], [[COPY13]], 0, 0, implicit $exec :: (store (p0) into `ptr addrspace(1) poison`, addrspace 1) + ; GISEL-GFX10-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 + ; GISEL-GFX10-NEXT: [[COPY15:%[0-9]+]]:vreg_128 = COPY [[REG_SEQUENCE3]] + ; GISEL-GFX10-NEXT: [[COPY16:%[0-9]+]]:vreg_64 = COPY [[DEF]] + ; GISEL-GFX10-NEXT: GLOBAL_STORE_DWORDX4 [[COPY16]], [[COPY15]], 0, 0, implicit $exec :: (store (<4 x s32>) into `ptr addrspace(1) poison`, align 4, addrspace 1) ; GISEL-GFX10-NEXT: [[COPY17:%[0-9]+]]:vreg_64 = COPY [[DEF]] - ; GISEL-GFX10-NEXT: GLOBAL_STORE_DWORD [[COPY17]], [[COPY16]], 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(1) poison`, addrspace 1) - ; GISEL-GFX10-NEXT: [[COPY18:%[0-9]+]]:vreg_128 = COPY [[REG_SEQUENCE1]] + ; GISEL-GFX10-NEXT: GLOBAL_STORE_DWORDX2 [[COPY17]], [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (store (p0) into `ptr addrspace(1) poison`, align 16, addrspace 1) + ; GISEL-GFX10-NEXT: [[COPY18:%[0-9]+]]:vreg_64 = COPY [[DEF]] + ; GISEL-GFX10-NEXT: GLOBAL_STORE_DWORD [[COPY18]], [[COPY8]], 8, 0, implicit $exec :: (store (s32) into `ptr addrspace(1) poison` + 8, align 8, addrspace 1) ; GISEL-GFX10-NEXT: [[COPY19:%[0-9]+]]:vreg_64 = COPY [[DEF]] - ; GISEL-GFX10-NEXT: GLOBAL_STORE_DWORDX4 [[COPY19]], [[COPY18]], 0, 0, implicit $exec :: (store (<4 x s32>) into `ptr addrspace(1) poison`, addrspace 1) - ; GISEL-GFX10-NEXT: [[COPY20:%[0-9]+]]:vreg_64 = COPY [[DEF]] - ; GISEL-GFX10-NEXT: GLOBAL_STORE_DWORDX2 [[COPY20]], [[REG_SEQUENCE2]], 0, 0, implicit $exec :: (store (p0) into `ptr addrspace(1) poison`, align 16, addrspace 1) - ; GISEL-GFX10-NEXT: [[COPY21:%[0-9]+]]:vreg_64 = COPY [[DEF]] - ; GISEL-GFX10-NEXT: GLOBAL_STORE_DWORD [[COPY21]], [[COPY9]], 8, 0, implicit $exec :: (store (s32) into `ptr addrspace(1) poison` + 8, align 8, addrspace 1) - ; GISEL-GFX10-NEXT: [[COPY22:%[0-9]+]]:vreg_64 = COPY [[DEF]] - ; GISEL-GFX10-NEXT: GLOBAL_STORE_DWORDX4 [[COPY22]], [[REG_SEQUENCE3]], 16, 0, implicit $exec :: (store (<4 x s32>) into `ptr addrspace(1) poison` + 16, addrspace 1) + ; GISEL-GFX10-NEXT: GLOBAL_STORE_DWORDX4 [[COPY19]], [[REG_SEQUENCE2]], 16, 0, implicit $exec :: (store (<4 x s32>) into `ptr addrspace(1) poison` + 16, addrspace 1) ; GISEL-GFX10-NEXT: S_ENDPGM 0 ; ; DAGISEL-GFX11-WF32-LABEL: name: amdgpu_cs_chain_preserve_cc_struct ; DAGISEL-GFX11-WF32: bb.0 (%ir-block.0): - ; DAGISEL-GFX11-WF32-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14 + ; DAGISEL-GFX11-WF32-NEXT: liveins: $sgpr0, $sgpr1, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14 ; DAGISEL-GFX11-WF32-NEXT: {{ $}} ; DAGISEL-GFX11-WF32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr14 ; DAGISEL-GFX11-WF32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr13 @@ -459,44 +451,39 @@ define amdgpu_cs_chain_preserve void @amdgpu_cs_chain_preserve_cc_struct( {ptr, ; DAGISEL-GFX11-WF32-NEXT: [[COPY8:%[0-9]+]]:sgpr_32 = COPY $sgpr5 ; DAGISEL-GFX11-WF32-NEXT: [[COPY9:%[0-9]+]]:sgpr_32 = COPY $sgpr4 ; DAGISEL-GFX11-WF32-NEXT: [[COPY10:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; DAGISEL-GFX11-WF32-NEXT: [[COPY11:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; DAGISEL-GFX11-WF32-NEXT: [[COPY12:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; DAGISEL-GFX11-WF32-NEXT: [[COPY13:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; DAGISEL-GFX11-WF32-NEXT: [[COPY11:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; DAGISEL-GFX11-WF32-NEXT: [[COPY12:%[0-9]+]]:sgpr_32 = COPY $sgpr0 ; DAGISEL-GFX11-WF32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY1]], %subreg.sub2, [[COPY]], %subreg.sub3 - ; DAGISEL-GFX11-WF32-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY10]] - ; DAGISEL-GFX11-WF32-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] - ; DAGISEL-GFX11-WF32-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] - ; DAGISEL-GFX11-WF32-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] - ; DAGISEL-GFX11-WF32-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY14]], %subreg.sub0, [[COPY15]], %subreg.sub1, [[COPY16]], %subreg.sub2, [[COPY17]], %subreg.sub3 + ; DAGISEL-GFX11-WF32-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY10]] + ; DAGISEL-GFX11-WF32-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] + ; DAGISEL-GFX11-WF32-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] + ; DAGISEL-GFX11-WF32-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] + ; DAGISEL-GFX11-WF32-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY13]], %subreg.sub0, [[COPY14]], %subreg.sub1, [[COPY15]], %subreg.sub2, [[COPY16]], %subreg.sub3 ; DAGISEL-GFX11-WF32-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; DAGISEL-GFX11-WF32-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY13]] - ; DAGISEL-GFX11-WF32-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[COPY12]] - ; DAGISEL-GFX11-WF32-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY18]], %subreg.sub0, [[COPY19]], %subreg.sub1 + ; DAGISEL-GFX11-WF32-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[COPY12]] + ; DAGISEL-GFX11-WF32-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY11]] + ; DAGISEL-GFX11-WF32-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY17]], %subreg.sub0, [[COPY18]], %subreg.sub1 ; DAGISEL-GFX11-WF32-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF - ; DAGISEL-GFX11-WF32-NEXT: [[COPY20:%[0-9]+]]:vreg_64 = COPY [[DEF]] - ; DAGISEL-GFX11-WF32-NEXT: GLOBAL_STORE_DWORDX2 [[COPY20]], killed [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(1) poison`, addrspace 1) + ; DAGISEL-GFX11-WF32-NEXT: [[COPY19:%[0-9]+]]:vreg_64 = COPY [[DEF]] + ; DAGISEL-GFX11-WF32-NEXT: GLOBAL_STORE_DWORDX2 [[COPY19]], killed [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(1) poison`, addrspace 1) ; DAGISEL-GFX11-WF32-NEXT: [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF - ; DAGISEL-GFX11-WF32-NEXT: [[COPY21:%[0-9]+]]:vreg_64 = COPY [[DEF1]] - ; DAGISEL-GFX11-WF32-NEXT: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[COPY11]] - ; DAGISEL-GFX11-WF32-NEXT: GLOBAL_STORE_DWORD [[COPY21]], [[COPY22]], 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(1) poison`, addrspace 1) + ; DAGISEL-GFX11-WF32-NEXT: [[COPY20:%[0-9]+]]:vreg_64 = COPY [[DEF1]] + ; DAGISEL-GFX11-WF32-NEXT: GLOBAL_STORE_DWORDX4 [[COPY20]], killed [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (store (s128) into `ptr addrspace(1) poison`, align 4, addrspace 1) ; DAGISEL-GFX11-WF32-NEXT: [[DEF2:%[0-9]+]]:sreg_64 = IMPLICIT_DEF - ; DAGISEL-GFX11-WF32-NEXT: [[COPY23:%[0-9]+]]:vreg_64 = COPY [[DEF2]] - ; DAGISEL-GFX11-WF32-NEXT: GLOBAL_STORE_DWORDX4 [[COPY23]], killed [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (store (s128) into `ptr addrspace(1) poison`, addrspace 1) + ; DAGISEL-GFX11-WF32-NEXT: [[COPY21:%[0-9]+]]:vreg_64 = COPY [[DEF2]] + ; DAGISEL-GFX11-WF32-NEXT: GLOBAL_STORE_DWORDX4 [[COPY21]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (store (s128) into `ptr addrspace(1) poison` + 16, addrspace 1) ; DAGISEL-GFX11-WF32-NEXT: [[DEF3:%[0-9]+]]:sreg_64 = IMPLICIT_DEF - ; DAGISEL-GFX11-WF32-NEXT: [[COPY24:%[0-9]+]]:vreg_64 = COPY [[DEF3]] - ; DAGISEL-GFX11-WF32-NEXT: GLOBAL_STORE_DWORDX4 [[COPY24]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (store (s128) into `ptr addrspace(1) poison` + 16, addrspace 1) + ; DAGISEL-GFX11-WF32-NEXT: [[COPY22:%[0-9]+]]:vreg_64 = COPY [[DEF3]] + ; DAGISEL-GFX11-WF32-NEXT: GLOBAL_STORE_DWORD [[COPY22]], [[COPY4]], 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(1) poison` + 8, align 8, basealign 16, addrspace 1) ; DAGISEL-GFX11-WF32-NEXT: [[DEF4:%[0-9]+]]:sreg_64 = IMPLICIT_DEF - ; DAGISEL-GFX11-WF32-NEXT: [[COPY25:%[0-9]+]]:vreg_64 = COPY [[DEF4]] - ; DAGISEL-GFX11-WF32-NEXT: GLOBAL_STORE_DWORD [[COPY25]], [[COPY4]], 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(1) poison` + 8, align 8, basealign 16, addrspace 1) - ; DAGISEL-GFX11-WF32-NEXT: [[DEF5:%[0-9]+]]:sreg_64 = IMPLICIT_DEF - ; DAGISEL-GFX11-WF32-NEXT: [[COPY26:%[0-9]+]]:vreg_64 = COPY [[DEF5]] - ; DAGISEL-GFX11-WF32-NEXT: [[COPY27:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE2]] - ; DAGISEL-GFX11-WF32-NEXT: GLOBAL_STORE_DWORDX2 [[COPY26]], killed [[COPY27]], 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(1) poison`, align 16, addrspace 1) + ; DAGISEL-GFX11-WF32-NEXT: [[COPY23:%[0-9]+]]:vreg_64 = COPY [[DEF4]] + ; DAGISEL-GFX11-WF32-NEXT: [[COPY24:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE2]] + ; DAGISEL-GFX11-WF32-NEXT: GLOBAL_STORE_DWORDX2 [[COPY23]], killed [[COPY24]], 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(1) poison`, align 16, addrspace 1) ; DAGISEL-GFX11-WF32-NEXT: S_ENDPGM 0 ; ; DAGISEL-GFX11-WF64-LABEL: name: amdgpu_cs_chain_preserve_cc_struct ; DAGISEL-GFX11-WF64: bb.0 (%ir-block.0): - ; DAGISEL-GFX11-WF64-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14 + ; DAGISEL-GFX11-WF64-NEXT: liveins: $sgpr0, $sgpr1, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14 ; DAGISEL-GFX11-WF64-NEXT: {{ $}} ; DAGISEL-GFX11-WF64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr14 ; DAGISEL-GFX11-WF64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr13 @@ -509,44 +496,39 @@ define amdgpu_cs_chain_preserve void @amdgpu_cs_chain_preserve_cc_struct( {ptr, ; DAGISEL-GFX11-WF64-NEXT: [[COPY8:%[0-9]+]]:sgpr_32 = COPY $sgpr5 ; DAGISEL-GFX11-WF64-NEXT: [[COPY9:%[0-9]+]]:sgpr_32 = COPY $sgpr4 ; DAGISEL-GFX11-WF64-NEXT: [[COPY10:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; DAGISEL-GFX11-WF64-NEXT: [[COPY11:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; DAGISEL-GFX11-WF64-NEXT: [[COPY12:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; DAGISEL-GFX11-WF64-NEXT: [[COPY13:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; DAGISEL-GFX11-WF64-NEXT: [[COPY11:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; DAGISEL-GFX11-WF64-NEXT: [[COPY12:%[0-9]+]]:sgpr_32 = COPY $sgpr0 ; DAGISEL-GFX11-WF64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY1]], %subreg.sub2, [[COPY]], %subreg.sub3 - ; DAGISEL-GFX11-WF64-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY10]] - ; DAGISEL-GFX11-WF64-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] - ; DAGISEL-GFX11-WF64-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] - ; DAGISEL-GFX11-WF64-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] - ; DAGISEL-GFX11-WF64-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY14]], %subreg.sub0, [[COPY15]], %subreg.sub1, [[COPY16]], %subreg.sub2, [[COPY17]], %subreg.sub3 + ; DAGISEL-GFX11-WF64-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY10]] + ; DAGISEL-GFX11-WF64-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] + ; DAGISEL-GFX11-WF64-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] + ; DAGISEL-GFX11-WF64-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] + ; DAGISEL-GFX11-WF64-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY13]], %subreg.sub0, [[COPY14]], %subreg.sub1, [[COPY15]], %subreg.sub2, [[COPY16]], %subreg.sub3 ; DAGISEL-GFX11-WF64-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; DAGISEL-GFX11-WF64-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY13]] - ; DAGISEL-GFX11-WF64-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[COPY12]] - ; DAGISEL-GFX11-WF64-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY18]], %subreg.sub0, [[COPY19]], %subreg.sub1 + ; DAGISEL-GFX11-WF64-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[COPY12]] + ; DAGISEL-GFX11-WF64-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY11]] + ; DAGISEL-GFX11-WF64-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY17]], %subreg.sub0, [[COPY18]], %subreg.sub1 ; DAGISEL-GFX11-WF64-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF - ; DAGISEL-GFX11-WF64-NEXT: [[COPY20:%[0-9]+]]:vreg_64 = COPY [[DEF]] - ; DAGISEL-GFX11-WF64-NEXT: GLOBAL_STORE_DWORDX2 [[COPY20]], killed [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(1) poison`, addrspace 1) + ; DAGISEL-GFX11-WF64-NEXT: [[COPY19:%[0-9]+]]:vreg_64 = COPY [[DEF]] + ; DAGISEL-GFX11-WF64-NEXT: GLOBAL_STORE_DWORDX2 [[COPY19]], killed [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(1) poison`, addrspace 1) ; DAGISEL-GFX11-WF64-NEXT: [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF - ; DAGISEL-GFX11-WF64-NEXT: [[COPY21:%[0-9]+]]:vreg_64 = COPY [[DEF1]] - ; DAGISEL-GFX11-WF64-NEXT: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[COPY11]] - ; DAGISEL-GFX11-WF64-NEXT: GLOBAL_STORE_DWORD [[COPY21]], [[COPY22]], 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(1) poison`, addrspace 1) + ; DAGISEL-GFX11-WF64-NEXT: [[COPY20:%[0-9]+]]:vreg_64 = COPY [[DEF1]] + ; DAGISEL-GFX11-WF64-NEXT: GLOBAL_STORE_DWORDX4 [[COPY20]], killed [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (store (s128) into `ptr addrspace(1) poison`, align 4, addrspace 1) ; DAGISEL-GFX11-WF64-NEXT: [[DEF2:%[0-9]+]]:sreg_64 = IMPLICIT_DEF - ; DAGISEL-GFX11-WF64-NEXT: [[COPY23:%[0-9]+]]:vreg_64 = COPY [[DEF2]] - ; DAGISEL-GFX11-WF64-NEXT: GLOBAL_STORE_DWORDX4 [[COPY23]], killed [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (store (s128) into `ptr addrspace(1) poison`, addrspace 1) + ; DAGISEL-GFX11-WF64-NEXT: [[COPY21:%[0-9]+]]:vreg_64 = COPY [[DEF2]] + ; DAGISEL-GFX11-WF64-NEXT: GLOBAL_STORE_DWORDX4 [[COPY21]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (store (s128) into `ptr addrspace(1) poison` + 16, addrspace 1) ; DAGISEL-GFX11-WF64-NEXT: [[DEF3:%[0-9]+]]:sreg_64 = IMPLICIT_DEF - ; DAGISEL-GFX11-WF64-NEXT: [[COPY24:%[0-9]+]]:vreg_64 = COPY [[DEF3]] - ; DAGISEL-GFX11-WF64-NEXT: GLOBAL_STORE_DWORDX4 [[COPY24]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (store (s128) into `ptr addrspace(1) poison` + 16, addrspace 1) + ; DAGISEL-GFX11-WF64-NEXT: [[COPY22:%[0-9]+]]:vreg_64 = COPY [[DEF3]] + ; DAGISEL-GFX11-WF64-NEXT: GLOBAL_STORE_DWORD [[COPY22]], [[COPY4]], 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(1) poison` + 8, align 8, basealign 16, addrspace 1) ; DAGISEL-GFX11-WF64-NEXT: [[DEF4:%[0-9]+]]:sreg_64 = IMPLICIT_DEF - ; DAGISEL-GFX11-WF64-NEXT: [[COPY25:%[0-9]+]]:vreg_64 = COPY [[DEF4]] - ; DAGISEL-GFX11-WF64-NEXT: GLOBAL_STORE_DWORD [[COPY25]], [[COPY4]], 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(1) poison` + 8, align 8, basealign 16, addrspace 1) - ; DAGISEL-GFX11-WF64-NEXT: [[DEF5:%[0-9]+]]:sreg_64 = IMPLICIT_DEF - ; DAGISEL-GFX11-WF64-NEXT: [[COPY26:%[0-9]+]]:vreg_64 = COPY [[DEF5]] - ; DAGISEL-GFX11-WF64-NEXT: [[COPY27:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE2]] - ; DAGISEL-GFX11-WF64-NEXT: GLOBAL_STORE_DWORDX2 [[COPY26]], killed [[COPY27]], 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(1) poison`, align 16, addrspace 1) + ; DAGISEL-GFX11-WF64-NEXT: [[COPY23:%[0-9]+]]:vreg_64 = COPY [[DEF4]] + ; DAGISEL-GFX11-WF64-NEXT: [[COPY24:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE2]] + ; DAGISEL-GFX11-WF64-NEXT: GLOBAL_STORE_DWORDX2 [[COPY23]], killed [[COPY24]], 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(1) poison`, align 16, addrspace 1) ; DAGISEL-GFX11-WF64-NEXT: S_ENDPGM 0 ; ; DAGISEL-GFX10-WF32-LABEL: name: amdgpu_cs_chain_preserve_cc_struct ; DAGISEL-GFX10-WF32: bb.0 (%ir-block.0): - ; DAGISEL-GFX10-WF32-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14 + ; DAGISEL-GFX10-WF32-NEXT: liveins: $sgpr0, $sgpr1, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14 ; DAGISEL-GFX10-WF32-NEXT: {{ $}} ; DAGISEL-GFX10-WF32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr14 ; DAGISEL-GFX10-WF32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr13 @@ -559,44 +541,39 @@ define amdgpu_cs_chain_preserve void @amdgpu_cs_chain_preserve_cc_struct( {ptr, ; DAGISEL-GFX10-WF32-NEXT: [[COPY8:%[0-9]+]]:sgpr_32 = COPY $sgpr5 ; DAGISEL-GFX10-WF32-NEXT: [[COPY9:%[0-9]+]]:sgpr_32 = COPY $sgpr4 ; DAGISEL-GFX10-WF32-NEXT: [[COPY10:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; DAGISEL-GFX10-WF32-NEXT: [[COPY11:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; DAGISEL-GFX10-WF32-NEXT: [[COPY12:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; DAGISEL-GFX10-WF32-NEXT: [[COPY13:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; DAGISEL-GFX10-WF32-NEXT: [[COPY11:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; DAGISEL-GFX10-WF32-NEXT: [[COPY12:%[0-9]+]]:sgpr_32 = COPY $sgpr0 ; DAGISEL-GFX10-WF32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY1]], %subreg.sub2, [[COPY]], %subreg.sub3 - ; DAGISEL-GFX10-WF32-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY10]] - ; DAGISEL-GFX10-WF32-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] - ; DAGISEL-GFX10-WF32-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] - ; DAGISEL-GFX10-WF32-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] - ; DAGISEL-GFX10-WF32-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY14]], %subreg.sub0, [[COPY15]], %subreg.sub1, [[COPY16]], %subreg.sub2, [[COPY17]], %subreg.sub3 + ; DAGISEL-GFX10-WF32-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY10]] + ; DAGISEL-GFX10-WF32-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] + ; DAGISEL-GFX10-WF32-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] + ; DAGISEL-GFX10-WF32-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] + ; DAGISEL-GFX10-WF32-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY13]], %subreg.sub0, [[COPY14]], %subreg.sub1, [[COPY15]], %subreg.sub2, [[COPY16]], %subreg.sub3 ; DAGISEL-GFX10-WF32-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; DAGISEL-GFX10-WF32-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY13]] - ; DAGISEL-GFX10-WF32-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[COPY12]] - ; DAGISEL-GFX10-WF32-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY18]], %subreg.sub0, [[COPY19]], %subreg.sub1 + ; DAGISEL-GFX10-WF32-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[COPY12]] + ; DAGISEL-GFX10-WF32-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY11]] + ; DAGISEL-GFX10-WF32-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY17]], %subreg.sub0, [[COPY18]], %subreg.sub1 ; DAGISEL-GFX10-WF32-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF - ; DAGISEL-GFX10-WF32-NEXT: [[COPY20:%[0-9]+]]:vreg_64 = COPY [[DEF]] - ; DAGISEL-GFX10-WF32-NEXT: GLOBAL_STORE_DWORDX2 [[COPY20]], killed [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(1) poison`, addrspace 1) + ; DAGISEL-GFX10-WF32-NEXT: [[COPY19:%[0-9]+]]:vreg_64 = COPY [[DEF]] + ; DAGISEL-GFX10-WF32-NEXT: GLOBAL_STORE_DWORDX2 [[COPY19]], killed [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(1) poison`, addrspace 1) ; DAGISEL-GFX10-WF32-NEXT: [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF - ; DAGISEL-GFX10-WF32-NEXT: [[COPY21:%[0-9]+]]:vreg_64 = COPY [[DEF1]] - ; DAGISEL-GFX10-WF32-NEXT: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[COPY11]] - ; DAGISEL-GFX10-WF32-NEXT: GLOBAL_STORE_DWORD [[COPY21]], [[COPY22]], 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(1) poison`, addrspace 1) + ; DAGISEL-GFX10-WF32-NEXT: [[COPY20:%[0-9]+]]:vreg_64 = COPY [[DEF1]] + ; DAGISEL-GFX10-WF32-NEXT: GLOBAL_STORE_DWORDX4 [[COPY20]], killed [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (store (s128) into `ptr addrspace(1) poison`, align 4, addrspace 1) ; DAGISEL-GFX10-WF32-NEXT: [[DEF2:%[0-9]+]]:sreg_64 = IMPLICIT_DEF - ; DAGISEL-GFX10-WF32-NEXT: [[COPY23:%[0-9]+]]:vreg_64 = COPY [[DEF2]] - ; DAGISEL-GFX10-WF32-NEXT: GLOBAL_STORE_DWORDX4 [[COPY23]], killed [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (store (s128) into `ptr addrspace(1) poison`, addrspace 1) + ; DAGISEL-GFX10-WF32-NEXT: [[COPY21:%[0-9]+]]:vreg_64 = COPY [[DEF2]] + ; DAGISEL-GFX10-WF32-NEXT: GLOBAL_STORE_DWORDX4 [[COPY21]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (store (s128) into `ptr addrspace(1) poison` + 16, addrspace 1) ; DAGISEL-GFX10-WF32-NEXT: [[DEF3:%[0-9]+]]:sreg_64 = IMPLICIT_DEF - ; DAGISEL-GFX10-WF32-NEXT: [[COPY24:%[0-9]+]]:vreg_64 = COPY [[DEF3]] - ; DAGISEL-GFX10-WF32-NEXT: GLOBAL_STORE_DWORDX4 [[COPY24]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (store (s128) into `ptr addrspace(1) poison` + 16, addrspace 1) + ; DAGISEL-GFX10-WF32-NEXT: [[COPY22:%[0-9]+]]:vreg_64 = COPY [[DEF3]] + ; DAGISEL-GFX10-WF32-NEXT: GLOBAL_STORE_DWORD [[COPY22]], [[COPY4]], 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(1) poison` + 8, align 8, basealign 16, addrspace 1) ; DAGISEL-GFX10-WF32-NEXT: [[DEF4:%[0-9]+]]:sreg_64 = IMPLICIT_DEF - ; DAGISEL-GFX10-WF32-NEXT: [[COPY25:%[0-9]+]]:vreg_64 = COPY [[DEF4]] - ; DAGISEL-GFX10-WF32-NEXT: GLOBAL_STORE_DWORD [[COPY25]], [[COPY4]], 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(1) poison` + 8, align 8, basealign 16, addrspace 1) - ; DAGISEL-GFX10-WF32-NEXT: [[DEF5:%[0-9]+]]:sreg_64 = IMPLICIT_DEF - ; DAGISEL-GFX10-WF32-NEXT: [[COPY26:%[0-9]+]]:vreg_64 = COPY [[DEF5]] - ; DAGISEL-GFX10-WF32-NEXT: [[COPY27:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE2]] - ; DAGISEL-GFX10-WF32-NEXT: GLOBAL_STORE_DWORDX2 [[COPY26]], killed [[COPY27]], 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(1) poison`, align 16, addrspace 1) + ; DAGISEL-GFX10-WF32-NEXT: [[COPY23:%[0-9]+]]:vreg_64 = COPY [[DEF4]] + ; DAGISEL-GFX10-WF32-NEXT: [[COPY24:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE2]] + ; DAGISEL-GFX10-WF32-NEXT: GLOBAL_STORE_DWORDX2 [[COPY23]], killed [[COPY24]], 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(1) poison`, align 16, addrspace 1) ; DAGISEL-GFX10-WF32-NEXT: S_ENDPGM 0 ; ; DAGISEL-GFX10-WF64-LABEL: name: amdgpu_cs_chain_preserve_cc_struct ; DAGISEL-GFX10-WF64: bb.0 (%ir-block.0): - ; DAGISEL-GFX10-WF64-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14 + ; DAGISEL-GFX10-WF64-NEXT: liveins: $sgpr0, $sgpr1, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14 ; DAGISEL-GFX10-WF64-NEXT: {{ $}} ; DAGISEL-GFX10-WF64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr14 ; DAGISEL-GFX10-WF64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr13 @@ -609,39 +586,34 @@ define amdgpu_cs_chain_preserve void @amdgpu_cs_chain_preserve_cc_struct( {ptr, ; DAGISEL-GFX10-WF64-NEXT: [[COPY8:%[0-9]+]]:sgpr_32 = COPY $sgpr5 ; DAGISEL-GFX10-WF64-NEXT: [[COPY9:%[0-9]+]]:sgpr_32 = COPY $sgpr4 ; DAGISEL-GFX10-WF64-NEXT: [[COPY10:%[0-9]+]]:sgpr_32 = COPY $sgpr3 - ; DAGISEL-GFX10-WF64-NEXT: [[COPY11:%[0-9]+]]:sgpr_32 = COPY $sgpr2 - ; DAGISEL-GFX10-WF64-NEXT: [[COPY12:%[0-9]+]]:sgpr_32 = COPY $sgpr1 - ; DAGISEL-GFX10-WF64-NEXT: [[COPY13:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; DAGISEL-GFX10-WF64-NEXT: [[COPY11:%[0-9]+]]:sgpr_32 = COPY $sgpr1 + ; DAGISEL-GFX10-WF64-NEXT: [[COPY12:%[0-9]+]]:sgpr_32 = COPY $sgpr0 ; DAGISEL-GFX10-WF64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY1]], %subreg.sub2, [[COPY]], %subreg.sub3 - ; DAGISEL-GFX10-WF64-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY10]] - ; DAGISEL-GFX10-WF64-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] - ; DAGISEL-GFX10-WF64-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] - ; DAGISEL-GFX10-WF64-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] - ; DAGISEL-GFX10-WF64-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY14]], %subreg.sub0, [[COPY15]], %subreg.sub1, [[COPY16]], %subreg.sub2, [[COPY17]], %subreg.sub3 + ; DAGISEL-GFX10-WF64-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY10]] + ; DAGISEL-GFX10-WF64-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] + ; DAGISEL-GFX10-WF64-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] + ; DAGISEL-GFX10-WF64-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] + ; DAGISEL-GFX10-WF64-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY13]], %subreg.sub0, [[COPY14]], %subreg.sub1, [[COPY15]], %subreg.sub2, [[COPY16]], %subreg.sub3 ; DAGISEL-GFX10-WF64-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1 - ; DAGISEL-GFX10-WF64-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY13]] - ; DAGISEL-GFX10-WF64-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[COPY12]] - ; DAGISEL-GFX10-WF64-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY18]], %subreg.sub0, [[COPY19]], %subreg.sub1 + ; DAGISEL-GFX10-WF64-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[COPY12]] + ; DAGISEL-GFX10-WF64-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY11]] + ; DAGISEL-GFX10-WF64-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY17]], %subreg.sub0, [[COPY18]], %subreg.sub1 ; DAGISEL-GFX10-WF64-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF - ; DAGISEL-GFX10-WF64-NEXT: [[COPY20:%[0-9]+]]:vreg_64 = COPY [[DEF]] - ; DAGISEL-GFX10-WF64-NEXT: GLOBAL_STORE_DWORDX2 [[COPY20]], killed [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(1) poison`, addrspace 1) + ; DAGISEL-GFX10-WF64-NEXT: [[COPY19:%[0-9]+]]:vreg_64 = COPY [[DEF]] + ; DAGISEL-GFX10-WF64-NEXT: GLOBAL_STORE_DWORDX2 [[COPY19]], killed [[REG_SEQUENCE3]], 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(1) poison`, addrspace 1) ; DAGISEL-GFX10-WF64-NEXT: [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF - ; DAGISEL-GFX10-WF64-NEXT: [[COPY21:%[0-9]+]]:vreg_64 = COPY [[DEF1]] - ; DAGISEL-GFX10-WF64-NEXT: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[COPY11]] - ; DAGISEL-GFX10-WF64-NEXT: GLOBAL_STORE_DWORD [[COPY21]], [[COPY22]], 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(1) poison`, addrspace 1) + ; DAGISEL-GFX10-WF64-NEXT: [[COPY20:%[0-9]+]]:vreg_64 = COPY [[DEF1]] + ; DAGISEL-GFX10-WF64-NEXT: GLOBAL_STORE_DWORDX4 [[COPY20]], killed [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (store (s128) into `ptr addrspace(1) poison`, align 4, addrspace 1) ; DAGISEL-GFX10-WF64-NEXT: [[DEF2:%[0-9]+]]:sreg_64 = IMPLICIT_DEF - ; DAGISEL-GFX10-WF64-NEXT: [[COPY23:%[0-9]+]]:vreg_64 = COPY [[DEF2]] - ; DAGISEL-GFX10-WF64-NEXT: GLOBAL_STORE_DWORDX4 [[COPY23]], killed [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (store (s128) into `ptr addrspace(1) poison`, addrspace 1) + ; DAGISEL-GFX10-WF64-NEXT: [[COPY21:%[0-9]+]]:vreg_64 = COPY [[DEF2]] + ; DAGISEL-GFX10-WF64-NEXT: GLOBAL_STORE_DWORDX4 [[COPY21]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (store (s128) into `ptr addrspace(1) poison` + 16, addrspace 1) ; DAGISEL-GFX10-WF64-NEXT: [[DEF3:%[0-9]+]]:sreg_64 = IMPLICIT_DEF - ; DAGISEL-GFX10-WF64-NEXT: [[COPY24:%[0-9]+]]:vreg_64 = COPY [[DEF3]] - ; DAGISEL-GFX10-WF64-NEXT: GLOBAL_STORE_DWORDX4 [[COPY24]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (store (s128) into `ptr addrspace(1) poison` + 16, addrspace 1) + ; DAGISEL-GFX10-WF64-NEXT: [[COPY22:%[0-9]+]]:vreg_64 = COPY [[DEF3]] + ; DAGISEL-GFX10-WF64-NEXT: GLOBAL_STORE_DWORD [[COPY22]], [[COPY4]], 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(1) poison` + 8, align 8, basealign 16, addrspace 1) ; DAGISEL-GFX10-WF64-NEXT: [[DEF4:%[0-9]+]]:sreg_64 = IMPLICIT_DEF - ; DAGISEL-GFX10-WF64-NEXT: [[COPY25:%[0-9]+]]:vreg_64 = COPY [[DEF4]] - ; DAGISEL-GFX10-WF64-NEXT: GLOBAL_STORE_DWORD [[COPY25]], [[COPY4]], 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(1) poison` + 8, align 8, basealign 16, addrspace 1) - ; DAGISEL-GFX10-WF64-NEXT: [[DEF5:%[0-9]+]]:sreg_64 = IMPLICIT_DEF - ; DAGISEL-GFX10-WF64-NEXT: [[COPY26:%[0-9]+]]:vreg_64 = COPY [[DEF5]] - ; DAGISEL-GFX10-WF64-NEXT: [[COPY27:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE2]] - ; DAGISEL-GFX10-WF64-NEXT: GLOBAL_STORE_DWORDX2 [[COPY26]], killed [[COPY27]], 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(1) poison`, align 16, addrspace 1) + ; DAGISEL-GFX10-WF64-NEXT: [[COPY23:%[0-9]+]]:vreg_64 = COPY [[DEF4]] + ; DAGISEL-GFX10-WF64-NEXT: [[COPY24:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE2]] + ; DAGISEL-GFX10-WF64-NEXT: GLOBAL_STORE_DWORDX2 [[COPY23]], killed [[COPY24]], 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(1) poison`, align 16, addrspace 1) ; DAGISEL-GFX10-WF64-NEXT: S_ENDPGM 0 %p = extractvalue {ptr, i32, <4 x i32>} %a, 0 %i = extractvalue {ptr, i32, <4 x i32>} %a, 1 @@ -1360,100 +1332,72 @@ define amdgpu_cs_chain_preserve void @amdgpu_cs_chain_preserve_many_regs(<36 x i ; GISEL-GFX11: bb.1 (%ir-block.0): ; GISEL-GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr32, $sgpr33, $sgpr34, $sgpr35, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63, $vgpr64, $vgpr65, $vgpr66, $vgpr67, $vgpr68, $vgpr69, $vgpr70, $vgpr71, $vgpr72, $vgpr73, $vgpr74, $vgpr75, $vgpr76, $vgpr77, $vgpr78, $vgpr79, $vgpr80, $vgpr81, $vgpr82, $vgpr83, $vgpr84, $vgpr85, $vgpr86, $vgpr87, $vgpr88, $vgpr89, $vgpr90, $vgpr91, $vgpr92, $vgpr93, $vgpr94, $vgpr95, $vgpr96, $vgpr97, $vgpr98, $vgpr99, $vgpr100, $vgpr101, $vgpr102, $vgpr103, $vgpr104, $vgpr105, $vgpr106, $vgpr107, $vgpr108, $vgpr109, $vgpr110, $vgpr111, $vgpr112, $vgpr113, $vgpr114, $vgpr115, $vgpr116, $vgpr117, $vgpr118, $vgpr119, $vgpr120, $vgpr121, $vgpr122, $vgpr123, $vgpr124, $vgpr125, $vgpr126, $vgpr127, $vgpr128, $vgpr129, $vgpr130, $vgpr131, $vgpr132, $vgpr133, $vgpr134, $vgpr135 ; GISEL-GFX11-NEXT: {{ $}} - ; GISEL-GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr35 - ; GISEL-GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr8 - ; GISEL-GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr135 + ; GISEL-GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr8 + ; GISEL-GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr135 ; GISEL-GFX11-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF - ; GISEL-GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; GISEL-GFX11-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[DEF]] - ; GISEL-GFX11-NEXT: GLOBAL_STORE_DWORD [[COPY4]], [[COPY3]], 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(1) poison`, addrspace 1) - ; GISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[DEF]] - ; GISEL-GFX11-NEXT: GLOBAL_STORE_DWORDX2 [[COPY5]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (store (<2 x s32>) into `ptr addrspace(1) poison`, addrspace 1) + ; GISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GISEL-GFX11-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[DEF]] + ; GISEL-GFX11-NEXT: GLOBAL_STORE_DWORDX2 [[COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (store (<2 x s32>) into `ptr addrspace(1) poison`, align 4, addrspace 1) ; GISEL-GFX11-NEXT: S_ENDPGM 0 ; ; GISEL-GFX10-LABEL: name: amdgpu_cs_chain_preserve_many_regs ; GISEL-GFX10: bb.1 (%ir-block.0): ; GISEL-GFX10-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr32, $sgpr33, $sgpr34, $sgpr35, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63, $vgpr64, $vgpr65, $vgpr66, $vgpr67, $vgpr68, $vgpr69, $vgpr70, $vgpr71, $vgpr72, $vgpr73, $vgpr74, $vgpr75, $vgpr76, $vgpr77, $vgpr78, $vgpr79, $vgpr80, $vgpr81, $vgpr82, $vgpr83, $vgpr84, $vgpr85, $vgpr86, $vgpr87, $vgpr88, $vgpr89, $vgpr90, $vgpr91, $vgpr92, $vgpr93, $vgpr94, $vgpr95, $vgpr96, $vgpr97, $vgpr98, $vgpr99, $vgpr100, $vgpr101, $vgpr102, $vgpr103, $vgpr104, $vgpr105, $vgpr106, $vgpr107, $vgpr108, $vgpr109, $vgpr110, $vgpr111, $vgpr112, $vgpr113, $vgpr114, $vgpr115, $vgpr116, $vgpr117, $vgpr118, $vgpr119, $vgpr120, $vgpr121, $vgpr122, $vgpr123, $vgpr124, $vgpr125, $vgpr126, $vgpr127, $vgpr128, $vgpr129, $vgpr130, $vgpr131, $vgpr132, $vgpr133, $vgpr134, $vgpr135 ; GISEL-GFX10-NEXT: {{ $}} - ; GISEL-GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr35 - ; GISEL-GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr8 - ; GISEL-GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr135 + ; GISEL-GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr8 + ; GISEL-GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr135 ; GISEL-GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF - ; GISEL-GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]] - ; GISEL-GFX10-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[DEF]] - ; GISEL-GFX10-NEXT: GLOBAL_STORE_DWORD [[COPY4]], [[COPY3]], 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(1) poison`, addrspace 1) - ; GISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1 - ; GISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[DEF]] - ; GISEL-GFX10-NEXT: GLOBAL_STORE_DWORDX2 [[COPY5]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (store (<2 x s32>) into `ptr addrspace(1) poison`, addrspace 1) + ; GISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GISEL-GFX10-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[DEF]] + ; GISEL-GFX10-NEXT: GLOBAL_STORE_DWORDX2 [[COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (store (<2 x s32>) into `ptr addrspace(1) poison`, align 4, addrspace 1) ; GISEL-GFX10-NEXT: S_ENDPGM 0 ; ; DAGISEL-GFX11-WF32-LABEL: name: amdgpu_cs_chain_preserve_many_regs ; DAGISEL-GFX11-WF32: bb.0 (%ir-block.0): - ; DAGISEL-GFX11-WF32-NEXT: liveins: $sgpr35, $vgpr8, $vgpr135 + ; DAGISEL-GFX11-WF32-NEXT: liveins: $vgpr8, $vgpr135 ; DAGISEL-GFX11-WF32-NEXT: {{ $}} ; DAGISEL-GFX11-WF32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr135 ; DAGISEL-GFX11-WF32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr8 - ; DAGISEL-GFX11-WF32-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr35 - ; DAGISEL-GFX11-WF32-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF - ; DAGISEL-GFX11-WF32-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[DEF]] - ; DAGISEL-GFX11-WF32-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] - ; DAGISEL-GFX11-WF32-NEXT: GLOBAL_STORE_DWORD [[COPY3]], [[COPY4]], 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(1) poison`, addrspace 1) ; DAGISEL-GFX11-WF32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1 - ; DAGISEL-GFX11-WF32-NEXT: [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF - ; DAGISEL-GFX11-WF32-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[DEF1]] - ; DAGISEL-GFX11-WF32-NEXT: GLOBAL_STORE_DWORDX2 [[COPY5]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(1) poison`, addrspace 1) + ; DAGISEL-GFX11-WF32-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF + ; DAGISEL-GFX11-WF32-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[DEF]] + ; DAGISEL-GFX11-WF32-NEXT: GLOBAL_STORE_DWORDX2 killed [[COPY2]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(1) poison`, align 4, addrspace 1) ; DAGISEL-GFX11-WF32-NEXT: S_ENDPGM 0 ; ; DAGISEL-GFX11-WF64-LABEL: name: amdgpu_cs_chain_preserve_many_regs ; DAGISEL-GFX11-WF64: bb.0 (%ir-block.0): - ; DAGISEL-GFX11-WF64-NEXT: liveins: $sgpr35, $vgpr8, $vgpr135 + ; DAGISEL-GFX11-WF64-NEXT: liveins: $vgpr8, $vgpr135 ; DAGISEL-GFX11-WF64-NEXT: {{ $}} ; DAGISEL-GFX11-WF64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr135 ; DAGISEL-GFX11-WF64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr8 - ; DAGISEL-GFX11-WF64-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr35 - ; DAGISEL-GFX11-WF64-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF - ; DAGISEL-GFX11-WF64-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[DEF]] - ; DAGISEL-GFX11-WF64-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] - ; DAGISEL-GFX11-WF64-NEXT: GLOBAL_STORE_DWORD [[COPY3]], [[COPY4]], 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(1) poison`, addrspace 1) ; DAGISEL-GFX11-WF64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1 - ; DAGISEL-GFX11-WF64-NEXT: [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF - ; DAGISEL-GFX11-WF64-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[DEF1]] - ; DAGISEL-GFX11-WF64-NEXT: GLOBAL_STORE_DWORDX2 [[COPY5]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(1) poison`, addrspace 1) + ; DAGISEL-GFX11-WF64-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF + ; DAGISEL-GFX11-WF64-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[DEF]] + ; DAGISEL-GFX11-WF64-NEXT: GLOBAL_STORE_DWORDX2 killed [[COPY2]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(1) poison`, align 4, addrspace 1) ; DAGISEL-GFX11-WF64-NEXT: S_ENDPGM 0 ; ; DAGISEL-GFX10-WF32-LABEL: name: amdgpu_cs_chain_preserve_many_regs ; DAGISEL-GFX10-WF32: bb.0 (%ir-block.0): - ; DAGISEL-GFX10-WF32-NEXT: liveins: $sgpr35, $vgpr8, $vgpr135 + ; DAGISEL-GFX10-WF32-NEXT: liveins: $vgpr8, $vgpr135 ; DAGISEL-GFX10-WF32-NEXT: {{ $}} ; DAGISEL-GFX10-WF32-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr135 ; DAGISEL-GFX10-WF32-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr8 - ; DAGISEL-GFX10-WF32-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr35 - ; DAGISEL-GFX10-WF32-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF - ; DAGISEL-GFX10-WF32-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[DEF]] - ; DAGISEL-GFX10-WF32-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] - ; DAGISEL-GFX10-WF32-NEXT: GLOBAL_STORE_DWORD [[COPY3]], [[COPY4]], 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(1) poison`, addrspace 1) ; DAGISEL-GFX10-WF32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1 - ; DAGISEL-GFX10-WF32-NEXT: [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF - ; DAGISEL-GFX10-WF32-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[DEF1]] - ; DAGISEL-GFX10-WF32-NEXT: GLOBAL_STORE_DWORDX2 [[COPY5]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(1) poison`, addrspace 1) + ; DAGISEL-GFX10-WF32-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF + ; DAGISEL-GFX10-WF32-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[DEF]] + ; DAGISEL-GFX10-WF32-NEXT: GLOBAL_STORE_DWORDX2 killed [[COPY2]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(1) poison`, align 4, addrspace 1) ; DAGISEL-GFX10-WF32-NEXT: S_ENDPGM 0 ; ; DAGISEL-GFX10-WF64-LABEL: name: amdgpu_cs_chain_preserve_many_regs ; DAGISEL-GFX10-WF64: bb.0 (%ir-block.0): - ; DAGISEL-GFX10-WF64-NEXT: liveins: $sgpr35, $vgpr8, $vgpr135 + ; DAGISEL-GFX10-WF64-NEXT: liveins: $vgpr8, $vgpr135 ; DAGISEL-GFX10-WF64-NEXT: {{ $}} ; DAGISEL-GFX10-WF64-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr135 ; DAGISEL-GFX10-WF64-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr8 - ; DAGISEL-GFX10-WF64-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr35 - ; DAGISEL-GFX10-WF64-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF - ; DAGISEL-GFX10-WF64-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[DEF]] - ; DAGISEL-GFX10-WF64-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] - ; DAGISEL-GFX10-WF64-NEXT: GLOBAL_STORE_DWORD [[COPY3]], [[COPY4]], 0, 0, implicit $exec :: (store (s32) into `ptr addrspace(1) poison`, addrspace 1) ; DAGISEL-GFX10-WF64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1 - ; DAGISEL-GFX10-WF64-NEXT: [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF - ; DAGISEL-GFX10-WF64-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[DEF1]] - ; DAGISEL-GFX10-WF64-NEXT: GLOBAL_STORE_DWORDX2 [[COPY5]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(1) poison`, addrspace 1) + ; DAGISEL-GFX10-WF64-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF + ; DAGISEL-GFX10-WF64-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[DEF]] + ; DAGISEL-GFX10-WF64-NEXT: GLOBAL_STORE_DWORDX2 killed [[COPY2]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (store (s64) into `ptr addrspace(1) poison`, align 4, addrspace 1) ; DAGISEL-GFX10-WF64-NEXT: S_ENDPGM 0 %c = extractelement <36 x i32> %a, i32 35 store i32 %c, ptr addrspace(1) poison diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier.ll index 417b8e08cf669..d8ba765395e20 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier.ll @@ -132,6 +132,7 @@ define amdgpu_kernel void @gws_barrier_vgpr_offset_add(i32 %val) #0 { ; LOOP: ds_write_b32 define amdgpu_kernel void @gws_barrier_save_m0_barrier_constant_offset(i32 %val) #0 { store i32 1, ptr addrspace(3) @lds + fence release call void @llvm.amdgcn.ds.gws.barrier(i32 %val, i32 10) store i32 2, ptr addrspace(3) @lds ret void diff --git a/llvm/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll b/llvm/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll index 31708a9b738db..23086fb1c18fa 100644 --- a/llvm/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll +++ b/llvm/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll @@ -52,12 +52,15 @@ define amdgpu_ps float @valley_partially_undef_copy() #0 { ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], 0 glc ; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: v_mov_b32_e32 v2, 0x7fc00000 -; CHECK-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 -; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], 0 +; CHECK-NEXT: s_mov_b32 s0, 0x7fc00000 +; CHECK-NEXT: v_mov_b32_e32 v5, v3 +; CHECK-NEXT: v_mov_b32_e32 v4, v2 +; CHECK-NEXT: v_mov_b32_e32 v3, v1 +; CHECK-NEXT: v_mov_b32_e32 v2, v0 ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 -; CHECK-NEXT: s_waitcnt expcnt(1) ; CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; CHECK-NEXT: v_mov_b32_e32 v2, s0 +; CHECK-NEXT: buffer_store_dwordx4 v[2:5], off, s[0:3], 0 ; CHECK-NEXT: v_cmp_ne_u32_e64 s[0:1], 1, v1 ; CHECK-NEXT: .LBB1_1: ; %bb9 ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 diff --git a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/multiple_tails.ll b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/multiple_tails.ll index 57da5976b3cfa..6f3c2fc5f387e 100644 --- a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/multiple_tails.ll +++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/multiple_tails.ll @@ -10,11 +10,7 @@ define amdgpu_kernel void @no_crash(i32 %arg) { ; GCN-SAME: i32 [[ARG:%.*]]) { ; GCN-NEXT: [[TEMP2:%.*]] = add i32 [[ARG]], 14 ; GCN-NEXT: [[TEMP3:%.*]] = getelementptr [16384 x i32], ptr addrspace(3) @[[GLOB0:[0-9]+]], i32 0, i32 [[TEMP2]] -; GCN-NEXT: [[TEMP4:%.*]] = add i32 [[ARG]], 15 -; GCN-NEXT: [[TEMP5:%.*]] = getelementptr [16384 x i32], ptr addrspace(3) @[[GLOB0]], i32 0, i32 [[TEMP4]] ; GCN-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(3) [[TEMP3]], align 4 -; GCN-NEXT: store i32 0, ptr addrspace(3) [[TEMP5]], align 4 -; GCN-NEXT: store i32 0, ptr addrspace(3) [[TEMP5]], align 4 ; GCN-NEXT: ret void ; %temp2 = add i32 %arg, 14 diff --git a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/vectorize-redund-stores.ll b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/vectorize-redund-stores.ll new file mode 100644 index 0000000000000..cd3e3bded681a --- /dev/null +++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/vectorize-redund-stores.ll @@ -0,0 +1,108 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -mtriple=amdgcn-amd-amdhsa -passes=load-store-vectorizer -S -o - %s | FileCheck %s + +define void @onevec(ptr %ptr, <1 x i32> %sd0, i32 %sd1, i32 %sd2, <1 x i32> %sd3, <1 x i32> %sd4, <1 x i32> %sd5) { +; CHECK-LABEL: define void @onevec( +; CHECK-SAME: ptr [[PTR:%.*]], <1 x i32> [[SD0:%.*]], i32 [[SD1:%.*]], i32 [[SD2:%.*]], <1 x i32> [[SD3:%.*]], <1 x i32> [[SD4:%.*]], <1 x i32> [[SD5:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <1 x i32> [[SD0]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <1 x i32> poison, i32 [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <1 x i32> [[TMP2]], i32 [[SD1]], i32 0 +; CHECK-NEXT: store <1 x i32> [[TMP3]], ptr [[PTR]], align 4 +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i32 16 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <1 x i32> poison, i32 [[SD2]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i32> [[SD3]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <1 x i32> [[TMP4]], i32 [[TMP5]], i32 0 +; CHECK-NEXT: store <1 x i32> [[TMP6]], ptr [[GEP1]], align 4 +; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i32 32 +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <1 x i32> [[SD4]], i32 0 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <1 x i32> poison, i32 [[TMP7]], i32 0 +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <1 x i32> [[SD5]], i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <1 x i32> [[TMP8]], i32 [[TMP9]], i32 0 +; CHECK-NEXT: store <1 x i32> [[TMP10]], ptr [[GEP2]], align 4 +; CHECK-NEXT: ret void +; + store <1 x i32> %sd0, ptr %ptr, align 4 + store i32 %sd1, ptr %ptr, align 4 + + %gep1 = getelementptr inbounds i8, ptr %ptr, i32 16 + store i32 %sd2, ptr %gep1, align 4 + store <1 x i32> %sd3, ptr %gep1, align 4 + + %gep2 = getelementptr inbounds i8, ptr %ptr, i32 32 + store <1 x i32> %sd4, ptr %gep2, align 4 + store <1 x i32> %sd5, ptr %gep2, align 4 + ret void +} + +define void @test(ptr %ptr, i32 %sd0, <2 x i32> %sd1, <2 x i32> %sd2, i32 %sd3) { +; CHECK-LABEL: define void @test( +; CHECK-SAME: ptr [[PTR:%.*]], i32 [[SD0:%.*]], <2 x i32> [[SD1:%.*]], <2 x i32> [[SD2:%.*]], i32 [[SD3:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[SD0]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[SD1]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[TMP2]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[SD1]], i32 1 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[TMP4]], i32 2 +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i32> [[SD2]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP6]], i32 2 +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i32> [[SD2]], i32 1 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[TMP8]], i32 3 +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[SD3]], i32 2 +; CHECK-NEXT: store <4 x i32> [[TMP10]], ptr [[PTR]], align 4 +; CHECK-NEXT: ret void +; + store i32 %sd0, ptr %ptr, align 4 + %gep1 = getelementptr inbounds i8, ptr %ptr, i32 4 + store <2 x i32> %sd1, ptr %gep1, align 4 + %gep2 = getelementptr inbounds i8, ptr %ptr, i32 8 + store <2 x i32> %sd2, ptr %gep2, align 4 + %gep3 = getelementptr inbounds i8, ptr %ptr, i32 8 + store i32 %sd3, ptr %gep3, align 4 + ret void +} + +define void @vect_zext_bitcast_i8_st4_to_i32_idx(ptr addrspace(1) %arg1, i32 %base, i32 %sd1, i32 %sd2, i32 %sd25, i32 %sd3, i32 %sd4) { +; CHECK-LABEL: define void @vect_zext_bitcast_i8_st4_to_i32_idx( +; CHECK-SAME: ptr addrspace(1) [[ARG1:%.*]], i32 [[BASE:%.*]], i32 [[SD1:%.*]], i32 [[SD2:%.*]], i32 [[SD25:%.*]], i32 [[SD3:%.*]], i32 [[SD4:%.*]]) { +; CHECK-NEXT: [[ADD1:%.*]] = add nuw i32 [[BASE]], 0 +; CHECK-NEXT: [[ZEXT1:%.*]] = zext i32 [[ADD1]] to i64 +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[ARG1]], i64 [[ZEXT1]] +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[SD1]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[SD2]], i32 1 +; CHECK-NEXT: store <2 x i32> [[TMP2]], ptr addrspace(1) [[GEP1]], align 4 +; CHECK-NEXT: [[ADD25:%.*]] = add nuw i32 [[BASE]], 6 +; CHECK-NEXT: [[ZEXT25:%.*]] = zext i32 [[ADD25]] to i64 +; CHECK-NEXT: [[GEP25:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[ARG1]], i64 [[ZEXT25]] +; CHECK-NEXT: store i32 [[SD25]], ptr addrspace(1) [[GEP25]], align 4 +; CHECK-NEXT: [[ADD3:%.*]] = add nuw i32 [[BASE]], 8 +; CHECK-NEXT: [[ZEXT3:%.*]] = zext i32 [[ADD3]] to i64 +; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[ARG1]], i64 [[ZEXT3]] +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> poison, i32 [[SD3]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[SD4]], i32 1 +; CHECK-NEXT: store <2 x i32> [[TMP4]], ptr addrspace(1) [[GEP3]], align 4 +; CHECK-NEXT: ret void +; + %add1 = add nuw i32 %base, 0 + %zext1 = zext i32 %add1 to i64 + %gep1 = getelementptr inbounds i8, ptr addrspace(1) %arg1, i64 %zext1 + store i32 %sd1, ptr addrspace(1) %gep1, align 4 + %add2 = add nuw i32 %base, 4 + %zext2 = zext i32 %add2 to i64 + %gep2 = getelementptr inbounds i8,ptr addrspace(1) %arg1, i64 %zext2 + store i32 %sd2, ptr addrspace(1) %gep2, align 4 + + ; A store with 2-byte overlap breaks continuity. + %add25 = add nuw i32 %base, 6 + %zext25 = zext i32 %add25 to i64 + %gep25 = getelementptr inbounds i8,ptr addrspace(1) %arg1, i64 %zext25 + store i32 %sd25, ptr addrspace(1) %gep25, align 4 + + %add3 = add nuw i32 %base, 8 + %zext3 = zext i32 %add3 to i64 + %gep3 = getelementptr inbounds i8, ptr addrspace(1) %arg1, i64 %zext3 + store i32 %sd3, ptr addrspace(1) %gep3, align 4 + %add4 = add nuw i32 %base, 12 + %zext4 = zext i32 %add4 to i64 + %gep4 = getelementptr inbounds i8, ptr addrspace(1) %arg1, i64 %zext4 + store i32 %sd4, ptr addrspace(1) %gep4, align 4 + ret void +} diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.generated.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.generated.expected index a8c2531117f42..c5ec8c49fe7d2 100644 --- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.generated.expected +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.generated.expected @@ -116,13 +116,13 @@ attributes #0 = { noredzone nounwind ssp uwtable "frame-pointer"="all" } ; CHECK-NEXT: s_mov_b32 s33, s32 ; CHECK-NEXT: s_addk_i32 s32, 0x600 ; CHECK-NEXT: v_mov_b32_e32 v0, 0 -; CHECK-NEXT: s_getpc_b64 s[4:5] -; CHECK-NEXT: s_add_u32 s4, s4, x@rel32@lo+4 -; CHECK-NEXT: s_addc_u32 s5, s5, x@rel32@hi+12 ; CHECK-NEXT: v_mov_b32_e32 v2, 1 ; CHECK-NEXT: v_mov_b32_e32 v3, 2 ; CHECK-NEXT: v_mov_b32_e32 v4, 3 ; CHECK-NEXT: v_mov_b32_e32 v5, 4 +; CHECK-NEXT: s_getpc_b64 s[4:5] +; CHECK-NEXT: s_add_u32 s4, s4, x@rel32@lo+4 +; CHECK-NEXT: s_addc_u32 s5, s5, x@rel32@hi+12 ; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s33 ; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:4 ; CHECK-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:8 diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.nogenerated.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.nogenerated.expected index 34530f2f632e2..21523e9a46c93 100644 --- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.nogenerated.expected +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/amdgpu_generated_funcs.ll.nogenerated.expected @@ -93,13 +93,13 @@ define dso_local i32 @main() #0 { ; CHECK-NEXT: s_mov_b32 s33, s32 ; CHECK-NEXT: s_addk_i32 s32, 0x600 ; CHECK-NEXT: v_mov_b32_e32 v0, 0 -; CHECK-NEXT: s_getpc_b64 s[4:5] -; CHECK-NEXT: s_add_u32 s4, s4, x@rel32@lo+4 -; CHECK-NEXT: s_addc_u32 s5, s5, x@rel32@hi+12 ; CHECK-NEXT: v_mov_b32_e32 v2, 1 ; CHECK-NEXT: v_mov_b32_e32 v3, 2 ; CHECK-NEXT: v_mov_b32_e32 v4, 3 ; CHECK-NEXT: v_mov_b32_e32 v5, 4 +; CHECK-NEXT: s_getpc_b64 s[4:5] +; CHECK-NEXT: s_add_u32 s4, s4, x@rel32@lo+4 +; CHECK-NEXT: s_addc_u32 s5, s5, x@rel32@hi+12 ; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s33 ; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:4 ; CHECK-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:8