diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td index dd72191873483..d1f477f78772f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td @@ -74,6 +74,9 @@ def gi_flat_offset : def gi_flat_offset_signed : GIComplexOperandMatcher, GIComplexPatternEquiv; +def gi_global_saddr : + GIComplexOperandMatcher, + GIComplexPatternEquiv; def gi_mubuf_scratch_offset : GIComplexOperandMatcher, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index f64aaf5062c50..7e842835a5b44 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -3254,6 +3254,79 @@ AMDGPUInstructionSelector::selectFlatOffsetSigned(MachineOperand &Root) const { return selectFlatOffsetImpl(Root); } +/// Match a zero extend from a 32-bit value to 64-bits. +static Register matchZeroExtendFromS32(MachineRegisterInfo &MRI, Register Reg) { + Register ZExtSrc; + if (mi_match(Reg, MRI, m_GZExt(m_Reg(ZExtSrc)))) + return MRI.getType(ZExtSrc) == LLT::scalar(32) ? ZExtSrc : Register(); + + // Match legalized form %zext = G_MERGE_VALUES (s32 %x), (s32 0) + const MachineInstr *Def = getDefIgnoringCopies(Reg, MRI); + if (Def->getOpcode() != AMDGPU::G_MERGE_VALUES) + return false; + + int64_t MergeRHS; + if (mi_match(Def->getOperand(2).getReg(), MRI, m_ICst(MergeRHS)) && + MergeRHS == 0) { + return Def->getOperand(1).getReg(); + } + + return Register(); +} + +// Match (64-bit SGPR base) + (zext vgpr offset) + sext(imm offset) +InstructionSelector::ComplexRendererFns +AMDGPUInstructionSelector::selectGlobalSAddr(MachineOperand &Root) const { + Register PtrBase; + int64_t ImmOffset; + + // Match the immediate offset first, which canonically is moved as low as + // possible. + std::tie(PtrBase, ImmOffset) = getPtrBaseWithConstantOffset(Root.getReg(), + *MRI); + + // TODO: Could split larger constant into VGPR offset. + if (ImmOffset != 0 && + !TII.isLegalFLATOffset(ImmOffset, AMDGPUAS::GLOBAL_ADDRESS, true)) { + PtrBase = Root.getReg(); + ImmOffset = 0; + } + + // Match the variable offset. + const MachineInstr *PtrBaseDef = getDefIgnoringCopies(PtrBase, *MRI); + if (PtrBaseDef->getOpcode() != AMDGPU::G_PTR_ADD) + return None; + + // Look through the SGPR->VGPR copy. + Register PtrBaseSrc = + getSrcRegIgnoringCopies(PtrBaseDef->getOperand(1).getReg(), *MRI); + if (!PtrBaseSrc) + return None; + + const RegisterBank *BaseRB = RBI.getRegBank(PtrBaseSrc, *MRI, TRI); + if (BaseRB->getID() != AMDGPU::SGPRRegBankID) + return None; + + Register SAddr = PtrBaseSrc; + Register PtrBaseOffset = PtrBaseDef->getOperand(2).getReg(); + + // It's possible voffset is an SGPR here, but the copy to VGPR will be + // inserted later. + Register VOffset = matchZeroExtendFromS32(*MRI, PtrBaseOffset); + if (!VOffset) + return None; + + return {{[=](MachineInstrBuilder &MIB) { // saddr + MIB.addReg(SAddr); + }, + [=](MachineInstrBuilder &MIB) { // voffset + MIB.addReg(VOffset); + }, + [=](MachineInstrBuilder &MIB) { // offset + MIB.addImm(ImmOffset); + }}}; +} + static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) { auto PSV = PtrInfo.V.dyn_cast(); return PSV && PSV->isStack(); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h index c9129bf1105b0..b18867299baf9 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -185,6 +185,9 @@ class AMDGPUInstructionSelector final : public InstructionSelector { InstructionSelector::ComplexRendererFns selectFlatOffsetSigned(MachineOperand &Root) const; + InstructionSelector::ComplexRendererFns + selectGlobalSAddr(MachineOperand &Root) const; + InstructionSelector::ComplexRendererFns selectMUBUFScratchOffen(MachineOperand &Root) const; InstructionSelector::ComplexRendererFns diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global-saddr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global-saddr.mir new file mode 100644 index 0000000000000..4f289d5559137 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global-saddr.mir @@ -0,0 +1,275 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s + +# TODO: Better to initialize 0 vgpr and use sgpr base +--- + +name: load_global_s32_from_sgpr +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1 + + ; GFX9-LABEL: name: load_global_s32_from_sgpr + ; GFX9: liveins: $sgpr0_sgpr1 + ; GFX9: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY [[COPY]] + ; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY1]], 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1) + ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX10-LABEL: name: load_global_s32_from_sgpr + ; GFX10: liveins: $sgpr0_sgpr1 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY [[COPY]] + ; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY1]], 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1) + ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + %0:sgpr(p1) = COPY $sgpr0_sgpr1 + %1:vgpr(p1) = COPY %0 + %2:vgpr(s32) = G_LOAD %1 :: (load 4, align 4, addrspace 1) + $vgpr0 = COPY %2 + +... + +# FIXME: This zext wouldn't select on its own. +--- + +name: load_global_s32_from_sgpr_zext_vgpr +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1, $vgpr0 + + ; GFX9-LABEL: name: load_global_s32_from_sgpr_zext_vgpr + ; GFX9: liveins: $sgpr0_sgpr1, $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1) + ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; GFX10-LABEL: name: load_global_s32_from_sgpr_zext_vgpr + ; GFX10: liveins: $sgpr0_sgpr1, $vgpr0 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1) + ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + %0:sgpr(p1) = COPY $sgpr0_sgpr1 + %1:vgpr(s32) = COPY $vgpr0 + %2:vgpr(p1) = COPY %0 + %3:vgpr(s64) = G_ZEXT %1 + %4:vgpr(p1) = G_PTR_ADD %2, %3 + %5:vgpr(s32) = G_LOAD %4 :: (load 4, align 4, addrspace 1) + $vgpr0 = COPY %5 + +... + +# Test with zext lowered to G_MERGE_VALUES +--- + +name: load_global_s32_from_sgpr_merge_zext_vgpr +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1, $vgpr0 + + ; GFX9-LABEL: name: load_global_s32_from_sgpr_merge_zext_vgpr + ; GFX9: liveins: $sgpr0_sgpr1, $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1) + ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; GFX10-LABEL: name: load_global_s32_from_sgpr_merge_zext_vgpr + ; GFX10: liveins: $sgpr0_sgpr1, $vgpr0 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1) + ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + %0:sgpr(p1) = COPY $sgpr0_sgpr1 + %1:vgpr(s32) = COPY $vgpr0 + %2:vgpr(p1) = COPY %0 + %zero:vgpr(s32) = G_CONSTANT i32 0 + %3:vgpr(s64) = G_MERGE_VALUES %1, %zero + %4:vgpr(p1) = G_PTR_ADD %2, %3 + %5:vgpr(s32) = G_LOAD %4 :: (load 4, align 4, addrspace 1) + $vgpr0 = COPY %5 + +... + +--- + +name: load_global_s32_from_sgpr_merge_not_0_vgpr +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1, $vgpr0 + + ; GFX9-LABEL: name: load_global_s32_from_sgpr_merge_not_0_vgpr + ; GFX9: liveins: $sgpr0_sgpr1, $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[COPY]] + ; GFX9: %notzero:vgpr_32 = V_MOV_B32_e32 1, implicit $exec + ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, %notzero, %subreg.sub1 + ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY2]].sub0 + ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX9: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY2]].sub1 + ; GFX9: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec + ; GFX9: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1 + ; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1) + ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + ; GFX10-LABEL: name: load_global_s32_from_sgpr_merge_not_0_vgpr + ; GFX10: liveins: $sgpr0_sgpr1, $vgpr0 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[COPY]] + ; GFX10: %notzero:vgpr_32 = V_MOV_B32_e32 1, implicit $exec + ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, %notzero, %subreg.sub1 + ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY2]].sub0 + ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY2]].sub1 + ; GFX10: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec + ; GFX10: %12:vgpr_32, dead %14:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1 + ; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1) + ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + %0:sgpr(p1) = COPY $sgpr0_sgpr1 + %1:vgpr(s32) = COPY $vgpr0 + %2:vgpr(p1) = COPY %0 + %notzero:vgpr(s32) = G_CONSTANT i32 1 + %3:vgpr(s64) = G_MERGE_VALUES %1, %notzero + %4:vgpr(p1) = G_PTR_ADD %2, %3 + %5:vgpr(s32) = G_LOAD %4 :: (load 4, align 4, addrspace 1) + $vgpr0 = COPY %5 + +... + +--- + +name: load_global_s32_from_sgpr_zext_vgpr_offset4095 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1, $vgpr0 + + ; GFX9-LABEL: name: load_global_s32_from_sgpr_zext_vgpr_offset4095 + ; GFX9: liveins: $sgpr0_sgpr1, $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], 4095, 0, 0, 0, implicit $exec :: (load 4, addrspace 1) + ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; GFX10-LABEL: name: load_global_s32_from_sgpr_zext_vgpr_offset4095 + ; GFX10: liveins: $sgpr0_sgpr1, $vgpr0 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[COPY]] + ; GFX10: %zero:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX10: %zext:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, %zero, %subreg.sub1 + ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY2]].sub0 + ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY %zext.sub0 + ; GFX10: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY2]].sub1 + ; GFX10: [[COPY6:%[0-9]+]]:vgpr_32 = COPY %zext.sub1 + ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec + ; GFX10: %24:vgpr_32, dead %26:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %24, %subreg.sub1 + ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec + ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX10: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX10: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY7]], [[COPY8]], 0, implicit $exec + ; GFX10: %14:vgpr_32, dead %16:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY9]], [[COPY10]], killed [[V_ADD_CO_U32_e64_3]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_2]], %subreg.sub0, %14, %subreg.sub1 + ; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1) + ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + %0:sgpr(p1) = COPY $sgpr0_sgpr1 + %1:vgpr(s32) = COPY $vgpr0 + %2:vgpr(p1) = COPY %0 + %zero:vgpr(s32) = G_CONSTANT i32 0 + %zext:vgpr(s64) = G_MERGE_VALUES %1, %zero + %4:vgpr(p1) = G_PTR_ADD %2, %zext + %5:vgpr(s64) = G_CONSTANT i64 4095 + %6:vgpr(p1) = G_PTR_ADD %4, %5 + %7:vgpr(s32) = G_LOAD %6 :: (load 4, align 4, addrspace 1) + $vgpr0 = COPY %7 + +... + +--- + +name: load_global_s32_from_sgpr_zext_vgpr_offset_neg4096 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1, $vgpr0 + + ; GFX9-LABEL: name: load_global_s32_from_sgpr_zext_vgpr_offset_neg4096 + ; GFX9: liveins: $sgpr0_sgpr1, $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[COPY]], [[COPY1]], -4096, 0, 0, 0, implicit $exec :: (load 4, addrspace 1) + ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD_SADDR]] + ; GFX10-LABEL: name: load_global_s32_from_sgpr_zext_vgpr_offset_neg4096 + ; GFX10: liveins: $sgpr0_sgpr1, $vgpr0 + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[COPY]] + ; GFX10: %zero:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX10: %zext:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, %zero, %subreg.sub1 + ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY2]].sub0 + ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY %zext.sub0 + ; GFX10: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY2]].sub1 + ; GFX10: [[COPY6:%[0-9]+]]:vgpr_32 = COPY %zext.sub1 + ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec + ; GFX10: %24:vgpr_32, dead %26:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %24, %subreg.sub1 + ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec + ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec + ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1 + ; GFX10: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; GFX10: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX10: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; GFX10: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX10: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY7]], [[COPY8]], 0, implicit $exec + ; GFX10: %14:vgpr_32, dead %16:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY9]], [[COPY10]], killed [[V_ADD_CO_U32_e64_3]], 0, implicit $exec + ; GFX10: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_2]], %subreg.sub0, %14, %subreg.sub1 + ; GFX10: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE2]], 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1) + ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]] + %0:sgpr(p1) = COPY $sgpr0_sgpr1 + %1:vgpr(s32) = COPY $vgpr0 + %2:vgpr(p1) = COPY %0 + %zero:vgpr(s32) = G_CONSTANT i32 0 + %zext:vgpr(s64) = G_MERGE_VALUES %1, %zero + %4:vgpr(p1) = G_PTR_ADD %2, %zext + %5:vgpr(s64) = G_CONSTANT i64 -4096 + %6:vgpr(p1) = G_PTR_ADD %4, %5 + %7:vgpr(s32) = G_LOAD %6 :: (load 4, align 4, addrspace 1) + $vgpr0 = COPY %7 + +...