diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index a910a900e775f..b886bf4364a2f 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -2880,8 +2880,13 @@ bool CombinerHelper::matchUndefShuffleVectorMask(MachineInstr &MI) const { bool CombinerHelper::matchUndefStore(MachineInstr &MI) const { assert(MI.getOpcode() == TargetOpcode::G_STORE); - return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(0).getReg(), - MRI); + auto &Store = cast(MI); + // An undef store can be erased only if it has no side effects beyond the + // value written. That holds for non-atomic and unordered atomic stores, but + // not for volatile or ordered atomic stores, which must be preserved. + if (!Store.isUnordered()) + return false; + return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Store.getValueReg(), MRI); } bool CombinerHelper::matchUndefSelectCmp(MachineInstr &MI) const { diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 8b5076cdd7712..5c73b0b09c556 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -542,8 +542,20 @@ static bool loadStoreBitcastWorkaround(const LLT Ty) { return EltSize != 32 && EltSize != 64; } +// True for a pointer in an address space beyond the known AMDGPU address +// spaces (e.g. produced by an illegal addrspacecast). Such pointers have no +// register class or load/store selection pattern. +static bool isUnsupportedPointerType(LLT Ty) { + return Ty.isPointer() && Ty.getAddressSpace() > AMDGPUAS::MAX_AMDGPU_ADDRESS; +} + static bool isLoadStoreLegal(const GCNSubtarget &ST, const LegalityQuery &Query) { const LLT Ty = Query.Types[0]; + // Stores of a pointer in an unsupported address space have no selection + // pattern; route them to custom legalization, which reinterprets the stored + // value as an integer of the same size. + if (Query.Opcode == TargetOpcode::G_STORE && isUnsupportedPointerType(Ty)) + return false; return isRegisterType(ST, Ty) && isLoadStoreSizeLegal(ST, Query) && !hasBufferRsrcWorkaround(Ty) && !loadStoreBitcastWorkaround(Ty); } @@ -1598,6 +1610,15 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, return isLoadStoreLegal(ST, Query); }); + // Stores of a pointer in an address space without a register class (e.g. + // produced by an illegal addrspacecast) have no selection pattern. + // Reinterpret the value as an integer of the same size, which does. + if (IsStore) { + Actions.customIf([=](const LegalityQuery &Query) -> bool { + return isUnsupportedPointerType(Query.Types[0]); + }); + } + // The custom pointers (fat pointers, buffer resources) don't work with load // and store at this level. Fat pointers should have been lowered to // intrinsics before the translation to MIR. @@ -3508,6 +3529,17 @@ bool AMDGPULegalizerInfo::legalizeStore(LegalizerHelper &Helper, Observer.changedInstr(MI); return true; } + + // A pointer in an address space without a register class (e.g. produced by + // an addrspacecast to an illegal address space) has no store pattern. + // Reinterpret the value as an integer of the same size, which does. + if (isUnsupportedPointerType(DataTy)) { + auto Cast = B.buildPtrToInt(LLT::scalar(DataTy.getSizeInBits()), DataReg); + Observer.changingInstr(MI); + MI.getOperand(0).setReg(Cast.getReg(0)); + Observer.changedInstr(MI); + return true; + } return false; } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir index 0a009e702dea4..ea2671deb9e40 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir @@ -2352,7 +2352,8 @@ body: | ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 - ; CI-NEXT: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), align 1, addrspace 1) + ; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY1]](p999) + ; CI-NEXT: G_STORE [[PTRTOINT]](s64), [[COPY]](p1) :: (store (p999), align 1, addrspace 1) ; ; VI-LABEL: name: test_store_global_p999_align1 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 @@ -2407,21 +2408,24 @@ body: | ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), align 1, addrspace 1) + ; GFX9-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY1]](p999) + ; GFX9-NEXT: G_STORE [[PTRTOINT]](s64), [[COPY]](p1) :: (store (p999), align 1, addrspace 1) ; ; GFX11-LABEL: name: test_store_global_p999_align1 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 - ; GFX11-NEXT: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), align 1, addrspace 1) + ; GFX11-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY1]](p999) + ; GFX11-NEXT: G_STORE [[PTRTOINT]](s64), [[COPY]](p1) :: (store (p999), align 1, addrspace 1) ; ; GFX12-LABEL: name: test_store_global_p999_align1 ; GFX12: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 - ; GFX12-NEXT: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), align 1, addrspace 1) + ; GFX12-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY1]](p999) + ; GFX12-NEXT: G_STORE [[PTRTOINT]](s64), [[COPY]](p1) :: (store (p999), align 1, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(p999) = COPY $vgpr2_vgpr3 G_STORE %1, %0 :: (store (p999), align 1, addrspace 1) @@ -2464,7 +2468,8 @@ body: | ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 - ; CI-NEXT: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), align 2, addrspace 1) + ; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY1]](p999) + ; CI-NEXT: G_STORE [[PTRTOINT]](s64), [[COPY]](p1) :: (store (p999), align 2, addrspace 1) ; ; VI-LABEL: name: test_store_global_p999_align2 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 @@ -2497,21 +2502,24 @@ body: | ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), align 2, addrspace 1) + ; GFX9-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY1]](p999) + ; GFX9-NEXT: G_STORE [[PTRTOINT]](s64), [[COPY]](p1) :: (store (p999), align 2, addrspace 1) ; ; GFX11-LABEL: name: test_store_global_p999_align2 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 - ; GFX11-NEXT: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), align 2, addrspace 1) + ; GFX11-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY1]](p999) + ; GFX11-NEXT: G_STORE [[PTRTOINT]](s64), [[COPY]](p1) :: (store (p999), align 2, addrspace 1) ; ; GFX12-LABEL: name: test_store_global_p999_align2 ; GFX12: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 - ; GFX12-NEXT: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), align 2, addrspace 1) + ; GFX12-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY1]](p999) + ; GFX12-NEXT: G_STORE [[PTRTOINT]](s64), [[COPY]](p1) :: (store (p999), align 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(p999) = COPY $vgpr2_vgpr3 G_STORE %1, %0 :: (store (p999), align 2, addrspace 1) @@ -2528,42 +2536,48 @@ body: | ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 - ; SI-NEXT: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), align 4, addrspace 1) + ; SI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY1]](p999) + ; SI-NEXT: G_STORE [[PTRTOINT]](s64), [[COPY]](p1) :: (store (p999), align 4, addrspace 1) ; ; CI-LABEL: name: test_store_global_p999_align4 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 - ; CI-NEXT: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), align 4, addrspace 1) + ; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY1]](p999) + ; CI-NEXT: G_STORE [[PTRTOINT]](s64), [[COPY]](p1) :: (store (p999), align 4, addrspace 1) ; ; VI-LABEL: name: test_store_global_p999_align4 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 - ; VI-NEXT: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), align 4, addrspace 1) + ; VI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY1]](p999) + ; VI-NEXT: G_STORE [[PTRTOINT]](s64), [[COPY]](p1) :: (store (p999), align 4, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_p999_align4 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), align 4, addrspace 1) + ; GFX9-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY1]](p999) + ; GFX9-NEXT: G_STORE [[PTRTOINT]](s64), [[COPY]](p1) :: (store (p999), align 4, addrspace 1) ; ; GFX11-LABEL: name: test_store_global_p999_align4 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 - ; GFX11-NEXT: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), align 4, addrspace 1) + ; GFX11-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY1]](p999) + ; GFX11-NEXT: G_STORE [[PTRTOINT]](s64), [[COPY]](p1) :: (store (p999), align 4, addrspace 1) ; ; GFX12-LABEL: name: test_store_global_p999_align4 ; GFX12: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 - ; GFX12-NEXT: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), align 4, addrspace 1) + ; GFX12-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY1]](p999) + ; GFX12-NEXT: G_STORE [[PTRTOINT]](s64), [[COPY]](p1) :: (store (p999), align 4, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(p999) = COPY $vgpr2_vgpr3 G_STORE %1, %0 :: (store (p999), align 4, addrspace 1) @@ -2580,42 +2594,48 @@ body: | ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 - ; SI-NEXT: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), addrspace 1) + ; SI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY1]](p999) + ; SI-NEXT: G_STORE [[PTRTOINT]](s64), [[COPY]](p1) :: (store (p999), addrspace 1) ; ; CI-LABEL: name: test_store_global_p999_align8 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 - ; CI-NEXT: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), addrspace 1) + ; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY1]](p999) + ; CI-NEXT: G_STORE [[PTRTOINT]](s64), [[COPY]](p1) :: (store (p999), addrspace 1) ; ; VI-LABEL: name: test_store_global_p999_align8 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 - ; VI-NEXT: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), addrspace 1) + ; VI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY1]](p999) + ; VI-NEXT: G_STORE [[PTRTOINT]](s64), [[COPY]](p1) :: (store (p999), addrspace 1) ; ; GFX9-LABEL: name: test_store_global_p999_align8 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), addrspace 1) + ; GFX9-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY1]](p999) + ; GFX9-NEXT: G_STORE [[PTRTOINT]](s64), [[COPY]](p1) :: (store (p999), addrspace 1) ; ; GFX11-LABEL: name: test_store_global_p999_align8 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 - ; GFX11-NEXT: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), addrspace 1) + ; GFX11-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY1]](p999) + ; GFX11-NEXT: G_STORE [[PTRTOINT]](s64), [[COPY]](p1) :: (store (p999), addrspace 1) ; ; GFX12-LABEL: name: test_store_global_p999_align8 ; GFX12: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 - ; GFX12-NEXT: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), addrspace 1) + ; GFX12-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY1]](p999) + ; GFX12-NEXT: G_STORE [[PTRTOINT]](s64), [[COPY]](p1) :: (store (p999), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(p999) = COPY $vgpr2_vgpr3 G_STORE %1, %0 :: (store (p999), align 8, addrspace 1) @@ -2632,42 +2652,48 @@ body: | ; SI-NEXT: {{ $}} ; SI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 - ; SI-NEXT: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), align 16, addrspace 1) + ; SI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY1]](p999) + ; SI-NEXT: G_STORE [[PTRTOINT]](s64), [[COPY]](p1) :: (store (p999), align 16, addrspace 1) ; ; CI-LABEL: name: test_store_global_p999_align16 ; CI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 - ; CI-NEXT: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), align 16, addrspace 1) + ; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY1]](p999) + ; CI-NEXT: G_STORE [[PTRTOINT]](s64), [[COPY]](p1) :: (store (p999), align 16, addrspace 1) ; ; VI-LABEL: name: test_store_global_p999_align16 ; VI: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; VI-NEXT: {{ $}} ; VI-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 - ; VI-NEXT: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), align 16, addrspace 1) + ; VI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY1]](p999) + ; VI-NEXT: G_STORE [[PTRTOINT]](s64), [[COPY]](p1) :: (store (p999), align 16, addrspace 1) ; ; GFX9-LABEL: name: test_store_global_p999_align16 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), align 16, addrspace 1) + ; GFX9-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY1]](p999) + ; GFX9-NEXT: G_STORE [[PTRTOINT]](s64), [[COPY]](p1) :: (store (p999), align 16, addrspace 1) ; ; GFX11-LABEL: name: test_store_global_p999_align16 ; GFX11: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 - ; GFX11-NEXT: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), align 16, addrspace 1) + ; GFX11-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY1]](p999) + ; GFX11-NEXT: G_STORE [[PTRTOINT]](s64), [[COPY]](p1) :: (store (p999), align 16, addrspace 1) ; ; GFX12-LABEL: name: test_store_global_p999_align16 ; GFX12: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:_(p999) = COPY $vgpr2_vgpr3 - ; GFX12-NEXT: G_STORE [[COPY1]](p999), [[COPY]](p1) :: (store (p999), align 16, addrspace 1) + ; GFX12-NEXT: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[COPY1]](p999) + ; GFX12-NEXT: G_STORE [[PTRTOINT]](s64), [[COPY]](p1) :: (store (p999), align 16, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(p999) = COPY $vgpr2_vgpr3 G_STORE %1, %0 :: (store (p999), align 16, addrspace 1) diff --git a/llvm/test/CodeGen/AMDGPU/invalid-addrspacecast.ll b/llvm/test/CodeGen/AMDGPU/invalid-addrspacecast.ll index 294c9045ebc14..2ba9f6e1e8e5a 100644 --- a/llvm/test/CodeGen/AMDGPU/invalid-addrspacecast.ll +++ b/llvm/test/CodeGen/AMDGPU/invalid-addrspacecast.ll @@ -42,17 +42,16 @@ define amdgpu_kernel void @use_constant32bit_to_local_addrspacecast(ptr addrspac } define amdgpu_kernel void @use_local_to_42_addrspacecast(ptr addrspace(3) %ptr) { -; SDAG-LABEL: use_local_to_42_addrspacecast: -; SDAG: ; %bb.0: -; SDAG-NEXT: v_mov_b32_e32 v0, 0 -; SDAG-NEXT: v_mov_b32_e32 v1, 0 -; SDAG-NEXT: flat_store_dwordx2 v[0:1], v[0:1] -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_endpgm -; -; GISEL-LABEL: use_local_to_42_addrspacecast: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_endpgm +; CHECK-LABEL: use_local_to_42_addrspacecast: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_mov_b32 flat_scratch_lo, s13 +; CHECK-NEXT: s_add_i32 s12, s12, s17 +; CHECK-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 +; CHECK-NEXT: v_mov_b32_e32 v0, 0 +; CHECK-NEXT: v_mov_b32_e32 v1, 0 +; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[0:1] +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: s_endpgm %cast = addrspacecast ptr addrspace(3) %ptr to ptr addrspace(42) store volatile ptr addrspace(42) %cast, ptr addrspace(1) null ret void diff --git a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-pal.ll b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-pal.ll index 94648892373e7..571c6215bd3aa 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-pal.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-pal.ll @@ -180,6 +180,12 @@ define amdgpu_gfx void @workgroup_ids_gfx(ptr addrspace(1) %outx, ptr addrspace( ; GFX9-GISEL-LABEL: workgroup_ids_gfx: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: global_store_dword v[0:1], v0, off +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX9-GISEL-NEXT: global_store_dword v[2:3], v0, off +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX9-GISEL-NEXT: global_store_dword v[4:5], v0, off +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX9ARCH-SDAG-LABEL: workgroup_ids_gfx: diff --git a/llvm/test/CodeGen/X86/GlobalISel/undef.ll b/llvm/test/CodeGen/X86/GlobalISel/undef.ll index 34ada8a20999d..3199a96d38244 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/undef.ll +++ b/llvm/test/CodeGen/X86/GlobalISel/undef.ll @@ -56,3 +56,45 @@ define float @test4(float %a) { ret float %r } +define void @plain_undef_store(ptr %p) { +; ALL-LABEL: plain_undef_store: +; ALL: # %bb.0: +; ALL-NEXT: retq + store i32 undef, ptr %p, align 4 + ret void +} + +define void @volatile_undef_store(ptr %p) { +; ALL-LABEL: volatile_undef_store: +; ALL: # %bb.0: +; ALL-NEXT: movl %eax, (%rdi) +; ALL-NEXT: retq + store volatile i32 undef, ptr %p, align 4 + ret void +} + +define void @seq_cst_atomic_undef_store(ptr %p) { +; ALL-LABEL: seq_cst_atomic_undef_store: +; ALL: # %bb.0: +; ALL-NEXT: movl %eax, (%rdi) +; ALL-NEXT: retq + store atomic i32 undef, ptr %p seq_cst, align 4 + ret void +} + +define void @unordered_atomic_undef_store(ptr %p) { +; ALL-LABEL: unordered_atomic_undef_store: +; ALL: # %bb.0: +; ALL-NEXT: retq + store atomic i32 undef, ptr %p unordered, align 4 + ret void +} + +define void @monotonic_atomic_undef_store(ptr %p) { +; ALL-LABEL: monotonic_atomic_undef_store: +; ALL: # %bb.0: +; ALL-NEXT: movl %eax, (%rdi) +; ALL-NEXT: retq + store atomic i32 undef, ptr %p monotonic, align 4 + ret void +}