-
Notifications
You must be signed in to change notification settings - Fork 15.3k
AMDGPU: Try to use zext to implement constant-32-bit addrspacecast #168977
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: users/arsenm/amdgpu/add-more-constant-address-space-32-bit-tests
Are you sure you want to change the base?
Conversation
If the high bits are assumed 0 for the cast, use zext. Previously we would emit a build_vector and a bitcast with the high element as 0. The zext is more easily optimized. I'm less convinced this is good for globalisel, since you still need to have the inttoptr back to the original pointer type. The default value is 0, though I'm not sure if this is meaningful in the real world. The real uses might always override the high bit value with the attribute.
|
Warning This pull request is not mergeable via GitHub because a downstack PR is open. Once all requirements are satisfied, merge this PR as a stack on Graphite.
This stack of pull requests is managed by Graphite. Learn more about stacking. |
|
@llvm/pr-subscribers-llvm-globalisel @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) ChangesIf the high bits are assumed 0 for the cast, use zext. Previously The default value is 0, though I'm not sure if this is meaningful Patch is 20.72 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/168977.diff 8 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 1a13b2226ecd6..4786ff53638da 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -2560,8 +2560,14 @@ bool AMDGPULegalizerInfo::legalizeAddrSpaceCast(
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
uint32_t AddrHiVal = Info->get32BitAddressHighBits();
auto PtrLo = B.buildPtrToInt(S32, Src);
- auto HighAddr = B.buildConstant(S32, AddrHiVal);
- B.buildMergeLikeInstr(Dst, {PtrLo, HighAddr});
+ if (AddrHiVal == 0) {
+ auto Zext = B.buildZExt(LLT::scalar(64), PtrLo);
+ B.buildIntToPtr(Dst, Zext);
+ } else {
+ auto HighAddr = B.buildConstant(S32, AddrHiVal);
+ B.buildMergeLikeInstr(Dst, {PtrLo, HighAddr});
+ }
+
MI.eraseFromParent();
return true;
}
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 875278a3b4f97..b9409846bd3f3 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -8374,6 +8374,9 @@ SDValue SITargetLowering::lowerADDRSPACECAST(SDValue Op,
Op.getValueType() == MVT::i64) {
const SIMachineFunctionInfo *Info =
DAG.getMachineFunction().getInfo<SIMachineFunctionInfo>();
+ if (Info->get32BitAddressHighBits() == 0)
+ return DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i64, Src);
+
SDValue Hi = DAG.getConstant(Info->get32BitAddressHighBits(), SL, MVT::i32);
SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32, Src, Hi);
return DAG.getNode(ISD::BITCAST, SL, MVT::i64, Vec);
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir
index 4471980c1ba1c..e83b4eabd5dc8 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir
@@ -428,9 +428,9 @@ body: |
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $vgpr0
; GCN-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6)
- ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GCN-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32)
- ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p4)
+ ; GCN-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[PTRTOINT]](s32)
+ ; GCN-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[ZEXT]](s64)
+ ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p4)
%0:_(p6) = COPY $vgpr0
%1:_(p4) = G_ADDRSPACE_CAST %0
$vgpr0_vgpr1 = COPY %1
@@ -485,9 +485,9 @@ body: |
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $vgpr0
; GCN-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6)
- ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; GCN-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32)
- ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p0)
+ ; GCN-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[PTRTOINT]](s32)
+ ; GCN-NEXT: [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[ZEXT]](s64)
+ ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p0)
%0:_(p6) = COPY $vgpr0
%1:_(p0) = G_ADDRSPACE_CAST %0
$vgpr0_vgpr1 = COPY %1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant-32bit.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant-32bit.mir
index b91f1f408dc58..b9c0217aa591f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant-32bit.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant-32bit.mir
@@ -12,24 +12,24 @@ body: |
; CI-NEXT: {{ $}}
; CI-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $vgpr0
; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6)
- ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32)
- ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[MV]](p4) :: (load (s8), addrspace 6)
- ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
- ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw inbounds G_PTR_ADD [[MV]], [[C1]](s64)
+ ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[PTRTOINT]](s32)
+ ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[ZEXT]](s64)
+ ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[INTTOPTR]](p4) :: (load (s8), addrspace 6)
+ ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw inbounds G_PTR_ADD [[INTTOPTR]], [[C]](s64)
; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 6)
- ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
- ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C2]](s32)
+ ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+ ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32)
; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
- ; CI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
- ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = nuw inbounds G_PTR_ADD [[MV]], [[C3]](s64)
+ ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = nuw inbounds G_PTR_ADD [[INTTOPTR]], [[C2]](s64)
; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 6)
- ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = nuw inbounds G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
+ ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = nuw inbounds G_PTR_ADD [[PTR_ADD1]], [[C]](s64)
; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 6)
- ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C2]](s32)
+ ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32)
; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]]
- ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C4]](s32)
+ ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32)
; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]]
; CI-NEXT: $vgpr0 = COPY [[OR2]](s32)
%0:_(p6) = COPY $vgpr0
@@ -48,9 +48,9 @@ body: |
; CI-NEXT: {{ $}}
; CI-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $vgpr0
; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6)
- ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32)
- ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load (s32), addrspace 6)
+ ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[PTRTOINT]](s32)
+ ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[ZEXT]](s64)
+ ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[INTTOPTR]](p4) :: (load (s32), addrspace 6)
; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32)
%0:_(p6) = COPY $vgpr0
%1:_(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 6)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-constant-32bit.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-constant-32bit.mir
index d87212d64d625..067844d506ef5 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-constant-32bit.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-constant-32bit.mir
@@ -13,9 +13,9 @@ body: |
; CI-NEXT: {{ $}}
; CI-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0
; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6)
- ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32)
- ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load (s32), addrspace 6)
+ ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[PTRTOINT]](s32)
+ ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[ZEXT]](s64)
+ ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[INTTOPTR]](p4) :: (load (s32), addrspace 6)
; CI-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s32)
; CI-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](s64)
%0:_(p6) = COPY $sgpr0
@@ -34,9 +34,9 @@ body: |
; CI-NEXT: {{ $}}
; CI-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0
; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6)
- ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32)
- ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load (s32), align 2, addrspace 6)
+ ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[PTRTOINT]](s32)
+ ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[ZEXT]](s64)
+ ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[INTTOPTR]](p4) :: (load (s32), align 2, addrspace 6)
; CI-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s32)
; CI-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](s64)
%0:_(p6) = COPY $sgpr0
@@ -55,9 +55,9 @@ body: |
; CI-NEXT: {{ $}}
; CI-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0
; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6)
- ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32)
- ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load (s32), align 1, addrspace 6)
+ ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[PTRTOINT]](s32)
+ ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[ZEXT]](s64)
+ ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[INTTOPTR]](p4) :: (load (s32), align 1, addrspace 6)
; CI-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s32)
; CI-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](s64)
%0:_(p6) = COPY $sgpr0
@@ -76,9 +76,9 @@ body: |
; CI-NEXT: {{ $}}
; CI-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0
; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6)
- ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32)
- ; CI-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[MV]](p4) :: (load (s8), addrspace 6)
+ ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[PTRTOINT]](s32)
+ ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[ZEXT]](s64)
+ ; CI-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[INTTOPTR]](p4) :: (load (s8), addrspace 6)
; CI-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32)
%0:_(p6) = COPY $sgpr0
%1:_(s32) = G_SEXTLOAD %0 :: (load (s8), align 1, addrspace 6)
@@ -96,9 +96,9 @@ body: |
; CI-NEXT: {{ $}}
; CI-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0
; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6)
- ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32)
- ; CI-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[MV]](p4) :: (load (s16), addrspace 6)
+ ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[PTRTOINT]](s32)
+ ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[ZEXT]](s64)
+ ; CI-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[INTTOPTR]](p4) :: (load (s16), addrspace 6)
; CI-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32)
%0:_(p6) = COPY $sgpr0
%1:_(s32) = G_SEXTLOAD %0 :: (load (s16), align 2, addrspace 6)
@@ -116,9 +116,9 @@ body: |
; CI-NEXT: {{ $}}
; CI-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0
; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6)
- ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32)
- ; CI-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[MV]](p4) :: (load (s16), align 1, addrspace 6)
+ ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[PTRTOINT]](s32)
+ ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[ZEXT]](s64)
+ ; CI-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[INTTOPTR]](p4) :: (load (s16), align 1, addrspace 6)
; CI-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32)
%0:_(p6) = COPY $sgpr0
%1:_(s32) = G_SEXTLOAD %0 :: (load (s16), align 1, addrspace 6)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-constant-32bit.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-constant-32bit.mir
index a4971e94e75f6..c72cdd5dbc8be 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-constant-32bit.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-constant-32bit.mir
@@ -14,11 +14,11 @@ body: |
; CI-NEXT: {{ $}}
; CI-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0
; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6)
- ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32)
- ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load (s32), addrspace 6)
- ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32)
- ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
+ ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[PTRTOINT]](s32)
+ ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[ZEXT]](s64)
+ ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[INTTOPTR]](p4) :: (load (s32), addrspace 6)
+ ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32)
+ ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT1]](s64)
%0:_(p6) = COPY $sgpr0
%1:_(s64) = G_ZEXTLOAD %0 :: (load (s32), align 4, addrspace 6)
$vgpr0_vgpr1 = COPY %1
@@ -35,11 +35,11 @@ body: |
; CI-NEXT: {{ $}}
; CI-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0
; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6)
- ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32)
- ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load (s32), align 2, addrspace 6)
- ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32)
- ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
+ ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[PTRTOINT]](s32)
+ ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[ZEXT]](s64)
+ ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[INTTOPTR]](p4) :: (load (s32), align 2, addrspace 6)
+ ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32)
+ ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT1]](s64)
%0:_(p6) = COPY $sgpr0
%1:_(s64) = G_ZEXTLOAD %0 :: (load (s32), align 2, addrspace 6)
$vgpr0_vgpr1 = COPY %1
@@ -56,11 +56,11 @@ body: |
; CI-NEXT: {{ $}}
; CI-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0
; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6)
- ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32)
- ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load (s32), align 1, addrspace 6)
- ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32)
- ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
+ ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[PTRTOINT]](s32)
+ ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[ZEXT]](s64)
+ ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[INTTOPTR]](p4) :: (load (s32), align 1, addrspace 6)
+ ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32)
+ ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT1]](s64)
%0:_(p6) = COPY $sgpr0
%1:_(s64) = G_ZEXTLOAD %0 :: (load (s32), align 1, addrspace 6)
$vgpr0_vgpr1 = COPY %1
@@ -77,9 +77,9 @@ body: |
; CI-NEXT: {{ $}}
; CI-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0
; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6)
- ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32)
- ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[MV]](p4) :: (load (s8), addrspace 6)
+ ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[PTRTOINT]](s32)
+ ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[ZEXT]](s64)
+ ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[INTTOPTR]](p4) :: (load (s8), addrspace 6)
; CI-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](s32)
%0:_(p6) = COPY $sgpr0
%1:_(s32) = G_ZEXTLOAD %0 :: (load (s8), align 1, addrspace 6)
@@ -97,9 +97,9 @@ body: |
; CI-NEXT: {{ $}}
; CI-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0
; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6)
- ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32)
- ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[MV]](p4) :: (load (s16), addrspace 6)
+ ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[PTRTOINT]](s32)
+ ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[ZEXT]](s64)
+ ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[INTTOPTR]](p4) :: (load (s16), addrspace 6)
; CI-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](s32)
%0:_(p6) = COPY $sgpr0
%1:_(s32) = G_ZEXTLOAD %0 :: (load (s16), align 2, addrspace 6)
@@ -117,9 +117,9 @@ body: |
; CI-NEXT: {{ $}}
; CI-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0
; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6)
- ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32)
- ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[MV]](p4) :: (load (s16), align 1, addrspace 6)
+ ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[PTRTOINT]](s32)
+ ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[ZEXT]](s64)
+ ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[INTTOPTR]](p4) :: (load (s16), align 1, addrspace 6)
; CI-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](s32)
%0:_(p6) = COPY $sgpr0
%1:_(s32) = G_ZEXTLOAD %0 :: (load (s16), align 1, addrspace 6)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant32bit.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant32bit.ll
index 0038a097174c6..2e1b853ff8c58 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant32bit.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant32bit.ll
@@ -11,8 +11,8 @@ define amdgpu_ps float @load_constant32bit_vgpr_offset(i32 %arg) {
; GFX6-LABEL: load_constant32bit_vgpr_offset:
; GFX6: ; %bb.0: ; %entry
; GFX6-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX6-NEXT: s_mov_b32 s2, 0
; GFX6-NEXT: v_mov_b32_e32 v1, 0
+; GFX6-NEXT: s_mov_b32 s2, 0
; GFX6-NEXT: s_mov_b32 s3, 0xf000
; GFX6-NEXT: s_mov_b64 s[0:1], 0
; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64
@@ -59,8 +59,8 @@ define amdgpu_ps <8 x float> @load_constant32bit_vgpr_v8f32(ptr addrspace(6) %ar
; GFX6-LABEL: load_constant32bit_vgpr_v8f32:
; GFX6: ; %bb.0: ; %entry
; GFX6-NEXT: v_mov_b32_e32 v4, v0
-; GFX6-NEXT: s_mov_b32 s2, 0
; GFX6-NEXT: v_mov_b32_e32 v5, 0
+; GFX6-NEXT: s_mov_b32 s2, 0
; GFX6-NEXT: s_mov_b32 s3, 0xf000
; GFX6-NEXT: s_mov_b64 s[0:1], 0
; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[4:5], s[0:3], 0 addr64
diff --git a/llvm/test/CodeGen/AMDGPU/codegen-prepare-addrspacecast-non-null.ll b/llvm/test/CodeGen/AMDGPU/codegen-prepare-addrspacecast-non-null.ll
index 66d99b14e282d..a99aab7a23a3b 100644
--- a/llvm/test/CodeGen/AMDGPU/codegen-prepare-addrspacecast-non-null.ll
+++ b/llvm/test/CodeGen/AMDGPU/codegen-prepare-addrspacecast-non-null.ll
@@ -474,15 +474,24 @@ define i32 @cast_private_to_flat_to_global(ptr addrspace(6) %const32.ptr) {
; OPT-NEXT: [[LOAD:%.*]] = load volatile i32, ptr addrspace(3) [[LOCAL_PTR]], align 4
; OPT-NEXT: ret i32 [[LOAD]]
;
-; ASM-LABEL: cast_private_to_flat_to_global:
-; ASM: ; %bb.0:
-; ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; ASM-NEXT: v_mov_b32_e32 v1, 0
-; ASM-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
-; ASM-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc
-; ASM-NEXT: ds_read_b32 v0, v0
-; ASM-NEXT: s_waitcnt lgkmcnt(0)
-; ASM-NEXT: s_setpc_b64 s[30:31]
+; DAGISEL-ASM-LABEL: cast_private_to_flat_to_global:
+; DAGISEL-ASM: ; %bb.0:
+; DAGISEL-ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; DAGISEL-ASM-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
+; DAGISEL-ASM-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc
+; D...
[truncated]
|
🐧 Linux x64 Test Results
|

If the high bits are assumed 0 for the cast, use zext. Previously
we would emit a build_vector and a bitcast with the high element
as 0. The zext is more easily optimized. I'm less convinced this is
good for globalisel, since you still need to have the inttoptr back
to the original pointer type.
The default value is 0, though I'm not sure if this is meaningful
in the real world. The real uses might always override the high
bit value with the attribute.