From 754ff45ec723d8f6ac09f4a5cf80b78471422fcc Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 20 Nov 2025 15:56:05 -0500 Subject: [PATCH] AMDGPU: Try to use zext to implement constant-32-bit addrspacecast If the high bits are assumed 0 for the cast, use zext. Previously we would emit a build_vector and a bitcast with the high element as 0. The zext is more easily optimized. I'm less convinced this is good for globalisel, since you still need to have the inttoptr back to the original pointer type. The default value is 0, though I'm not sure if this is meaningful in the real world. The real uses might always override the high bit value with the attribute. --- .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 10 +++- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 3 ++ .../GlobalISel/legalize-addrspacecast.mir | 12 ++--- .../legalize-load-constant-32bit.mir | 32 ++++++------- .../legalize-sextload-constant-32bit.mir | 36 +++++++------- .../legalize-zextload-constant-32bit.mir | 48 +++++++++---------- .../AMDGPU/GlobalISel/load-constant32bit.ll | 4 +- .../codegen-prepare-addrspacecast-non-null.ll | 27 +++++++---- 8 files changed, 95 insertions(+), 77 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 1a13b2226ecd6..4786ff53638da 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -2560,8 +2560,14 @@ bool AMDGPULegalizerInfo::legalizeAddrSpaceCast( const SIMachineFunctionInfo *Info = MF.getInfo(); uint32_t AddrHiVal = Info->get32BitAddressHighBits(); auto PtrLo = B.buildPtrToInt(S32, Src); - auto HighAddr = B.buildConstant(S32, AddrHiVal); - B.buildMergeLikeInstr(Dst, {PtrLo, HighAddr}); + if (AddrHiVal == 0) { + auto Zext = B.buildZExt(LLT::scalar(64), PtrLo); + B.buildIntToPtr(Dst, Zext); + } else { + auto HighAddr = B.buildConstant(S32, AddrHiVal); + B.buildMergeLikeInstr(Dst, {PtrLo, HighAddr}); + } + MI.eraseFromParent(); return true; } diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 875278a3b4f97..b9409846bd3f3 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -8374,6 +8374,9 @@ SDValue SITargetLowering::lowerADDRSPACECAST(SDValue Op, Op.getValueType() == MVT::i64) { const SIMachineFunctionInfo *Info = DAG.getMachineFunction().getInfo(); + if (Info->get32BitAddressHighBits() == 0) + return DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i64, Src); + SDValue Hi = DAG.getConstant(Info->get32BitAddressHighBits(), SL, MVT::i32); SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32, Src, Hi); return DAG.getNode(ISD::BITCAST, SL, MVT::i64, Vec); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir index 4471980c1ba1c..e83b4eabd5dc8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir @@ -428,9 +428,9 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $vgpr0 ; GCN-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6) - ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GCN-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32) - ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p4) + ; GCN-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[PTRTOINT]](s32) + ; GCN-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[ZEXT]](s64) + ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p4) %0:_(p6) = COPY $vgpr0 %1:_(p4) = G_ADDRSPACE_CAST %0 $vgpr0_vgpr1 = COPY %1 @@ -485,9 +485,9 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $vgpr0 ; GCN-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6) - ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GCN-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32) - ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p0) + ; GCN-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[PTRTOINT]](s32) + ; GCN-NEXT: [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[ZEXT]](s64) + ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p0) %0:_(p6) = COPY $vgpr0 %1:_(p0) = G_ADDRSPACE_CAST %0 $vgpr0_vgpr1 = COPY %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant-32bit.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant-32bit.mir index b91f1f408dc58..b9c0217aa591f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant-32bit.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant-32bit.mir @@ -12,24 +12,24 @@ body: | ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $vgpr0 ; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[MV]](p4) :: (load (s8), addrspace 6) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw inbounds G_PTR_ADD [[MV]], [[C1]](s64) + ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[PTRTOINT]](s32) + ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[ZEXT]](s64) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[INTTOPTR]](p4) :: (load (s8), addrspace 6) + ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw inbounds G_PTR_ADD [[INTTOPTR]], [[C]](s64) ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 6) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C2]](s32) + ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = nuw inbounds G_PTR_ADD [[MV]], [[C3]](s64) + ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = nuw inbounds G_PTR_ADD [[INTTOPTR]], [[C2]](s64) ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 6) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = nuw inbounds G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = nuw inbounds G_PTR_ADD [[PTR_ADD1]], [[C]](s64) ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 6) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C2]](s32) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C4]](s32) + ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; CI-NEXT: $vgpr0 = COPY [[OR2]](s32) %0:_(p6) = COPY $vgpr0 @@ -48,9 +48,9 @@ body: | ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $vgpr0 ; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load (s32), addrspace 6) + ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[PTRTOINT]](s32) + ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[ZEXT]](s64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[INTTOPTR]](p4) :: (load (s32), addrspace 6) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) %0:_(p6) = COPY $vgpr0 %1:_(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 6) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-constant-32bit.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-constant-32bit.mir index d87212d64d625..067844d506ef5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-constant-32bit.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-constant-32bit.mir @@ -13,9 +13,9 @@ body: | ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0 ; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load (s32), addrspace 6) + ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[PTRTOINT]](s32) + ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[ZEXT]](s64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[INTTOPTR]](p4) :: (load (s32), addrspace 6) ; CI-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](s64) %0:_(p6) = COPY $sgpr0 @@ -34,9 +34,9 @@ body: | ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0 ; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load (s32), align 2, addrspace 6) + ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[PTRTOINT]](s32) + ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[ZEXT]](s64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[INTTOPTR]](p4) :: (load (s32), align 2, addrspace 6) ; CI-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](s64) %0:_(p6) = COPY $sgpr0 @@ -55,9 +55,9 @@ body: | ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0 ; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load (s32), align 1, addrspace 6) + ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[PTRTOINT]](s32) + ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[ZEXT]](s64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[INTTOPTR]](p4) :: (load (s32), align 1, addrspace 6) ; CI-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](s64) %0:_(p6) = COPY $sgpr0 @@ -76,9 +76,9 @@ body: | ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0 ; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32) - ; CI-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[MV]](p4) :: (load (s8), addrspace 6) + ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[PTRTOINT]](s32) + ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[ZEXT]](s64) + ; CI-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[INTTOPTR]](p4) :: (load (s8), addrspace 6) ; CI-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32) %0:_(p6) = COPY $sgpr0 %1:_(s32) = G_SEXTLOAD %0 :: (load (s8), align 1, addrspace 6) @@ -96,9 +96,9 @@ body: | ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0 ; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32) - ; CI-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[MV]](p4) :: (load (s16), addrspace 6) + ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[PTRTOINT]](s32) + ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[ZEXT]](s64) + ; CI-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[INTTOPTR]](p4) :: (load (s16), addrspace 6) ; CI-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32) %0:_(p6) = COPY $sgpr0 %1:_(s32) = G_SEXTLOAD %0 :: (load (s16), align 2, addrspace 6) @@ -116,9 +116,9 @@ body: | ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0 ; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32) - ; CI-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[MV]](p4) :: (load (s16), align 1, addrspace 6) + ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[PTRTOINT]](s32) + ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[ZEXT]](s64) + ; CI-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[INTTOPTR]](p4) :: (load (s16), align 1, addrspace 6) ; CI-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32) %0:_(p6) = COPY $sgpr0 %1:_(s32) = G_SEXTLOAD %0 :: (load (s16), align 1, addrspace 6) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-constant-32bit.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-constant-32bit.mir index a4971e94e75f6..c72cdd5dbc8be 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-constant-32bit.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-constant-32bit.mir @@ -14,11 +14,11 @@ body: | ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0 ; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load (s32), addrspace 6) - ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) + ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[PTRTOINT]](s32) + ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[ZEXT]](s64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[INTTOPTR]](p4) :: (load (s32), addrspace 6) + ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT1]](s64) %0:_(p6) = COPY $sgpr0 %1:_(s64) = G_ZEXTLOAD %0 :: (load (s32), align 4, addrspace 6) $vgpr0_vgpr1 = COPY %1 @@ -35,11 +35,11 @@ body: | ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0 ; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load (s32), align 2, addrspace 6) - ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) + ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[PTRTOINT]](s32) + ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[ZEXT]](s64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[INTTOPTR]](p4) :: (load (s32), align 2, addrspace 6) + ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT1]](s64) %0:_(p6) = COPY $sgpr0 %1:_(s64) = G_ZEXTLOAD %0 :: (load (s32), align 2, addrspace 6) $vgpr0_vgpr1 = COPY %1 @@ -56,11 +56,11 @@ body: | ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0 ; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load (s32), align 1, addrspace 6) - ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) + ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[PTRTOINT]](s32) + ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[ZEXT]](s64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[INTTOPTR]](p4) :: (load (s32), align 1, addrspace 6) + ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT1]](s64) %0:_(p6) = COPY $sgpr0 %1:_(s64) = G_ZEXTLOAD %0 :: (load (s32), align 1, addrspace 6) $vgpr0_vgpr1 = COPY %1 @@ -77,9 +77,9 @@ body: | ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0 ; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[MV]](p4) :: (load (s8), addrspace 6) + ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[PTRTOINT]](s32) + ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[ZEXT]](s64) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[INTTOPTR]](p4) :: (load (s8), addrspace 6) ; CI-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](s32) %0:_(p6) = COPY $sgpr0 %1:_(s32) = G_ZEXTLOAD %0 :: (load (s8), align 1, addrspace 6) @@ -97,9 +97,9 @@ body: | ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0 ; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[MV]](p4) :: (load (s16), addrspace 6) + ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[PTRTOINT]](s32) + ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[ZEXT]](s64) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[INTTOPTR]](p4) :: (load (s16), addrspace 6) ; CI-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](s32) %0:_(p6) = COPY $sgpr0 %1:_(s32) = G_ZEXTLOAD %0 :: (load (s16), align 2, addrspace 6) @@ -117,9 +117,9 @@ body: | ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0 ; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[MV]](p4) :: (load (s16), align 1, addrspace 6) + ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[PTRTOINT]](s32) + ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[ZEXT]](s64) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[INTTOPTR]](p4) :: (load (s16), align 1, addrspace 6) ; CI-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](s32) %0:_(p6) = COPY $sgpr0 %1:_(s32) = G_ZEXTLOAD %0 :: (load (s16), align 1, addrspace 6) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant32bit.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant32bit.ll index 0038a097174c6..2e1b853ff8c58 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant32bit.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant32bit.ll @@ -11,8 +11,8 @@ define amdgpu_ps float @load_constant32bit_vgpr_offset(i32 %arg) { ; GFX6-LABEL: load_constant32bit_vgpr_offset: ; GFX6: ; %bb.0: ; %entry ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX6-NEXT: s_mov_b32 s2, 0 ; GFX6-NEXT: v_mov_b32_e32 v1, 0 +; GFX6-NEXT: s_mov_b32 s2, 0 ; GFX6-NEXT: s_mov_b32 s3, 0xf000 ; GFX6-NEXT: s_mov_b64 s[0:1], 0 ; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 @@ -59,8 +59,8 @@ define amdgpu_ps <8 x float> @load_constant32bit_vgpr_v8f32(ptr addrspace(6) %ar ; GFX6-LABEL: load_constant32bit_vgpr_v8f32: ; GFX6: ; %bb.0: ; %entry ; GFX6-NEXT: v_mov_b32_e32 v4, v0 -; GFX6-NEXT: s_mov_b32 s2, 0 ; GFX6-NEXT: v_mov_b32_e32 v5, 0 +; GFX6-NEXT: s_mov_b32 s2, 0 ; GFX6-NEXT: s_mov_b32 s3, 0xf000 ; GFX6-NEXT: s_mov_b64 s[0:1], 0 ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[4:5], s[0:3], 0 addr64 diff --git a/llvm/test/CodeGen/AMDGPU/codegen-prepare-addrspacecast-non-null.ll b/llvm/test/CodeGen/AMDGPU/codegen-prepare-addrspacecast-non-null.ll index 66d99b14e282d..a99aab7a23a3b 100644 --- a/llvm/test/CodeGen/AMDGPU/codegen-prepare-addrspacecast-non-null.ll +++ b/llvm/test/CodeGen/AMDGPU/codegen-prepare-addrspacecast-non-null.ll @@ -474,15 +474,24 @@ define i32 @cast_private_to_flat_to_global(ptr addrspace(6) %const32.ptr) { ; OPT-NEXT: [[LOAD:%.*]] = load volatile i32, ptr addrspace(3) [[LOCAL_PTR]], align 4 ; OPT-NEXT: ret i32 [[LOAD]] ; -; ASM-LABEL: cast_private_to_flat_to_global: -; ASM: ; %bb.0: -; ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; ASM-NEXT: v_mov_b32_e32 v1, 0 -; ASM-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] -; ASM-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc -; ASM-NEXT: ds_read_b32 v0, v0 -; ASM-NEXT: s_waitcnt lgkmcnt(0) -; ASM-NEXT: s_setpc_b64 s[30:31] +; DAGISEL-ASM-LABEL: cast_private_to_flat_to_global: +; DAGISEL-ASM: ; %bb.0: +; DAGISEL-ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; DAGISEL-ASM-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; DAGISEL-ASM-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc +; DAGISEL-ASM-NEXT: ds_read_b32 v0, v0 +; DAGISEL-ASM-NEXT: s_waitcnt lgkmcnt(0) +; DAGISEL-ASM-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-ASM-LABEL: cast_private_to_flat_to_global: +; GISEL-ASM: ; %bb.0: +; GISEL-ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-ASM-NEXT: v_mov_b32_e32 v1, 0 +; GISEL-ASM-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] +; GISEL-ASM-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc +; GISEL-ASM-NEXT: ds_read_b32 v0, v0 +; GISEL-ASM-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-ASM-NEXT: s_setpc_b64 s[30:31] %flat.ptr = addrspacecast ptr addrspace(6) %const32.ptr to ptr %local.ptr = addrspacecast ptr %flat.ptr to ptr addrspace(3) %load = load volatile i32, ptr addrspace(3) %local.ptr