diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 1a13b2226ecd6..4786ff53638da 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -2560,8 +2560,14 @@ bool AMDGPULegalizerInfo::legalizeAddrSpaceCast( const SIMachineFunctionInfo *Info = MF.getInfo(); uint32_t AddrHiVal = Info->get32BitAddressHighBits(); auto PtrLo = B.buildPtrToInt(S32, Src); - auto HighAddr = B.buildConstant(S32, AddrHiVal); - B.buildMergeLikeInstr(Dst, {PtrLo, HighAddr}); + if (AddrHiVal == 0) { + auto Zext = B.buildZExt(LLT::scalar(64), PtrLo); + B.buildIntToPtr(Dst, Zext); + } else { + auto HighAddr = B.buildConstant(S32, AddrHiVal); + B.buildMergeLikeInstr(Dst, {PtrLo, HighAddr}); + } + MI.eraseFromParent(); return true; } diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 875278a3b4f97..b9409846bd3f3 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -8374,6 +8374,9 @@ SDValue SITargetLowering::lowerADDRSPACECAST(SDValue Op, Op.getValueType() == MVT::i64) { const SIMachineFunctionInfo *Info = DAG.getMachineFunction().getInfo(); + if (Info->get32BitAddressHighBits() == 0) + return DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i64, Src); + SDValue Hi = DAG.getConstant(Info->get32BitAddressHighBits(), SL, MVT::i32); SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32, Src, Hi); return DAG.getNode(ISD::BITCAST, SL, MVT::i64, Vec); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir index 4471980c1ba1c..e83b4eabd5dc8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir @@ -428,9 +428,9 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $vgpr0 ; GCN-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6) - ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GCN-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32) - ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p4) + ; GCN-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[PTRTOINT]](s32) + ; GCN-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[ZEXT]](s64) + ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p4) %0:_(p6) = COPY $vgpr0 %1:_(p4) = G_ADDRSPACE_CAST %0 $vgpr0_vgpr1 = COPY %1 @@ -485,9 +485,9 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $vgpr0 ; GCN-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6) - ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GCN-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32) - ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p0) + ; GCN-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[PTRTOINT]](s32) + ; GCN-NEXT: [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[ZEXT]](s64) + ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[INTTOPTR]](p0) %0:_(p6) = COPY $vgpr0 %1:_(p0) = G_ADDRSPACE_CAST %0 $vgpr0_vgpr1 = COPY %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant-32bit.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant-32bit.mir index b91f1f408dc58..b9c0217aa591f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant-32bit.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant-32bit.mir @@ -12,24 +12,24 @@ body: | ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $vgpr0 ; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[MV]](p4) :: (load (s8), addrspace 6) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw inbounds G_PTR_ADD [[MV]], [[C1]](s64) + ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[PTRTOINT]](s32) + ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[ZEXT]](s64) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[INTTOPTR]](p4) :: (load (s8), addrspace 6) + ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw inbounds G_PTR_ADD [[INTTOPTR]], [[C]](s64) ; CI-NEXT: [[ZEXTLOAD1:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD]](p4) :: (load (s8) from unknown-address + 1, addrspace 6) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C2]](s32) + ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXTLOAD1]], [[C1]](s32) ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]] - ; CI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = nuw inbounds G_PTR_ADD [[MV]], [[C3]](s64) + ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = nuw inbounds G_PTR_ADD [[INTTOPTR]], [[C2]](s64) ; CI-NEXT: [[ZEXTLOAD2:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[PTR_ADD1]](p4) :: (load (s8) from unknown-address + 2, addrspace 6) - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = nuw inbounds G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = nuw inbounds G_PTR_ADD [[PTR_ADD1]], [[C]](s64) ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load (s8) from unknown-address + 3, addrspace 6) - ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C2]](s32) + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32) ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[ZEXTLOAD2]] - ; CI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C4]](s32) + ; CI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[OR1]], [[C3]](s32) ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SHL2]], [[OR]] ; CI-NEXT: $vgpr0 = COPY [[OR2]](s32) %0:_(p6) = COPY $vgpr0 @@ -48,9 +48,9 @@ body: | ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $vgpr0 ; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load (s32), addrspace 6) + ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[PTRTOINT]](s32) + ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[ZEXT]](s64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[INTTOPTR]](p4) :: (load (s32), addrspace 6) ; CI-NEXT: $vgpr0 = COPY [[LOAD]](s32) %0:_(p6) = COPY $vgpr0 %1:_(s32) = G_LOAD %0 :: (load (s32), align 4, addrspace 6) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-constant-32bit.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-constant-32bit.mir index d87212d64d625..067844d506ef5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-constant-32bit.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-constant-32bit.mir @@ -13,9 +13,9 @@ body: | ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0 ; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load (s32), addrspace 6) + ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[PTRTOINT]](s32) + ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[ZEXT]](s64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[INTTOPTR]](p4) :: (load (s32), addrspace 6) ; CI-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](s64) %0:_(p6) = COPY $sgpr0 @@ -34,9 +34,9 @@ body: | ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0 ; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load (s32), align 2, addrspace 6) + ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[PTRTOINT]](s32) + ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[ZEXT]](s64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[INTTOPTR]](p4) :: (load (s32), align 2, addrspace 6) ; CI-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](s64) %0:_(p6) = COPY $sgpr0 @@ -55,9 +55,9 @@ body: | ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0 ; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load (s32), align 1, addrspace 6) + ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[PTRTOINT]](s32) + ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[ZEXT]](s64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[INTTOPTR]](p4) :: (load (s32), align 1, addrspace 6) ; CI-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s32) ; CI-NEXT: $vgpr0_vgpr1 = COPY [[SEXT]](s64) %0:_(p6) = COPY $sgpr0 @@ -76,9 +76,9 @@ body: | ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0 ; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32) - ; CI-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[MV]](p4) :: (load (s8), addrspace 6) + ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[PTRTOINT]](s32) + ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[ZEXT]](s64) + ; CI-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[INTTOPTR]](p4) :: (load (s8), addrspace 6) ; CI-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32) %0:_(p6) = COPY $sgpr0 %1:_(s32) = G_SEXTLOAD %0 :: (load (s8), align 1, addrspace 6) @@ -96,9 +96,9 @@ body: | ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0 ; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32) - ; CI-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[MV]](p4) :: (load (s16), addrspace 6) + ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[PTRTOINT]](s32) + ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[ZEXT]](s64) + ; CI-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[INTTOPTR]](p4) :: (load (s16), addrspace 6) ; CI-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32) %0:_(p6) = COPY $sgpr0 %1:_(s32) = G_SEXTLOAD %0 :: (load (s16), align 2, addrspace 6) @@ -116,9 +116,9 @@ body: | ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0 ; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32) - ; CI-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[MV]](p4) :: (load (s16), align 1, addrspace 6) + ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[PTRTOINT]](s32) + ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[ZEXT]](s64) + ; CI-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[INTTOPTR]](p4) :: (load (s16), align 1, addrspace 6) ; CI-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32) %0:_(p6) = COPY $sgpr0 %1:_(s32) = G_SEXTLOAD %0 :: (load (s16), align 1, addrspace 6) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-constant-32bit.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-constant-32bit.mir index a4971e94e75f6..c72cdd5dbc8be 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-constant-32bit.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-constant-32bit.mir @@ -14,11 +14,11 @@ body: | ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0 ; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load (s32), addrspace 6) - ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) + ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[PTRTOINT]](s32) + ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[ZEXT]](s64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[INTTOPTR]](p4) :: (load (s32), addrspace 6) + ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT1]](s64) %0:_(p6) = COPY $sgpr0 %1:_(s64) = G_ZEXTLOAD %0 :: (load (s32), align 4, addrspace 6) $vgpr0_vgpr1 = COPY %1 @@ -35,11 +35,11 @@ body: | ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0 ; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load (s32), align 2, addrspace 6) - ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) + ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[PTRTOINT]](s32) + ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[ZEXT]](s64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[INTTOPTR]](p4) :: (load (s32), align 2, addrspace 6) + ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT1]](s64) %0:_(p6) = COPY $sgpr0 %1:_(s64) = G_ZEXTLOAD %0 :: (load (s32), align 2, addrspace 6) $vgpr0_vgpr1 = COPY %1 @@ -56,11 +56,11 @@ body: | ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0 ; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32) - ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load (s32), align 1, addrspace 6) - ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32) - ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) + ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[PTRTOINT]](s32) + ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[ZEXT]](s64) + ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[INTTOPTR]](p4) :: (load (s32), align 1, addrspace 6) + ; CI-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s32) + ; CI-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT1]](s64) %0:_(p6) = COPY $sgpr0 %1:_(s64) = G_ZEXTLOAD %0 :: (load (s32), align 1, addrspace 6) $vgpr0_vgpr1 = COPY %1 @@ -77,9 +77,9 @@ body: | ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0 ; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[MV]](p4) :: (load (s8), addrspace 6) + ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[PTRTOINT]](s32) + ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[ZEXT]](s64) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[INTTOPTR]](p4) :: (load (s8), addrspace 6) ; CI-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](s32) %0:_(p6) = COPY $sgpr0 %1:_(s32) = G_ZEXTLOAD %0 :: (load (s8), align 1, addrspace 6) @@ -97,9 +97,9 @@ body: | ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0 ; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[MV]](p4) :: (load (s16), addrspace 6) + ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[PTRTOINT]](s32) + ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[ZEXT]](s64) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[INTTOPTR]](p4) :: (load (s16), addrspace 6) ; CI-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](s32) %0:_(p6) = COPY $sgpr0 %1:_(s32) = G_ZEXTLOAD %0 :: (load (s16), align 2, addrspace 6) @@ -117,9 +117,9 @@ body: | ; CI-NEXT: {{ $}} ; CI-NEXT: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0 ; CI-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p6) - ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CI-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[PTRTOINT]](s32), [[C]](s32) - ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[MV]](p4) :: (load (s16), align 1, addrspace 6) + ; CI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[PTRTOINT]](s32) + ; CI-NEXT: [[INTTOPTR:%[0-9]+]]:_(p4) = G_INTTOPTR [[ZEXT]](s64) + ; CI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[INTTOPTR]](p4) :: (load (s16), align 1, addrspace 6) ; CI-NEXT: $vgpr0 = COPY [[ZEXTLOAD]](s32) %0:_(p6) = COPY $sgpr0 %1:_(s32) = G_ZEXTLOAD %0 :: (load (s16), align 1, addrspace 6) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant32bit.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant32bit.ll index 0038a097174c6..2e1b853ff8c58 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant32bit.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant32bit.ll @@ -11,8 +11,8 @@ define amdgpu_ps float @load_constant32bit_vgpr_offset(i32 %arg) { ; GFX6-LABEL: load_constant32bit_vgpr_offset: ; GFX6: ; %bb.0: ; %entry ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX6-NEXT: s_mov_b32 s2, 0 ; GFX6-NEXT: v_mov_b32_e32 v1, 0 +; GFX6-NEXT: s_mov_b32 s2, 0 ; GFX6-NEXT: s_mov_b32 s3, 0xf000 ; GFX6-NEXT: s_mov_b64 s[0:1], 0 ; GFX6-NEXT: buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 @@ -59,8 +59,8 @@ define amdgpu_ps <8 x float> @load_constant32bit_vgpr_v8f32(ptr addrspace(6) %ar ; GFX6-LABEL: load_constant32bit_vgpr_v8f32: ; GFX6: ; %bb.0: ; %entry ; GFX6-NEXT: v_mov_b32_e32 v4, v0 -; GFX6-NEXT: s_mov_b32 s2, 0 ; GFX6-NEXT: v_mov_b32_e32 v5, 0 +; GFX6-NEXT: s_mov_b32 s2, 0 ; GFX6-NEXT: s_mov_b32 s3, 0xf000 ; GFX6-NEXT: s_mov_b64 s[0:1], 0 ; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[4:5], s[0:3], 0 addr64 diff --git a/llvm/test/CodeGen/AMDGPU/codegen-prepare-addrspacecast-non-null.ll b/llvm/test/CodeGen/AMDGPU/codegen-prepare-addrspacecast-non-null.ll index 66d99b14e282d..a99aab7a23a3b 100644 --- a/llvm/test/CodeGen/AMDGPU/codegen-prepare-addrspacecast-non-null.ll +++ b/llvm/test/CodeGen/AMDGPU/codegen-prepare-addrspacecast-non-null.ll @@ -474,15 +474,24 @@ define i32 @cast_private_to_flat_to_global(ptr addrspace(6) %const32.ptr) { ; OPT-NEXT: [[LOAD:%.*]] = load volatile i32, ptr addrspace(3) [[LOCAL_PTR]], align 4 ; OPT-NEXT: ret i32 [[LOAD]] ; -; ASM-LABEL: cast_private_to_flat_to_global: -; ASM: ; %bb.0: -; ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; ASM-NEXT: v_mov_b32_e32 v1, 0 -; ASM-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] -; ASM-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc -; ASM-NEXT: ds_read_b32 v0, v0 -; ASM-NEXT: s_waitcnt lgkmcnt(0) -; ASM-NEXT: s_setpc_b64 s[30:31] +; DAGISEL-ASM-LABEL: cast_private_to_flat_to_global: +; DAGISEL-ASM: ; %bb.0: +; DAGISEL-ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; DAGISEL-ASM-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; DAGISEL-ASM-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc +; DAGISEL-ASM-NEXT: ds_read_b32 v0, v0 +; DAGISEL-ASM-NEXT: s_waitcnt lgkmcnt(0) +; DAGISEL-ASM-NEXT: s_setpc_b64 s[30:31] +; +; GISEL-ASM-LABEL: cast_private_to_flat_to_global: +; GISEL-ASM: ; %bb.0: +; GISEL-ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-ASM-NEXT: v_mov_b32_e32 v1, 0 +; GISEL-ASM-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] +; GISEL-ASM-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc +; GISEL-ASM-NEXT: ds_read_b32 v0, v0 +; GISEL-ASM-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-ASM-NEXT: s_setpc_b64 s[30:31] %flat.ptr = addrspacecast ptr addrspace(6) %const32.ptr to ptr %local.ptr = addrspacecast ptr %flat.ptr to ptr addrspace(3) %load = load volatile i32, ptr addrspace(3) %local.ptr