diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 52823c16d72d9..edd8ea39e0f42 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -1482,22 +1482,26 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent, Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32); if (ConstantSDNode *CAddr = dyn_cast(Addr)) { - unsigned Imm = CAddr->getZExtValue(); - - SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32); - MachineSDNode *MovHighBits = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, - DL, MVT::i32, HighBits); - VAddr = SDValue(MovHighBits, 0); - - // In a call sequence, stores to the argument stack area are relative to the - // stack pointer. - const MachinePointerInfo &PtrInfo = cast(Parent)->getPointerInfo(); - - SOffset = isStackPtrRelative(PtrInfo) - ? CurDAG->getRegister(Info->getStackPtrOffsetReg(), MVT::i32) - : CurDAG->getTargetConstant(0, DL, MVT::i32); - ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16); - return true; + int64_t Imm = CAddr->getSExtValue(); + const int64_t NullPtr = + AMDGPUTargetMachine::getNullPointerValue(AMDGPUAS::PRIVATE_ADDRESS); + // Don't fold null pointer. + if (Imm != NullPtr) { + SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32); + MachineSDNode *MovHighBits = CurDAG->getMachineNode( + AMDGPU::V_MOV_B32_e32, DL, MVT::i32, HighBits); + VAddr = SDValue(MovHighBits, 0); + + // In a call sequence, stores to the argument stack area are relative to the + // stack pointer. + const MachinePointerInfo &PtrInfo + = cast(Parent)->getPointerInfo(); + SOffset = isStackPtrRelative(PtrInfo) + ? CurDAG->getRegister(Info->getStackPtrOffsetReg(), MVT::i32) + : CurDAG->getTargetConstant(0, DL, MVT::i32); + ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16); + return true; + } } if (CurDAG->isBaseWithConstantOffset(Addr)) { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index aee6c0dd8a8e0..5afec2188d66b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -3062,7 +3062,8 @@ AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const { const SIMachineFunctionInfo *Info = MF->getInfo(); int64_t Offset = 0; - if (mi_match(Root.getReg(), *MRI, m_ICst(Offset))) { + if (mi_match(Root.getReg(), *MRI, m_ICst(Offset)) && + Offset != TM.getNullPointerValue(AMDGPUAS::PRIVATE_ADDRESS)) { Register HighBits = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass); // TODO: Should this be inside the render function? The iterator seems to @@ -3091,7 +3092,7 @@ AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const { }}}; } - assert(Offset == 0); + assert(Offset == 0 || Offset == -1); // Try to fold a frame index directly into the MUBUF vaddr field, and any // offsets. diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h index 2ef6cd5b3e338..e223fecc88195 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h @@ -57,8 +57,9 @@ class AMDGPUTargetMachine : public LLVMTargetMachine { void adjustPassManager(PassManagerBuilder &) override; /// Get the integer value of a null pointer in the given address space. - uint64_t getNullPointerValue(unsigned AddrSpace) const { + static int64_t getNullPointerValue(unsigned AddrSpace) { return (AddrSpace == AMDGPUAS::LOCAL_ADDRESS || + AddrSpace == AMDGPUAS::PRIVATE_ADDRESS || AddrSpace == AMDGPUAS::REGION_ADDRESS) ? -1 : 0; } }; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir index 13e4035a48828..79284fdfd05f7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir @@ -843,3 +843,31 @@ body: | $vgpr0 = COPY %3 ... + +# Should not fold offset if this is a null dereference. +--- + +name: load_private_s32_from_neg1 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + stackPtrOffsetReg: $sgpr32 + +body: | + bb.0: + + ; GFX6-LABEL: name: load_private_s32_from_neg1 + ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec + ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, -1, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5) + ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX9-LABEL: name: load_private_s32_from_neg1 + ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec + ; GFX9: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, -1, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5) + ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + %0:vgpr(p5) = G_CONSTANT i32 -1 + %1:vgpr(s32) = G_LOAD %0 :: (load 4, align 4, addrspace 5) + $vgpr0 = COPY %1 + +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir index 643bdd3b7d582..395d34a00081d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir @@ -173,12 +173,12 @@ body: | ; VI-LABEL: name: test_addrspacecast_p5_to_p0 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 ; VI: [[COPY1:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[C:%[0-9]+]]:_(p5) = G_CONSTANT i32 0 + ; VI: [[C:%[0-9]+]]:_(p5) = G_CONSTANT i32 -1 ; VI: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 ; VI: [[COPY2:%[0-9]+]]:_(p4) = COPY [[COPY]](p4) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 68 - ; VI: [[GEP:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY2]], [[C2]](s64) - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (dereferenceable invariant load 4, addrspace 4) + ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY2]], [[C2]](s64) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 4, addrspace 4) ; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY1]](p5), [[C]] ; VI: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY1]](p5) ; VI: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[LOAD]](s32) @@ -186,7 +186,7 @@ body: | ; VI: $vgpr0_vgpr1 = COPY [[SELECT]](p0) ; GFX9-LABEL: name: test_addrspacecast_p5_to_p0 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[C:%[0-9]+]]:_(p5) = G_CONSTANT i32 0 + ; GFX9: [[C:%[0-9]+]]:_(p5) = G_CONSTANT i32 -1 ; GFX9: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 ; GFX9: [[S_GETREG_B32_:%[0-9]+]]:sreg_32(s32) = S_GETREG_B32 30735 ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -216,7 +216,7 @@ body: | ; VI-LABEL: name: test_addrspacecast_p0_to_p5 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[C:%[0-9]+]]:_(p5) = G_CONSTANT i32 0 + ; VI: [[C:%[0-9]+]]:_(p5) = G_CONSTANT i32 -1 ; VI: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 ; VI: [[EXTRACT:%[0-9]+]]:_(p5) = G_EXTRACT [[COPY]](p0), 0 ; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p0), [[C1]] @@ -224,7 +224,7 @@ body: | ; VI: $vgpr0 = COPY [[SELECT]](p5) ; GFX9-LABEL: name: test_addrspacecast_p0_to_p5 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[C:%[0-9]+]]:_(p5) = G_CONSTANT i32 0 + ; GFX9: [[C:%[0-9]+]]:_(p5) = G_CONSTANT i32 -1 ; GFX9: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 ; GFX9: [[EXTRACT:%[0-9]+]]:_(p5) = G_EXTRACT [[COPY]](p0), 0 ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p0), [[C1]] @@ -232,7 +232,7 @@ body: | ; GFX9: $vgpr0 = COPY [[SELECT]](p5) ; SI-LABEL: name: test_addrspacecast_p0_to_p5 ; SI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; SI: [[C:%[0-9]+]]:_(p5) = G_CONSTANT i32 0 + ; SI: [[C:%[0-9]+]]:_(p5) = G_CONSTANT i32 -1 ; SI: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 ; SI: [[EXTRACT:%[0-9]+]]:_(p5) = G_EXTRACT [[COPY]](p0), 0 ; SI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p0), [[C1]] @@ -260,8 +260,8 @@ body: | ; VI: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 ; VI: [[COPY2:%[0-9]+]]:_(p4) = COPY [[COPY]](p4) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 - ; VI: [[GEP:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY2]], [[C2]](s64) - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (dereferenceable invariant load 4, align 64, addrspace 4) + ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY2]], [[C2]](s64) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 4, align 64, addrspace 4) ; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY1]](p3), [[C]] ; VI: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY1]](p3) ; VI: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[LOAD]](s32) @@ -466,15 +466,15 @@ body: | ; VI: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 ; VI: [[COPY2:%[0-9]+]]:_(p4) = COPY [[COPY]](p4) ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 - ; VI: [[GEP:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY2]], [[C2]](s64) - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (dereferenceable invariant load 4, align 64, addrspace 4) + ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY2]], [[C2]](s64) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load 4, align 64, addrspace 4) ; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](p3), [[C]] ; VI: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV]](p3) ; VI: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[LOAD]](s32) ; VI: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[MV]], [[C1]] ; VI: [[COPY3:%[0-9]+]]:_(p4) = COPY [[COPY]](p4) - ; VI: [[GEP1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY3]], [[C2]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p4) :: (dereferenceable invariant load 4, align 64, addrspace 4) + ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY3]], [[C2]](s64) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load 4, align 64, addrspace 4) ; VI: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](p3), [[C]] ; VI: [[PTRTOINT1:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV1]](p3) ; VI: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT1]](s32), [[LOAD1]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast.ll index 764f935097642..d16edbac75fe2 100644 --- a/llvm/test/CodeGen/AMDGPU/addrspacecast.ll +++ b/llvm/test/CodeGen/AMDGPU/addrspacecast.ll @@ -76,7 +76,7 @@ define void @use_group_to_flat_addrspacecast_func(i32 addrspace(3)* %ptr) #0 { ; CI-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[APERTURE]] ; CI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7 -; CI-DAG: v_cmp_ne_u32_e64 vcc, [[PTR]], 0 +; CI-DAG: v_cmp_ne_u32_e64 vcc, [[PTR]], -1 ; CI-DAG: v_cndmask_b32_e32 v[[HI:[0-9]+]], 0, [[VAPERTURE]], vcc ; CI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] ; CI-DAG: v_cndmask_b32_e32 v[[LO:[0-9]+]], 0, [[VPTR]] @@ -89,7 +89,7 @@ define void @use_group_to_flat_addrspacecast_func(i32 addrspace(3)* %ptr) #0 { ; GFX9-XXX: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], src_private_base ; GFX9-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7 -; GFX9: v_cmp_ne_u32_e64 vcc, [[PTR]], 0 +; GFX9: v_cmp_ne_u32_e64 vcc, [[PTR]], -1 ; GFX9: v_cndmask_b32_e32 v[[HI:[0-9]+]], 0, [[VAPERTURE]], vcc ; GFX9: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] ; GFX9-DAG: v_cndmask_b32_e32 v[[LO:[0-9]+]], 0, [[VPTR]] @@ -167,7 +167,7 @@ define amdgpu_kernel void @use_flat_to_group_addrspacecast(i32* %ptr) #0 { ; HSA: s_load_dwordx2 s{{\[}}[[PTR_LO:[0-9]+]]:[[PTR_HI:[0-9]+]]{{\]}} ; HSA-DAG: v_cmp_ne_u64_e64 vcc, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0{{$}} ; HSA-DAG: v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], s[[PTR_LO]] -; HSA-DAG: v_cndmask_b32_e32 [[CASTPTR:v[0-9]+]], 0, v[[VPTR_LO]] +; HSA-DAG: v_cndmask_b32_e32 [[CASTPTR:v[0-9]+]], -1, v[[VPTR_LO]] ; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 0{{$}} ; HSA: buffer_store_dword v[[K]], [[CASTPTR]], s{{\[[0-9]+:[0-9]+\]}}, 0 offen{{$}} define amdgpu_kernel void @use_flat_to_private_addrspacecast(i32* %ptr) #0 { @@ -252,12 +252,16 @@ define amdgpu_kernel void @cast_neg1_flat_to_group_addrspacecast() #0 { ; FIXME: Shouldn't need to enable queue ptr ; HSA-LABEL: {{^}}cast_0_private_to_flat_addrspacecast: -; CI: enable_sgpr_queue_ptr = 1 -; GFX9: enable_sgpr_queue_ptr = 0 +; CI: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x11 +; CI-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE]] +; GFX9-DAG: s_getreg_b32 [[SSRC_SHARED:s[0-9]+]], hwreg(HW_REG_SH_MEM_BASES, 0, 16) +; GFX9-DAG: s_lshl_b32 [[SSRC_SHARED_BASE:s[0-9]+]], [[SSRC_SHARED]], 16 +; GFX9-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[SSRC_SHARED_BASE]] + +; GFX9-XXX: v_mov_b32_e32 v[[HI:[0-9]+]], src_shared_base ; HSA-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}} ; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}} -; HSA: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}} ; HSA: {{flat|global}}_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, v[[K]] define amdgpu_kernel void @cast_0_private_to_flat_addrspacecast() #0 { %cast = addrspacecast i32 addrspace(5)* null to i32* @@ -266,14 +270,41 @@ define amdgpu_kernel void @cast_0_private_to_flat_addrspacecast() #0 { } ; HSA-LABEL: {{^}}cast_0_flat_to_private_addrspacecast: -; HSA: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}} -; HSA: buffer_store_dword [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 +; HSA-DAG: v_mov_b32_e32 [[PTR:v[0-9]+]], -1{{$}} +; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}} +; HSA: buffer_store_dword [[K]], [[PTR]], s{{\[[0-9]+:[0-9]+\]}}, 0 define amdgpu_kernel void @cast_0_flat_to_private_addrspacecast() #0 { %cast = addrspacecast i32* null to i32 addrspace(5)* store volatile i32 7, i32 addrspace(5)* %cast ret void } + +; HSA-LABEL: {{^}}cast_neg1_private_to_flat_addrspacecast: +; CI: enable_sgpr_queue_ptr = 1 +; GFX9: enable_sgpr_queue_ptr = 0 + +; HSA-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}} +; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}} +; HSA: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}} +; HSA: {{flat|global}}_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, v[[K]] +define amdgpu_kernel void @cast_neg1_private_to_flat_addrspacecast() #0 { + %cast = addrspacecast i32 addrspace(5)* inttoptr (i32 -1 to i32 addrspace(5)*) to i32* + store volatile i32 7, i32* %cast + ret void +} + +; HSA-LABEL: {{^}}cast_neg1_flat_to_private_addrspacecast: +; HSA-DAG: v_mov_b32_e32 [[PTR:v[0-9]+]], -1{{$}} +; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}} +; HSA: buffer_store_dword [[K]], [[PTR]], s{{\[[0-9]+:[0-9]+\]}}, 0 +define amdgpu_kernel void @cast_neg1_flat_to_private_addrspacecast() #0 { + %cast = addrspacecast i32* inttoptr (i64 -1 to i32*) to i32 addrspace(5)* + store volatile i32 7, i32 addrspace(5)* %cast + ret void +} + + ; Disable optimizations in case there are optimizations added that ; specialize away generic pointer accesses. diff --git a/llvm/test/CodeGen/AMDGPU/nullptr.ll b/llvm/test/CodeGen/AMDGPU/nullptr.ll index 4eaf9836bb9d3..16292f0ebee08 100644 --- a/llvm/test/CodeGen/AMDGPU/nullptr.ll +++ b/llvm/test/CodeGen/AMDGPU/nullptr.ll @@ -4,7 +4,7 @@ %struct.S = type { i32 addrspace(5)*, i32 addrspace(1)*, i32 addrspace(4)*, i32 addrspace(3)*, i32*, i32 addrspace(2)*} ; CHECK-LABEL: nullptr_priv: -; CHECK-NEXT: .long 0 +; CHECK-NEXT: .long -1 @nullptr_priv = global i32 addrspace(5)* addrspacecast (i32* null to i32 addrspace(5)*) ; CHECK-LABEL: nullptr_glob: @@ -98,7 +98,7 @@ @nullptr23 = global i32 addrspace(23)* addrspacecast (i32* null to i32 addrspace(23)*) ; CHECK-LABEL: structWithPointers: -; CHECK-NEXT: .long 0 +; CHECK-NEXT: .long -1 ; GCN-NEXT: .zero 4 ; GCN-NEXT: .quad 0 ; R600-NEXT: .long 0