138 changes: 69 additions & 69 deletions llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@
; flat instructions. It's still flat, it just doesn't work.

; CHECK-LABEL: @load_flat_from_global(
; CHECK-NEXT: %tmp1 = load float, float addrspace(1)* %ptr
; CHECK-NEXT: %tmp1 = load float, ptr addrspace(1) %ptr
; CHECK-NEXT: ret float %tmp1
define float @load_flat_from_global(float addrspace(1)*%ptr) #0 {
%tmp0 = addrspacecast float addrspace(1)* %ptr to float*
%tmp1 = load float, float* %tmp0
define float @load_flat_from_global(ptr addrspace(1) %ptr) #0 {
%tmp0 = addrspacecast ptr addrspace(1) %ptr to ptr
%tmp1 = load float, ptr %tmp0
ret float %tmp1
}
106 changes: 53 additions & 53 deletions llvm/test/Transforms/InferAddressSpaces/AMDGPU/noop-ptrint-pair.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,108 +4,108 @@
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-ni:7"

; COMMON-LABEL: @noop_ptrint_pair(
; AMDGCN-NEXT: store i32 0, i32 addrspace(1)* %{{.*}}
; AMDGCN-NEXT: store i32 0, ptr addrspace(1) %{{.*}}
; AMDGCN-NEXT: ret void
; NOTTI-NEXT: %1 = ptrtoint i32 addrspace(1)* %x.coerce to i64
; NOTTI-NEXT: %2 = inttoptr i64 %1 to i32*
; NOTTI-NEXT: store i32 0, i32* %2
; NOTTI-NEXT: %1 = ptrtoint ptr addrspace(1) %x.coerce to i64
; NOTTI-NEXT: %2 = inttoptr i64 %1 to ptr
; NOTTI-NEXT: store i32 0, ptr %2
; NOTTI-NEXT: ret void
define void @noop_ptrint_pair(i32 addrspace(1)* %x.coerce) {
%1 = ptrtoint i32 addrspace(1)* %x.coerce to i64
%2 = inttoptr i64 %1 to i32*
store i32 0, i32* %2
define void @noop_ptrint_pair(ptr addrspace(1) %x.coerce) {
%1 = ptrtoint ptr addrspace(1) %x.coerce to i64
%2 = inttoptr i64 %1 to ptr
store i32 0, ptr %2
ret void
}

; COMMON-LABEL: @non_noop_ptrint_pair(
; AMDGCN-NEXT: ptrtoint i32 addrspace(3)* %{{.*}} to i64
; AMDGCN-NEXT: inttoptr i64 %{{.*}} to i32*
; AMDGCN-NEXT: store i32 0, i32* %{{.*}}
; AMDGCN-NEXT: ptrtoint ptr addrspace(3) %{{.*}} to i64
; AMDGCN-NEXT: inttoptr i64 %{{.*}} to ptr
; AMDGCN-NEXT: store i32 0, ptr %{{.*}}
; AMDGCN-NEXT: ret void
; NOTTI-NEXT: ptrtoint i32 addrspace(3)* %{{.*}} to i64
; NOTTI-NEXT: inttoptr i64 %{{.*}} to i32*
; NOTTI-NEXT: store i32 0, i32* %{{.*}}
; NOTTI-NEXT: ptrtoint ptr addrspace(3) %{{.*}} to i64
; NOTTI-NEXT: inttoptr i64 %{{.*}} to ptr
; NOTTI-NEXT: store i32 0, ptr %{{.*}}
; NOTTI-NEXT: ret void
define void @non_noop_ptrint_pair(i32 addrspace(3)* %x.coerce) {
%1 = ptrtoint i32 addrspace(3)* %x.coerce to i64
%2 = inttoptr i64 %1 to i32*
store i32 0, i32* %2
define void @non_noop_ptrint_pair(ptr addrspace(3) %x.coerce) {
%1 = ptrtoint ptr addrspace(3) %x.coerce to i64
%2 = inttoptr i64 %1 to ptr
store i32 0, ptr %2
ret void
}

; COMMON-LABEL: @non_noop_ptrint_pair2(
; AMDGCN-NEXT: ptrtoint i32 addrspace(1)* %{{.*}} to i32
; AMDGCN-NEXT: inttoptr i32 %{{.*}} to i32*
; AMDGCN-NEXT: store i32 0, i32* %{{.*}}
; AMDGCN-NEXT: ptrtoint ptr addrspace(1) %{{.*}} to i32
; AMDGCN-NEXT: inttoptr i32 %{{.*}} to ptr
; AMDGCN-NEXT: store i32 0, ptr %{{.*}}
; AMDGCN-NEXT: ret void
; NOTTI-NEXT: ptrtoint i32 addrspace(1)* %{{.*}} to i32
; NOTTI-NEXT: inttoptr i32 %{{.*}} to i32*
; NOTTI-NEXT: store i32 0, i32* %{{.*}}
; NOTTI-NEXT: ptrtoint ptr addrspace(1) %{{.*}} to i32
; NOTTI-NEXT: inttoptr i32 %{{.*}} to ptr
; NOTTI-NEXT: store i32 0, ptr %{{.*}}
; NOTTI-NEXT: ret void
define void @non_noop_ptrint_pair2(i32 addrspace(1)* %x.coerce) {
%1 = ptrtoint i32 addrspace(1)* %x.coerce to i32
%2 = inttoptr i32 %1 to i32*
store i32 0, i32* %2
define void @non_noop_ptrint_pair2(ptr addrspace(1) %x.coerce) {
%1 = ptrtoint ptr addrspace(1) %x.coerce to i32
%2 = inttoptr i32 %1 to ptr
store i32 0, ptr %2
ret void
}

@g = addrspace(1) global i32 0, align 4
@l = addrspace(3) global i32 0, align 4

; COMMON-LABEL: @noop_ptrint_pair_ce(
; AMDGCN-NEXT: store i32 0, i32 addrspace(1)* @g
; AMDGCN-NEXT: store i32 0, ptr addrspace(1) @g
; AMDGCN-NEXT: ret void
; NOTTI-NEXT: store i32 0, i32* inttoptr (i64 ptrtoint (i32 addrspace(1)* @g to i64) to i32*)
; NOTTI-NEXT: store i32 0, ptr inttoptr (i64 ptrtoint (ptr addrspace(1) @g to i64) to ptr)
; NOTTI-NEXT: ret void
define void @noop_ptrint_pair_ce() {
store i32 0, i32* inttoptr (i64 ptrtoint (i32 addrspace(1)* @g to i64) to i32*)
store i32 0, ptr inttoptr (i64 ptrtoint (ptr addrspace(1) @g to i64) to ptr)
ret void
}

; COMMON-LABEL: @noop_ptrint_pair_ce2(
; AMDGCN-NEXT: ret i32* addrspacecast (i32 addrspace(1)* @g to i32*)
; NOTTI-NEXT: ret i32* inttoptr (i64 ptrtoint (i32 addrspace(1)* @g to i64) to i32*)
define i32* @noop_ptrint_pair_ce2() {
ret i32* inttoptr (i64 ptrtoint (i32 addrspace(1)* @g to i64) to i32*)
; AMDGCN-NEXT: ret ptr addrspacecast (ptr addrspace(1) @g to ptr)
; NOTTI-NEXT: ret ptr inttoptr (i64 ptrtoint (ptr addrspace(1) @g to i64) to ptr)
define ptr @noop_ptrint_pair_ce2() {
ret ptr inttoptr (i64 ptrtoint (ptr addrspace(1) @g to i64) to ptr)
}

; COMMON-LABEL: @noop_ptrint_pair_ce3(
; AMDGCN-NEXT: %i = inttoptr i64 ptrtoint (i32 addrspace(1)* @g to i64) to i32*
; AMDGCN-NEXT: %i = inttoptr i64 ptrtoint (ptr addrspace(1) @g to i64) to ptr
; AMDGCN-NEXT: ret void
; NOTTI-NEXT: %i = inttoptr i64 ptrtoint (i32 addrspace(1)* @g to i64) to i32*
; NOTTI-NEXT: %i = inttoptr i64 ptrtoint (ptr addrspace(1) @g to i64) to ptr
; NOTTI-NEXT: ret void
define void @noop_ptrint_pair_ce3() {
%i = inttoptr i64 ptrtoint (i32 addrspace(1)* @g to i64) to i32*
%i = inttoptr i64 ptrtoint (ptr addrspace(1) @g to i64) to ptr
ret void
}

; COMMON-LABEL: @non_noop_ptrint_pair_ce(
; AMDGCN-NEXT: store i32 0, i32* inttoptr (i64 ptrtoint (i32 addrspace(3)* @l to i64) to i32*)
; AMDGCN-NEXT: store i32 0, ptr inttoptr (i64 ptrtoint (ptr addrspace(3) @l to i64) to ptr)
; AMDGCN-NEXT: ret void
; NOTTI-NEXT: store i32 0, i32* inttoptr (i64 ptrtoint (i32 addrspace(3)* @l to i64) to i32*)
; NOTTI-NEXT: store i32 0, ptr inttoptr (i64 ptrtoint (ptr addrspace(3) @l to i64) to ptr)
; NOTTI-NEXT: ret void
define void @non_noop_ptrint_pair_ce() {
store i32 0, i32* inttoptr (i64 ptrtoint (i32 addrspace(3)* @l to i64) to i32*)
store i32 0, ptr inttoptr (i64 ptrtoint (ptr addrspace(3) @l to i64) to ptr)
ret void
}

; COMMON-LABEL: @non_noop_ptrint_pair_ce2(
; AMDGCN-NEXT: ret i32* inttoptr (i64 ptrtoint (i32 addrspace(3)* @l to i64) to i32*)
; NOTTI-NEXT: ret i32* inttoptr (i64 ptrtoint (i32 addrspace(3)* @l to i64) to i32*)
define i32* @non_noop_ptrint_pair_ce2() {
ret i32* inttoptr (i64 ptrtoint (i32 addrspace(3)* @l to i64) to i32*)
; AMDGCN-NEXT: ret ptr inttoptr (i64 ptrtoint (ptr addrspace(3) @l to i64) to ptr)
; NOTTI-NEXT: ret ptr inttoptr (i64 ptrtoint (ptr addrspace(3) @l to i64) to ptr)
define ptr @non_noop_ptrint_pair_ce2() {
ret ptr inttoptr (i64 ptrtoint (ptr addrspace(3) @l to i64) to ptr)
}

; COMMON-LABEL: @non_noop_ptrint_pair_ce3(
; AMDGCN-NEXT: ret i32* inttoptr (i32 ptrtoint (i32 addrspace(1)* @g to i32) to i32*)
; NOTTI-NEXT: ret i32* inttoptr (i32 ptrtoint (i32 addrspace(1)* @g to i32) to i32*)
define i32* @non_noop_ptrint_pair_ce3() {
ret i32* inttoptr (i32 ptrtoint (i32 addrspace(1)* @g to i32) to i32*)
; AMDGCN-NEXT: ret ptr inttoptr (i32 ptrtoint (ptr addrspace(1) @g to i32) to ptr)
; NOTTI-NEXT: ret ptr inttoptr (i32 ptrtoint (ptr addrspace(1) @g to i32) to ptr)
define ptr @non_noop_ptrint_pair_ce3() {
ret ptr inttoptr (i32 ptrtoint (ptr addrspace(1) @g to i32) to ptr)
}

; COMMON-LABEL: @non_noop_ptrint_pair_ce4(
; AMDGCN-NEXT: ret i32* inttoptr (i128 ptrtoint (i32 addrspace(3)* @l to i128) to i32*)
; NOTTI-NEXT: ret i32* inttoptr (i128 ptrtoint (i32 addrspace(3)* @l to i128) to i32*)
define i32* @non_noop_ptrint_pair_ce4() {
ret i32* inttoptr (i128 ptrtoint (i32 addrspace(3)* @l to i128) to i32*)
; AMDGCN-NEXT: ret ptr inttoptr (i128 ptrtoint (ptr addrspace(3) @l to i128) to ptr)
; NOTTI-NEXT: ret ptr inttoptr (i128 ptrtoint (ptr addrspace(3) @l to i128) to ptr)
define ptr @non_noop_ptrint_pair_ce4() {
ret ptr inttoptr (i128 ptrtoint (ptr addrspace(3) @l to i128) to ptr)
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,13 @@
; Should generate flat load

; CHECK-LABEL: @generic_address_bitcast_const(
; CHECK: %vecload1 = load <2 x double>, <2 x double> addrspace(1)* bitcast (double addrspace(1)* getelementptr inbounds ([100 x double], [100 x double] addrspace(1)* @data, i64 0, i64 4) to <2 x double> addrspace(1)*), align 8
define amdgpu_kernel void @generic_address_bitcast_const(i64 %arg0, i32 addrspace(1)* nocapture %results) #0 {
; CHECK: %vecload1 = load <2 x double>, ptr addrspace(1) getelementptr inbounds ([100 x double], ptr addrspace(1) @data, i64 0, i64 4), align 8
define amdgpu_kernel void @generic_address_bitcast_const(i64 %arg0, ptr addrspace(1) nocapture %results) #0 {
entry:
%tmp1 = call i32 @llvm.amdgcn.workitem.id.x()
%tmp2 = zext i32 %tmp1 to i64
%tmp3 = add i64 %tmp2, %arg0
%vecload1 = load <2 x double>, <2 x double>* bitcast (double* getelementptr ([100 x double], [100 x double]* addrspacecast ([100 x double] addrspace(1)* @data to [100 x double]*), i64 0, i64 4) to <2 x double>*), align 8
%vecload1 = load <2 x double>, ptr bitcast (ptr getelementptr ([100 x double], ptr addrspacecast (ptr addrspace(1) @data to ptr), i64 0, i64 4) to ptr), align 8
%cmp = fcmp ord <2 x double> %vecload1, zeroinitializer
%sext = sext <2 x i1> %cmp to <2 x i64>
%tmp4 = extractelement <2 x i64> %sext, i64 0
Expand All @@ -23,82 +23,79 @@ entry:
%tmp7 = lshr i64 %tmp6, 63
%tmp8 = trunc i64 %tmp7 to i32
%idxprom = and i64 %tmp3, 4294967295
%arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %results, i64 %idxprom
store i32 %tmp8, i32 addrspace(1)* %arrayidx, align 4
%arrayidx = getelementptr inbounds i32, ptr addrspace(1) %results, i64 %idxprom
store i32 %tmp8, ptr addrspace(1) %arrayidx, align 4
ret void
}

@generic_address_bug9749.val = internal addrspace(1) global float 0.0, align 4

declare i32 @_Z9get_fencePv(i8*)
declare i32 @_Z9get_fencePv(ptr)
%opencl.pipe_t = type opaque

; This is a compile time assert bug, but we still want to check optimization
; is performed to generate ld_global.
; CHECK-LABEL: @generic_address_pipe_bug9673(
; CHECK: %tmp1 = bitcast %opencl.pipe_t addrspace(3)* %in_pipe to i32 addrspace(3)*
; CHECK: %add.ptr = getelementptr inbounds i32, i32 addrspace(3)* %tmp1, i32 2
; CHECK: %tmp2 = load i32, i32 addrspace(3)* %add.ptr, align 4
define amdgpu_kernel void @generic_address_pipe_bug9673(%opencl.pipe_t addrspace(3)* nocapture %in_pipe, i32 addrspace(1)* nocapture %dst) #0 {
; CHECK: %add.ptr = getelementptr inbounds i32, ptr addrspace(3) %in_pipe, i32 2
; CHECK: %tmp2 = load i32, ptr addrspace(3) %add.ptr, align 4
define amdgpu_kernel void @generic_address_pipe_bug9673(ptr addrspace(3) nocapture %in_pipe, ptr addrspace(1) nocapture %dst) #0 {
entry:
%tmp = call i32 @llvm.amdgcn.workitem.id.x()
%tmp1 = bitcast %opencl.pipe_t addrspace(3)* %in_pipe to i32 addrspace(3)*
%add.ptr = getelementptr inbounds i32, i32 addrspace(3)* %tmp1, i32 2
%tmp2 = load i32, i32 addrspace(3)* %add.ptr, align 4
%arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %dst, i32 %tmp
store i32 %tmp2, i32 addrspace(1)* %arrayidx, align 4
%add.ptr = getelementptr inbounds i32, ptr addrspace(3) %in_pipe, i32 2
%tmp2 = load i32, ptr addrspace(3) %add.ptr, align 4
%arrayidx = getelementptr inbounds i32, ptr addrspace(1) %dst, i32 %tmp
store i32 %tmp2, ptr addrspace(1) %arrayidx, align 4
ret void
}

; Should generate flat load
; CHECK-LABEL: @generic_address_bug9749(
; CHECK: br i1
; CHECK: load float, float*
; CHECK: load float, ptr
; CHECK: br label
define amdgpu_kernel void @generic_address_bug9749(i32 addrspace(1)* nocapture %results) #0 {
define amdgpu_kernel void @generic_address_bug9749(ptr addrspace(1) nocapture %results) #0 {
entry:
%ptr = alloca float*, align 8, addrspace(5)
%ptr = alloca ptr, align 8, addrspace(5)
%tmp = call i32 @llvm.amdgcn.workitem.id.x()
%tmp1 = zext i32 %tmp to i64
store float 0x3FB99999A0000000, float addrspace(1)* @generic_address_bug9749.val, align 4
store volatile float* addrspacecast (float addrspace(1)* @generic_address_bug9749.val to float*), float* addrspace(5)* %ptr, align 8
%tmp2 = load volatile float*, float* addrspace(5)* %ptr, align 8
%tmp3 = load float, float addrspace(1)* @generic_address_bug9749.val, align 4
%tmp4 = bitcast float* %tmp2 to i8*
%call.i = call i32 @_Z9get_fencePv(i8* %tmp4) #1
store float 0x3FB99999A0000000, ptr addrspace(1) @generic_address_bug9749.val, align 4
store volatile ptr addrspacecast (ptr addrspace(1) @generic_address_bug9749.val to ptr), ptr addrspace(5) %ptr, align 8
%tmp2 = load volatile ptr, ptr addrspace(5) %ptr, align 8
%tmp3 = load float, ptr addrspace(1) @generic_address_bug9749.val, align 4
%call.i = call i32 @_Z9get_fencePv(ptr %tmp2) #1
%switch.i.i = icmp ult i32 %call.i, 4
br i1 %switch.i.i, label %if.end.i, label %helperFunction.exit

if.end.i: ; preds = %entry
%tmp5 = load float, float* %tmp2, align 4
%tmp5 = load float, ptr %tmp2, align 4
%not.cmp.i = fcmp oeq float %tmp5, %tmp3
%phitmp = zext i1 %not.cmp.i to i32
br label %helperFunction.exit

helperFunction.exit: ; preds = %if.end.i, %entry
%retval.0.i = phi i32 [ 0, %entry ], [ %phitmp, %if.end.i ]
%arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %results, i64 %tmp1
store i32 %retval.0.i, i32 addrspace(1)* %arrayidx, align 4
%arrayidx = getelementptr inbounds i32, ptr addrspace(1) %results, i64 %tmp1
store i32 %retval.0.i, ptr addrspace(1) %arrayidx, align 4
ret void
}

; CHECK-LABEL: @generic_address_opt_phi_bug9776_simple_phi_kernel(
; CHECK: phi i32 addrspace(3)*
; CHECK: store i32 %i.03, i32 addrspace(3)* %
define amdgpu_kernel void @generic_address_opt_phi_bug9776_simple_phi_kernel(i32 addrspace(3)* nocapture %in, i32 %numElems) #0 {
; CHECK: phi ptr addrspace(3)
; CHECK: store i32 %i.03, ptr addrspace(3) %
define amdgpu_kernel void @generic_address_opt_phi_bug9776_simple_phi_kernel(ptr addrspace(3) nocapture %in, i32 %numElems) #0 {
entry:
%cmp1 = icmp eq i32 %numElems, 0
br i1 %cmp1, label %for.end, label %for.body.lr.ph

for.body.lr.ph: ; preds = %entry
%tmp = addrspacecast i32 addrspace(3)* %in to i32*
%tmp = addrspacecast ptr addrspace(3) %in to ptr
br label %for.body

for.body: ; preds = %for.body, %for.body.lr.ph
%i.03 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
%ptr.02 = phi i32* [ %tmp, %for.body.lr.ph ], [ %add.ptr, %for.body ]
store i32 %i.03, i32* %ptr.02, align 4
%add.ptr = getelementptr inbounds i32, i32* %ptr.02, i64 4
%ptr.02 = phi ptr [ %tmp, %for.body.lr.ph ], [ %add.ptr, %for.body ]
store i32 %i.03, ptr %ptr.02, align 4
%add.ptr = getelementptr inbounds i32, ptr %ptr.02, i64 4
%inc = add nuw i32 %i.03, 1
%exitcond = icmp eq i32 %inc, %numElems
br i1 %exitcond, label %for.end, label %for.body
Expand All @@ -108,31 +105,29 @@ for.end: ; preds = %for.body, %entry
}

; CHECK-LABEL: @generic_address_bug9899(
; CHECK: %vecload = load <2 x i32>, <2 x i32> addrspace(3)*
; CHECK: store <2 x i32> %tmp16, <2 x i32> addrspace(3)*
define amdgpu_kernel void @generic_address_bug9899(i64 %arg0, i32 addrspace(3)* nocapture %sourceA, i32 addrspace(3)* nocapture %destValues) #0 {
; CHECK: %vecload = load <2 x i32>, ptr addrspace(3)
; CHECK: store <2 x i32> %tmp16, ptr addrspace(3)
define amdgpu_kernel void @generic_address_bug9899(i64 %arg0, ptr addrspace(3) nocapture %sourceA, ptr addrspace(3) nocapture %destValues) #0 {
entry:
%tmp1 = call i32 @llvm.amdgcn.workitem.id.x()
%tmp2 = zext i32 %tmp1 to i64
%tmp3 = add i64 %tmp2, %arg0
%sext = shl i64 %tmp3, 32
%tmp4 = addrspacecast i32 addrspace(3)* %destValues to i32*
%tmp5 = addrspacecast i32 addrspace(3)* %sourceA to i32*
%tmp4 = addrspacecast ptr addrspace(3) %destValues to ptr
%tmp5 = addrspacecast ptr addrspace(3) %sourceA to ptr
%tmp6 = ashr exact i64 %sext, 31
%tmp7 = getelementptr inbounds i32, i32* %tmp5, i64 %tmp6
%arrayidx_v4 = bitcast i32* %tmp7 to <2 x i32>*
%vecload = load <2 x i32>, <2 x i32>* %arrayidx_v4, align 4
%tmp7 = getelementptr inbounds i32, ptr %tmp5, i64 %tmp6
%vecload = load <2 x i32>, ptr %tmp7, align 4
%tmp8 = extractelement <2 x i32> %vecload, i32 0
%tmp9 = extractelement <2 x i32> %vecload, i32 1
%tmp10 = icmp eq i32 %tmp8, 0
%tmp11 = select i1 %tmp10, i32 32, i32 %tmp8
%tmp12 = icmp eq i32 %tmp9, 0
%tmp13 = select i1 %tmp12, i32 32, i32 %tmp9
%tmp14 = getelementptr inbounds i32, i32* %tmp4, i64 %tmp6
%tmp14 = getelementptr inbounds i32, ptr %tmp4, i64 %tmp6
%tmp15 = insertelement <2 x i32> poison, i32 %tmp11, i32 0
%tmp16 = insertelement <2 x i32> %tmp15, i32 %tmp13, i32 1
%arrayidx_v41 = bitcast i32* %tmp14 to <2 x i32>*
store <2 x i32> %tmp16, <2 x i32>* %arrayidx_v41, align 4
store <2 x i32> %tmp16, ptr %tmp14, align 4
ret void
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,13 @@
; Should generate flat load

; CHECK-LABEL: @generic_address_bitcast_const(
; CHECK: %vecload1 = load <2 x double>, <2 x double> addrspace(1)* bitcast (double addrspace(1)* getelementptr inbounds ([100 x double], [100 x double] addrspace(1)* @data, i64 0, i64 4) to <2 x double> addrspace(1)*), align 8
define amdgpu_kernel void @generic_address_bitcast_const(i64 %arg0, i32 addrspace(1)* nocapture %results) #0 {
; CHECK: %vecload1 = load <2 x double>, ptr addrspace(1) getelementptr inbounds ([100 x double], ptr addrspace(1) @data, i64 0, i64 4), align 8
define amdgpu_kernel void @generic_address_bitcast_const(i64 %arg0, ptr addrspace(1) nocapture %results) #0 {
entry:
%tmp1 = call i32 @llvm.amdgcn.workitem.id.x()
%tmp2 = zext i32 %tmp1 to i64
%tmp3 = add i64 %tmp2, %arg0
%vecload1 = load <2 x double>, <2 x double>* bitcast (double* getelementptr ([100 x double], [100 x double]* addrspacecast ([100 x double] addrspace(1)* @data to [100 x double]*), i64 0, i64 4) to <2 x double>*), align 8
%vecload1 = load <2 x double>, ptr bitcast (ptr getelementptr ([100 x double], ptr addrspacecast (ptr addrspace(1) @data to ptr), i64 0, i64 4) to ptr), align 8
%cmp = fcmp ord <2 x double> %vecload1, zeroinitializer
%sext = sext <2 x i1> %cmp to <2 x i64>
%tmp4 = extractelement <2 x i64> %sext, i64 0
Expand All @@ -23,82 +23,79 @@ entry:
%tmp7 = lshr i64 %tmp6, 63
%tmp8 = trunc i64 %tmp7 to i32
%idxprom = and i64 %tmp3, 4294967295
%arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %results, i64 %idxprom
store i32 %tmp8, i32 addrspace(1)* %arrayidx, align 4
%arrayidx = getelementptr inbounds i32, ptr addrspace(1) %results, i64 %idxprom
store i32 %tmp8, ptr addrspace(1) %arrayidx, align 4
ret void
}

@generic_address_bug9749.val = internal addrspace(1) global float 0.0, align 4

declare i32 @_Z9get_fencePv(i8*)
declare i32 @_Z9get_fencePv(ptr)
%opencl.pipe_t = type opaque

; This is a compile time assert bug, but we still want to check optimization
; is performed to generate ld_global.
; CHECK-LABEL: @generic_address_pipe_bug9673(
; CHECK: %tmp1 = bitcast %opencl.pipe_t addrspace(3)* %in_pipe to i32 addrspace(3)*
; CHECK: %add.ptr = getelementptr inbounds i32, i32 addrspace(3)* %tmp1, i32 2
; CHECK: %tmp2 = load i32, i32 addrspace(3)* %add.ptr, align 4
define amdgpu_kernel void @generic_address_pipe_bug9673(%opencl.pipe_t addrspace(3)* nocapture %in_pipe, i32 addrspace(1)* nocapture %dst) #0 {
; CHECK: %add.ptr = getelementptr inbounds i32, ptr addrspace(3) %in_pipe, i32 2
; CHECK: %tmp2 = load i32, ptr addrspace(3) %add.ptr, align 4
define amdgpu_kernel void @generic_address_pipe_bug9673(ptr addrspace(3) nocapture %in_pipe, ptr addrspace(1) nocapture %dst) #0 {
entry:
%tmp = call i32 @llvm.amdgcn.workitem.id.x()
%tmp1 = bitcast %opencl.pipe_t addrspace(3)* %in_pipe to i32 addrspace(3)*
%add.ptr = getelementptr inbounds i32, i32 addrspace(3)* %tmp1, i32 2
%tmp2 = load i32, i32 addrspace(3)* %add.ptr, align 4
%arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %dst, i32 %tmp
store i32 %tmp2, i32 addrspace(1)* %arrayidx, align 4
%add.ptr = getelementptr inbounds i32, ptr addrspace(3) %in_pipe, i32 2
%tmp2 = load i32, ptr addrspace(3) %add.ptr, align 4
%arrayidx = getelementptr inbounds i32, ptr addrspace(1) %dst, i32 %tmp
store i32 %tmp2, ptr addrspace(1) %arrayidx, align 4
ret void
}

; Should generate flat load
; CHECK-LABEL: @generic_address_bug9749(
; CHECK: br i1
; CHECK: load float, float*
; CHECK: load float, ptr
; CHECK: br label
define amdgpu_kernel void @generic_address_bug9749(i32 addrspace(1)* nocapture %results) #0 {
define amdgpu_kernel void @generic_address_bug9749(ptr addrspace(1) nocapture %results) #0 {
entry:
%ptr = alloca float*, align 8, addrspace(5)
%ptr = alloca ptr, align 8, addrspace(5)
%tmp = call i32 @llvm.amdgcn.workitem.id.x()
%tmp1 = zext i32 %tmp to i64
store float 0x3FB99999A0000000, float addrspace(1)* @generic_address_bug9749.val, align 4
store volatile float* addrspacecast (float addrspace(1)* @generic_address_bug9749.val to float*), float* addrspace(5)* %ptr, align 8
%tmp2 = load volatile float*, float* addrspace(5)* %ptr, align 8
%tmp3 = load float, float addrspace(1)* @generic_address_bug9749.val, align 4
%tmp4 = bitcast float* %tmp2 to i8*
%call.i = call i32 @_Z9get_fencePv(i8* %tmp4) #1
store float 0x3FB99999A0000000, ptr addrspace(1) @generic_address_bug9749.val, align 4
store volatile ptr addrspacecast (ptr addrspace(1) @generic_address_bug9749.val to ptr), ptr addrspace(5) %ptr, align 8
%tmp2 = load volatile ptr, ptr addrspace(5) %ptr, align 8
%tmp3 = load float, ptr addrspace(1) @generic_address_bug9749.val, align 4
%call.i = call i32 @_Z9get_fencePv(ptr %tmp2) #1
%switch.i.i = icmp ult i32 %call.i, 4
br i1 %switch.i.i, label %if.end.i, label %helperFunction.exit

if.end.i: ; preds = %entry
%tmp5 = load float, float* %tmp2, align 4
%tmp5 = load float, ptr %tmp2, align 4
%not.cmp.i = fcmp oeq float %tmp5, %tmp3
%phitmp = zext i1 %not.cmp.i to i32
br label %helperFunction.exit

helperFunction.exit: ; preds = %if.end.i, %entry
%retval.0.i = phi i32 [ 0, %entry ], [ %phitmp, %if.end.i ]
%arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %results, i64 %tmp1
store i32 %retval.0.i, i32 addrspace(1)* %arrayidx, align 4
%arrayidx = getelementptr inbounds i32, ptr addrspace(1) %results, i64 %tmp1
store i32 %retval.0.i, ptr addrspace(1) %arrayidx, align 4
ret void
}

; CHECK-LABEL: @generic_address_opt_phi_bug9776_simple_phi_kernel(
; CHECK: phi i32 addrspace(3)*
; CHECK: store i32 %i.03, i32 addrspace(3)* %
define amdgpu_kernel void @generic_address_opt_phi_bug9776_simple_phi_kernel(i32 addrspace(3)* nocapture %in, i32 %numElems) #0 {
; CHECK: phi ptr addrspace(3)
; CHECK: store i32 %i.03, ptr addrspace(3) %
define amdgpu_kernel void @generic_address_opt_phi_bug9776_simple_phi_kernel(ptr addrspace(3) nocapture %in, i32 %numElems) #0 {
entry:
%cmp1 = icmp eq i32 %numElems, 0
br i1 %cmp1, label %for.end, label %for.body.lr.ph

for.body.lr.ph: ; preds = %entry
%tmp = addrspacecast i32 addrspace(3)* %in to i32*
%tmp = addrspacecast ptr addrspace(3) %in to ptr
br label %for.body

for.body: ; preds = %for.body, %for.body.lr.ph
%i.03 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
%ptr.02 = phi i32* [ %tmp, %for.body.lr.ph ], [ %add.ptr, %for.body ]
store i32 %i.03, i32* %ptr.02, align 4
%add.ptr = getelementptr inbounds i32, i32* %ptr.02, i64 4
%ptr.02 = phi ptr [ %tmp, %for.body.lr.ph ], [ %add.ptr, %for.body ]
store i32 %i.03, ptr %ptr.02, align 4
%add.ptr = getelementptr inbounds i32, ptr %ptr.02, i64 4
%inc = add nuw i32 %i.03, 1
%exitcond = icmp eq i32 %inc, %numElems
br i1 %exitcond, label %for.end, label %for.body
Expand All @@ -108,31 +105,29 @@ for.end: ; preds = %for.body, %entry
}

; CHECK-LABEL: @generic_address_bug9899(
; CHECK: %vecload = load <2 x i32>, <2 x i32> addrspace(3)*
; CHECK: store <2 x i32> %tmp16, <2 x i32> addrspace(3)*
define amdgpu_kernel void @generic_address_bug9899(i64 %arg0, i32 addrspace(3)* nocapture %sourceA, i32 addrspace(3)* nocapture %destValues) #0 {
; CHECK: %vecload = load <2 x i32>, ptr addrspace(3)
; CHECK: store <2 x i32> %tmp16, ptr addrspace(3)
define amdgpu_kernel void @generic_address_bug9899(i64 %arg0, ptr addrspace(3) nocapture %sourceA, ptr addrspace(3) nocapture %destValues) #0 {
entry:
%tmp1 = call i32 @llvm.amdgcn.workitem.id.x()
%tmp2 = zext i32 %tmp1 to i64
%tmp3 = add i64 %tmp2, %arg0
%sext = shl i64 %tmp3, 32
%tmp4 = addrspacecast i32 addrspace(3)* %destValues to i32*
%tmp5 = addrspacecast i32 addrspace(3)* %sourceA to i32*
%tmp4 = addrspacecast ptr addrspace(3) %destValues to ptr
%tmp5 = addrspacecast ptr addrspace(3) %sourceA to ptr
%tmp6 = ashr exact i64 %sext, 31
%tmp7 = getelementptr inbounds i32, i32* %tmp5, i64 %tmp6
%arrayidx_v4 = bitcast i32* %tmp7 to <2 x i32>*
%vecload = load <2 x i32>, <2 x i32>* %arrayidx_v4, align 4
%tmp7 = getelementptr inbounds i32, ptr %tmp5, i64 %tmp6
%vecload = load <2 x i32>, ptr %tmp7, align 4
%tmp8 = extractelement <2 x i32> %vecload, i32 0
%tmp9 = extractelement <2 x i32> %vecload, i32 1
%tmp10 = icmp eq i32 %tmp8, 0
%tmp11 = select i1 %tmp10, i32 32, i32 %tmp8
%tmp12 = icmp eq i32 %tmp9, 0
%tmp13 = select i1 %tmp12, i32 32, i32 %tmp9
%tmp14 = getelementptr inbounds i32, i32* %tmp4, i64 %tmp6
%tmp14 = getelementptr inbounds i32, ptr %tmp4, i64 %tmp6
%tmp15 = insertelement <2 x i32> undef, i32 %tmp11, i32 0
%tmp16 = insertelement <2 x i32> %tmp15, i32 %tmp13, i32 1
%arrayidx_v41 = bitcast i32* %tmp14 to <2 x i32>*
store <2 x i32> %tmp16, <2 x i32>* %arrayidx_v41, align 4
store <2 x i32> %tmp16, ptr %tmp14, align 4
ret void
}

Expand Down
364 changes: 182 additions & 182 deletions llvm/test/Transforms/InferAddressSpaces/AMDGPU/ptrmask.ll

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -5,23 +5,23 @@

; Make sure there is only one addrspacecast. The original cast should
; not be cloned to satisfy the second user.
define void @bar(%0 addrspace(1)* %orig.ptr) {
define void @bar(ptr addrspace(1) %orig.ptr) {
; CHECK-LABEL: @bar(
; CHECK-NEXT: bb:
; CHECK-NEXT: [[ORIG_CAST:%.*]] = addrspacecast [[TMP0:%.*]] addrspace(1)* [[ORIG_PTR:%.*]] to %0*
; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds [[TMP0]], %0* [[ORIG_CAST]], i64 0, i32 1
; CHECK-NEXT: call void @foo(i8* [[GEP0]])
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds [[TMP0]], %0* [[ORIG_CAST]], i64 0, i32 2
; CHECK-NEXT: call void @foo(i8* [[GEP1]])
; CHECK-NEXT: [[ORIG_CAST:%.*]] = addrspacecast ptr addrspace(1) [[ORIG_PTR:%.*]] to ptr
; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds [[TMP0:%.*]], ptr [[ORIG_CAST]], i64 0, i32 1
; CHECK-NEXT: call void @foo(ptr [[GEP0]])
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds [[TMP0]], ptr [[ORIG_CAST]], i64 0, i32 2
; CHECK-NEXT: call void @foo(ptr [[GEP1]])
; CHECK-NEXT: ret void
;
bb:
%orig.cast = addrspacecast %0 addrspace(1)* %orig.ptr to %0*
%gep0 = getelementptr inbounds %0, %0* %orig.cast, i64 0, i32 1
call void @foo(i8* %gep0)
%gep1 = getelementptr inbounds %0, %0* %orig.cast, i64 0, i32 2
call void @foo(i8* %gep1)
%orig.cast = addrspacecast ptr addrspace(1) %orig.ptr to ptr
%gep0 = getelementptr inbounds %0, ptr %orig.cast, i64 0, i32 1
call void @foo(ptr %gep0)
%gep1 = getelementptr inbounds %0, ptr %orig.cast, i64 0, i32 2
call void @foo(ptr %gep1)
ret void
}

declare void @foo(i8*)
declare void @foo(ptr)
276 changes: 138 additions & 138 deletions llvm/test/Transforms/InferAddressSpaces/AMDGPU/select.ll

Large diffs are not rendered by default.

12 changes: 6 additions & 6 deletions llvm/test/Transforms/InferAddressSpaces/AMDGPU/self-phi.ll
Original file line number Diff line number Diff line change
@@ -1,25 +1,25 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -S -infer-address-spaces %s | FileCheck %s

define amdgpu_kernel void @phi_self(i8 addrspace(1)* %arg) {
define amdgpu_kernel void @phi_self(ptr addrspace(1) %arg) {
; CHECK-LABEL: @phi_self(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[I:%.*]] = phi i8 addrspace(1)* [ [[I]], [[LOOP]] ], [ [[ARG:%.*]], [[ENTRY:%.*]] ]
; CHECK-NEXT: [[I1:%.*]] = load i8, i8 addrspace(1)* [[I]], align 1
; CHECK-NEXT: [[I:%.*]] = phi ptr addrspace(1) [ [[I]], [[LOOP]] ], [ [[ARG:%.*]], [[ENTRY:%.*]] ]
; CHECK-NEXT: [[I1:%.*]] = load i8, ptr addrspace(1) [[I]], align 1
; CHECK-NEXT: [[I2:%.*]] = icmp eq i8 [[I1]], 0
; CHECK-NEXT: br i1 [[I2]], label [[LOOP]], label [[RET:%.*]]
; CHECK: ret:
; CHECK-NEXT: ret void
;
entry:
%cast = addrspacecast i8 addrspace(1)* %arg to i8*
%cast = addrspacecast ptr addrspace(1) %arg to ptr
br label %loop

loop:
%i = phi i8* [%i, %loop], [%cast, %entry]
%i1 = load i8, i8* %i, align 1
%i = phi ptr [%i, %loop], [%cast, %entry]
%i1 = load i8, ptr %i, align 1
%i2 = icmp eq i8 %i1, 0
br i1 %i2, label %loop, label %ret

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ define amdgpu_kernel void @subclass_data_assert() {
; CHECK-NEXT: entry:
; CHECK-NEXT: unreachable
; CHECK: strlen.while11:
; CHECK-NEXT: [[I:%.*]] = getelementptr i8, i8* [[I]], i64 1
; CHECK-NEXT: [[I1:%.*]] = load i8, i8* [[I]], align 1
; CHECK-NEXT: [[I:%.*]] = getelementptr i8, ptr [[I]], i64 1
; CHECK-NEXT: [[I1:%.*]] = load i8, ptr [[I]], align 1
; CHECK-NEXT: [[I2:%.*]] = icmp eq i8 [[I1]], 0
; CHECK-NEXT: br i1 [[I2]], label [[STRLEN_WHILE_DONE12:%.*]], label [[STRLEN_WHILE11:%.*]]
; CHECK: strlen.while.done12:
Expand All @@ -17,8 +17,8 @@ entry:
unreachable

strlen.while11: ; preds = %strlen.while11
%i = getelementptr i8, i8* %i, i64 1
%i1 = load i8, i8* %i, align 1
%i = getelementptr i8, ptr %i, i64 1
%i1 = load i8, ptr %i, align 1
%i2 = icmp eq i8 %i1, 0
br i1 %i2, label %strlen.while.done12, label %strlen.while11

Expand Down
160 changes: 80 additions & 80 deletions llvm/test/Transforms/InferAddressSpaces/AMDGPU/volatile.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,137 +3,137 @@
; Check that volatile users of addrspacecast are not replaced.

; CHECK-LABEL: @volatile_load_flat_from_global(
; CHECK: load volatile i32, i32*
; CHECK: store i32 %val, i32 addrspace(1)*
define amdgpu_kernel void @volatile_load_flat_from_global(i32 addrspace(1)* nocapture %input, i32 addrspace(1)* nocapture %output) #0 {
%tmp0 = addrspacecast i32 addrspace(1)* %input to i32*
%tmp1 = addrspacecast i32 addrspace(1)* %output to i32*
%val = load volatile i32, i32* %tmp0, align 4
store i32 %val, i32* %tmp1, align 4
; CHECK: load volatile i32, ptr
; CHECK: store i32 %val, ptr addrspace(1)
define amdgpu_kernel void @volatile_load_flat_from_global(ptr addrspace(1) nocapture %input, ptr addrspace(1) nocapture %output) #0 {
%tmp0 = addrspacecast ptr addrspace(1) %input to ptr
%tmp1 = addrspacecast ptr addrspace(1) %output to ptr
%val = load volatile i32, ptr %tmp0, align 4
store i32 %val, ptr %tmp1, align 4
ret void
}

; CHECK-LABEL: @volatile_load_flat_from_constant(
; CHECK: load volatile i32, i32*
; CHECK: store i32 %val, i32 addrspace(1)*
define amdgpu_kernel void @volatile_load_flat_from_constant(i32 addrspace(4)* nocapture %input, i32 addrspace(1)* nocapture %output) #0 {
%tmp0 = addrspacecast i32 addrspace(4)* %input to i32*
%tmp1 = addrspacecast i32 addrspace(1)* %output to i32*
%val = load volatile i32, i32* %tmp0, align 4
store i32 %val, i32* %tmp1, align 4
; CHECK: load volatile i32, ptr
; CHECK: store i32 %val, ptr addrspace(1)
define amdgpu_kernel void @volatile_load_flat_from_constant(ptr addrspace(4) nocapture %input, ptr addrspace(1) nocapture %output) #0 {
%tmp0 = addrspacecast ptr addrspace(4) %input to ptr
%tmp1 = addrspacecast ptr addrspace(1) %output to ptr
%val = load volatile i32, ptr %tmp0, align 4
store i32 %val, ptr %tmp1, align 4
ret void
}

; CHECK-LABEL: @volatile_load_flat_from_group(
; CHECK: load volatile i32, i32*
; CHECK: store i32 %val, i32 addrspace(3)*
define amdgpu_kernel void @volatile_load_flat_from_group(i32 addrspace(3)* nocapture %input, i32 addrspace(3)* nocapture %output) #0 {
%tmp0 = addrspacecast i32 addrspace(3)* %input to i32*
%tmp1 = addrspacecast i32 addrspace(3)* %output to i32*
%val = load volatile i32, i32* %tmp0, align 4
store i32 %val, i32* %tmp1, align 4
; CHECK: load volatile i32, ptr
; CHECK: store i32 %val, ptr addrspace(3)
define amdgpu_kernel void @volatile_load_flat_from_group(ptr addrspace(3) nocapture %input, ptr addrspace(3) nocapture %output) #0 {
%tmp0 = addrspacecast ptr addrspace(3) %input to ptr
%tmp1 = addrspacecast ptr addrspace(3) %output to ptr
%val = load volatile i32, ptr %tmp0, align 4
store i32 %val, ptr %tmp1, align 4
ret void
}

; CHECK-LABEL: @volatile_load_flat_from_private(
; CHECK: load volatile i32, i32*
; CHECK: store i32 %val, i32 addrspace(5)*
define amdgpu_kernel void @volatile_load_flat_from_private(i32 addrspace(5)* nocapture %input, i32 addrspace(5)* nocapture %output) #0 {
%tmp0 = addrspacecast i32 addrspace(5)* %input to i32*
%tmp1 = addrspacecast i32 addrspace(5)* %output to i32*
%val = load volatile i32, i32* %tmp0, align 4
store i32 %val, i32* %tmp1, align 4
; CHECK: load volatile i32, ptr
; CHECK: store i32 %val, ptr addrspace(5)
define amdgpu_kernel void @volatile_load_flat_from_private(ptr addrspace(5) nocapture %input, ptr addrspace(5) nocapture %output) #0 {
%tmp0 = addrspacecast ptr addrspace(5) %input to ptr
%tmp1 = addrspacecast ptr addrspace(5) %output to ptr
%val = load volatile i32, ptr %tmp0, align 4
store i32 %val, ptr %tmp1, align 4
ret void
}

; CHECK-LABEL: @volatile_store_flat_to_global(
; CHECK: load i32, i32 addrspace(1)*
; CHECK: store volatile i32 %val, i32*
define amdgpu_kernel void @volatile_store_flat_to_global(i32 addrspace(1)* nocapture %input, i32 addrspace(1)* nocapture %output) #0 {
%tmp0 = addrspacecast i32 addrspace(1)* %input to i32*
%tmp1 = addrspacecast i32 addrspace(1)* %output to i32*
%val = load i32, i32* %tmp0, align 4
store volatile i32 %val, i32* %tmp1, align 4
; CHECK: load i32, ptr addrspace(1)
; CHECK: store volatile i32 %val, ptr
define amdgpu_kernel void @volatile_store_flat_to_global(ptr addrspace(1) nocapture %input, ptr addrspace(1) nocapture %output) #0 {
%tmp0 = addrspacecast ptr addrspace(1) %input to ptr
%tmp1 = addrspacecast ptr addrspace(1) %output to ptr
%val = load i32, ptr %tmp0, align 4
store volatile i32 %val, ptr %tmp1, align 4
ret void
}

; CHECK-LABEL: @volatile_store_flat_to_group(
; CHECK: load i32, i32 addrspace(3)*
; CHECK: store volatile i32 %val, i32*
define amdgpu_kernel void @volatile_store_flat_to_group(i32 addrspace(3)* nocapture %input, i32 addrspace(3)* nocapture %output) #0 {
%tmp0 = addrspacecast i32 addrspace(3)* %input to i32*
%tmp1 = addrspacecast i32 addrspace(3)* %output to i32*
%val = load i32, i32* %tmp0, align 4
store volatile i32 %val, i32* %tmp1, align 4
; CHECK: load i32, ptr addrspace(3)
; CHECK: store volatile i32 %val, ptr
define amdgpu_kernel void @volatile_store_flat_to_group(ptr addrspace(3) nocapture %input, ptr addrspace(3) nocapture %output) #0 {
%tmp0 = addrspacecast ptr addrspace(3) %input to ptr
%tmp1 = addrspacecast ptr addrspace(3) %output to ptr
%val = load i32, ptr %tmp0, align 4
store volatile i32 %val, ptr %tmp1, align 4
ret void
}

; CHECK-LABEL: @volatile_store_flat_to_private(
; CHECK: load i32, i32 addrspace(5)*
; CHECK: store volatile i32 %val, i32*
define amdgpu_kernel void @volatile_store_flat_to_private(i32 addrspace(5)* nocapture %input, i32 addrspace(5)* nocapture %output) #0 {
%tmp0 = addrspacecast i32 addrspace(5)* %input to i32*
%tmp1 = addrspacecast i32 addrspace(5)* %output to i32*
%val = load i32, i32* %tmp0, align 4
store volatile i32 %val, i32* %tmp1, align 4
; CHECK: load i32, ptr addrspace(5)
; CHECK: store volatile i32 %val, ptr
define amdgpu_kernel void @volatile_store_flat_to_private(ptr addrspace(5) nocapture %input, ptr addrspace(5) nocapture %output) #0 {
%tmp0 = addrspacecast ptr addrspace(5) %input to ptr
%tmp1 = addrspacecast ptr addrspace(5) %output to ptr
%val = load i32, ptr %tmp0, align 4
store volatile i32 %val, ptr %tmp1, align 4
ret void
}

; CHECK-LABEL: @volatile_atomicrmw_add_group_to_flat(
; CHECK: addrspacecast i32 addrspace(3)* %group.ptr to i32*
; CHECK: atomicrmw volatile add i32*
define i32 @volatile_atomicrmw_add_group_to_flat(i32 addrspace(3)* %group.ptr, i32 %y) #0 {
%cast = addrspacecast i32 addrspace(3)* %group.ptr to i32*
%ret = atomicrmw volatile add i32* %cast, i32 %y seq_cst
; CHECK: addrspacecast ptr addrspace(3) %group.ptr to ptr
; CHECK: atomicrmw volatile add ptr
define i32 @volatile_atomicrmw_add_group_to_flat(ptr addrspace(3) %group.ptr, i32 %y) #0 {
%cast = addrspacecast ptr addrspace(3) %group.ptr to ptr
%ret = atomicrmw volatile add ptr %cast, i32 %y seq_cst
ret i32 %ret
}

; CHECK-LABEL: @volatile_atomicrmw_add_global_to_flat(
; CHECK: addrspacecast i32 addrspace(1)* %global.ptr to i32*
; CHECK: %ret = atomicrmw volatile add i32*
define i32 @volatile_atomicrmw_add_global_to_flat(i32 addrspace(1)* %global.ptr, i32 %y) #0 {
%cast = addrspacecast i32 addrspace(1)* %global.ptr to i32*
%ret = atomicrmw volatile add i32* %cast, i32 %y seq_cst
; CHECK: addrspacecast ptr addrspace(1) %global.ptr to ptr
; CHECK: %ret = atomicrmw volatile add ptr
define i32 @volatile_atomicrmw_add_global_to_flat(ptr addrspace(1) %global.ptr, i32 %y) #0 {
%cast = addrspacecast ptr addrspace(1) %global.ptr to ptr
%ret = atomicrmw volatile add ptr %cast, i32 %y seq_cst
ret i32 %ret
}

; CHECK-LABEL: @volatile_cmpxchg_global_to_flat(
; CHECK: addrspacecast i32 addrspace(1)* %global.ptr to i32*
; CHECK: cmpxchg volatile i32*
define { i32, i1 } @volatile_cmpxchg_global_to_flat(i32 addrspace(1)* %global.ptr, i32 %cmp, i32 %val) #0 {
%cast = addrspacecast i32 addrspace(1)* %global.ptr to i32*
%ret = cmpxchg volatile i32* %cast, i32 %cmp, i32 %val seq_cst monotonic
; CHECK: addrspacecast ptr addrspace(1) %global.ptr to ptr
; CHECK: cmpxchg volatile ptr
define { i32, i1 } @volatile_cmpxchg_global_to_flat(ptr addrspace(1) %global.ptr, i32 %cmp, i32 %val) #0 {
%cast = addrspacecast ptr addrspace(1) %global.ptr to ptr
%ret = cmpxchg volatile ptr %cast, i32 %cmp, i32 %val seq_cst monotonic
ret { i32, i1 } %ret
}

; CHECK-LABEL: @volatile_cmpxchg_group_to_flat(
; CHECK: addrspacecast i32 addrspace(3)* %group.ptr to i32*
; CHECK: cmpxchg volatile i32*
define { i32, i1 } @volatile_cmpxchg_group_to_flat(i32 addrspace(3)* %group.ptr, i32 %cmp, i32 %val) #0 {
%cast = addrspacecast i32 addrspace(3)* %group.ptr to i32*
%ret = cmpxchg volatile i32* %cast, i32 %cmp, i32 %val seq_cst monotonic
; CHECK: addrspacecast ptr addrspace(3) %group.ptr to ptr
; CHECK: cmpxchg volatile ptr
define { i32, i1 } @volatile_cmpxchg_group_to_flat(ptr addrspace(3) %group.ptr, i32 %cmp, i32 %val) #0 {
%cast = addrspacecast ptr addrspace(3) %group.ptr to ptr
%ret = cmpxchg volatile ptr %cast, i32 %cmp, i32 %val seq_cst monotonic
ret { i32, i1 } %ret
}

; CHECK-LABEL: @volatile_memset_group_to_flat(
; CHECK: %cast = addrspacecast i8 addrspace(3)* %group.ptr to i8*
; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 %cast, i8 4, i64 32, i1 true)
define amdgpu_kernel void @volatile_memset_group_to_flat(i8 addrspace(3)* %group.ptr, i32 %y) #0 {
%cast = addrspacecast i8 addrspace(3)* %group.ptr to i8*
call void @llvm.memset.p0i8.i64(i8* align 4 %cast, i8 4, i64 32, i1 true)
; CHECK: %cast = addrspacecast ptr addrspace(3) %group.ptr to ptr
; CHECK: call void @llvm.memset.p0.i64(ptr align 4 %cast, i8 4, i64 32, i1 true)
define amdgpu_kernel void @volatile_memset_group_to_flat(ptr addrspace(3) %group.ptr, i32 %y) #0 {
%cast = addrspacecast ptr addrspace(3) %group.ptr to ptr
call void @llvm.memset.p0.i64(ptr align 4 %cast, i8 4, i64 32, i1 true)
ret void
}

; CHECK-LABEL: @volatile_memset_global_to_flat(
; CHECK: %cast = addrspacecast i8 addrspace(1)* %global.ptr to i8*
; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 %cast, i8 4, i64 32, i1 true)
define amdgpu_kernel void @volatile_memset_global_to_flat(i8 addrspace(1)* %global.ptr, i32 %y) #0 {
%cast = addrspacecast i8 addrspace(1)* %global.ptr to i8*
call void @llvm.memset.p0i8.i64(i8* align 4 %cast, i8 4, i64 32, i1 true)
; CHECK: %cast = addrspacecast ptr addrspace(1) %global.ptr to ptr
; CHECK: call void @llvm.memset.p0.i64(ptr align 4 %cast, i8 4, i64 32, i1 true)
define amdgpu_kernel void @volatile_memset_global_to_flat(ptr addrspace(1) %global.ptr, i32 %y) #0 {
%cast = addrspacecast ptr addrspace(1) %global.ptr to ptr
call void @llvm.memset.p0.i64(ptr align 4 %cast, i8 4, i64 32, i1 true)
ret void
}

declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) #1
declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1) #1

attributes #0 = { nounwind }
attributes #1 = { argmemonly nounwind }
20 changes: 10 additions & 10 deletions llvm/test/Transforms/InferAddressSpaces/NVPTX/bug31948.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,22 @@

target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"

%struct.bar = type { float, float* }
%struct.bar = type { float, ptr }

@var1 = local_unnamed_addr addrspace(3) externally_initialized global %struct.bar undef, align 8

; CHECK-LABEL: @bug31948(
; CHECK: %tmp = load float*, float* addrspace(3)* getelementptr inbounds (%struct.bar, %struct.bar addrspace(3)* @var1, i64 0, i32 1), align 8
; CHECK: %tmp1 = load float, float* %tmp, align 4
; CHECK: store float %conv1, float* %tmp, align 4
; CHECK: store i32 32, i32 addrspace(3)* bitcast (float* addrspace(3)* getelementptr inbounds (%struct.bar, %struct.bar addrspace(3)* @var1, i64 0, i32 1) to i32 addrspace(3)*), align 4
define void @bug31948(float %a, float* nocapture readnone %x, float* nocapture readnone %y) local_unnamed_addr #0 {
; CHECK: %tmp = load ptr, ptr addrspace(3) getelementptr inbounds (%struct.bar, ptr addrspace(3) @var1, i64 0, i32 1), align 8
; CHECK: %tmp1 = load float, ptr %tmp, align 4
; CHECK: store float %conv1, ptr %tmp, align 4
; CHECK: store i32 32, ptr addrspace(3) getelementptr inbounds (%struct.bar, ptr addrspace(3) @var1, i64 0, i32 1), align 4
define void @bug31948(float %a, ptr nocapture readnone %x, ptr nocapture readnone %y) local_unnamed_addr #0 {
entry:
%tmp = load float*, float** getelementptr (%struct.bar, %struct.bar* addrspacecast (%struct.bar addrspace(3)* @var1 to %struct.bar*), i64 0, i32 1), align 8
%tmp1 = load float, float* %tmp, align 4
%tmp = load ptr, ptr getelementptr (%struct.bar, ptr addrspacecast (ptr addrspace(3) @var1 to ptr), i64 0, i32 1), align 8
%tmp1 = load float, ptr %tmp, align 4
%conv1 = fadd float %tmp1, 1.000000e+00
store float %conv1, float* %tmp, align 4
store i32 32, i32* bitcast (float** getelementptr (%struct.bar, %struct.bar* addrspacecast (%struct.bar addrspace(3)* @var1 to %struct.bar*), i64 0, i32 1) to i32*), align 4
store float %conv1, ptr %tmp, align 4
store i32 32, ptr bitcast (ptr getelementptr (%struct.bar, ptr addrspacecast (ptr addrspace(3) @var1 to ptr), i64 0, i32 1) to ptr), align 4
ret void
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,107 +1,102 @@
; RUN: opt -S -mtriple=nvptx64-nvidia-cuda -infer-address-spaces -o - %s | FileCheck %s

; CHECK-LABEL: @f0
; CHECK: addrspacecast float* {{%.*}} to float addrspace(4)*
; CHECK: getelementptr inbounds float, float addrspace(4)*
; CHECK: load float, float addrspace(4)*
define float @f0(float* %p) {
; CHECK: addrspacecast ptr {{%.*}} to ptr addrspace(4)
; CHECK: getelementptr inbounds float, ptr addrspace(4)
; CHECK: load float, ptr addrspace(4)
define float @f0(ptr %p) {
entry:
%0 = bitcast float* %p to i8*
%1 = call i1 @llvm.nvvm.isspacep.const(i8* %0)
tail call void @llvm.assume(i1 %1)
%2 = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
%idxprom = zext i32 %2 to i64
%arrayidx = getelementptr inbounds float, float* %p, i64 %idxprom
%3 = load float, float* %arrayidx, align 4
ret float %3
%0 = call i1 @llvm.nvvm.isspacep.const(ptr %p)
tail call void @llvm.assume(i1 %0)
%1 = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
%idxprom = zext i32 %1 to i64
%arrayidx = getelementptr inbounds float, ptr %p, i64 %idxprom
%2 = load float, ptr %arrayidx, align 4
ret float %2
}

; CHECK-LABEL: @f1
; CHECK: addrspacecast float* {{%.*}} to float addrspace(1)*
; CHECK: getelementptr inbounds float, float addrspace(1)*
; CHECK: load float, float addrspace(1)*
define float @f1(float* %p) {
; CHECK: addrspacecast ptr {{%.*}} to ptr addrspace(1)
; CHECK: getelementptr inbounds float, ptr addrspace(1)
; CHECK: load float, ptr addrspace(1)
define float @f1(ptr %p) {
entry:
%0 = bitcast float* %p to i8*
%1 = call i1 @llvm.nvvm.isspacep.global(i8* %0)
tail call void @llvm.assume(i1 %1)
%2 = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
%idxprom = zext i32 %2 to i64
%arrayidx = getelementptr inbounds float, float* %p, i64 %idxprom
%3 = load float, float* %arrayidx, align 4
ret float %3
%0 = call i1 @llvm.nvvm.isspacep.global(ptr %p)
tail call void @llvm.assume(i1 %0)
%1 = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
%idxprom = zext i32 %1 to i64
%arrayidx = getelementptr inbounds float, ptr %p, i64 %idxprom
%2 = load float, ptr %arrayidx, align 4
ret float %2
}

; CHECK-LABEL: @f2
; CHECK: addrspacecast float* {{%.*}} to float addrspace(5)*
; CHECK: getelementptr inbounds float, float addrspace(5)*
; CHECK: load float, float addrspace(5)*
define float @f2(float* %p) {
; CHECK: addrspacecast ptr {{%.*}} to ptr addrspace(5)
; CHECK: getelementptr inbounds float, ptr addrspace(5)
; CHECK: load float, ptr addrspace(5)
define float @f2(ptr %p) {
entry:
%0 = bitcast float* %p to i8*
%1 = call i1 @llvm.nvvm.isspacep.local(i8* %0)
tail call void @llvm.assume(i1 %1)
%2 = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
%idxprom = zext i32 %2 to i64
%arrayidx = getelementptr inbounds float, float* %p, i64 %idxprom
%3 = load float, float* %arrayidx, align 4
ret float %3
%0 = call i1 @llvm.nvvm.isspacep.local(ptr %p)
tail call void @llvm.assume(i1 %0)
%1 = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
%idxprom = zext i32 %1 to i64
%arrayidx = getelementptr inbounds float, ptr %p, i64 %idxprom
%2 = load float, ptr %arrayidx, align 4
ret float %2
}

; CHECK-LABEL: @f3
; CHECK: addrspacecast float* {{%.*}} to float addrspace(3)*
; CHECK: getelementptr inbounds float, float addrspace(3)*
; CHECK: load float, float addrspace(3)*
define float @f3(float* %p) {
; CHECK: addrspacecast ptr {{%.*}} to ptr addrspace(3)
; CHECK: getelementptr inbounds float, ptr addrspace(3)
; CHECK: load float, ptr addrspace(3)
define float @f3(ptr %p) {
entry:
%0 = bitcast float* %p to i8*
%1 = call i1 @llvm.nvvm.isspacep.shared(i8* %0)
tail call void @llvm.assume(i1 %1)
%2 = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
%idxprom = zext i32 %2 to i64
%arrayidx = getelementptr inbounds float, float* %p, i64 %idxprom
%3 = load float, float* %arrayidx, align 4
ret float %3
%0 = call i1 @llvm.nvvm.isspacep.shared(ptr %p)
tail call void @llvm.assume(i1 %0)
%1 = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
%idxprom = zext i32 %1 to i64
%arrayidx = getelementptr inbounds float, ptr %p, i64 %idxprom
%2 = load float, ptr %arrayidx, align 4
ret float %2
}

; CHECK-LABEL: @g0
; CHECK: if.then:
; CHECK: addrspacecast float* {{%.*}} to float addrspace(3)*
; CHECK: getelementptr inbounds float, float addrspace(3)*
; CHECK: load float, float addrspace(3)*
; CHECK: addrspacecast ptr {{%.*}} to ptr addrspace(3)
; CHECK: getelementptr inbounds float, ptr addrspace(3)
; CHECK: load float, ptr addrspace(3)
; CHECK: if.end:
; CHECK: getelementptr inbounds float, float*
; CHECK: load float, float*
define float @g0(i32 %c, float* %p) {
; CHECK: getelementptr inbounds float, ptr
; CHECK: load float, ptr
define float @g0(i32 %c, ptr %p) {
entry:
%tobool.not = icmp eq i32 %c, 0
br i1 %tobool.not, label %if.end, label %if.then

if.then:
%0 = bitcast float* %p to i8*
%1 = call i1 @llvm.nvvm.isspacep.shared(i8* %0)
tail call void @llvm.assume(i1 %1)
%2 = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
%idxprom = zext i32 %2 to i64
%arrayidx = getelementptr inbounds float, float* %p, i64 %idxprom
%3 = load float, float* %arrayidx, align 4
%add = fadd float %3, 0.
%0 = call i1 @llvm.nvvm.isspacep.shared(ptr %p)
tail call void @llvm.assume(i1 %0)
%1 = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
%idxprom = zext i32 %1 to i64
%arrayidx = getelementptr inbounds float, ptr %p, i64 %idxprom
%2 = load float, ptr %arrayidx, align 4
%add = fadd float %2, 0.
br label %if.end

if.end:
%s = phi float [ %add, %if.then ], [ 0., %entry ]
%4 = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.y()
%idxprom2 = zext i32 %4 to i64
%arrayidx2 = getelementptr inbounds float, float* %p, i64 %idxprom2
%5 = load float, float* %arrayidx2, align 4
%add2 = fadd float %s, %5
%3 = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.y()
%idxprom2 = zext i32 %3 to i64
%arrayidx2 = getelementptr inbounds float, ptr %p, i64 %idxprom2
%4 = load float, ptr %arrayidx2, align 4
%add2 = fadd float %s, %4
ret float %add2
}

declare void @llvm.assume(i1)
declare i1 @llvm.nvvm.isspacep.const(i8*)
declare i1 @llvm.nvvm.isspacep.global(i8*)
declare i1 @llvm.nvvm.isspacep.local(i8*)
declare i1 @llvm.nvvm.isspacep.shared(i8*)
declare i1 @llvm.nvvm.isspacep.const(ptr)
declare i1 @llvm.nvvm.isspacep.global(ptr)
declare i1 @llvm.nvvm.isspacep.local(ptr)
declare i1 @llvm.nvvm.isspacep.shared(ptr)
declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
declare i32 @llvm.nvvm.read.ptx.sreg.tid.y()
44 changes: 20 additions & 24 deletions llvm/test/Transforms/InferAddressSpaces/NVPTX/clone_constexpr.ll
Original file line number Diff line number Diff line change
Expand Up @@ -12,62 +12,58 @@ define void @foo() local_unnamed_addr #0 {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[X0:%.*]] = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x() #[[ATTR2:[0-9]+]]
; CHECK-NEXT: [[IDXPROM_I:%.*]] = zext i32 [[X0]] to i64
; CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr [[STRUCT_S:%.*]], %struct.S* addrspacecast ([[STRUCT_S]] addrspace(3)* @g1 to %struct.S*), i64 0, i32 0, i64 [[IDXPROM_I]]
; CHECK-NEXT: tail call void @f1(i32* [[ARRAYIDX_I]], i32 undef) #[[ATTR0:[0-9]+]]
; CHECK-NEXT: [[X1:%.*]] = load i32, i32 addrspace(3)* getelementptr inbounds ([[STRUCT_S]], [[STRUCT_S]] addrspace(3)* @g1, i64 0, i32 0, i64 0), align 4
; CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr [[STRUCT_S:%.*]], ptr addrspacecast (ptr addrspace(3) @g1 to ptr), i64 0, i32 0, i64 [[IDXPROM_I]]
; CHECK-NEXT: tail call void @f1(ptr [[ARRAYIDX_I]], i32 undef) #[[ATTR0:[0-9]+]]
; CHECK-NEXT: [[X1:%.*]] = load i32, ptr addrspace(3) @g1, align 4
; CHECK-NEXT: [[L_SROA_0_0_INSERT_EXT_I:%.*]] = zext i32 [[X1]] to i64
; CHECK-NEXT: tail call void @f2(i64* null, i64 [[L_SROA_0_0_INSERT_EXT_I]]) #[[ATTR0]]
; CHECK-NEXT: tail call void @f2(ptr null, i64 [[L_SROA_0_0_INSERT_EXT_I]]) #[[ATTR0]]
; CHECK-NEXT: ret void
;
entry:
%x0 = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x() #2
%idxprom.i = zext i32 %x0 to i64
%arrayidx.i = getelementptr %struct.S, %struct.S* addrspacecast (%struct.S addrspace(3)* @g1 to %struct.S*), i64 0, i32 0, i64 %idxprom.i
tail call void @f1(i32* %arrayidx.i, i32 undef) #0
%x1 = load i32, i32* getelementptr (%struct.S, %struct.S* addrspacecast (%struct.S addrspace(3)* @g1 to %struct.S*), i64 0, i32 0, i64 0), align 4
%arrayidx.i = getelementptr %struct.S, ptr addrspacecast (ptr addrspace(3) @g1 to ptr), i64 0, i32 0, i64 %idxprom.i
tail call void @f1(ptr %arrayidx.i, i32 undef) #0
%x1 = load i32, ptr addrspacecast (ptr addrspace(3) @g1 to ptr), align 4
%L.sroa.0.0.insert.ext.i = zext i32 %x1 to i64
tail call void @f2(i64* null, i64 %L.sroa.0.0.insert.ext.i) #0
tail call void @f2(ptr null, i64 %L.sroa.0.0.insert.ext.i) #0
ret void
}

declare void @f1(i32*, i32) local_unnamed_addr #0
declare void @f2(i64*, i64) local_unnamed_addr #0
declare void @f1(ptr, i32) local_unnamed_addr #0
declare void @f2(ptr, i64) local_unnamed_addr #0
declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() #1

; Make sure we can clone GEP which uses complex constant expressions as indices.
; https://bugs.llvm.org/show_bug.cgi?id=51099
@g2 = internal addrspace(3) global [128 x i8] undef, align 1

define float @complex_ce(i8* nocapture readnone %a, i8* nocapture readnone %b, i8* nocapture readnone %c) local_unnamed_addr #0 {
define float @complex_ce(ptr nocapture readnone %a, ptr nocapture readnone %b, ptr nocapture readnone %c) local_unnamed_addr #0 {
; CHECK-LABEL: @complex_ce(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load float, float addrspace(3)* bitcast (i8 addrspace(3)* getelementptr (i8, i8 addrspace(3)* getelementptr inbounds ([128 x i8], [128 x i8] addrspace(3)* @g2, i64 0, i64 0), i64 sub (i64 ptrtoint (i8 addrspace(3)* getelementptr inbounds ([128 x i8], [128 x i8] addrspace(3)* @g2, i64 0, i64 123) to i64), i64 ptrtoint (i8 addrspace(3)* getelementptr inbounds ([128 x i8], [128 x i8] addrspace(3)* @g2, i64 2, i64 0) to i64))) to float addrspace(3)*), align 4
; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr addrspace(3) getelementptr (i8, ptr addrspace(3) @g2, i64 sub (i64 ptrtoint (ptr addrspace(3) getelementptr inbounds ([128 x i8], ptr addrspace(3) @g2, i64 0, i64 123) to i64), i64 ptrtoint (ptr addrspace(3) getelementptr inbounds ([128 x i8], ptr addrspace(3) @g2, i64 2, i64 0) to i64))), align 4
; CHECK-NEXT: ret float [[TMP0]]
;
entry:
%0 = load float, float* bitcast (
i8* getelementptr (
i8, i8* getelementptr inbounds (
[128 x i8],
[128 x i8]* addrspacecast ([128 x i8] addrspace(3)* @g2 to [128 x i8]*),
i64 0,
i64 0),
%0 = load float, ptr bitcast (
ptr getelementptr (
i8, ptr addrspacecast (ptr addrspace(3) @g2 to ptr),
i64 sub (
i64 ptrtoint (
i8* getelementptr inbounds (
ptr getelementptr inbounds (
[128 x i8],
[128 x i8]* addrspacecast ([128 x i8] addrspace(3)* @g2 to [128 x i8]*),
ptr addrspacecast (ptr addrspace(3) @g2 to ptr),
i64 0,
i64 123)
to i64),
i64 ptrtoint (
i8* getelementptr inbounds (
ptr getelementptr inbounds (
[128 x i8],
[128 x i8]* addrspacecast ([128 x i8] addrspace(3)* @g2 to [128 x i8]*),
ptr addrspacecast (ptr addrspace(3) @g2 to ptr),
i64 2,
i64 0)
to i64)))
to float*), align 4
to ptr), align 4
ret float %0
}

Expand Down
14 changes: 7 additions & 7 deletions llvm/test/Transforms/InferAddressSpaces/X86/noop-ptrint-pair.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-ni:7"

; CHECK-LABEL: @noop_ptrint_pair(
; CHECK: addrspacecast i32 addrspace(1)* %x to i32 addrspace(4)*
; CHECK-NEXT: ptrtoint i32 addrspace(4)* %{{.*}} to i64
; CHECK-NEXT: inttoptr i64 %{{.*}} to i32 addrspace(4)*
define void @noop_ptrint_pair(i32 addrspace(1)* %x) {
%1 = addrspacecast i32 addrspace(1)* %x to i32 addrspace(4)*
%2 = ptrtoint i32 addrspace(4)* %1 to i64
%3 = inttoptr i64 %2 to i32 addrspace(4)*
; CHECK: addrspacecast ptr addrspace(1) %x to ptr addrspace(4)
; CHECK-NEXT: ptrtoint ptr addrspace(4) %{{.*}} to i64
; CHECK-NEXT: inttoptr i64 %{{.*}} to ptr addrspace(4)
define void @noop_ptrint_pair(ptr addrspace(1) %x) {
%1 = addrspacecast ptr addrspace(1) %x to ptr addrspace(4)
%2 = ptrtoint ptr addrspace(4) %1 to i64
%3 = inttoptr i64 %2 to ptr addrspace(4)
ret void
}