Skip to content

Commit

Permalink
[AMDGPU] Switch to the new addr space mapping by default
Browse files Browse the repository at this point in the history
This requires corresponding clang change.

Differential Revision: https://reviews.llvm.org/D40955

llvm-svn: 324101
  • Loading branch information
yxsamliu committed Feb 2, 2018
1 parent a43e965 commit 2a22c5d
Show file tree
Hide file tree
Showing 104 changed files with 3,238 additions and 3,258 deletions.
9 changes: 0 additions & 9 deletions llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
Expand Up @@ -260,24 +260,15 @@ GCNILPSchedRegistry("gcn-ilp",
static StringRef computeDataLayout(const Triple &TT) {
if (TT.getArch() == Triple::r600) {
// 32-bit pointers.
if (TT.getEnvironmentName() == "amdgiz" ||
TT.getEnvironmentName() == "amdgizcl")
return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
"-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5";
return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
"-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64";
}

// 32-bit private, local, and region pointers. 64-bit global, constant and
// flat.
if (TT.getEnvironmentName() == "amdgiz" ||
TT.getEnvironmentName() == "amdgizcl")
return "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32"
"-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
"-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5";
return "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32"
"-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
"-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64";
}

LLVM_READNONE
Expand Down
14 changes: 3 additions & 11 deletions llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
Expand Up @@ -935,18 +935,10 @@ namespace llvm {
namespace AMDGPU {

AMDGPUAS getAMDGPUAS(Triple T) {
auto Env = T.getEnvironmentName();
AMDGPUAS AS;
if (Env == "amdgiz" || Env == "amdgizcl") {
AS.FLAT_ADDRESS = 0;
AS.PRIVATE_ADDRESS = 5;
AS.REGION_ADDRESS = 4;
}
else {
AS.FLAT_ADDRESS = 4;
AS.PRIVATE_ADDRESS = 0;
AS.REGION_ADDRESS = 5;
}
AS.FLAT_ADDRESS = 0;
AS.PRIVATE_ADDRESS = 5;
AS.REGION_ADDRESS = 4;
return AS;
}

Expand Down
48 changes: 24 additions & 24 deletions llvm/test/Analysis/CostModel/AMDGPU/addrspacecast.ll
@@ -1,45 +1,45 @@
; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri < %s | FileCheck %s

; CHECK: 'addrspacecast_global_to_flat'
; CHECK: estimated cost of 0 for {{.*}} addrspacecast i8 addrspace(1)* %ptr to i8 addrspace(4)*
define i8 addrspace(4)* @addrspacecast_global_to_flat(i8 addrspace(1)* %ptr) #0 {
%cast = addrspacecast i8 addrspace(1)* %ptr to i8 addrspace(4)*
ret i8 addrspace(4)* %cast
; CHECK: estimated cost of 0 for {{.*}} addrspacecast i8 addrspace(1)* %ptr to i8*
define i8* @addrspacecast_global_to_flat(i8 addrspace(1)* %ptr) #0 {
%cast = addrspacecast i8 addrspace(1)* %ptr to i8*
ret i8* %cast
}

; CHECK: 'addrspacecast_global_to_flat_v2'
; CHECK: estimated cost of 0 for {{.*}} addrspacecast <2 x i8 addrspace(1)*> %ptr to <2 x i8 addrspace(4)*>
define <2 x i8 addrspace(4)*> @addrspacecast_global_to_flat_v2(<2 x i8 addrspace(1)*> %ptr) #0 {
%cast = addrspacecast <2 x i8 addrspace(1)*> %ptr to <2 x i8 addrspace(4)*>
ret <2 x i8 addrspace(4)*> %cast
; CHECK: estimated cost of 0 for {{.*}} addrspacecast <2 x i8 addrspace(1)*> %ptr to <2 x i8*>
define <2 x i8*> @addrspacecast_global_to_flat_v2(<2 x i8 addrspace(1)*> %ptr) #0 {
%cast = addrspacecast <2 x i8 addrspace(1)*> %ptr to <2 x i8*>
ret <2 x i8*> %cast
}

; CHECK: 'addrspacecast_global_to_flat_v32'
; CHECK: estimated cost of 0 for {{.*}} addrspacecast <32 x i8 addrspace(1)*> %ptr to <32 x i8 addrspace(4)*>
define <32 x i8 addrspace(4)*> @addrspacecast_global_to_flat_v32(<32 x i8 addrspace(1)*> %ptr) #0 {
%cast = addrspacecast <32 x i8 addrspace(1)*> %ptr to <32 x i8 addrspace(4)*>
ret <32 x i8 addrspace(4)*> %cast
; CHECK: estimated cost of 0 for {{.*}} addrspacecast <32 x i8 addrspace(1)*> %ptr to <32 x i8*>
define <32 x i8*> @addrspacecast_global_to_flat_v32(<32 x i8 addrspace(1)*> %ptr) #0 {
%cast = addrspacecast <32 x i8 addrspace(1)*> %ptr to <32 x i8*>
ret <32 x i8*> %cast
}

; CHECK: 'addrspacecast_local_to_flat'
; CHECK: estimated cost of 1 for {{.*}} addrspacecast i8 addrspace(3)* %ptr to i8 addrspace(4)*
define i8 addrspace(4)* @addrspacecast_local_to_flat(i8 addrspace(3)* %ptr) #0 {
%cast = addrspacecast i8 addrspace(3)* %ptr to i8 addrspace(4)*
ret i8 addrspace(4)* %cast
; CHECK: estimated cost of 1 for {{.*}} addrspacecast i8 addrspace(3)* %ptr to i8*
define i8* @addrspacecast_local_to_flat(i8 addrspace(3)* %ptr) #0 {
%cast = addrspacecast i8 addrspace(3)* %ptr to i8*
ret i8* %cast
}

; CHECK: 'addrspacecast_local_to_flat_v2'
; CHECK: estimated cost of 2 for {{.*}} addrspacecast <2 x i8 addrspace(3)*> %ptr to <2 x i8 addrspace(4)*>
define <2 x i8 addrspace(4)*> @addrspacecast_local_to_flat_v2(<2 x i8 addrspace(3)*> %ptr) #0 {
%cast = addrspacecast <2 x i8 addrspace(3)*> %ptr to <2 x i8 addrspace(4)*>
ret <2 x i8 addrspace(4)*> %cast
; CHECK: estimated cost of 2 for {{.*}} addrspacecast <2 x i8 addrspace(3)*> %ptr to <2 x i8*>
define <2 x i8*> @addrspacecast_local_to_flat_v2(<2 x i8 addrspace(3)*> %ptr) #0 {
%cast = addrspacecast <2 x i8 addrspace(3)*> %ptr to <2 x i8*>
ret <2 x i8*> %cast
}

; CHECK: 'addrspacecast_local_to_flat_v32'
; CHECK: estimated cost of 32 for {{.*}} addrspacecast <32 x i8 addrspace(3)*> %ptr to <32 x i8 addrspace(4)*>
define <32 x i8 addrspace(4)*> @addrspacecast_local_to_flat_v32(<32 x i8 addrspace(3)*> %ptr) #0 {
%cast = addrspacecast <32 x i8 addrspace(3)*> %ptr to <32 x i8 addrspace(4)*>
ret <32 x i8 addrspace(4)*> %cast
; CHECK: estimated cost of 32 for {{.*}} addrspacecast <32 x i8 addrspace(3)*> %ptr to <32 x i8*>
define <32 x i8*> @addrspacecast_local_to_flat_v32(<32 x i8 addrspace(3)*> %ptr) #0 {
%cast = addrspacecast <32 x i8 addrspace(3)*> %ptr to <32 x i8*>
ret <32 x i8*> %cast
}

attributes #0 = { nounwind readnone }
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/InlineAsmCrash.ll
Expand Up @@ -4,9 +4,9 @@
; CHECK-NEXT: s_nop 0
; CHECK-NEXT: ;;#ASMEND

define void @foo(i32* %ptr) {
define void @foo(i32 addrspace(5)* %ptr) {
%tmp = tail call { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } asm "s_nop 0", "=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,=v,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65"(i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2)
%tmp2 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %tmp, 0
store i32 %tmp2, i32* %ptr, align 4
store i32 %tmp2, i32 addrspace(5)* %ptr, align 4
ret void
}
76 changes: 38 additions & 38 deletions llvm/test/CodeGen/AMDGPU/addrspacecast.ll
Expand Up @@ -35,8 +35,8 @@
; CI: NumSgprs: {{[0-9][0-9]+}}
; GFX9: NumSgprs: {{[0-9]+}}
define amdgpu_kernel void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #0 {
%stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)*
store volatile i32 7, i32 addrspace(4)* %stof
%stof = addrspacecast i32 addrspace(3)* %ptr to i32*
store volatile i32 7, i32* %stof
ret void
}

Expand Down Expand Up @@ -73,9 +73,9 @@ define amdgpu_kernel void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %pt

; CI: NumSgprs: {{[0-9][0-9]+}}
; GFX9: NumSgprs: {{[0-9]+}}
define amdgpu_kernel void @use_private_to_flat_addrspacecast(i32* %ptr) #0 {
%stof = addrspacecast i32* %ptr to i32 addrspace(4)*
store volatile i32 7, i32 addrspace(4)* %stof
define amdgpu_kernel void @use_private_to_flat_addrspacecast(i32 addrspace(5)* %ptr) #0 {
%stof = addrspacecast i32 addrspace(5)* %ptr to i32*
store volatile i32 7, i32* %stof
ret void
}

Expand All @@ -89,8 +89,8 @@ define amdgpu_kernel void @use_private_to_flat_addrspacecast(i32* %ptr) #0 {
; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7
; HSA: flat_store_dword v{{\[}}[[VPTRLO]]:[[VPTRHI]]{{\]}}, [[K]]
define amdgpu_kernel void @use_global_to_flat_addrspacecast(i32 addrspace(1)* %ptr) #0 {
%stof = addrspacecast i32 addrspace(1)* %ptr to i32 addrspace(4)*
store volatile i32 7, i32 addrspace(4)* %stof
%stof = addrspacecast i32 addrspace(1)* %ptr to i32*
store volatile i32 7, i32* %stof
ret void
}

Expand All @@ -101,8 +101,8 @@ define amdgpu_kernel void @use_global_to_flat_addrspacecast(i32 addrspace(1)* %p
; HSA-DAG: v_mov_b32_e32 v[[VPTRHI:[0-9]+]], s[[PTRHI]]
; HSA: flat_load_dword v{{[0-9]+}}, v{{\[}}[[VPTRLO]]:[[VPTRHI]]{{\]}}
define amdgpu_kernel void @use_constant_to_flat_addrspacecast(i32 addrspace(2)* %ptr) #0 {
%stof = addrspacecast i32 addrspace(2)* %ptr to i32 addrspace(4)*
%ld = load volatile i32, i32 addrspace(4)* %stof
%stof = addrspacecast i32 addrspace(2)* %ptr to i32*
%ld = load volatile i32, i32* %stof
ret void
}

Expand All @@ -117,8 +117,8 @@ define amdgpu_kernel void @use_constant_to_flat_addrspacecast(i32 addrspace(2)*
; HSA-DAG: v_cndmask_b32_e32 [[CASTPTR:v[0-9]+]], -1, v[[VPTR_LO]]
; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 0{{$}}
; HSA: ds_write_b32 [[CASTPTR]], v[[K]]
define amdgpu_kernel void @use_flat_to_group_addrspacecast(i32 addrspace(4)* %ptr) #0 {
%ftos = addrspacecast i32 addrspace(4)* %ptr to i32 addrspace(3)*
define amdgpu_kernel void @use_flat_to_group_addrspacecast(i32* %ptr) #0 {
%ftos = addrspacecast i32* %ptr to i32 addrspace(3)*
store volatile i32 0, i32 addrspace(3)* %ftos
ret void
}
Expand All @@ -134,9 +134,9 @@ define amdgpu_kernel void @use_flat_to_group_addrspacecast(i32 addrspace(4)* %pt
; HSA-DAG: v_cndmask_b32_e32 [[CASTPTR:v[0-9]+]], 0, v[[VPTR_LO]]
; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 0{{$}}
; HSA: buffer_store_dword v[[K]], [[CASTPTR]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
define amdgpu_kernel void @use_flat_to_private_addrspacecast(i32 addrspace(4)* %ptr) #0 {
%ftos = addrspacecast i32 addrspace(4)* %ptr to i32*
store volatile i32 0, i32* %ftos
define amdgpu_kernel void @use_flat_to_private_addrspacecast(i32* %ptr) #0 {
%ftos = addrspacecast i32* %ptr to i32 addrspace(5)*
store volatile i32 0, i32 addrspace(5)* %ftos
ret void
}

Expand All @@ -148,8 +148,8 @@ define amdgpu_kernel void @use_flat_to_private_addrspacecast(i32 addrspace(4)* %
; HSA-DAG: v_mov_b32_e32 v[[VPTRHI:[0-9]+]], s[[PTRHI]]
; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0
; HSA: {{flat|global}}_store_dword v{{\[}}[[VPTRLO]]:[[VPTRHI]]{{\]}}, [[K]]
define amdgpu_kernel void @use_flat_to_global_addrspacecast(i32 addrspace(4)* %ptr) #0 {
%ftos = addrspacecast i32 addrspace(4)* %ptr to i32 addrspace(1)*
define amdgpu_kernel void @use_flat_to_global_addrspacecast(i32* %ptr) #0 {
%ftos = addrspacecast i32* %ptr to i32 addrspace(1)*
store volatile i32 0, i32 addrspace(1)* %ftos
ret void
}
Expand All @@ -159,8 +159,8 @@ define amdgpu_kernel void @use_flat_to_global_addrspacecast(i32 addrspace(4)* %p

; HSA: s_load_dwordx2 s{{\[}}[[PTRLO:[0-9]+]]:[[PTRHI:[0-9]+]]{{\]}}, s[4:5], 0x0
; HSA: s_load_dword s{{[0-9]+}}, s{{\[}}[[PTRLO]]:[[PTRHI]]{{\]}}, 0x0
define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32 addrspace(4)* %ptr) #0 {
%ftos = addrspacecast i32 addrspace(4)* %ptr to i32 addrspace(2)*
define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32* %ptr) #0 {
%ftos = addrspacecast i32* %ptr to i32 addrspace(2)*
load volatile i32, i32 addrspace(2)* %ftos
ret void
}
Expand All @@ -178,8 +178,8 @@ define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32 addrspace(4)*
; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
; HSA: {{flat|global}}_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, v[[K]]
define amdgpu_kernel void @cast_0_group_to_flat_addrspacecast() #0 {
%cast = addrspacecast i32 addrspace(3)* null to i32 addrspace(4)*
store volatile i32 7, i32 addrspace(4)* %cast
%cast = addrspacecast i32 addrspace(3)* null to i32*
store volatile i32 7, i32* %cast
ret void
}

Expand All @@ -188,7 +188,7 @@ define amdgpu_kernel void @cast_0_group_to_flat_addrspacecast() #0 {
; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}}
; HSA: ds_write_b32 [[PTR]], [[K]]
define amdgpu_kernel void @cast_0_flat_to_group_addrspacecast() #0 {
%cast = addrspacecast i32 addrspace(4)* null to i32 addrspace(3)*
%cast = addrspacecast i32* null to i32 addrspace(3)*
store volatile i32 7, i32 addrspace(3)* %cast
ret void
}
Expand All @@ -199,8 +199,8 @@ define amdgpu_kernel void @cast_0_flat_to_group_addrspacecast() #0 {
; HSA: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
; HSA: {{flat|global}}_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, v[[K]]
define amdgpu_kernel void @cast_neg1_group_to_flat_addrspacecast() #0 {
%cast = addrspacecast i32 addrspace(3)* inttoptr (i32 -1 to i32 addrspace(3)*) to i32 addrspace(4)*
store volatile i32 7, i32 addrspace(4)* %cast
%cast = addrspacecast i32 addrspace(3)* inttoptr (i32 -1 to i32 addrspace(3)*) to i32*
store volatile i32 7, i32* %cast
ret void
}

Expand All @@ -209,7 +209,7 @@ define amdgpu_kernel void @cast_neg1_group_to_flat_addrspacecast() #0 {
; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}}
; HSA: ds_write_b32 [[PTR]], [[K]]
define amdgpu_kernel void @cast_neg1_flat_to_group_addrspacecast() #0 {
%cast = addrspacecast i32 addrspace(4)* inttoptr (i64 -1 to i32 addrspace(4)*) to i32 addrspace(3)*
%cast = addrspacecast i32* inttoptr (i64 -1 to i32*) to i32 addrspace(3)*
store volatile i32 7, i32 addrspace(3)* %cast
ret void
}
Expand All @@ -224,17 +224,17 @@ define amdgpu_kernel void @cast_neg1_flat_to_group_addrspacecast() #0 {
; HSA: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
; HSA: {{flat|global}}_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, v[[K]]
define amdgpu_kernel void @cast_0_private_to_flat_addrspacecast() #0 {
%cast = addrspacecast i32* null to i32 addrspace(4)*
store volatile i32 7, i32 addrspace(4)* %cast
%cast = addrspacecast i32 addrspace(5)* null to i32*
store volatile i32 7, i32* %cast
ret void
}

; HSA-LABEL: {{^}}cast_0_flat_to_private_addrspacecast:
; HSA: v_mov_b32_e32 [[K:v[0-9]+]], 7{{$}}
; HSA: buffer_store_dword [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}}
define amdgpu_kernel void @cast_0_flat_to_private_addrspacecast() #0 {
%cast = addrspacecast i32 addrspace(4)* null to i32 addrspace(0)*
store volatile i32 7, i32* %cast
%cast = addrspacecast i32* null to i32 addrspace(5)*
store volatile i32 7, i32 addrspace(5)* %cast
ret void
}

Expand All @@ -250,17 +250,17 @@ entry:
br i1 %cmp, label %local, label %global

local:
%flat_local = addrspacecast i32 addrspace(3)* %lptr to i32 addrspace(4)*
%flat_local = addrspacecast i32 addrspace(3)* %lptr to i32*
br label %end

global:
%flat_global = addrspacecast i32 addrspace(1)* %gptr to i32 addrspace(4)*
%flat_global = addrspacecast i32 addrspace(1)* %gptr to i32*
br label %end

end:
%fptr = phi i32 addrspace(4)* [ %flat_local, %local ], [ %flat_global, %global ]
store volatile i32 %x, i32 addrspace(4)* %fptr, align 4
; %val = load i32, i32 addrspace(4)* %fptr, align 4
%fptr = phi i32* [ %flat_local, %local ], [ %flat_global, %global ]
store volatile i32 %x, i32* %fptr, align 4
; %val = load i32, i32* %fptr, align 4
; store i32 %val, i32 addrspace(1)* %out, align 4
ret void
}
Expand All @@ -278,14 +278,14 @@ end:
; HSA: s_barrier
; HSA: {{flat|global}}_load_dword
define amdgpu_kernel void @store_flat_scratch(i32 addrspace(1)* noalias %out, i32) #0 {
%alloca = alloca i32, i32 9, align 4
%alloca = alloca i32, i32 9, align 4, addrspace(5)
%x = call i32 @llvm.amdgcn.workitem.id.x() #2
%pptr = getelementptr i32, i32* %alloca, i32 %x
%fptr = addrspacecast i32* %pptr to i32 addrspace(4)*
store volatile i32 %x, i32 addrspace(4)* %fptr
%pptr = getelementptr i32, i32 addrspace(5)* %alloca, i32 %x
%fptr = addrspacecast i32 addrspace(5)* %pptr to i32*
store volatile i32 %x, i32* %fptr
; Dummy call
call void @llvm.amdgcn.s.barrier() #1
%reload = load volatile i32, i32 addrspace(4)* %fptr, align 4
%reload = load volatile i32, i32* %fptr, align 4
store volatile i32 %reload, i32 addrspace(1)* %out, align 4
ret void
}
Expand Down
14 changes: 7 additions & 7 deletions llvm/test/CodeGen/AMDGPU/amdgcn.private-memory.ll
Expand Up @@ -17,13 +17,13 @@ declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
; GCN: buffer_store_dword [[RESULT]]
define amdgpu_kernel void @work_item_info(i32 addrspace(1)* %out, i32 %in) {
entry:
%0 = alloca [2 x i32]
%1 = getelementptr [2 x i32], [2 x i32]* %0, i32 0, i32 0
%2 = getelementptr [2 x i32], [2 x i32]* %0, i32 0, i32 1
store i32 0, i32* %1
store i32 1, i32* %2
%3 = getelementptr [2 x i32], [2 x i32]* %0, i32 0, i32 %in
%4 = load i32, i32* %3
%0 = alloca [2 x i32], addrspace(5)
%1 = getelementptr [2 x i32], [2 x i32] addrspace(5)* %0, i32 0, i32 0
%2 = getelementptr [2 x i32], [2 x i32] addrspace(5)* %0, i32 0, i32 1
store i32 0, i32 addrspace(5)* %1
store i32 1, i32 addrspace(5)* %2
%3 = getelementptr [2 x i32], [2 x i32] addrspace(5)* %0, i32 0, i32 %in
%4 = load i32, i32 addrspace(5)* %3
%5 = call i32 @llvm.amdgcn.workitem.id.x()
%6 = add i32 %4, %5
store i32 %6, i32 addrspace(1)* %out
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/amdgpu-alias-analysis.ll
@@ -1,9 +1,9 @@
; RUN: opt -mtriple=amdgcn-- -O3 -aa-eval -print-all-alias-modref-info -disable-output < %s 2>&1 | FileCheck %s
; RUN: opt -mtriple=r600-- -O3 -aa-eval -print-all-alias-modref-info -disable-output < %s 2>&1 | FileCheck %s

; CHECK: NoAlias: i8 addrspace(1)* %p1, i8* %p
; CHECK: NoAlias: i8 addrspace(1)* %p1, i8 addrspace(5)* %p

define void @test(i8* %p, i8 addrspace(1)* %p1) {
define void @test(i8 addrspace(5)* %p, i8 addrspace(1)* %p1) {
ret void
}

0 comments on commit 2a22c5d

Please sign in to comment.