diff --git a/llvm/test/CodeGen/AMDGPU/atomic_load_local.ll b/llvm/test/CodeGen/AMDGPU/atomic_load_local.ll index edca16871ac670..b8c3fed5d257fd 100644 --- a/llvm/test/CodeGen/AMDGPU/atomic_load_local.ll +++ b/llvm/test/CodeGen/AMDGPU/atomic_load_local.ll @@ -46,7 +46,59 @@ define i64 @atomic_load_monotonic_i64(i64 addrspace(3)* %ptr) { ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_setpc_b64 define i64 @atomic_load_monotonic_i64_offset(i64 addrspace(3)* %ptr) { - %gep = getelementptr inbounds i64, i64 addrspace(3)* %ptr, i64 16 + %gep = getelementptr inbounds i64, i64 addrspace(3)* %ptr, i32 16 %load = load atomic i64, i64 addrspace(3)* %gep monotonic, align 8 ret i64 %load } + +; GCN-LABEL: {{^}}atomic_load_monotonic_f32_offset: +; GCN: s_waitcnt +; GFX9-NOT: s_mov_b32 m0 +; CI-NEXT: s_mov_b32 m0 +; GCN-NEXT: ds_read_b32 v0, v0 offset:64{{$}} +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_setpc_b64 +define float @atomic_load_monotonic_f32_offset(float addrspace(3)* %ptr) { + %gep = getelementptr inbounds float, float addrspace(3)* %ptr, i32 16 + %load = load atomic float, float addrspace(3)* %gep monotonic, align 4 + ret float %load +} + +; GCN-LABEL: {{^}}atomic_load_monotonic_f64_offset: +; GCN: s_waitcnt +; GFX9-NOT: s_mov_b32 m0 +; CI-NEXT: s_mov_b32 m0 +; GCN-NEXT: ds_read_b64 v[0:1], v0 offset:128{{$}} +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_setpc_b64 +define double @atomic_load_monotonic_f64_offset(double addrspace(3)* %ptr) { + %gep = getelementptr inbounds double, double addrspace(3)* %ptr, i32 16 + %load = load atomic double, double addrspace(3)* %gep monotonic, align 8 + ret double %load +} + +; GCN-LABEL: {{^}}atomic_load_monotonic_p0i8_offset: +; GCN: s_waitcnt +; GFX9-NOT: s_mov_b32 m0 +; CI-NEXT: s_mov_b32 m0 +; GCN-NEXT: ds_read_b64 v[0:1], v0 offset:128{{$}} +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_setpc_b64 +define i8* @atomic_load_monotonic_p0i8_offset(i8* addrspace(3)* %ptr) { + %gep = getelementptr inbounds i8*, i8* addrspace(3)* %ptr, i32 16 + %load = load atomic i8*, i8* addrspace(3)* %gep monotonic, align 8 + ret i8* %load +} + +; GCN-LABEL: {{^}}atomic_load_monotonic_p3i8_offset: +; GCN: s_waitcnt +; GFX9-NOT: s_mov_b32 m0 +; CI-NEXT: s_mov_b32 m0 +; GCN-NEXT: ds_read_b32 v0, v0 offset:64{{$}} +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_setpc_b64 +define i8 addrspace(3)* @atomic_load_monotonic_p3i8_offset(i8 addrspace(3)* addrspace(3)* %ptr) { + %gep = getelementptr inbounds i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* %ptr, i32 16 + %load = load atomic i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* %gep monotonic, align 4 + ret i8 addrspace(3)* %load +} diff --git a/llvm/test/CodeGen/AMDGPU/flat_atomics.ll b/llvm/test/CodeGen/AMDGPU/flat_atomics.ll index d20894979ed20f..b465031bc01c87 100644 --- a/llvm/test/CodeGen/AMDGPU/flat_atomics.ll +++ b/llvm/test/CodeGen/AMDGPU/flat_atomics.ll @@ -1044,3 +1044,87 @@ entry: store atomic i32 %in, i32* %ptr seq_cst, align 4 ret void } + +; GCN-LABEL: {{^}}atomic_load_f32_offset: +; CIVI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} +; GFX9: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] offset:16 glc{{$}} +; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +define amdgpu_kernel void @atomic_load_f32_offset(float* %in, float* %out) { +entry: + %gep = getelementptr float, float* %in, i32 4 + %val = load atomic float, float* %gep seq_cst, align 4 + store float %val, float* %out + ret void +} + +; GCN-LABEL: {{^}}atomic_load_f32: +; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc +; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +define amdgpu_kernel void @atomic_load_f32(float* %in, float* %out) { +entry: + %val = load atomic float, float* %in seq_cst, align 4 + store float %val, float* %out + ret void +} + +; GCN-LABEL: {{^}}atomic_load_f32_addr64_offset: +; CIVI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}} +; GFX9: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}} +; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +define amdgpu_kernel void @atomic_load_f32_addr64_offset(float* %in, float* %out, i64 %index) { +entry: + %ptr = getelementptr float, float* %in, i64 %index + %gep = getelementptr float, float* %ptr, i32 4 + %val = load atomic float, float* %gep seq_cst, align 4 + store float %val, float* %out + ret void +} + +; GCN-LABEL: {{^}}atomic_load_f32_addr64: +; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}} +; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +define amdgpu_kernel void @atomic_load_f32_addr64(float* %in, float* %out, i64 %index) { +entry: + %ptr = getelementptr float, float* %in, i64 %index + %val = load atomic float, float* %ptr seq_cst, align 4 + store float %val, float* %out + ret void +} + +; GCN-LABEL: {{^}}atomic_store_f32_offset: +; CIVI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} +; GFX9: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} offset:16{{$}} +define amdgpu_kernel void @atomic_store_f32_offset(float %in, float* %out) { +entry: + %gep = getelementptr float, float* %out, i32 4 + store atomic float %in, float* %gep seq_cst, align 4 + ret void +} + +; GCN-LABEL: {{^}}atomic_store_f32: +; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} +define amdgpu_kernel void @atomic_store_f32(float %in, float* %out) { +entry: + store atomic float %in, float* %out seq_cst, align 4 + ret void +} + +; GCN-LABEL: {{^}}atomic_store_f32_addr64_offset: +; CIVI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} +; GFX9: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} offset:16{{$}} +define amdgpu_kernel void @atomic_store_f32_addr64_offset(float %in, float* %out, i64 %index) { +entry: + %ptr = getelementptr float, float* %out, i64 %index + %gep = getelementptr float, float* %ptr, i32 4 + store atomic float %in, float* %gep seq_cst, align 4 + ret void +} + +; GCN-LABEL: {{^}}atomic_store_f32_addr64: +; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}} +define amdgpu_kernel void @atomic_store_f32_addr64(float %in, float* %out, i64 %index) { +entry: + %ptr = getelementptr float, float* %out, i64 %index + store atomic float %in, float* %ptr seq_cst, align 4 + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll b/llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll index 0cf28275b87b71..3e9b8603432f89 100644 --- a/llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll +++ b/llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll @@ -982,3 +982,83 @@ entry: store i64 %extract0, i64* %out2 ret void } + +; GCN-LABEL: {{^}}atomic_load_f64_offset: +; GCN: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} +; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +define amdgpu_kernel void @atomic_load_f64_offset(double* %in, double* %out) { +entry: + %gep = getelementptr double, double* %in, i64 4 + %val = load atomic double, double* %gep seq_cst, align 8 + store double %val, double* %out + ret void +} + +; GCN-LABEL: {{^}}atomic_load_f64: +; GCN: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}] glc +; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +define amdgpu_kernel void @atomic_load_f64(double* %in, double* %out) { +entry: + %val = load atomic double, double* %in seq_cst, align 8 + store double %val, double* %out + ret void +} + +; GCN-LABEL: {{^}}atomic_load_f64_addr64_offset: +; GCN: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}] glc{{$}} +; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +define amdgpu_kernel void @atomic_load_f64_addr64_offset(double* %in, double* %out, i64 %index) { +entry: + %ptr = getelementptr double, double* %in, i64 %index + %gep = getelementptr double, double* %ptr, i64 4 + %val = load atomic double, double* %gep seq_cst, align 8 + store double %val, double* %out + ret void +} + +; GCN-LABEL: {{^}}atomic_load_f64_addr64: +; GCN: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}] glc{{$}} +; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]] +define amdgpu_kernel void @atomic_load_f64_addr64(double* %in, double* %out, i64 %index) { +entry: + %ptr = getelementptr double, double* %in, i64 %index + %val = load atomic double, double* %ptr seq_cst, align 8 + store double %val, double* %out + ret void +} + +; GCN-LABEL: {{^}}atomic_store_f64_offset: +; GCN: flat_store_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} +define amdgpu_kernel void @atomic_store_f64_offset(double %in, double* %out) { +entry: + %gep = getelementptr double, double* %out, i64 4 + store atomic double %in, double* %gep seq_cst, align 8 + ret void +} + +; GCN-LABEL: {{^}}atomic_store_f64: +; GCN: flat_store_dwordx2 {{v\[[0-9]+:[0-9]\]}}, v[{{[0-9]+}}:{{[0-9]+}}] +define amdgpu_kernel void @atomic_store_f64(double %in, double* %out) { +entry: + store atomic double %in, double* %out seq_cst, align 8 + ret void +} + +; GCN-LABEL: {{^}}atomic_store_f64_addr64_offset: +; GCN: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}]{{$}} +define amdgpu_kernel void @atomic_store_f64_addr64_offset(double %in, double* %out, i64 %index) { +entry: + %ptr = getelementptr double, double* %out, i64 %index + %gep = getelementptr double, double* %ptr, i64 4 + store atomic double %in, double* %gep seq_cst, align 8 + ret void +} + +; GCN-LABEL: {{^}}atomic_store_f64_addr64: +; GCN: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}]{{$}} +define amdgpu_kernel void @atomic_store_f64_addr64(double %in, double* %out, i64 %index) { +entry: + %ptr = getelementptr double, double* %out, i64 %index + store atomic double %in, double* %ptr seq_cst, align 8 + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics.ll b/llvm/test/CodeGen/AMDGPU/global_atomics.ll index 618c70083077d9..7730d7ebd49d74 100644 --- a/llvm/test/CodeGen/AMDGPU/global_atomics.ll +++ b/llvm/test/CodeGen/AMDGPU/global_atomics.ll @@ -1155,6 +1155,20 @@ entry: ret void } +; GCN-LABEL: {{^}}atomic_load_f32_offset: +; SI: buffer_load_dword [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} +; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} +; SIVI: buffer_store_dword [[RET]] + +; GFX9: global_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], off offset:16 glc{{$}} +define amdgpu_kernel void @atomic_load_f32_offset(float addrspace(1)* %in, float addrspace(1)* %out) { +entry: + %gep = getelementptr float, float addrspace(1)* %in, i64 4 + %val = load atomic float, float addrspace(1)* %gep seq_cst, align 4 + store float %val, float addrspace(1)* %out + ret void +} + ; GCN-LABEL: {{^}}atomic_load_i32: ; SI: buffer_load_dword [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc ; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc @@ -1197,6 +1211,21 @@ entry: ret void } +; GCN-LABEL: {{^}}atomic_load_f32_addr64_offset: +; SI: buffer_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}} +; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}} +; SIVI: buffer_store_dword [[RET]] + +; GFX9: global_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], off offset:16 glc{{$}} +define amdgpu_kernel void @atomic_load_f32_addr64_offset(float addrspace(1)* %in, float addrspace(1)* %out, i64 %index) { +entry: + %ptr = getelementptr float, float addrspace(1)* %in, i64 %index + %gep = getelementptr float, float addrspace(1)* %ptr, i64 4 + %val = load atomic float, float addrspace(1)* %gep seq_cst, align 4 + store float %val, float addrspace(1)* %out + ret void +} + ; GCN-LABEL: {{^}}atomic_store_i32_offset: ; SI: buffer_store_dword {{v[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} ; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}} @@ -1218,6 +1247,16 @@ entry: ret void } +; GCN-LABEL: {{^}}atomic_store_f32: +; SI: buffer_store_dword {{v[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}} +; GFX9: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}, off{{$}} +define amdgpu_kernel void @atomic_store_f32(float %in, float addrspace(1)* %out) { +entry: + store atomic float %in, float addrspace(1)* %out seq_cst, align 4 + ret void +} + ; GCN-LABEL: {{^}}atomic_store_i32_addr64_offset: ; SI: buffer_store_dword {{v[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} ; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}} @@ -1230,6 +1269,18 @@ entry: ret void } +; GCN-LABEL: {{^}}atomic_store_f32_addr64_offset: +; SI: buffer_store_dword {{v[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}} +; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}} +; GFX9: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}, off offset:16{{$}} +define amdgpu_kernel void @atomic_store_f32_addr64_offset(float %in, float addrspace(1)* %out, i64 %index) { +entry: + %ptr = getelementptr float, float addrspace(1)* %out, i64 %index + %gep = getelementptr float, float addrspace(1)* %ptr, i64 4 + store atomic float %in, float addrspace(1)* %gep seq_cst, align 4 + ret void +} + ; GCN-LABEL: {{^}}atomic_store_i32_addr64: ; SI: buffer_store_dword {{v[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} ; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}} @@ -1240,3 +1291,14 @@ entry: store atomic i32 %in, i32 addrspace(1)* %ptr seq_cst, align 4 ret void } + +; GCN-LABEL: {{^}}atomic_store_f32_addr64: +; SI: buffer_store_dword {{v[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} +; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}} +; GFX9: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}, off{{$}} +define amdgpu_kernel void @atomic_store_f32_addr64(float %in, float addrspace(1)* %out, i64 %index) { +entry: + %ptr = getelementptr float, float addrspace(1)* %out, i64 %index + store atomic float %in, float addrspace(1)* %ptr seq_cst, align 4 + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_i64.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_i64.ll index d00b6522d46f5d..6ccc1d92521a17 100644 --- a/llvm/test/CodeGen/AMDGPU/global_atomics_i64.ll +++ b/llvm/test/CodeGen/AMDGPU/global_atomics_i64.ll @@ -1147,6 +1147,21 @@ entry: ret void } +; GCN-LABEL: {{^}}atomic_load_f64_addr64_offset: +; CI: buffer_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}} +; VI: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}] glc{{$}} +; CIVI: buffer_store_dwordx2 [[RET]] + +; GFX9: global_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], off offset:32 glc{{$}} +define amdgpu_kernel void @atomic_load_f64_addr64_offset(double addrspace(1)* %in, double addrspace(1)* %out, i64 %index) { +entry: + %ptr = getelementptr double, double addrspace(1)* %in, i64 %index + %gep = getelementptr double, double addrspace(1)* %ptr, i64 4 + %val = load atomic double, double addrspace(1)* %gep seq_cst, align 8 + store double %val, double addrspace(1)* %out + ret void +} + ; GCN-LABEL: {{^}}atomic_store_i64_offset: ; CI: buffer_store_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}} ; VI: flat_store_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}} @@ -1190,3 +1205,15 @@ entry: store atomic i64 %in, i64 addrspace(1)* %ptr seq_cst, align 8 ret void } + +; GCN-LABEL: {{^}}atomic_store_f64_addr64_offset: +; CI: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}} +; VI: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}]{{$}} +; GFX9: global_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}], off offset:32{{$}} +define amdgpu_kernel void @atomic_store_f64_addr64_offset(double %in, double addrspace(1)* %out, i64 %index) { +entry: + %ptr = getelementptr double, double addrspace(1)* %out, i64 %index + %gep = getelementptr double, double addrspace(1)* %ptr, i64 4 + store atomic double %in, double addrspace(1)* %gep seq_cst, align 8 + ret void +}