Skip to content

Commit

Permalink
AMDGPU: Add some missing atomics tests
Browse files Browse the repository at this point in the history
We had no FP atomic load/store coverage.
  • Loading branch information
arsenm committed Apr 26, 2020
1 parent 41eb0fc commit 4cef981
Show file tree
Hide file tree
Showing 5 changed files with 306 additions and 1 deletion.
54 changes: 53 additions & 1 deletion llvm/test/CodeGen/AMDGPU/atomic_load_local.ll
Expand Up @@ -46,7 +46,59 @@ define i64 @atomic_load_monotonic_i64(i64 addrspace(3)* %ptr) {
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_setpc_b64
define i64 @atomic_load_monotonic_i64_offset(i64 addrspace(3)* %ptr) {
%gep = getelementptr inbounds i64, i64 addrspace(3)* %ptr, i64 16
%gep = getelementptr inbounds i64, i64 addrspace(3)* %ptr, i32 16
%load = load atomic i64, i64 addrspace(3)* %gep monotonic, align 8
ret i64 %load
}

; GCN-LABEL: {{^}}atomic_load_monotonic_f32_offset:
; GCN: s_waitcnt
; GFX9-NOT: s_mov_b32 m0
; CI-NEXT: s_mov_b32 m0
; GCN-NEXT: ds_read_b32 v0, v0 offset:64{{$}}
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_setpc_b64
define float @atomic_load_monotonic_f32_offset(float addrspace(3)* %ptr) {
%gep = getelementptr inbounds float, float addrspace(3)* %ptr, i32 16
%load = load atomic float, float addrspace(3)* %gep monotonic, align 4
ret float %load
}

; GCN-LABEL: {{^}}atomic_load_monotonic_f64_offset:
; GCN: s_waitcnt
; GFX9-NOT: s_mov_b32 m0
; CI-NEXT: s_mov_b32 m0
; GCN-NEXT: ds_read_b64 v[0:1], v0 offset:128{{$}}
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_setpc_b64
define double @atomic_load_monotonic_f64_offset(double addrspace(3)* %ptr) {
%gep = getelementptr inbounds double, double addrspace(3)* %ptr, i32 16
%load = load atomic double, double addrspace(3)* %gep monotonic, align 8
ret double %load
}

; GCN-LABEL: {{^}}atomic_load_monotonic_p0i8_offset:
; GCN: s_waitcnt
; GFX9-NOT: s_mov_b32 m0
; CI-NEXT: s_mov_b32 m0
; GCN-NEXT: ds_read_b64 v[0:1], v0 offset:128{{$}}
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_setpc_b64
define i8* @atomic_load_monotonic_p0i8_offset(i8* addrspace(3)* %ptr) {
%gep = getelementptr inbounds i8*, i8* addrspace(3)* %ptr, i32 16
%load = load atomic i8*, i8* addrspace(3)* %gep monotonic, align 8
ret i8* %load
}

; GCN-LABEL: {{^}}atomic_load_monotonic_p3i8_offset:
; GCN: s_waitcnt
; GFX9-NOT: s_mov_b32 m0
; CI-NEXT: s_mov_b32 m0
; GCN-NEXT: ds_read_b32 v0, v0 offset:64{{$}}
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_setpc_b64
define i8 addrspace(3)* @atomic_load_monotonic_p3i8_offset(i8 addrspace(3)* addrspace(3)* %ptr) {
%gep = getelementptr inbounds i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* %ptr, i32 16
%load = load atomic i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* %gep monotonic, align 4
ret i8 addrspace(3)* %load
}
84 changes: 84 additions & 0 deletions llvm/test/CodeGen/AMDGPU/flat_atomics.ll
Expand Up @@ -1044,3 +1044,87 @@ entry:
store atomic i32 %in, i32* %ptr seq_cst, align 4
ret void
}

; GCN-LABEL: {{^}}atomic_load_f32_offset:
; CIVI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
; GFX9: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @atomic_load_f32_offset(float* %in, float* %out) {
entry:
%gep = getelementptr float, float* %in, i32 4
%val = load atomic float, float* %gep seq_cst, align 4
store float %val, float* %out
ret void
}

; GCN-LABEL: {{^}}atomic_load_f32:
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @atomic_load_f32(float* %in, float* %out) {
entry:
%val = load atomic float, float* %in seq_cst, align 4
store float %val, float* %out
ret void
}

; GCN-LABEL: {{^}}atomic_load_f32_addr64_offset:
; CIVI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
; GFX9: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @atomic_load_f32_addr64_offset(float* %in, float* %out, i64 %index) {
entry:
%ptr = getelementptr float, float* %in, i64 %index
%gep = getelementptr float, float* %ptr, i32 4
%val = load atomic float, float* %gep seq_cst, align 4
store float %val, float* %out
ret void
}

; GCN-LABEL: {{^}}atomic_load_f32_addr64:
; GCN: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @atomic_load_f32_addr64(float* %in, float* %out, i64 %index) {
entry:
%ptr = getelementptr float, float* %in, i64 %index
%val = load atomic float, float* %ptr seq_cst, align 4
store float %val, float* %out
ret void
}

; GCN-LABEL: {{^}}atomic_store_f32_offset:
; CIVI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
; GFX9: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} offset:16{{$}}
define amdgpu_kernel void @atomic_store_f32_offset(float %in, float* %out) {
entry:
%gep = getelementptr float, float* %out, i32 4
store atomic float %in, float* %gep seq_cst, align 4
ret void
}

; GCN-LABEL: {{^}}atomic_store_f32:
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @atomic_store_f32(float %in, float* %out) {
entry:
store atomic float %in, float* %out seq_cst, align 4
ret void
}

; GCN-LABEL: {{^}}atomic_store_f32_addr64_offset:
; CIVI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
; GFX9: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} offset:16{{$}}
define amdgpu_kernel void @atomic_store_f32_addr64_offset(float %in, float* %out, i64 %index) {
entry:
%ptr = getelementptr float, float* %out, i64 %index
%gep = getelementptr float, float* %ptr, i32 4
store atomic float %in, float* %gep seq_cst, align 4
ret void
}

; GCN-LABEL: {{^}}atomic_store_f32_addr64:
; GCN: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}{{$}}
define amdgpu_kernel void @atomic_store_f32_addr64(float %in, float* %out, i64 %index) {
entry:
%ptr = getelementptr float, float* %out, i64 %index
store atomic float %in, float* %ptr seq_cst, align 4
ret void
}
80 changes: 80 additions & 0 deletions llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll
Expand Up @@ -982,3 +982,83 @@ entry:
store i64 %extract0, i64* %out2
ret void
}

; GCN-LABEL: {{^}}atomic_load_f64_offset:
; GCN: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @atomic_load_f64_offset(double* %in, double* %out) {
entry:
%gep = getelementptr double, double* %in, i64 4
%val = load atomic double, double* %gep seq_cst, align 8
store double %val, double* %out
ret void
}

; GCN-LABEL: {{^}}atomic_load_f64:
; GCN: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}] glc
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @atomic_load_f64(double* %in, double* %out) {
entry:
%val = load atomic double, double* %in seq_cst, align 8
store double %val, double* %out
ret void
}

; GCN-LABEL: {{^}}atomic_load_f64_addr64_offset:
; GCN: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @atomic_load_f64_addr64_offset(double* %in, double* %out, i64 %index) {
entry:
%ptr = getelementptr double, double* %in, i64 %index
%gep = getelementptr double, double* %ptr, i64 4
%val = load atomic double, double* %gep seq_cst, align 8
store double %val, double* %out
ret void
}

; GCN-LABEL: {{^}}atomic_load_f64_addr64:
; GCN: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
define amdgpu_kernel void @atomic_load_f64_addr64(double* %in, double* %out, i64 %index) {
entry:
%ptr = getelementptr double, double* %in, i64 %index
%val = load atomic double, double* %ptr seq_cst, align 8
store double %val, double* %out
ret void
}

; GCN-LABEL: {{^}}atomic_store_f64_offset:
; GCN: flat_store_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
define amdgpu_kernel void @atomic_store_f64_offset(double %in, double* %out) {
entry:
%gep = getelementptr double, double* %out, i64 4
store atomic double %in, double* %gep seq_cst, align 8
ret void
}

; GCN-LABEL: {{^}}atomic_store_f64:
; GCN: flat_store_dwordx2 {{v\[[0-9]+:[0-9]\]}}, v[{{[0-9]+}}:{{[0-9]+}}]
define amdgpu_kernel void @atomic_store_f64(double %in, double* %out) {
entry:
store atomic double %in, double* %out seq_cst, align 8
ret void
}

; GCN-LABEL: {{^}}atomic_store_f64_addr64_offset:
; GCN: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}]{{$}}
define amdgpu_kernel void @atomic_store_f64_addr64_offset(double %in, double* %out, i64 %index) {
entry:
%ptr = getelementptr double, double* %out, i64 %index
%gep = getelementptr double, double* %ptr, i64 4
store atomic double %in, double* %gep seq_cst, align 8
ret void
}

; GCN-LABEL: {{^}}atomic_store_f64_addr64:
; GCN: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}]{{$}}
define amdgpu_kernel void @atomic_store_f64_addr64(double %in, double* %out, i64 %index) {
entry:
%ptr = getelementptr double, double* %out, i64 %index
store atomic double %in, double* %ptr seq_cst, align 8
ret void
}
62 changes: 62 additions & 0 deletions llvm/test/CodeGen/AMDGPU/global_atomics.ll
Expand Up @@ -1155,6 +1155,20 @@ entry:
ret void
}

; GCN-LABEL: {{^}}atomic_load_f32_offset:
; SI: buffer_load_dword [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
; SIVI: buffer_store_dword [[RET]]

; GFX9: global_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], off offset:16 glc{{$}}
define amdgpu_kernel void @atomic_load_f32_offset(float addrspace(1)* %in, float addrspace(1)* %out) {
entry:
%gep = getelementptr float, float addrspace(1)* %in, i64 4
%val = load atomic float, float addrspace(1)* %gep seq_cst, align 4
store float %val, float addrspace(1)* %out
ret void
}

; GCN-LABEL: {{^}}atomic_load_i32:
; SI: buffer_load_dword [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc
Expand Down Expand Up @@ -1197,6 +1211,21 @@ entry:
ret void
}

; GCN-LABEL: {{^}}atomic_load_f32_addr64_offset:
; SI: buffer_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
; SIVI: buffer_store_dword [[RET]]

; GFX9: global_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], off offset:16 glc{{$}}
define amdgpu_kernel void @atomic_load_f32_addr64_offset(float addrspace(1)* %in, float addrspace(1)* %out, i64 %index) {
entry:
%ptr = getelementptr float, float addrspace(1)* %in, i64 %index
%gep = getelementptr float, float addrspace(1)* %ptr, i64 4
%val = load atomic float, float addrspace(1)* %gep seq_cst, align 4
store float %val, float addrspace(1)* %out
ret void
}

; GCN-LABEL: {{^}}atomic_store_i32_offset:
; SI: buffer_store_dword {{v[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}}
Expand All @@ -1218,6 +1247,16 @@ entry:
ret void
}

; GCN-LABEL: {{^}}atomic_store_f32:
; SI: buffer_store_dword {{v[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}}
; GFX9: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}, off{{$}}
define amdgpu_kernel void @atomic_store_f32(float %in, float addrspace(1)* %out) {
entry:
store atomic float %in, float addrspace(1)* %out seq_cst, align 4
ret void
}

; GCN-LABEL: {{^}}atomic_store_i32_addr64_offset:
; SI: buffer_store_dword {{v[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}}
Expand All @@ -1230,6 +1269,18 @@ entry:
ret void
}

; GCN-LABEL: {{^}}atomic_store_f32_addr64_offset:
; SI: buffer_store_dword {{v[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}}
; GFX9: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}, off offset:16{{$}}
define amdgpu_kernel void @atomic_store_f32_addr64_offset(float %in, float addrspace(1)* %out, i64 %index) {
entry:
%ptr = getelementptr float, float addrspace(1)* %out, i64 %index
%gep = getelementptr float, float addrspace(1)* %ptr, i64 4
store atomic float %in, float addrspace(1)* %gep seq_cst, align 4
ret void
}

; GCN-LABEL: {{^}}atomic_store_i32_addr64:
; SI: buffer_store_dword {{v[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}}
Expand All @@ -1240,3 +1291,14 @@ entry:
store atomic i32 %in, i32 addrspace(1)* %ptr seq_cst, align 4
ret void
}

; GCN-LABEL: {{^}}atomic_store_f32_addr64:
; SI: buffer_store_dword {{v[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}}
; GFX9: global_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}}, off{{$}}
define amdgpu_kernel void @atomic_store_f32_addr64(float %in, float addrspace(1)* %out, i64 %index) {
entry:
%ptr = getelementptr float, float addrspace(1)* %out, i64 %index
store atomic float %in, float addrspace(1)* %ptr seq_cst, align 4
ret void
}
27 changes: 27 additions & 0 deletions llvm/test/CodeGen/AMDGPU/global_atomics_i64.ll
Expand Up @@ -1147,6 +1147,21 @@ entry:
ret void
}

; GCN-LABEL: {{^}}atomic_load_f64_addr64_offset:
; CI: buffer_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32 glc{{$}}
; VI: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
; CIVI: buffer_store_dwordx2 [[RET]]

; GFX9: global_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], off offset:32 glc{{$}}
define amdgpu_kernel void @atomic_load_f64_addr64_offset(double addrspace(1)* %in, double addrspace(1)* %out, i64 %index) {
entry:
%ptr = getelementptr double, double addrspace(1)* %in, i64 %index
%gep = getelementptr double, double addrspace(1)* %ptr, i64 4
%val = load atomic double, double addrspace(1)* %gep seq_cst, align 8
store double %val, double addrspace(1)* %out
ret void
}

; GCN-LABEL: {{^}}atomic_store_i64_offset:
; CI: buffer_store_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
; VI: flat_store_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
Expand Down Expand Up @@ -1190,3 +1205,15 @@ entry:
store atomic i64 %in, i64 addrspace(1)* %ptr seq_cst, align 8
ret void
}

; GCN-LABEL: {{^}}atomic_store_f64_addr64_offset:
; CI: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:32{{$}}
; VI: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}]{{$}}
; GFX9: global_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}], off offset:32{{$}}
define amdgpu_kernel void @atomic_store_f64_addr64_offset(double %in, double addrspace(1)* %out, i64 %index) {
entry:
%ptr = getelementptr double, double addrspace(1)* %out, i64 %index
%gep = getelementptr double, double addrspace(1)* %ptr, i64 4
store atomic double %in, double addrspace(1)* %gep seq_cst, align 8
ret void
}

0 comments on commit 4cef981

Please sign in to comment.