Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Remarks] Emit optimization remarks for atomics generating CAS loop
Implements ORE in AtomicExpand pass to report atomics generating a compare and swap loop. Differential Revision: https://reviews.llvm.org/D106891
- Loading branch information
Showing
11 changed files
with
249 additions
and
7 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
// RUN: %clang_cc1 %s -triple=amdgcn-amd-amdhsa -fcuda-is-device \ | ||
// RUN: -target-cpu gfx90a -Rpass=atomic-expand -S -o - 2>&1 | \ | ||
// RUN: FileCheck %s --check-prefix=GFX90A-CAS | ||
|
||
// REQUIRES: amdgpu-registered-target | ||
|
||
#include "Inputs/cuda.h" | ||
#include <stdatomic.h> | ||
|
||
// GFX90A-CAS: A compare and swap loop was generated for an atomic fadd operation at system memory scope | ||
// GFX90A-CAS-LABEL: _Z14atomic_add_casPf | ||
// GFX90A-CAS: flat_atomic_cmpswap v0, v[2:3], v[4:5] glc | ||
// GFX90A-CAS: s_cbranch_execnz | ||
__device__ float atomic_add_cas(float *p) { | ||
return __atomic_fetch_add(p, 1.0f, memory_order_relaxed); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
// RUN: %clang_cc1 %s -cl-std=CL2.0 -O0 -triple=amdgcn-amd-amdhsa -target-cpu gfx90a \ | ||
// RUN: -Rpass=atomic-expand -S -o - 2>&1 | \ | ||
// RUN: FileCheck %s --check-prefix=REMARK | ||
|
||
// RUN: %clang_cc1 %s -cl-std=CL2.0 -O0 -triple=amdgcn-amd-amdhsa -target-cpu gfx90a \ | ||
// RUN: -Rpass=atomic-expand -S -emit-llvm -o - 2>&1 | \ | ||
// RUN: FileCheck %s --check-prefix=GFX90A-CAS | ||
|
||
// REQUIRES: amdgpu-registered-target | ||
|
||
typedef enum memory_order { | ||
memory_order_relaxed = __ATOMIC_RELAXED, | ||
memory_order_acquire = __ATOMIC_ACQUIRE, | ||
memory_order_release = __ATOMIC_RELEASE, | ||
memory_order_acq_rel = __ATOMIC_ACQ_REL, | ||
memory_order_seq_cst = __ATOMIC_SEQ_CST | ||
} memory_order; | ||
|
||
typedef enum memory_scope { | ||
memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM, | ||
memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP, | ||
memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE, | ||
memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES, | ||
#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups) | ||
memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP | ||
#endif | ||
} memory_scope; | ||
|
||
// REMARK: remark: A compare and swap loop was generated for an atomic fadd operation at workgroup-one-as memory scope [-Rpass=atomic-expand] | ||
// REMARK: remark: A compare and swap loop was generated for an atomic fadd operation at agent-one-as memory scope [-Rpass=atomic-expand] | ||
// REMARK: remark: A compare and swap loop was generated for an atomic fadd operation at one-as memory scope [-Rpass=atomic-expand] | ||
// REMARK: remark: A compare and swap loop was generated for an atomic fadd operation at wavefront-one-as memory scope [-Rpass=atomic-expand] | ||
// GFX90A-CAS-LABEL: @atomic_cas | ||
// GFX90A-CAS: atomicrmw fadd float addrspace(1)* {{.*}} syncscope("workgroup-one-as") monotonic | ||
// GFX90A-CAS: atomicrmw fadd float addrspace(1)* {{.*}} syncscope("agent-one-as") monotonic | ||
// GFX90A-CAS: atomicrmw fadd float addrspace(1)* {{.*}} syncscope("one-as") monotonic | ||
// GFX90A-CAS: atomicrmw fadd float addrspace(1)* {{.*}} syncscope("wavefront-one-as") monotonic | ||
float atomic_cas(__global atomic_float *d, float a) { | ||
float ret1 = __opencl_atomic_fetch_add(d, a, memory_order_relaxed, memory_scope_work_group); | ||
float ret2 = __opencl_atomic_fetch_add(d, a, memory_order_relaxed, memory_scope_device); | ||
float ret3 = __opencl_atomic_fetch_add(d, a, memory_order_relaxed, memory_scope_all_svm_devices); | ||
float ret4 = __opencl_atomic_fetch_add(d, a, memory_order_relaxed, memory_scope_sub_group); | ||
} | ||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs --pass-remarks=atomic-expand \ | ||
; RUN: %s -o - 2>&1 | FileCheck %s --check-prefix=GFX90A-CAS | ||
|
||
; GFX90A-CAS: A compare and swap loop was generated for an atomic fadd operation at system memory scope | ||
; GFX90A-CAS: A compare and swap loop was generated for an atomic fadd operation at agent memory scope | ||
; GFX90A-CAS: A compare and swap loop was generated for an atomic fadd operation at workgroup memory scope | ||
; GFX90A-CAS: A compare and swap loop was generated for an atomic fadd operation at wavefront memory scope | ||
; GFX90A-CAS: A compare and swap loop was generated for an atomic fadd operation at singlethread memory scope | ||
; GFX90A-CAS: A compare and swap loop was generated for an atomic fadd operation at one-as memory scope | ||
; GFX90A-CAS: A compare and swap loop was generated for an atomic fadd operation at agent-one-as memory scope | ||
; GFX90A-CAS: A compare and swap loop was generated for an atomic fadd operation at workgroup-one-as memory scope | ||
; GFX90A-CAS: A compare and swap loop was generated for an atomic fadd operation at wavefront-one-as memory scope | ||
; GFX90A-CAS: A compare and swap loop was generated for an atomic fadd operation at singlethread-one-as memory scope | ||
|
||
; GFX90A-CAS-LABEL: atomic_add_cas: | ||
; GFX90A-CAS: flat_atomic_cmpswap v3, v[0:1], v[4:5] glc | ||
; GFX90A-CAS: s_cbranch_execnz | ||
define dso_local void @atomic_add_cas(float* %p, float %q) { | ||
entry: | ||
%ret = atomicrmw fadd float* %p, float %q monotonic, align 4 | ||
ret void | ||
} | ||
|
||
; GFX90A-CAS-LABEL: atomic_add_cas_agent: | ||
; GFX90A-CAS: flat_atomic_cmpswap v3, v[0:1], v[4:5] glc | ||
; GFX90A-CAS: s_cbranch_execnz | ||
define dso_local void @atomic_add_cas_agent(float* %p, float %q) { | ||
entry: | ||
%ret = atomicrmw fadd float* %p, float %q syncscope("agent") monotonic, align 4 | ||
ret void | ||
} | ||
|
||
; GFX90A-CAS-LABEL: atomic_add_cas_workgroup: | ||
; GFX90A-CAS: flat_atomic_cmpswap v3, v[0:1], v[4:5] glc | ||
; GFX90A-CAS: s_cbranch_execnz | ||
define dso_local void @atomic_add_cas_workgroup(float* %p, float %q) { | ||
entry: | ||
%ret = atomicrmw fadd float* %p, float %q syncscope("workgroup") monotonic, align 4 | ||
ret void | ||
} | ||
|
||
; GFX90A-CAS-LABEL: atomic_add_cas_wavefront: | ||
; GFX90A-CAS: flat_atomic_cmpswap v3, v[0:1], v[4:5] glc | ||
; GFX90A-CAS: s_cbranch_execnz | ||
define dso_local void @atomic_add_cas_wavefront(float* %p, float %q) { | ||
entry: | ||
%ret = atomicrmw fadd float* %p, float %q syncscope("wavefront") monotonic, align 4 | ||
ret void | ||
} | ||
|
||
; GFX90A-CAS-LABEL: atomic_add_cas_singlethread: | ||
; GFX90A-CAS: flat_atomic_cmpswap v3, v[0:1], v[4:5] glc | ||
; GFX90A-CAS: s_cbranch_execnz | ||
define dso_local void @atomic_add_cas_singlethread(float* %p, float %q) { | ||
entry: | ||
%ret = atomicrmw fadd float* %p, float %q syncscope("singlethread") monotonic, align 4 | ||
ret void | ||
} | ||
|
||
; GFX90A-CAS-LABEL: atomic_add_cas_one_as: | ||
; GFX90A-CAS: flat_atomic_cmpswap v3, v[0:1], v[4:5] glc | ||
; GFX90A-CAS: s_cbranch_execnz | ||
define dso_local void @atomic_add_cas_one_as(float* %p, float %q) { | ||
entry: | ||
%ret = atomicrmw fadd float* %p, float %q syncscope("one-as") monotonic, align 4 | ||
ret void | ||
} | ||
|
||
; GFX90A-CAS-LABEL: atomic_add_cas_agent_one_as: | ||
; GFX90A-CAS: flat_atomic_cmpswap v3, v[0:1], v[4:5] glc | ||
; GFX90A-CAS: s_cbranch_execnz | ||
define dso_local void @atomic_add_cas_agent_one_as(float* %p, float %q) { | ||
entry: | ||
%ret = atomicrmw fadd float* %p, float %q syncscope("agent-one-as") monotonic, align 4 | ||
ret void | ||
} | ||
|
||
; GFX90A-CAS-LABEL: atomic_add_cas_workgroup_one_as: | ||
; GFX90A-CAS: flat_atomic_cmpswap v3, v[0:1], v[4:5] glc | ||
; GFX90A-CAS: s_cbranch_execnz | ||
define dso_local void @atomic_add_cas_workgroup_one_as(float* %p, float %q) { | ||
entry: | ||
%ret = atomicrmw fadd float* %p, float %q syncscope("workgroup-one-as") monotonic, align 4 | ||
ret void | ||
} | ||
|
||
; GFX90A-CAS-LABEL: atomic_add_cas_wavefront_one_as: | ||
; GFX90A-CAS: flat_atomic_cmpswap v3, v[0:1], v[4:5] glc | ||
; GFX90A-CAS: s_cbranch_execnz | ||
define dso_local void @atomic_add_cas_wavefront_one_as(float* %p, float %q) { | ||
entry: | ||
%ret = atomicrmw fadd float* %p, float %q syncscope("wavefront-one-as") monotonic, align 4 | ||
ret void | ||
} | ||
|
||
; GFX90A-CAS-LABEL: atomic_add_cas_singlethread_one_as: | ||
; GFX90A-CAS: flat_atomic_cmpswap v3, v[0:1], v[4:5] glc | ||
; GFX90A-CAS: s_cbranch_execnz | ||
define dso_local void @atomic_add_cas_singlethread_one_as(float* %p, float %q) { | ||
entry: | ||
%ret = atomicrmw fadd float* %p, float %q syncscope("singlethread-one-as") monotonic, align 4 | ||
ret void | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.