-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AMDGPU][NFC] Test clean up. #85775
Merged
Merged
[AMDGPU][NFC] Test clean up. #85775
Conversation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Added common check for DPP and Iterative stategies for uniform value case since optimization applied is same.
@llvm/pr-subscribers-backend-amdgpu Author: Pravin Jagtap (pravinjagtap) ChangesAdded common check for DPP and Iterative stategies for uniform value case since optimization applied is same. Patch is 47.32 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/85775.diff 2 Files Affected:
diff --git a/llvm/test/CodeGen/AMDGPU/global_atomic_optimizer_fp_rtn.ll b/llvm/test/CodeGen/AMDGPU/global_atomic_optimizer_fp_rtn.ll
index e3fada3459a07f..538ef42121b83b 100644
--- a/llvm/test/CodeGen/AMDGPU/global_atomic_optimizer_fp_rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/global_atomic_optimizer_fp_rtn.ll
@@ -1,71 +1,43 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S -mtriple=amdgcn-- -mcpu=gfx906 -amdgpu-atomic-optimizer-strategy=Iterative -passes='amdgpu-atomic-optimizer,verify<domtree>' %s | FileCheck -check-prefix=IR-ITERATIVE %s
-; RUN: opt -S -mtriple=amdgcn-- -mcpu=gfx906 -amdgpu-atomic-optimizer-strategy=DPP -passes='amdgpu-atomic-optimizer,verify<domtree>' %s | FileCheck -check-prefix=IR-DPP %s
+; RUN: opt -S -mtriple=amdgcn-- -mcpu=gfx906 -amdgpu-atomic-optimizer-strategy=Iterative -passes='amdgpu-atomic-optimizer,verify<domtree>' %s | FileCheck --check-prefixes=IR,IR-ITERATIVE %s
+; RUN: opt -S -mtriple=amdgcn-- -mcpu=gfx906 -amdgpu-atomic-optimizer-strategy=DPP -passes='amdgpu-atomic-optimizer,verify<domtree>' %s | FileCheck --check-prefixes=IR,IR-DPP %s
+
+; Tests various combinations of uniform/divergent address and uniform/divergent value inputs of various types for atomic operations.
+; Optimization remains same for Iterative and DPP strategies when value in uniform. These different scan/reduction
+; strategies are valid for only divergent values. This optimization is valid for divergent addresses. Test also covers different scopes.
define amdgpu_ps float @global_atomic_fadd_uni_address_uni_value_agent_scope_unsafe(ptr addrspace(1) inreg %ptr, float inreg %val) #0 {
-; IR-ITERATIVE-LABEL: @global_atomic_fadd_uni_address_uni_value_agent_scope_unsafe(
-; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live()
-; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP24:%.*]]
-; IR-ITERATIVE: 2:
-; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
-; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
-; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
-; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
-; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0)
-; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]])
-; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]])
-; IR-ITERATIVE-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32
-; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = uitofp i32 [[TMP10]] to float
-; IR-ITERATIVE-NEXT: [[TMP12:%.*]] = fmul float [[VAL:%.*]], [[TMP11]]
-; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0
-; IR-ITERATIVE-NEXT: br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]]
-; IR-ITERATIVE: 14:
-; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP12]] syncscope("agent") monotonic, align 4
-; IR-ITERATIVE-NEXT: br label [[TMP16]]
-; IR-ITERATIVE: 16:
-; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ]
-; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = bitcast float [[TMP17]] to i32
-; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[TMP18]])
-; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = bitcast i32 [[TMP19]] to float
-; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = uitofp i32 [[TMP8]] to float
-; IR-ITERATIVE-NEXT: [[TMP22:%.*]] = fmul float [[VAL]], [[TMP21]]
-; IR-ITERATIVE-NEXT: [[TMP23:%.*]] = fadd float [[TMP20]], [[TMP22]]
-; IR-ITERATIVE-NEXT: br label [[TMP24]]
-; IR-ITERATIVE: 24:
-; IR-ITERATIVE-NEXT: [[TMP25:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP23]], [[TMP16]] ]
-; IR-ITERATIVE-NEXT: ret float [[TMP25]]
-;
-; IR-DPP-LABEL: @global_atomic_fadd_uni_address_uni_value_agent_scope_unsafe(
-; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live()
-; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP24:%.*]]
-; IR-DPP: 2:
-; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
-; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
-; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
-; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
-; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0)
-; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]])
-; IR-DPP-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]])
-; IR-DPP-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32
-; IR-DPP-NEXT: [[TMP11:%.*]] = uitofp i32 [[TMP10]] to float
-; IR-DPP-NEXT: [[TMP12:%.*]] = fmul float [[VAL:%.*]], [[TMP11]]
-; IR-DPP-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0
-; IR-DPP-NEXT: br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]]
-; IR-DPP: 14:
-; IR-DPP-NEXT: [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP12]] syncscope("agent") monotonic, align 4
-; IR-DPP-NEXT: br label [[TMP16]]
-; IR-DPP: 16:
-; IR-DPP-NEXT: [[TMP17:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ]
-; IR-DPP-NEXT: [[TMP18:%.*]] = bitcast float [[TMP17]] to i32
-; IR-DPP-NEXT: [[TMP19:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[TMP18]])
-; IR-DPP-NEXT: [[TMP20:%.*]] = bitcast i32 [[TMP19]] to float
-; IR-DPP-NEXT: [[TMP21:%.*]] = uitofp i32 [[TMP8]] to float
-; IR-DPP-NEXT: [[TMP22:%.*]] = fmul float [[VAL]], [[TMP21]]
-; IR-DPP-NEXT: [[TMP23:%.*]] = fadd float [[TMP20]], [[TMP22]]
-; IR-DPP-NEXT: br label [[TMP24]]
-; IR-DPP: 24:
-; IR-DPP-NEXT: [[TMP25:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP23]], [[TMP16]] ]
-; IR-DPP-NEXT: ret float [[TMP25]]
+; IR-LABEL: @global_atomic_fadd_uni_address_uni_value_agent_scope_unsafe(
+; IR-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live()
+; IR-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP24:%.*]]
+; IR: 2:
+; IR-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
+; IR-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
+; IR-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
+; IR-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
+; IR-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0)
+; IR-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]])
+; IR-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]])
+; IR-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32
+; IR-NEXT: [[TMP11:%.*]] = uitofp i32 [[TMP10]] to float
+; IR-NEXT: [[TMP12:%.*]] = fmul float [[VAL:%.*]], [[TMP11]]
+; IR-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0
+; IR-NEXT: br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]]
+; IR: 14:
+; IR-NEXT: [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP12]] syncscope("agent") monotonic, align 4
+; IR-NEXT: br label [[TMP16]]
+; IR: 16:
+; IR-NEXT: [[TMP17:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ]
+; IR-NEXT: [[TMP18:%.*]] = bitcast float [[TMP17]] to i32
+; IR-NEXT: [[TMP19:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[TMP18]])
+; IR-NEXT: [[TMP20:%.*]] = bitcast i32 [[TMP19]] to float
+; IR-NEXT: [[TMP21:%.*]] = uitofp i32 [[TMP8]] to float
+; IR-NEXT: [[TMP22:%.*]] = fmul float [[VAL]], [[TMP21]]
+; IR-NEXT: [[TMP23:%.*]] = fadd float [[TMP20]], [[TMP22]]
+; IR-NEXT: br label [[TMP24]]
+; IR: 24:
+; IR-NEXT: [[TMP25:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP23]], [[TMP16]] ]
+; IR-NEXT: ret float [[TMP25]]
;
%result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic, align 4
ret float %result
@@ -411,7 +383,6 @@ define amdgpu_ps float @global_atomic_fsub_uni_address_uni_value_agent_scope_str
ret float %result
}
-
define amdgpu_ps float @global_atomic_fsub_uni_address_div_value_agent_scope_strictfp(ptr addrspace(1) inreg %ptr, float %val) #2 {
; IR-ITERATIVE-LABEL: @global_atomic_fsub_uni_address_div_value_agent_scope_strictfp(
; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]]
@@ -514,61 +485,33 @@ define amdgpu_ps float @global_atomic_fsub_uni_address_div_value_agent_scope_str
}
define amdgpu_ps float @global_atomic_fmin_uni_address_uni_value_agent_scope_unsafe(ptr addrspace(1) inreg %ptr, float inreg %val) #0 {
-; IR-ITERATIVE-LABEL: @global_atomic_fmin_uni_address_uni_value_agent_scope_unsafe(
-; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live()
-; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP20:%.*]]
-; IR-ITERATIVE: 2:
-; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
-; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
-; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
-; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
-; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0)
-; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]])
-; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0
-; IR-ITERATIVE-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP12:%.*]]
-; IR-ITERATIVE: 10:
-; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
-; IR-ITERATIVE-NEXT: br label [[TMP12]]
-; IR-ITERATIVE: 12:
-; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP11]], [[TMP10]] ]
-; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = bitcast float [[TMP13]] to i32
-; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[TMP14]])
-; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = bitcast i32 [[TMP15]] to float
-; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = uitofp i32 [[TMP8]] to float
-; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = select i1 [[TMP9]], float 0x7FF0000000000000, float [[VAL]]
-; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call float @llvm.minnum.f32(float [[TMP16]], float [[TMP18]])
-; IR-ITERATIVE-NEXT: br label [[TMP20]]
-; IR-ITERATIVE: 20:
-; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP19]], [[TMP12]] ]
-; IR-ITERATIVE-NEXT: ret float [[TMP21]]
-;
-; IR-DPP-LABEL: @global_atomic_fmin_uni_address_uni_value_agent_scope_unsafe(
-; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live()
-; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP20:%.*]]
-; IR-DPP: 2:
-; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
-; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
-; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
-; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
-; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0)
-; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]])
-; IR-DPP-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0
-; IR-DPP-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP12:%.*]]
-; IR-DPP: 10:
-; IR-DPP-NEXT: [[TMP11:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
-; IR-DPP-NEXT: br label [[TMP12]]
-; IR-DPP: 12:
-; IR-DPP-NEXT: [[TMP13:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP11]], [[TMP10]] ]
-; IR-DPP-NEXT: [[TMP14:%.*]] = bitcast float [[TMP13]] to i32
-; IR-DPP-NEXT: [[TMP15:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[TMP14]])
-; IR-DPP-NEXT: [[TMP16:%.*]] = bitcast i32 [[TMP15]] to float
-; IR-DPP-NEXT: [[TMP17:%.*]] = uitofp i32 [[TMP8]] to float
-; IR-DPP-NEXT: [[TMP18:%.*]] = select i1 [[TMP9]], float 0x7FF0000000000000, float [[VAL]]
-; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.minnum.f32(float [[TMP16]], float [[TMP18]])
-; IR-DPP-NEXT: br label [[TMP20]]
-; IR-DPP: 20:
-; IR-DPP-NEXT: [[TMP21:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP19]], [[TMP12]] ]
-; IR-DPP-NEXT: ret float [[TMP21]]
+; IR-LABEL: @global_atomic_fmin_uni_address_uni_value_agent_scope_unsafe(
+; IR-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live()
+; IR-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP20:%.*]]
+; IR: 2:
+; IR-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
+; IR-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
+; IR-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
+; IR-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
+; IR-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0)
+; IR-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]])
+; IR-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0
+; IR-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP12:%.*]]
+; IR: 10:
+; IR-NEXT: [[TMP11:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
+; IR-NEXT: br label [[TMP12]]
+; IR: 12:
+; IR-NEXT: [[TMP13:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP11]], [[TMP10]] ]
+; IR-NEXT: [[TMP14:%.*]] = bitcast float [[TMP13]] to i32
+; IR-NEXT: [[TMP15:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[TMP14]])
+; IR-NEXT: [[TMP16:%.*]] = bitcast i32 [[TMP15]] to float
+; IR-NEXT: [[TMP17:%.*]] = uitofp i32 [[TMP8]] to float
+; IR-NEXT: [[TMP18:%.*]] = select i1 [[TMP9]], float 0x7FF0000000000000, float [[VAL]]
+; IR-NEXT: [[TMP19:%.*]] = call float @llvm.minnum.f32(float [[TMP16]], float [[TMP18]])
+; IR-NEXT: br label [[TMP20]]
+; IR: 20:
+; IR-NEXT: [[TMP21:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP19]], [[TMP12]] ]
+; IR-NEXT: ret float [[TMP21]]
;
%result = atomicrmw fmin ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic
ret float %result
@@ -1007,159 +950,109 @@ define amdgpu_ps float @global_atomic_fadd_uni_address_div_value_system_scope_st
ret float %result
}
-
define amdgpu_ps float @global_atomic_fadd_div_address_uni_value_agent_scope_unsafe(ptr addrspace(1) %ptr, float inreg %val) #0 {
-; IR-ITERATIVE-LABEL: @global_atomic_fadd_div_address_uni_value_agent_scope_unsafe(
-; IR-ITERATIVE-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
-; IR-ITERATIVE-NEXT: ret float [[RESULT]]
-;
-; IR-DPP-LABEL: @global_atomic_fadd_div_address_uni_value_agent_scope_unsafe(
-; IR-DPP-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
-; IR-DPP-NEXT: ret float [[RESULT]]
+; IR-LABEL: @global_atomic_fadd_div_address_uni_value_agent_scope_unsafe(
+; IR-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
+; IR-NEXT: ret float [[RESULT]]
;
%result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic, align 4
ret float %result
}
define amdgpu_ps float @global_atomic_fadd_div_address_div_value_agent_scope_unsafe(ptr addrspace(1) %ptr, float %val) #0 {
-; IR-ITERATIVE-LABEL: @global_atomic_fadd_div_address_div_value_agent_scope_unsafe(
-; IR-ITERATIVE-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
-; IR-ITERATIVE-NEXT: ret float [[RESULT]]
-;
-; IR-DPP-LABEL: @global_atomic_fadd_div_address_div_value_agent_scope_unsafe(
-; IR-DPP-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
-; IR-DPP-NEXT: ret float [[RESULT]]
+; IR-LABEL: @global_atomic_fadd_div_address_div_value_agent_scope_unsafe(
+; IR-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
+; IR-NEXT: ret float [[RESULT]]
;
%result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic, align 4
ret float %result
}
define amdgpu_ps float @global_atomic_fadd_div_address_uni_value_one_as_scope_unsafe_structfp(ptr addrspace(1) %ptr, float inreg %val) #1 {
-; IR-ITERATIVE-LABEL: @global_atomic_fadd_div_address_uni_value_one_as_scope_unsafe_structfp(
-; IR-ITERATIVE-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("one-as") monotonic, align 4
-; IR-ITERATIVE-NEXT: ret float [[RESULT]]
-;
-; IR-DPP-LABEL: @global_atomic_fadd_div_address_uni_value_one_as_scope_unsafe_structfp(
-; IR-DPP-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("one-as") monotonic, align 4
-; IR-DPP-NEXT: ret float [[RESULT]]
+; IR-LABEL: @global_atomic_fadd_div_address_uni_value_one_as_scope_unsafe_structfp(
+; IR-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("one-as") monotonic, align 4
+; IR-NEXT: ret float [[RESULT]]
;
%result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("one-as") monotonic
ret float %result
}
define amdgpu_ps float @global_atomic_fadd_div_address_div_value_one_as_scope_unsafe_structfp(ptr addrspace(1) %ptr, float %val) #1 {
-; IR-ITERATIVE-LABEL: @global_atomic_fadd_div_address_div_value_one_as_scope_unsafe_structfp(
-; IR-ITERATIVE-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("one-as") monotonic, align 4
-; IR-ITERATIVE-NEXT: ret float [[RESULT]]
-;
-; IR-DPP-LABEL: @global_atomic_fadd_div_address_div_value_one_as_scope_unsafe_structfp(
-; IR-DPP-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("one-as") monotonic, align 4
-; IR-DPP-NEXT: ret float [[RESULT]]
+; IR-LABEL: @global_atomic_fadd_div_address_div_value_one_as_scope_unsafe_structfp(
+; IR-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("one-as") monotonic, align 4
+; IR-NEXT: ret float [[RESULT]]
;
%result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("one-as") monotonic
ret float %result
}
define amdgpu_ps float @global_atomic_fsub_div_address_uni_value_agent_scope_strictfp(ptr addrspace(1) %ptr, float inreg %val) #2 {
-; IR-ITERATIVE-LABEL: @global_atomic_fsub_div_address_uni_value_agent_scope_strictfp(
-; IR-ITERATIVE-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
-; IR-ITERATIVE-NEXT: ret float [[RESULT]]
-;
-; IR-DPP-LABEL: @global_atomic_fsub_div_address_uni_value_agent_scope_strictfp(
-; IR-DPP-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
-; IR-DPP-NEXT: ret float [[RESULT]]
+; IR-LABEL: @global_atomic_fsub_div_address_uni_value_agent_scope_strictfp(
+; IR-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
+; IR-NEXT: ret float [[RESULT]]
;
%result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic
ret float %result
}
-
define amdgpu_ps float @global_atomic_fsub_div_address_div_value_agent_scope_strictfp(ptr addrspace(1) %ptr, float %val) #2 {
-; IR-ITERATIVE-LABEL: @global_atomic_fsub_div_address_div_value_agent_scope_strictfp(
-; IR-ITERATIVE-NEXT: [[RESULT:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
-; IR-ITERATIVE-NEXT: ret float [[RESULT]]
-;
-; IR-DPP-LABEL: @global_atomic_fsub_div_address_div_value_agent_scope_strictfp(
-; IR-DPP-NEXT: [[RESULT:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
-; IR-DPP-NEXT: ret float [[RESULT]]
+; IR-LABEL: @global_atomic_fsub_div_address_div_value_agent_scope_strictfp(
+; IR-NEXT: [[RESULT:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
+; IR-NEXT: ret float [[RESULT]]
;
%result = atomicrmw fsub ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic
ret float %result
}
define amdgpu_ps float @global_atomic_fmin_div_address_uni_value_agen...
[truncated]
|
arsenm
approved these changes
Mar 19, 2024
chencha3
pushed a commit
to chencha3/llvm-project
that referenced
this pull request
Mar 23, 2024
Added common check for DPP and Iterative strategies for uniform value case since optimization applied is same. Authored-by: Pravin Jagtap <Pravin.Jagtap@amd.com>
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Added common check for DPP and Iterative stategies for uniform value case since optimization applied is same.