Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[AMDGPU] Enable divergence predicates for negative inline constant su…
…btraction We have a pattern that undo sub x, c -> add x, -c canonicalization since c is more likely an inline immediate than -c. This patch enables it to select scalar or vector subtracion by the input node divergence. Reviewed By: foad Differential Revision: https://reviews.llvm.org/D121360
- Loading branch information
Showing
3 changed files
with
88 additions
and
60 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
28 changes: 28 additions & 0 deletions
28
llvm/test/CodeGen/AMDGPU/divergence-driven-negsubinlineconst.ll
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
; RUN: llc -march=amdgcn -stop-after=amdgpu-isel < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s | ||
; RUN: llc -march=amdgcn -mcpu=gfx900 -stop-after=amdgpu-isel < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX900 %s | ||
|
||
; FUNC-LABEL: {{^}}uniform_add_SIC: | ||
; GCN: S_SUB_I32 killed %{{[0-9]+}}, 32 | ||
define amdgpu_kernel void @uniform_add_SIC(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { | ||
%a = load i32, i32 addrspace(1)* %in | ||
%result = add i32 %a, -32 | ||
store i32 %result, i32 addrspace(1)* %out | ||
ret void | ||
} | ||
|
||
; FUNC-LABEL: {{^}}uniform_add_SIC: | ||
; SI: V_SUB_CO_U32_e64 killed %{{[0-9]+}}, 32 | ||
; GFX900: V_SUB_U32_e64 killed %{{[0-9]+}}, 32 | ||
define amdgpu_kernel void @divergent_add_SIC(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { | ||
%tid = call i32 @llvm.amdgcn.workitem.id.x() | ||
%gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 %tid | ||
%a = load volatile i32, i32 addrspace(1)* %gep | ||
%result = add i32 %a, -32 | ||
store i32 %result, i32 addrspace(1)* %out | ||
ret void | ||
} | ||
|
||
declare i32 @llvm.amdgcn.workitem.id.x() #1 | ||
|
||
attributes #0 = { nounwind } | ||
attributes #1 = { nounwind readnone speculatable } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters