-
Notifications
You must be signed in to change notification settings - Fork 10.8k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Reapply [ValueTracking] Support min/max selects in computeConstantRan…
…ge() Add support for min/max flavor selects in computeConstantRange(), which allows us to fold comparisons of a min/max against a constant in InstSimplify. This fixes an infinite InstCombine loop, with the test case taken from D59378. Relative to the previous iteration, this contains some adjustments for AMDGPU med3 tests: The AMDGPU target runs InstSimplify prior to codegen, which ends up constant folding some existing med3 tests after this change. To preserve these tests a hidden -amdgpu-scalar-ir-passes option is added, which allows disabling scalar IR passes (that use InstSimplify) for testing purposes. Differential Revision: https://reviews.llvm.org/D59506 llvm-svn: 357870
- Loading branch information
Showing
8 changed files
with
115 additions
and
58 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
; RUN: llc -march=amdgcn -verify-machineinstrs -amdgpu-scalar-ir-passes=false < %s | FileCheck -check-prefix=GCN -check-prefix=SICIVI -check-prefix=SI %s | ||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs -amdgpu-scalar-ir-passes=false < %s | FileCheck -check-prefix=GCN -check-prefix=SICIVI -check-prefix=VI %s | ||
; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs -amdgpu-scalar-ir-passes=false < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s | ||
|
||
; These tests are split out from umed3.ll and smed3.ll and use the | ||
; -amdgpu-scalar-ir-passes=false flag, because InstSimplify would constant | ||
; fold these functions otherwise. | ||
|
||
declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone | ||
|
||
; GCN-LABEL: {{^}}v_test_umed3_r_i_i_constant_order_i32: | ||
; GCN: v_max_u32_e32 v{{[0-9]+}}, 17, v{{[0-9]+}} | ||
; GCN: v_min_u32_e32 v{{[0-9]+}}, 12, v{{[0-9]+}} | ||
define amdgpu_kernel void @v_test_umed3_r_i_i_constant_order_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) #1 { | ||
%tid = call i32 @llvm.amdgcn.workitem.id.x() | ||
%gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid | ||
%outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid | ||
%a = load i32, i32 addrspace(1)* %gep0 | ||
|
||
%icmp0 = icmp ugt i32 %a, 17 | ||
%i0 = select i1 %icmp0, i32 %a, i32 17 | ||
|
||
%icmp1 = icmp ult i32 %i0, 12 | ||
%i1 = select i1 %icmp1, i32 %i0, i32 12 | ||
|
||
store i32 %i1, i32 addrspace(1)* %outgep | ||
ret void | ||
} | ||
|
||
; GCN-LABEL: {{^}}v_test_smed3_r_i_i_constant_order_i32: | ||
; GCN: v_max_i32_e32 v{{[0-9]+}}, 17, v{{[0-9]+}} | ||
; GCN: v_min_i32_e32 v{{[0-9]+}}, 12, v{{[0-9]+}} | ||
define amdgpu_kernel void @v_test_smed3_r_i_i_constant_order_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) #1 { | ||
%tid = call i32 @llvm.amdgcn.workitem.id.x() | ||
%gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid | ||
%outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid | ||
%a = load i32, i32 addrspace(1)* %gep0 | ||
|
||
%icmp0 = icmp sgt i32 %a, 17 | ||
%i0 = select i1 %icmp0, i32 %a, i32 17 | ||
|
||
%icmp1 = icmp slt i32 %i0, 12 | ||
%i1 = select i1 %icmp1, i32 %i0, i32 12 | ||
|
||
store i32 %i1, i32 addrspace(1)* %outgep | ||
ret void | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters