-
Notifications
You must be signed in to change notification settings - Fork 10.9k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
SimpleLoopUnswitch: Restore uniform unswitch test
This was supposed to document the new PM limitation but was deleted in fb4113e Switch to generated checks since that's more reliable than XFAIL, and just preserve the preferred results as comments.
- Loading branch information
Showing
1 changed file
with
77 additions
and
0 deletions.
There are no files selected for viewing
77 changes: 77 additions & 0 deletions
77
llvm/test/Transforms/SimpleLoopUnswitch/AMDGPU/uniform-unswitch.ll
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 | ||
; RUN: opt -mtriple=amdgcn-- -passes='default<O3>' -S %s | FileCheck %s | ||
|
||
; Check that loop unswitch happened and condition hoisted out of the loop. | ||
; Condition is uniform so even targets with divergence should perform unswitching. | ||
|
||
; This fails with the new pass manager: | ||
; https://bugs.llvm.org/show_bug.cgi?id=48819 | ||
; The correct behaviour (allow uniform non-trivial branches to be | ||
; unswitched on all targets) requires access to the function-level | ||
; divergence analysis from a loop transform, which is currently not | ||
; supported in the new pass manager. | ||
|
||
; SHOULDBE-LABEL: {{^}}define amdgpu_kernel void @uniform_unswitch | ||
; SHOULDBE: entry: | ||
; SHOULDBE-NEXT: [[LOOP_COND:%[a-z0-9]+]] = icmp | ||
; SHOULDBE-NEXT: [[IF_COND:%[a-z0-9]+]] = icmp eq i32 %x, 123456 | ||
; SHOULDBE-NEXT: and i1 [[LOOP_COND]], [[IF_COND]] | ||
; SHOULDBE-NEXT: br i1 | ||
|
||
define amdgpu_kernel void @uniform_unswitch(ptr nocapture %out, i32 %n, i32 %x) { | ||
; CHECK-LABEL: define amdgpu_kernel void @uniform_unswitch | ||
; CHECK-SAME: (ptr nocapture writeonly [[OUT:%.*]], i32 [[N:%.*]], i32 [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { | ||
; CHECK-NEXT: entry: | ||
; CHECK-NEXT: [[OUT_GLOBAL:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1) | ||
; CHECK-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[N]], 0 | ||
; CHECK-NEXT: br i1 [[CMP6]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_COND_CLEANUP:%.*]] | ||
; CHECK: for.body.lr.ph: | ||
; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[X]], 123456 | ||
; CHECK-NEXT: br label [[FOR_BODY:%.*]] | ||
; CHECK: for.cond.cleanup: | ||
; CHECK-NEXT: ret void | ||
; CHECK: for.body: | ||
; CHECK-NEXT: [[I_07:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ] | ||
; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] | ||
; CHECK: if.then: | ||
; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[I_07]] to i64 | ||
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[OUT_GLOBAL]], i64 [[TMP0]] | ||
; CHECK-NEXT: store i32 [[I_07]], ptr addrspace(1) [[ARRAYIDX]], align 4 | ||
; CHECK-NEXT: br label [[FOR_INC]] | ||
; CHECK: for.inc: | ||
; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_07]], 1 | ||
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] | ||
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]] | ||
; | ||
entry: | ||
%cmp6 = icmp sgt i32 %n, 0 | ||
br i1 %cmp6, label %for.body.lr.ph, label %for.cond.cleanup | ||
|
||
for.body.lr.ph: ; preds = %entry | ||
%cmp1 = icmp eq i32 %x, 123456 | ||
br label %for.body | ||
|
||
for.cond.cleanup.loopexit: ; preds = %for.inc | ||
br label %for.cond.cleanup | ||
|
||
for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry | ||
ret void | ||
|
||
for.body: ; preds = %for.inc, %for.body.lr.ph | ||
%i.07 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ] | ||
br i1 %cmp1, label %if.then, label %for.inc | ||
|
||
if.then: ; preds = %for.body | ||
%arrayidx = getelementptr inbounds i32, ptr %out, i32 %i.07 | ||
store i32 %i.07, ptr %arrayidx, align 4 | ||
br label %for.inc | ||
|
||
for.inc: ; preds = %for.body, %if.then | ||
%inc = add nuw nsw i32 %i.07, 1 | ||
%exitcond = icmp eq i32 %inc, %n | ||
br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body | ||
} | ||
|
||
declare i32 @llvm.amdgcn.workitem.id.x() #0 | ||
|
||
attributes #0 = { nounwind readnone } |