Skip to content

Commit

Permalink
SimpleLoopUnswitch: Restore uniform unswitch test
Browse files Browse the repository at this point in the history
This was supposed to document the new PM limitation but
was deleted in fb4113e

Switch to generated checks since that's more reliable than XFAIL, and
just preserve the preferred results as comments.
  • Loading branch information
arsenm committed Jul 7, 2023
1 parent 5a67fa2 commit 7128b12
Showing 1 changed file with 77 additions and 0 deletions.
77 changes: 77 additions & 0 deletions llvm/test/Transforms/SimpleLoopUnswitch/AMDGPU/uniform-unswitch.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
; RUN: opt -mtriple=amdgcn-- -passes='default<O3>' -S %s | FileCheck %s

; Check that loop unswitch happened and condition hoisted out of the loop.
; Condition is uniform so even targets with divergence should perform unswitching.

; This fails with the new pass manager:
; https://bugs.llvm.org/show_bug.cgi?id=48819
; The correct behaviour (allow uniform non-trivial branches to be
; unswitched on all targets) requires access to the function-level
; divergence analysis from a loop transform, which is currently not
; supported in the new pass manager.

; SHOULDBE-LABEL: {{^}}define amdgpu_kernel void @uniform_unswitch
; SHOULDBE: entry:
; SHOULDBE-NEXT: [[LOOP_COND:%[a-z0-9]+]] = icmp
; SHOULDBE-NEXT: [[IF_COND:%[a-z0-9]+]] = icmp eq i32 %x, 123456
; SHOULDBE-NEXT: and i1 [[LOOP_COND]], [[IF_COND]]
; SHOULDBE-NEXT: br i1

define amdgpu_kernel void @uniform_unswitch(ptr nocapture %out, i32 %n, i32 %x) {
; CHECK-LABEL: define amdgpu_kernel void @uniform_unswitch
; CHECK-SAME: (ptr nocapture writeonly [[OUT:%.*]], i32 [[N:%.*]], i32 [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[OUT_GLOBAL:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
; CHECK-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[N]], 0
; CHECK-NEXT: br i1 [[CMP6]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_COND_CLEANUP:%.*]]
; CHECK: for.body.lr.ph:
; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[X]], 123456
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.cond.cleanup:
; CHECK-NEXT: ret void
; CHECK: for.body:
; CHECK-NEXT: [[I_07:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ]
; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
; CHECK: if.then:
; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[I_07]] to i64
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[OUT_GLOBAL]], i64 [[TMP0]]
; CHECK-NEXT: store i32 [[I_07]], ptr addrspace(1) [[ARRAYIDX]], align 4
; CHECK-NEXT: br label [[FOR_INC]]
; CHECK: for.inc:
; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_07]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]]
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]]
;
entry:
%cmp6 = icmp sgt i32 %n, 0
br i1 %cmp6, label %for.body.lr.ph, label %for.cond.cleanup

for.body.lr.ph: ; preds = %entry
%cmp1 = icmp eq i32 %x, 123456
br label %for.body

for.cond.cleanup.loopexit: ; preds = %for.inc
br label %for.cond.cleanup

for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
ret void

for.body: ; preds = %for.inc, %for.body.lr.ph
%i.07 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ]
br i1 %cmp1, label %if.then, label %for.inc

if.then: ; preds = %for.body
%arrayidx = getelementptr inbounds i32, ptr %out, i32 %i.07
store i32 %i.07, ptr %arrayidx, align 4
br label %for.inc

for.inc: ; preds = %for.body, %if.then
%inc = add nuw nsw i32 %i.07, 1
%exitcond = icmp eq i32 %inc, %n
br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
}

declare i32 @llvm.amdgcn.workitem.id.x() #0

attributes #0 = { nounwind readnone }

0 comments on commit 7128b12

Please sign in to comment.