From c0d9bf2f6afdceca53cda1a294ceeb74385142d1 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Thu, 4 Nov 2021 14:46:20 -0700 Subject: [PATCH] [indvars] Allow rotation (narrowing) of exit test when discovering trip count This relaxes the one-use requirement on the rotation transform specifically for the case where we know we're zexting an IV of the loop. This allows us to discover trip count information in SCEV, which seems worth a single extra loop invariant truncate. Honestly, I'd prefer if SCEV could just compute the trip count directly (e.g. D109457), but this unblocks practical benefit. --- llvm/lib/Transforms/Scalar/IndVarSimplify.cpp | 13 ++-- .../IndVarSimplify/finite-exit-comparisons.ll | 61 +++++++++++++++++++ 2 files changed, 70 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp index ce180777e9784..d9858f2f79f88 100644 --- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -1493,10 +1493,6 @@ bool IndVarSimplify::canonicalizeExitCondition(Loop *L) { } assert(!L->isLoopInvariant(LHS) && L->isLoopInvariant(RHS)); - if (!LHS->hasOneUse()) - // Can't rotate without increasing instruction count - continue; - // Match (icmp unsigned-cond zext, RHS) // TODO: Extend to handle corresponding sext/signed-cmp case // TODO: Extend to other invertible functions @@ -1504,6 +1500,15 @@ bool IndVarSimplify::canonicalizeExitCondition(Loop *L) { if (!match(LHS, m_ZExt(m_Value(LHSOp)))) continue; + // In general, we only rotate if we can do so without increasing the number + // of instructions. The exception is when we have an zext(add-rec). The + // reason for allowing this exception is that we know we need to get rid + // of the zext for SCEV to be able to compute a trip count for said loops; + // we consider the new trip count valuable enough to increase instruction + // count by one. + if (!LHS->hasOneUse() && !isa(SE->getSCEV(LHSOp))) + continue; + // Given a icmp unsigned-cond zext(Op) where zext(trunc(RHS)) == RHS // replace with an icmp of the form icmp unsigned-cond Op, trunc(RHS) // when zext is loop varying and RHS is loop invariant. This converts diff --git a/llvm/test/Transforms/IndVarSimplify/finite-exit-comparisons.ll b/llvm/test/Transforms/IndVarSimplify/finite-exit-comparisons.ll index d4336dfde43e8..74a85703fe8ca 100644 --- a/llvm/test/Transforms/IndVarSimplify/finite-exit-comparisons.ll +++ b/llvm/test/Transforms/IndVarSimplify/finite-exit-comparisons.ll @@ -928,3 +928,64 @@ for.body: ; preds = %entry, %for.body for.end: ; preds = %for.body, %entry ret void } + +define i16 @ult_multiuse_profit(i16 %n.raw, i8 %start) mustprogress { +; CHECK-LABEL: @ult_multiuse_profit( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = trunc i16 254 to i8 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[START:%.*]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1 +; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[IV_NEXT]] to i16 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[IV_NEXT]], [[TMP0]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] +; CHECK: for.end: +; CHECK-NEXT: [[ZEXT_LCSSA:%.*]] = phi i16 [ [[ZEXT]], [[FOR_BODY]] ] +; CHECK-NEXT: ret i16 [[ZEXT_LCSSA]] +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %iv = phi i8 [ %iv.next, %for.body ], [ %start, %entry ] + %iv.next = add i8 %iv, 1 + %zext = zext i8 %iv.next to i16 + %cmp = icmp ult i16 %zext, 254 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret i16 %zext +} + +define i16 @ult_multiuse_profit2(i16 %n.raw, i8 %start) mustprogress { +; CHECK-LABEL: @ult_multiuse_profit2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = trunc i16 254 to i8 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[START:%.*]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[IV2:%.*]] = phi i16 [ [[ZEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1 +; CHECK-NEXT: [[ZEXT]] = zext i8 [[IV_NEXT]] to i16 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[IV_NEXT]], [[TMP0]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] +; CHECK: for.end: +; CHECK-NEXT: [[IV2_LCSSA:%.*]] = phi i16 [ [[IV2]], [[FOR_BODY]] ] +; CHECK-NEXT: ret i16 [[IV2_LCSSA]] +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %iv = phi i8 [ %iv.next, %for.body ], [ %start, %entry ] + %iv2 = phi i16 [%zext, %for.body], [0, %entry] + %iv.next = add i8 %iv, 1 + %zext = zext i8 %iv.next to i16 + %cmp = icmp ult i16 %zext, 254 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret i16 %iv2 +} +