From f847094c246810d698d3d6d476f066e9b8779456 Mon Sep 17 00:00:00 2001 From: Max Kazantsev Date: Tue, 3 Nov 2020 13:18:46 +0700 Subject: [PATCH] [IndVars] Use knowledge about execution on last iteration when removing checks If we know that some check will not be executed on the last iteration, we can use this fact to eliminate its check. Differential Revision: https://reviews.llvm.org/D88210 Reviwed By: ebrevnov --- llvm/lib/Transforms/Scalar/IndVarSimplify.cpp | 38 +++++++++++++++++-- .../IndVarSimplify/predicated_ranges.ll | 5 +-- 2 files changed, 37 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp index 2a5d1da6717d5..0ee0d6e87a0d9 100644 --- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -2418,6 +2418,7 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { }; bool Changed = false; + bool SkipLastIter = false; SmallSet DominatingExitCounts; for (BasicBlock *ExitingBB : ExitingBlocks) { const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); @@ -2425,18 +2426,49 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) { // Okay, we do not know the exit count here. Can we at least prove that it // will remain the same within iteration space? auto *BI = cast(ExitingBB->getTerminator()); - auto OptimizeCond = [&](bool Inverted) { - if (isTrivialCond(L, BI, SE, Inverted, MaxExitCount)) { + auto OptimizeCond = [this, L, BI, ExitingBB, MaxExitCount, &FoldExit]( + bool Inverted, bool SkipLastIter) { + const SCEV *MaxIter = MaxExitCount; + if (SkipLastIter) { + const SCEV *One = SE->getOne(MaxIter->getType()); + MaxIter = SE->getMinusSCEV(MaxIter, One); + } + if (isTrivialCond(L, BI, SE, Inverted, MaxIter)) { FoldExit(ExitingBB, Inverted); return true; } return false; }; - if (OptimizeCond(false) || OptimizeCond(true)) + + // TODO: We might have proved that we can skip the last iteration for + // this check. In this case, we only want to check the condition on the + // pre-last iteration (MaxExitCount - 1). However, there is a nasty + // corner case: + // + // for (i = len; i != 0; i--) { ... check (i ult X) ... } + // + // If we could not prove that len != 0, then we also could not prove that + // (len - 1) is not a UINT_MAX. If we simply query (len - 1), then + // OptimizeCond will likely not prove anything for it, even if it could + // prove the same fact for len. + // + // As a temporary solution, we query both last and pre-last iterations in + // hope that we will be able to prove triviality for at least one of + // them. We can stop querying MaxExitCount for this case once SCEV + // understands that (MaxExitCount - 1) will not overflow here. + if (OptimizeCond(false, false) || OptimizeCond(true, false)) Changed = true; + else if (SkipLastIter) + if (OptimizeCond(false, true) || OptimizeCond(true, true)) + Changed = true; continue; } + if (MaxExitCount == ExitCount) + // If the loop has more than 1 iteration, all further checks will be + // executed 1 iteration less. + SkipLastIter = true; + // If we know we'd exit on the first iteration, rewrite the exit to // reflect this. This does not imply the loop must exit through this // exit; there may be an earlier one taken on the first iteration. diff --git a/llvm/test/Transforms/IndVarSimplify/predicated_ranges.ll b/llvm/test/Transforms/IndVarSimplify/predicated_ranges.ll index 9e6908c308d2b..a01f12d303a7a 100644 --- a/llvm/test/Transforms/IndVarSimplify/predicated_ranges.ll +++ b/llvm/test/Transforms/IndVarSimplify/predicated_ranges.ll @@ -2,7 +2,7 @@ ; RUN: opt -indvars -S < %s | FileCheck %s ; RUN: opt -passes=indvars -S < %s | FileCheck %s -; TODO: should be able to remove the range check basing on the following facts: +; Check that we are able to remove the range check basing on the following facts: ; 0 <= len <= MAX_INT [1]; ; iv starts from len and goes down stopping at zero and [1], therefore ; 0 <= iv <= len [2]; @@ -21,8 +21,7 @@ define void @test_predicated_simple_unsigned(i32* %p, i32* %arr) { ; CHECK-NEXT: br i1 [[ZERO_COND]], label [[EXIT:%.*]], label [[RANGE_CHECK_BLOCK:%.*]] ; CHECK: range_check_block: ; CHECK-NEXT: [[IV_NEXT]] = sub i32 [[IV]], 1 -; CHECK-NEXT: [[RANGE_CHECK:%.*]] = icmp ult i32 [[IV_NEXT]], [[LEN]] -; CHECK-NEXT: br i1 [[RANGE_CHECK]], label [[BACKEDGE]], label [[FAIL:%.*]] +; CHECK-NEXT: br i1 true, label [[BACKEDGE]], label [[FAIL:%.*]] ; CHECK: backedge: ; CHECK-NEXT: [[EL_PTR:%.*]] = getelementptr i32, i32* [[P]], i32 [[IV]] ; CHECK-NEXT: [[EL:%.*]] = load i32, i32* [[EL_PTR]], align 4