240 changes: 1 addition & 239 deletions llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/LoopUnrollAnalyzer.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetTransformInfo.h"
Expand Down Expand Up @@ -168,245 +169,6 @@ static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
return UP;
}

namespace {
// This class is used to get an estimate of the optimization effects that we
// could get from complete loop unrolling. It comes from the fact that some
// loads might be replaced with concrete constant values and that could trigger
// a chain of instruction simplifications.
//
// E.g. we might have:
// int a[] = {0, 1, 0};
// v = 0;
// for (i = 0; i < 3; i ++)
// v += b[i]*a[i];
// If we completely unroll the loop, we would get:
// v = b[0]*a[0] + b[1]*a[1] + b[2]*a[2]
// Which then will be simplified to:
// v = b[0]* 0 + b[1]* 1 + b[2]* 0
// And finally:
// v = b[1]
class UnrolledInstAnalyzer : private InstVisitor<UnrolledInstAnalyzer, bool> {
typedef InstVisitor<UnrolledInstAnalyzer, bool> Base;
friend class InstVisitor<UnrolledInstAnalyzer, bool>;
struct SimplifiedAddress {
Value *Base = nullptr;
ConstantInt *Offset = nullptr;
};

public:
UnrolledInstAnalyzer(unsigned Iteration,
DenseMap<Value *, Constant *> &SimplifiedValues,
ScalarEvolution &SE)
: SimplifiedValues(SimplifiedValues), SE(SE) {
IterationNumber = SE.getConstant(APInt(64, Iteration));
}

// Allow access to the initial visit method.
using Base::visit;

private:
/// \brief A cache of pointer bases and constant-folded offsets corresponding
/// to GEP (or derived from GEP) instructions.
///
/// In order to find the base pointer one needs to perform non-trivial
/// traversal of the corresponding SCEV expression, so it's good to have the
/// results saved.
DenseMap<Value *, SimplifiedAddress> SimplifiedAddresses;

/// \brief SCEV expression corresponding to number of currently simulated
/// iteration.
const SCEV *IterationNumber;

/// \brief A Value->Constant map for keeping values that we managed to
/// constant-fold on the given iteration.
///
/// While we walk the loop instructions, we build up and maintain a mapping
/// of simplified values specific to this iteration. The idea is to propagate
/// any special information we have about loads that can be replaced with
/// constants after complete unrolling, and account for likely simplifications
/// post-unrolling.
DenseMap<Value *, Constant *> &SimplifiedValues;

ScalarEvolution &SE;

/// \brief Try to simplify instruction \param I using its SCEV expression.
///
/// The idea is that some AddRec expressions become constants, which then
/// could trigger folding of other instructions. However, that only happens
/// for expressions whose start value is also constant, which isn't always the
/// case. In another common and important case the start value is just some
/// address (i.e. SCEVUnknown) - in this case we compute the offset and save
/// it along with the base address instead.
bool simplifyInstWithSCEV(Instruction *I) {
if (!SE.isSCEVable(I->getType()))
return false;

const SCEV *S = SE.getSCEV(I);
if (auto *SC = dyn_cast<SCEVConstant>(S)) {
SimplifiedValues[I] = SC->getValue();
return true;
}

auto *AR = dyn_cast<SCEVAddRecExpr>(S);
if (!AR)
return false;

const SCEV *ValueAtIteration = AR->evaluateAtIteration(IterationNumber, SE);
// Check if the AddRec expression becomes a constant.
if (auto *SC = dyn_cast<SCEVConstant>(ValueAtIteration)) {
SimplifiedValues[I] = SC->getValue();
return true;
}

// Check if the offset from the base address becomes a constant.
auto *Base = dyn_cast<SCEVUnknown>(SE.getPointerBase(S));
if (!Base)
return false;
auto *Offset =
dyn_cast<SCEVConstant>(SE.getMinusSCEV(ValueAtIteration, Base));
if (!Offset)
return false;
SimplifiedAddress Address;
Address.Base = Base->getValue();
Address.Offset = Offset->getValue();
SimplifiedAddresses[I] = Address;
return true;
}

/// Base case for the instruction visitor.
bool visitInstruction(Instruction &I) {
return simplifyInstWithSCEV(&I);
}

/// Try to simplify binary operator I.
///
/// TODO: Probably it's worth to hoist the code for estimating the
/// simplifications effects to a separate class, since we have a very similar
/// code in InlineCost already.
bool visitBinaryOperator(BinaryOperator &I) {
Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
if (!isa<Constant>(LHS))
if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS))
LHS = SimpleLHS;
if (!isa<Constant>(RHS))
if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS))
RHS = SimpleRHS;

Value *SimpleV = nullptr;
const DataLayout &DL = I.getModule()->getDataLayout();
if (auto FI = dyn_cast<FPMathOperator>(&I))
SimpleV =
SimplifyFPBinOp(I.getOpcode(), LHS, RHS, FI->getFastMathFlags(), DL);
else
SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS, DL);

if (Constant *C = dyn_cast_or_null<Constant>(SimpleV))
SimplifiedValues[&I] = C;

if (SimpleV)
return true;
return Base::visitBinaryOperator(I);
}

/// Try to fold load I.
bool visitLoad(LoadInst &I) {
Value *AddrOp = I.getPointerOperand();

auto AddressIt = SimplifiedAddresses.find(AddrOp);
if (AddressIt == SimplifiedAddresses.end())
return false;
ConstantInt *SimplifiedAddrOp = AddressIt->second.Offset;

auto *GV = dyn_cast<GlobalVariable>(AddressIt->second.Base);
// We're only interested in loads that can be completely folded to a
// constant.
if (!GV || !GV->hasDefinitiveInitializer() || !GV->isConstant())
return false;

ConstantDataSequential *CDS =
dyn_cast<ConstantDataSequential>(GV->getInitializer());
if (!CDS)
return false;

// We might have a vector load from an array. FIXME: for now we just bail
// out in this case, but we should be able to resolve and simplify such
// loads.
if(!CDS->isElementTypeCompatible(I.getType()))
return false;

int ElemSize = CDS->getElementType()->getPrimitiveSizeInBits() / 8U;
assert(SimplifiedAddrOp->getValue().getActiveBits() < 64 &&
"Unexpectedly large index value.");
int64_t Index = SimplifiedAddrOp->getSExtValue() / ElemSize;
if (Index >= CDS->getNumElements()) {
// FIXME: For now we conservatively ignore out of bound accesses, but
// we're allowed to perform the optimization in this case.
return false;
}

Constant *CV = CDS->getElementAsConstant(Index);
assert(CV && "Constant expected.");
SimplifiedValues[&I] = CV;

return true;
}

bool visitCastInst(CastInst &I) {
// Propagate constants through casts.
Constant *COp = dyn_cast<Constant>(I.getOperand(0));
if (!COp)
COp = SimplifiedValues.lookup(I.getOperand(0));
if (COp)
if (Constant *C =
ConstantExpr::getCast(I.getOpcode(), COp, I.getType())) {
SimplifiedValues[&I] = C;
return true;
}

return Base::visitCastInst(I);
}

bool visitCmpInst(CmpInst &I) {
Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);

// First try to handle simplified comparisons.
if (!isa<Constant>(LHS))
if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS))
LHS = SimpleLHS;
if (!isa<Constant>(RHS))
if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS))
RHS = SimpleRHS;

if (!isa<Constant>(LHS) && !isa<Constant>(RHS)) {
auto SimplifiedLHS = SimplifiedAddresses.find(LHS);
if (SimplifiedLHS != SimplifiedAddresses.end()) {
auto SimplifiedRHS = SimplifiedAddresses.find(RHS);
if (SimplifiedRHS != SimplifiedAddresses.end()) {
SimplifiedAddress &LHSAddr = SimplifiedLHS->second;
SimplifiedAddress &RHSAddr = SimplifiedRHS->second;
if (LHSAddr.Base == RHSAddr.Base) {
LHS = LHSAddr.Offset;
RHS = RHSAddr.Offset;
}
}
}
}

if (Constant *CLHS = dyn_cast<Constant>(LHS)) {
if (Constant *CRHS = dyn_cast<Constant>(RHS)) {
if (Constant *C = ConstantExpr::getCompare(I.getPredicate(), CLHS, CRHS)) {
SimplifiedValues[&I] = C;
return true;
}
}
}

return Base::visitCmpInst(I);
}
};
} // namespace


namespace {
struct EstimatedUnrollCost {
/// \brief The estimated cost after unrolling.
Expand Down
1 change: 1 addition & 0 deletions llvm/unittests/Analysis/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,5 @@ add_llvm_unittest(AnalysisTests
ScalarEvolutionTest.cpp
MixedTBAATest.cpp
ValueTrackingTest.cpp
UnrollAnalyzer.cpp
)
133 changes: 133 additions & 0 deletions llvm/unittests/Analysis/UnrollAnalyzer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
//===- UnrollAnalyzerTest.cpp - UnrollAnalyzer unit tests -----------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//

#include "llvm/AsmParser/Parser.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Analysis/LoopUnrollAnalyzer.h"
#include "llvm/IR/Dominators.h"
#include "gtest/gtest.h"

using namespace llvm;
namespace llvm {
void initializeUnrollAnalyzerTestPass(PassRegistry &);

static SmallVector<DenseMap<Value *, Constant *>, 16> SimplifiedValuesVector;
static unsigned TripCount = 0;

namespace {
struct UnrollAnalyzerTest : public FunctionPass {
static char ID;
bool runOnFunction(Function &F) override {
LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();

Function::iterator FI = F.begin();
FI++; // First basic block is entry - skip it.
BasicBlock *Header = &*FI++;
Loop *L = LI->getLoopFor(Header);

SimplifiedValuesVector.clear();
TripCount = SE->getSmallConstantTripCount(L, Header);
for (unsigned Iteration = 0; Iteration < TripCount; Iteration++) {
DenseMap<Value *, Constant *> SimplifiedValues;
UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, *SE);
for (Instruction &I : *Header)
Analyzer.visit(I);
SimplifiedValuesVector.push_back(SimplifiedValues);
}
return false;
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
AU.addRequired<ScalarEvolutionWrapperPass>();
AU.setPreservesAll();
}
UnrollAnalyzerTest() : FunctionPass(ID) {
initializeUnrollAnalyzerTestPass(*PassRegistry::getPassRegistry());
}
};
}

char UnrollAnalyzerTest::ID = 0;

std::unique_ptr<Module> makeLLVMModule(UnrollAnalyzerTest *P,
const char *ModuleStr) {
LLVMContext &C = getGlobalContext();
SMDiagnostic Err;
return parseAssemblyString(ModuleStr, Err, C);
}

TEST(UnrollAnalyzerTest, BasicSimplifications) {
const char *ModuleStr =
"target datalayout = \"e-m:o-i64:64-f80:128-n8:16:32:64-S128\"\n"
"define i64 @propagate_loop_phis() {\n"
"entry:\n"
" br label %loop\n"
"loop:\n"
" %iv = phi i64 [ 0, %entry ], [ %inc, %loop ]\n"
" %x0 = phi i64 [ 0, %entry ], [ %x2, %loop ]\n"
" %x1 = or i64 %x0, 1\n"
" %x2 = or i64 %x1, 2\n"
" %inc = add nuw nsw i64 %iv, 1\n"
" %cond = icmp sge i64 %inc, 8\n"
" br i1 %cond, label %loop.end, label %loop\n"
"loop.end:\n"
" %x.lcssa = phi i64 [ %x2, %loop ]\n"
" ret i64 %x.lcssa\n"
"}\n";
UnrollAnalyzerTest *P = new UnrollAnalyzerTest();
std::unique_ptr<Module> M = makeLLVMModule(P, ModuleStr);
legacy::PassManager Passes;
Passes.add(P);
Passes.run(*M);

// Perform checks
Module::iterator MI = M->begin();
Function *F = &*MI++;
Function::iterator FI = F->begin();
FI++; // First basic block is entry - skip it.
BasicBlock *Header = &*FI++;

BasicBlock::iterator BBI = Header->begin();
std::advance(BBI, 4);
Instruction *Y1 = &*BBI++;
Instruction *Y2 = &*BBI++;
// Check simplification expected on the 1st iteration.
// Check that "%inc = add nuw nsw i64 %iv, 1" is simplified to 1
auto I1 = SimplifiedValuesVector[0].find(Y1);
EXPECT_TRUE(I1 != SimplifiedValuesVector[0].end());
EXPECT_EQ(dyn_cast<ConstantInt>((*I1).second)->getZExtValue(), 1U);

// Check that "%cond = icmp sge i64 %inc, 10" is simplified to false
auto I2 = SimplifiedValuesVector[0].find(Y2);
EXPECT_TRUE(I2 != SimplifiedValuesVector[0].end());
EXPECT_FALSE(dyn_cast<ConstantInt>((*I2).second)->getZExtValue());

// Check simplification expected on the last iteration.
// Check that "%inc = add nuw nsw i64 %iv, 1" is simplified to 8
I1 = SimplifiedValuesVector[TripCount - 1].find(Y1);
EXPECT_TRUE(I1 != SimplifiedValuesVector[TripCount - 1].end());
EXPECT_EQ(dyn_cast<ConstantInt>((*I1).second)->getZExtValue(), TripCount);

// Check that "%cond = icmp sge i64 %inc, 10" is simplified to false
I2 = SimplifiedValuesVector[TripCount - 1].find(Y2);
EXPECT_TRUE(I2 != SimplifiedValuesVector[TripCount - 1].end());
EXPECT_TRUE(dyn_cast<ConstantInt>((*I2).second)->getZExtValue());
}
} // end namespace llvm

INITIALIZE_PASS_BEGIN(UnrollAnalyzerTest, "unrollanalyzertestpass",
"unrollanalyzertestpass", false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_END(UnrollAnalyzerTest, "unrollanalyzertestpass",
"unrollanalyzertestpass", false, false)