Expand Up
@@ -209,6 +209,29 @@ namespace {
class LoopVectorizationLegality ;
class LoopVectorizationCostModel ;
// / Optimization analysis message produced during vectorization. Messages inform
// / the user why vectorization did not occur.
class Report {
std::string Message;
raw_string_ostream Out;
Instruction *Instr;
public:
Report (Instruction *I = nullptr ) : Out(Message), Instr(I) {
Out << " loop not vectorized: " ;
}
template <typename A> Report &operator <<(const A &Value) {
Out << Value;
return *this ;
}
Instruction *getInstr () { return Instr; }
std::string &str () { return Out.str (); }
operator Twine () { return Out.str (); }
};
// / InnerLoopVectorizer vectorizes loops which contain only one basic
// / block to a specified vectorization factor (VF).
// / This class performs the widening of scalars into vectors, or multiple
Expand Down
Expand Up
@@ -515,10 +538,12 @@ class LoopVectorizationLegality {
unsigned NumPredStores;
LoopVectorizationLegality (Loop *L, ScalarEvolution *SE, const DataLayout *DL,
DominatorTree *DT, TargetLibraryInfo *TLI)
DominatorTree *DT, TargetLibraryInfo *TLI,
Function *F)
: NumLoads(0 ), NumStores(0 ), NumPredStores(0 ), TheLoop(L), SE(SE), DL(DL),
DT (DT), TLI(TLI), Induction(nullptr ), WidestIndTy(nullptr ),
HasFunNoNaNAttr(false ), MaxSafeDepDistBytes(-1U ) {}
DT (DT), TLI(TLI), TheFunction(F), Induction(nullptr ),
WidestIndTy(nullptr ), HasFunNoNaNAttr(false ), MaxSafeDepDistBytes(-1U ) {
}
// / This enum represents the kinds of reductions that we support.
enum ReductionKind {
Expand Down
Expand Up
@@ -747,6 +772,16 @@ class LoopVectorizationLegality {
// / invariant.
void collectStridedAcccess (Value *LoadOrStoreInst);
// / Report an analysis message to assist the user in diagnosing loops that are
// / not vectorized.
void emitAnalysis (Report &Message) {
DebugLoc DL = TheLoop->getStartLoc ();
if (Instruction *I = Message.getInstr ())
DL = I->getDebugLoc ();
emitOptimizationRemarkAnalysis (TheFunction->getContext (), DEBUG_TYPE,
*TheFunction, DL, Message.str ());
}
// / The loop that we evaluate.
Loop *TheLoop;
// / Scev analysis.
Expand All
@@ -757,6 +792,8 @@ class LoopVectorizationLegality {
DominatorTree *DT;
// / Target Library Info.
TargetLibraryInfo *TLI;
// / Parent function
Function *TheFunction;
// --- vectorization state --- //
Expand Down
Expand Up
@@ -942,6 +979,29 @@ class LoopVectorizeHints {
LoopID = NewLoopID;
}
std::string emitRemark () const {
Report R;
R << " vectorization " ;
switch (Force) {
case LoopVectorizeHints::FK_Disabled:
R << " is explicitly disabled" ;
break ;
case LoopVectorizeHints::FK_Enabled:
R << " is explicitly enabled" ;
if (Width != 0 && Unroll != 0 )
R << " with width " << Width << " and interleave count " << Unroll;
else if (Width != 0 )
R << " with width " << Width;
else if (Unroll != 0 )
R << " with interleave count " << Unroll;
break ;
case LoopVectorizeHints::FK_Undefined:
R << " was not specified" ;
break ;
}
return R.str ();
}
unsigned getWidth () const { return Width; }
unsigned getUnroll () const { return Unroll; }
enum ForceKind getForce () const { return Force; }
Expand Down
Expand Up
@@ -1125,18 +1185,37 @@ struct LoopVectorize : public FunctionPass {
: " ?" )) << " width=" << Hints.getWidth ()
<< " unroll=" << Hints.getUnroll () << " \n " );
// Function containing loop
Function *F = L->getHeader ()->getParent ();
// Looking at the diagnostic output is the only way to determine if a loop
// was vectorized (other than looking at the IR or machine code), so it
// is important to generate an optimization remark for each loop. Most of
// these messages are generated by emitOptimizationRemarkAnalysis. Remarks
// generated by emitOptimizationRemark and emitOptimizationRemarkMissed are
// less verbose reporting vectorized loops and unvectorized loops that may
// benefit from vectorization, respectively.
if (Hints.getForce () == LoopVectorizeHints::FK_Disabled) {
DEBUG (dbgs () << " LV: Not vectorizing: #pragma vectorize disable.\n " );
emitOptimizationRemarkAnalysis (F->getContext (), DEBUG_TYPE, *F,
L->getStartLoc (), Hints.emitRemark ());
return false ;
}
if (!AlwaysVectorize && Hints.getForce () != LoopVectorizeHints::FK_Enabled) {
DEBUG (dbgs () << " LV: Not vectorizing: No #pragma vectorize enable.\n " );
emitOptimizationRemarkAnalysis (F->getContext (), DEBUG_TYPE, *F,
L->getStartLoc (), Hints.emitRemark ());
return false ;
}
if (Hints.getWidth () == 1 && Hints.getUnroll () == 1 ) {
DEBUG (dbgs () << " LV: Not vectorizing: Disabled/already vectorized.\n " );
emitOptimizationRemarkAnalysis (
F->getContext (), DEBUG_TYPE, *F, L->getStartLoc (),
" loop not vectorized: vector width and interleave count are "
" explicitly set to 1" );
return false ;
}
Expand All
@@ -1151,14 +1230,19 @@ struct LoopVectorize : public FunctionPass {
DEBUG (dbgs () << " But vectorizing was explicitly forced.\n " );
else {
DEBUG (dbgs () << " \n " );
emitOptimizationRemarkAnalysis (
F->getContext (), DEBUG_TYPE, *F, L->getStartLoc (),
" vectorization is not beneficial and is not explicitly forced" );
return false ;
}
}
// Check if it is legal to vectorize the loop.
LoopVectorizationLegality LVL (L, SE, DL, DT, TLI);
LoopVectorizationLegality LVL (L, SE, DL, DT, TLI, F );
if (!LVL.canVectorize ()) {
DEBUG (dbgs () << " LV: Not vectorizing: Cannot prove legality.\n " );
emitOptimizationRemarkMissed (F->getContext (), DEBUG_TYPE, *F,
L->getStartLoc (), Hints.emitRemark ());
return false ;
}
Expand All
@@ -1167,7 +1251,6 @@ struct LoopVectorize : public FunctionPass {
// Check the function attributes to find out if this function should be
// optimized for size.
Function *F = L->getHeader ()->getParent ();
bool OptForSize = Hints.getForce () != LoopVectorizeHints::FK_Enabled &&
F->hasFnAttribute (Attribute::OptimizeForSize);
Expand All
@@ -1190,6 +1273,11 @@ struct LoopVectorize : public FunctionPass {
if (F->hasFnAttribute (Attribute::NoImplicitFloat)) {
DEBUG (dbgs () << " LV: Can't vectorize when the NoImplicitFloat"
" attribute is used.\n " );
emitOptimizationRemarkAnalysis (
F->getContext (), DEBUG_TYPE, *F, L->getStartLoc (),
" loop not vectorized due to NoImplicitFloat attribute" );
emitOptimizationRemarkMissed (F->getContext (), DEBUG_TYPE, *F,
L->getStartLoc (), Hints.emitRemark ());
return false ;
}
Expand All
@@ -1208,9 +1296,14 @@ struct LoopVectorize : public FunctionPass {
DEBUG (dbgs () << " LV: Unroll Factor is " << UF << ' \n ' );
if (VF.Width == 1 ) {
DEBUG (dbgs () << " LV: Vectorization is possible but not beneficial.\n " );
if (UF == 1 )
DEBUG (dbgs () << " LV: Vectorization is possible but not beneficial\n " );
if (UF == 1 ) {
emitOptimizationRemarkAnalysis (
F->getContext (), DEBUG_TYPE, *F, L->getStartLoc (),
" not beneficial to vectorize and user disabled interleaving" );
return false ;
}
DEBUG (dbgs () << " LV: Trying to at least unroll the loops.\n " );
// Report the unrolling decision.
Expand All
@@ -1220,6 +1313,7 @@ struct LoopVectorize : public FunctionPass {
" (vectorization not beneficial)" ));
// We decided not to vectorize, but we may want to unroll.
InnerLoopUnroller Unroller (L, SE, LI, DT, DL, TLI, UF);
Unroller.vectorize (&LVL);
} else {
Expand Down
Expand Up
@@ -3213,8 +3307,10 @@ static bool canIfConvertPHINodes(BasicBlock *BB) {
}
bool LoopVectorizationLegality::canVectorizeWithIfConvert () {
if (!EnableIfConversion)
if (!EnableIfConversion) {
emitAnalysis (Report () << " if-conversion is disabled" );
return false ;
}
assert (TheLoop->getNumBlocks () > 1 && " Single block loops are vectorizable" );
Expand Down
Expand Up
@@ -3244,16 +3340,24 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
BasicBlock *BB = *BI;
// We don't support switch statements inside loops.
if (!isa<BranchInst>(BB->getTerminator ()))
if (!isa<BranchInst>(BB->getTerminator ())) {
emitAnalysis (Report (BB->getTerminator ())
<< " loop contains a switch statement" );
return false ;
}
// We must be able to predicate all blocks that need to be predicated.
if (blockNeedsPredication (BB)) {
if (!blockCanBePredicated (BB, SafePointes))
if (!blockCanBePredicated (BB, SafePointes)) {
emitAnalysis (Report (BB->getTerminator ())
<< " control flow cannot be substituted for a select" );
return false ;
} else if (BB != Header && !canIfConvertPHINodes (BB))
}
} else if (BB != Header && !canIfConvertPHINodes (BB)) {
emitAnalysis (Report (BB->getTerminator ())
<< " control flow cannot be substituted for a select" );
return false ;
}
}
// We can if-convert this loop.
Expand All
@@ -3263,20 +3367,31 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
bool LoopVectorizationLegality::canVectorize () {
// We must have a loop in canonical form. Loops with indirectbr in them cannot
// be canonicalized.
if (!TheLoop->getLoopPreheader ())
if (!TheLoop->getLoopPreheader ()) {
emitAnalysis (
Report () << " loop control flow is not understood by vectorizer" );
return false ;
}
// We can only vectorize innermost loops.
if (TheLoop->getSubLoopsVector ().size ())
if (TheLoop->getSubLoopsVector ().size ()) {
emitAnalysis (Report () << " loop is not the innermost loop" );
return false ;
}
// We must have a single backedge.
if (TheLoop->getNumBackEdges () != 1 )
if (TheLoop->getNumBackEdges () != 1 ) {
emitAnalysis (
Report () << " loop control flow is not understood by vectorizer" );
return false ;
}
// We must have a single exiting block.
if (!TheLoop->getExitingBlock ())
if (!TheLoop->getExitingBlock ()) {
emitAnalysis (
Report () << " loop control flow is not understood by vectorizer" );
return false ;
}
// We need to have a loop header.
DEBUG (dbgs () << " LV: Found a loop: " <<
Expand All
@@ -3292,6 +3407,7 @@ bool LoopVectorizationLegality::canVectorize() {
// ScalarEvolution needs to be able to find the exit count.
const SCEV *ExitCount = SE->getBackedgeTakenCount (TheLoop);
if (ExitCount == SE->getCouldNotCompute ()) {
emitAnalysis (Report () << " could not determine number of loop iterations" );
DEBUG (dbgs () << " LV: SCEV could not compute the loop exit count.\n " );
return false ;
}
Expand Down
Expand Up
@@ -3385,6 +3501,8 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
if (!PhiTy->isIntegerTy () &&
!PhiTy->isFloatingPointTy () &&
!PhiTy->isPointerTy ()) {
emitAnalysis (Report (it)
<< " loop control flow is not understood by vectorizer" );
DEBUG (dbgs () << " LV: Found an non-int non-pointer PHI.\n " );
return false ;
}
Expand All
@@ -3395,13 +3513,17 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
if (*bb != Header) {
// Check that this instruction has no outside users or is an
// identified reduction value with an outside user.
if (!hasOutsideLoopUser (TheLoop, it, AllowedExit))
if (!hasOutsideLoopUser (TheLoop, it, AllowedExit))
continue ;
emitAnalysis (Report (it) << " value that could not be identified as "
" reduction is used outside the loop" );
return false ;
}
// We only allow if-converted PHIs with more than two incoming values.
if (Phi->getNumIncomingValues () != 2 ) {
emitAnalysis (Report (it)
<< " control flow not understood by vectorizer" );
DEBUG (dbgs () << " LV: Found an invalid PHI.\n " );
return false ;
}
Expand Down
Expand Up
@@ -3432,8 +3554,11 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// Until we explicitly handle the case of an induction variable with
// an outside loop user we have to give up vectorizing this loop.
if (hasOutsideLoopUser (TheLoop, it, AllowedExit))
if (hasOutsideLoopUser (TheLoop, it, AllowedExit)) {
emitAnalysis (Report (it) << " use of induction value outside of the "
" loop is not handled by vectorizer" );
return false ;
}
continue ;
}
Expand Down
Expand Up
@@ -3476,6 +3601,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
continue ;
}
emitAnalysis (Report (it) << " unvectorizable operation" );
DEBUG (dbgs () << " LV: Found an unidentified PHI." << *Phi <<" \n " );
return false ;
}// end of PHI handling
Expand All
@@ -3484,6 +3610,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// calls and we do handle certain intrinsic and libm functions.
CallInst *CI = dyn_cast<CallInst>(it);
if (CI && !getIntrinsicIDForCall (CI, TLI) && !isa<DbgInfoIntrinsic>(CI)) {
emitAnalysis (Report (it) << " call instruction cannot be vectorized" );
DEBUG (dbgs () << " LV: Found a call site.\n " );
return false ;
}
Expand All
@@ -3493,6 +3620,8 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
if (CI &&
hasVectorInstrinsicScalarOpd (getIntrinsicIDForCall (CI, TLI), 1 )) {
if (!SE->isLoopInvariant (SE->getSCEV (CI->getOperand (1 )), TheLoop)) {
emitAnalysis (Report (it)
<< " intrinsic instruction cannot be vectorized" );
DEBUG (dbgs () << " LV: Found unvectorizable intrinsic " << *CI << " \n " );
return false ;
}
Expand All
@@ -3502,15 +3631,19 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// Also, we can't vectorize extractelement instructions.
if ((!VectorType::isValidElementType (it->getType ()) &&
!it->getType ()->isVoidTy ()) || isa<ExtractElementInst>(it)) {
emitAnalysis (Report (it)
<< " instruction return type cannot be vectorized" );
DEBUG (dbgs () << " LV: Found unvectorizable type.\n " );
return false ;
}
// Check that the stored type is vectorizable.
if (StoreInst *ST = dyn_cast<StoreInst>(it)) {
Type *T = ST->getValueOperand ()->getType ();
if (!VectorType::isValidElementType (T))
if (!VectorType::isValidElementType (T)) {
emitAnalysis (Report (ST) << " store instruction cannot be vectorized" );
return false ;
}
if (EnableMemAccessVersioning)
collectStridedAcccess (ST);
}
Expand All
@@ -3521,17 +3654,22 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// Reduction instructions are allowed to have exit users.
// All other instructions must not have external users.
if (hasOutsideLoopUser (TheLoop, it, AllowedExit))
if (hasOutsideLoopUser (TheLoop, it, AllowedExit)) {
emitAnalysis (Report (it) << " value cannot be used outside the loop" );
return false ;
}
} // next instr.
}
if (!Induction) {
DEBUG (dbgs () << " LV: Did not find one integer induction var.\n " );
if (Inductions.empty ())
if (Inductions.empty ()) {
emitAnalysis (Report ()
<< " loop induction variable could not be identified" );
return false ;
}
}
return true ;
Expand Down
Expand Up
@@ -4438,8 +4576,9 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
continue ;
LoadInst *Ld = dyn_cast<LoadInst>(it);
if (!Ld) return false ;
if (!Ld->isSimple () && !IsAnnotatedParallel) {
if (!Ld || (!Ld->isSimple () && !IsAnnotatedParallel)) {
emitAnalysis (Report (Ld)
<< " read with atomic ordering or volatile read" );
DEBUG (dbgs () << " LV: Found a non-simple load.\n " );
return false ;
}
Expand All
@@ -4452,8 +4591,13 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
// Save 'store' instructions. Abort if other instructions write to memory.
if (it->mayWriteToMemory ()) {
StoreInst *St = dyn_cast<StoreInst>(it);
if (!St) return false ;
if (!St) {
emitAnalysis (Report (it) << " instruction cannot be vectorized" );
return false ;
}
if (!St->isSimple () && !IsAnnotatedParallel) {
emitAnalysis (Report (St)
<< " write with atomic ordering or volatile write" );
DEBUG (dbgs () << " LV: Found a non-simple store.\n " );
return false ;
}
Expand Down
Expand Up
@@ -4490,6 +4634,9 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
Value* Ptr = ST->getPointerOperand ();
if (isUniform (Ptr )) {
emitAnalysis (
Report (ST)
<< " write to a loop invariant address could not be vectorized" );
DEBUG (dbgs () << " LV: We don't allow storing to uniform addresses\n " );
return false ;
}
Expand Down
Expand Up
@@ -4568,6 +4715,7 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
}
if (NeedRTCheck && !CanDoRT) {
emitAnalysis (Report () << " cannot identify array bounds" );
DEBUG (dbgs () << " LV: We can't vectorize because we can't find " <<
" the array bounds.\n " );
PtrRtCheck.reset ();
Expand Down
Expand Up
@@ -4598,6 +4746,14 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
// Check that we did not collect too many pointers or found an unsizeable
// pointer.
if (!CanDoRT || NumComparisons > RuntimeMemoryCheckThreshold) {
if (!CanDoRT && NumComparisons > 0 )
emitAnalysis (Report ()
<< " cannot check memory dependencies at runtime" );
else
emitAnalysis (Report ()
<< NumComparisons << " exceeds limit of "
<< RuntimeMemoryCheckThreshold
<< " dependent memory operations checked at runtime" );
DEBUG (dbgs () << " LV: Can't vectorize with memory checks\n " );
PtrRtCheck.reset ();
return false ;
Expand All
@@ -4607,6 +4763,9 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
}
}
if (!CanVecMem)
emitAnalysis (Report () << " unsafe dependent memory operations in loop" );
DEBUG (dbgs () << " LV: We" << (NeedRTCheck ? " " : " don't" ) <<
" need a runtime memory check.\n " );
Expand Down