Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion flang/test/Integration/unroll-loops.f90
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ subroutine unroll(a)
! NO-UNROLL-NEXT: %[[GEP:.*]] = getelementptr i64, ptr %[[ARG0]], i64 %[[IND]]
! NO-UNROLL-NEXT: store <2 x i64> %[[VIND]], ptr %[[GEP]]
! NO-UNROLL-NEXT: %[[NIV:.*]] = add nuw i64 %{{.*}}, 2
! NO-UNROLL-NEXT: %[[NVIND]] = add <2 x i64> %[[VIND]], splat (i64 2)
! NO-UNROLL-NEXT: %[[NVIND]] = add nuw nsw <2 x i64> %[[VIND]], splat (i64 2)
!
! UNROLL-NEXT: %[[VIND1:.*]] = add <2 x i64> %[[VIND]], splat (i64 2)
! UNROLL-NEXT: %[[GEP0:.*]] = getelementptr i64, ptr %[[ARG0]], i64 %[[IND]]
Expand Down
2 changes: 1 addition & 1 deletion flang/test/Lower/HLFIR/unroll-loops.fir
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ func.func @unroll(%arg0: !fir.ref<!fir.array<1000 x index>> {fir.bindc_name = "a
// NO-UNROLL-NEXT: %[[GEP:.*]] = getelementptr i64, ptr %[[ARG0]], i64 %[[IND]]
// NO-UNROLL-NEXT: store <2 x i64> %[[VIND]], ptr %[[GEP]]
// NO-UNROLL-NEXT: %[[NIV:.*]] = add nuw i64 %{{.*}}, 2
// NO-UNROLL-NEXT: %[[NVIND]] = add <2 x i64> %[[VIND]], splat (i64 2)
// NO-UNROLL-NEXT: %[[NVIND]] = add nuw nsw <2 x i64> %[[VIND]], splat (i64 2)

// UNROLL-NEXT: %[[VIND1:.*]] = add <2 x i64> %[[VIND]], splat (i64 2)
// UNROLL-NEXT: %[[GEP0:.*]] = getelementptr i64, ptr %[[ARG0]], i64 %[[IND]]
Expand Down
9 changes: 4 additions & 5 deletions llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
Original file line number Diff line number Diff line change
Expand Up @@ -178,11 +178,10 @@ class VPBuilder {
new VPInstructionWithType(Opcode, Operands, ResultTy, Flags, DL, Name));
}

VPInstruction *createOverflowingOp(unsigned Opcode,
ArrayRef<VPValue *> Operands,
VPRecipeWithIRFlags::WrapFlagsTy WrapFlags,
DebugLoc DL = DebugLoc::getUnknown(),
const Twine &Name = "") {
VPInstruction *createOverflowingOp(
unsigned Opcode, ArrayRef<VPValue *> Operands,
VPRecipeWithIRFlags::WrapFlagsTy WrapFlags = {false, false},
DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "") {
return tryInsertInstruction(
new VPInstruction(Opcode, Operands, WrapFlags, {}, DL, Name));
}
Expand Down
15 changes: 12 additions & 3 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7639,6 +7639,10 @@ createWidenInductionRecipes(VPInstruction *PhiR,
assert(Plan.getLiveIn(IndDesc.getStartValue()) == Start &&
"Start VPValue must match IndDesc's start value");

// It is always safe to copy over the NoWrap and FastMath flags. In
// particular, when folding tail by masking, the masked-off lanes are never
// used, so it is safe.
VPIRFlags Flags = vputils::getFlagsFromIndDesc(IndDesc);
VPValue *Step =
vputils::getOrCreateVPValueForSCEVExpr(Plan, IndDesc.getStep());

Expand All @@ -7651,7 +7655,7 @@ createWidenInductionRecipes(VPInstruction *PhiR,

PHINode *Phi = cast<PHINode>(PhiR->getUnderlyingInstr());
return new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, &Plan.getVF(),
IndDesc, PhiR->getDebugLoc());
IndDesc, Flags, PhiR->getDebugLoc());
}

VPHeaderPHIRecipe *
Expand Down Expand Up @@ -7705,10 +7709,15 @@ VPRecipeBuilder::tryToOptimizeInductionTruncate(VPInstruction *VPI,
PHINode *Phi = WidenIV->getPHINode();
VPValue *Start = WidenIV->getStartValue();
const InductionDescriptor &IndDesc = WidenIV->getInductionDescriptor();

// It is always safe to copy over the NoWrap and FastMath flags. In
// particular, when folding tail by masking, the masked-off lanes are never
// used, so it is safe.
VPIRFlags Flags = vputils::getFlagsFromIndDesc(IndDesc);
VPValue *Step =
vputils::getOrCreateVPValueForSCEVExpr(Plan, IndDesc.getStep());
return new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, &Plan.getVF(),
IndDesc, I, VPI->getDebugLoc());
return new VPWidenIntOrFpInductionRecipe(
Phi, Start, Step, &Plan.getVF(), IndDesc, I, Flags, VPI->getDebugLoc());
}

VPSingleDefRecipe *VPRecipeBuilder::tryToWidenCall(VPInstruction *VPI,
Expand Down
14 changes: 8 additions & 6 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -2123,7 +2123,8 @@ class VPWidenInductionRecipe : public VPHeaderPHIRecipe {
/// A recipe for handling phi nodes of integer and floating-point inductions,
/// producing their vector values. This is an abstract recipe and must be
/// converted to concrete recipes before executing.
class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe {
class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe,
public VPIRFlags {
TruncInst *Trunc;

// If this recipe is unrolled it will have 2 additional operands.
Expand All @@ -2132,19 +2133,20 @@ class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe {
public:
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step,
VPValue *VF, const InductionDescriptor &IndDesc,
DebugLoc DL)
const VPIRFlags &Flags, DebugLoc DL)
: VPWidenInductionRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start,
Step, IndDesc, DL),
Trunc(nullptr) {
VPIRFlags(Flags), Trunc(nullptr) {
addOperand(VF);
}

VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step,
VPValue *VF, const InductionDescriptor &IndDesc,
TruncInst *Trunc, DebugLoc DL)
TruncInst *Trunc, const VPIRFlags &Flags,
DebugLoc DL)
: VPWidenInductionRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start,
Step, IndDesc, DL),
Trunc(Trunc) {
VPIRFlags(Flags), Trunc(Trunc) {
addOperand(VF);
SmallVector<std::pair<unsigned, MDNode *>> Metadata;
(void)Metadata;
Expand All @@ -2158,7 +2160,7 @@ class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe {
VPWidenIntOrFpInductionRecipe *clone() override {
return new VPWidenIntOrFpInductionRecipe(
getPHINode(), getStartValue(), getStepValue(), getVFValue(),
getInductionDescriptor(), Trunc, getDebugLoc());
getInductionDescriptor(), Trunc, *this, getDebugLoc());
}

VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionSC)
Expand Down
4 changes: 3 additions & 1 deletion llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2382,7 +2382,9 @@ void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent;
printAsOperand(O, SlotTracker);
O << " = WIDEN-INDUCTION ";
O << " = WIDEN-INDUCTION";
printFlags(O);
O << " ";
printOperands(O, SlotTracker);

if (auto *TI = getTruncInst())
Expand Down
21 changes: 14 additions & 7 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,13 @@ bool VPlanTransforms::tryToConvertVPInstructionsToVPRecipes(
VPValue *Start = Plan.getOrAddLiveIn(II->getStartValue());
VPValue *Step =
vputils::getOrCreateVPValueForSCEVExpr(Plan, II->getStep());
// It is always safe to copy over the NoWrap and FastMath flags. In
// particular, when folding tail by masking, the masked-off lanes are
// never used, so it is safe.
VPIRFlags Flags = vputils::getFlagsFromIndDesc(*II);
NewRecipe = new VPWidenIntOrFpInductionRecipe(
Phi, Start, Step, &Plan.getVF(), *II, Ingredient.getDebugLoc());
Phi, Start, Step, &Plan.getVF(), *II, Flags,
Ingredient.getDebugLoc());
}
} else {
assert(isa<VPInstruction>(&Ingredient) &&
Expand Down Expand Up @@ -542,6 +547,11 @@ static void removeRedundantCanonicalIVs(VPlan &Plan) {
// only.
if (!vputils::onlyScalarValuesUsed(WidenOriginalIV) ||
vputils::onlyFirstLaneUsed(WidenNewIV)) {
// We are replacing a wide canonical iv with a suitable wide induction.
// This is used to compute header mask, hence all lanes will be used and
// we need to drop wrap flags only applying to lanes guranteed to execute
// in the original scalar loop.
WidenOriginalIV->dropPoisonGeneratingFlags();
WidenNewIV->replaceAllUsesWith(WidenOriginalIV);
WidenNewIV->eraseFromParent();
return;
Expand Down Expand Up @@ -3285,16 +3295,13 @@ expandVPWidenIntOrFpInduction(VPWidenIntOrFpInductionRecipe *WidenIVR,
const InductionDescriptor &ID = WidenIVR->getInductionDescriptor();
Instruction::BinaryOps AddOp;
Instruction::BinaryOps MulOp;
// FIXME: The newly created binary instructions should contain nsw/nuw
// flags, which can be found from the original scalar operations.
VPIRFlags Flags;
VPIRFlags Flags = *WidenIVR;
if (ID.getKind() == InductionDescriptor::IK_IntInduction) {
AddOp = Instruction::Add;
MulOp = Instruction::Mul;
} else {
AddOp = ID.getInductionOpcode();
MulOp = Instruction::FMul;
Flags = ID.getInductionBinOp()->getFastMathFlags();
}

// If the phi is truncated, truncate the start and step values.
Expand Down Expand Up @@ -3406,7 +3413,7 @@ static void expandVPWidenPointerInduction(VPWidenPointerInductionRecipe *R,
Builder.setInsertPoint(R->getParent(), R->getParent()->getFirstNonPhi());
Type *StepTy = TypeInfo.inferScalarType(Step);
VPValue *Offset = Builder.createNaryOp(VPInstruction::StepVector, {}, StepTy);
Offset = Builder.createNaryOp(Instruction::Mul, {Offset, Step});
Offset = Builder.createOverflowingOp(Instruction::Mul, {Offset, Step});
VPValue *PtrAdd = Builder.createNaryOp(
VPInstruction::WidePtrAdd, {ScalarPtrPhi, Offset}, DL, "vector.gep");
R->replaceAllUsesWith(PtrAdd);
Expand All @@ -3416,7 +3423,7 @@ static void expandVPWidenPointerInduction(VPWidenPointerInductionRecipe *R,
Builder.setInsertPoint(ExitingBB, ExitingBB->getTerminator()->getIterator());
VF = Builder.createScalarZExtOrTrunc(VF, StepTy, TypeInfo.inferScalarType(VF),
DL);
VPValue *Inc = Builder.createNaryOp(Instruction::Mul, {Step, VF});
VPValue *Inc = Builder.createOverflowingOp(Instruction::Mul, {Step, VF});

VPValue *InductionGEP =
Builder.createPtrAdd(ScalarPtrPhi, Inc, DL, "ptr.ind");
Expand Down
13 changes: 13 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,19 @@ std::optional<VPValue *>
getRecipesForUncountableExit(VPlan &Plan,
SmallVectorImpl<VPRecipeBase *> &Recipes,
SmallVectorImpl<VPRecipeBase *> &GEPs);

/// Extracts and returns NoWrap and FastMath flags from the induction binop in
/// \p ID.
inline VPIRFlags getFlagsFromIndDesc(const InductionDescriptor &ID) {
if (ID.getKind() == InductionDescriptor::IK_FpInduction)
return ID.getInductionBinOp()->getFastMathFlags();

if (auto *OBO = dyn_cast_if_present<OverflowingBinaryOperator>(
ID.getInductionBinOp()))
return VPIRFlags::WrapFlagsTy(OBO->hasNoUnsignedWrap(),
OBO->hasNoSignedWrap());
return {};
}
} // namespace vputils

//===----------------------------------------------------------------------===//
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[VAL]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
; CHECK-NEXT: [[TMP7:%.*]] = mul <vscale x 8 x i64> [[TMP8]], splat (i64 1)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP7]]
; CHECK-NEXT: [[TMP3:%.*]] = mul <vscale x 8 x i64> [[TMP8]], splat (i64 1)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP3]]
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP1]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
Expand Down Expand Up @@ -76,8 +76,8 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[VAL]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
; CHECK-NEXT: [[TMP7:%.*]] = mul <vscale x 8 x i64> [[TMP8]], splat (i64 1)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP7]]
; CHECK-NEXT: [[TMP3:%.*]] = mul <vscale x 8 x i64> [[TMP8]], splat (i64 1)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP3]]
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP1]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1052,7 +1052,7 @@ define void @redundant_branch_and_tail_folding(ptr %dst, i1 %c) {
; DEFAULT-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
; DEFAULT-NEXT: store i32 [[TMP2]], ptr [[DST]], align 4
; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD]], splat (i64 4)
; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[STEP_ADD]], splat (i64 4)
; DEFAULT-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
; DEFAULT-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]]
; DEFAULT: [[MIDDLE_BLOCK]]:
Expand Down
Loading
Loading