-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[LV] Don't create partial reductions if factor doesn't match accumulator #158603
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
4ef6a5e
3608890
b9c39a1
6240599
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8197,8 +8197,11 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R, | |
if (isa<LoadInst>(Instr) || isa<StoreInst>(Instr)) | ||
return tryToWidenMemory(Instr, Operands, Range); | ||
|
||
if (std::optional<unsigned> ScaleFactor = getScalingForReduction(Instr)) | ||
return tryToCreatePartialReduction(Instr, Operands, ScaleFactor.value()); | ||
if (std::optional<unsigned> ScaleFactor = getScalingForReduction(Instr)) { | ||
if (auto PartialRed = | ||
tryToCreatePartialReduction(Instr, Operands, ScaleFactor.value())) | ||
return PartialRed; | ||
} | ||
|
||
if (!shouldWiden(Instr, Range)) | ||
return nullptr; | ||
|
@@ -8232,6 +8235,10 @@ VPRecipeBuilder::tryToCreatePartialReduction(Instruction *Reduction, | |
isa<VPPartialReductionRecipe>(BinOpRecipe)) | ||
std::swap(BinOp, Accumulator); | ||
|
||
if (ScaleFactor != | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why does the scale-factor of the accumulator (PHI) not match that of the reduction? Is there something missing elsewhere that causes this to not match? Possibly unrelated, but when I print the values of BinOp/Accumulator, I see that There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Ths can happen when there's a chain and the first entry can use a partial reduction but the second entry in the chain cannot.
I think it is poorly named, as this also supports using dot product instructions for widening adds, by multiplying with 1 |
||
vputils::getVFScaleFactor(Accumulator->getDefiningRecipe())) | ||
return nullptr; | ||
|
||
unsigned ReductionOpcode = Reduction->getOpcode(); | ||
if (ReductionOpcode == Instruction::Sub) { | ||
auto *const Zero = ConstantInt::get(Reduction->getType(), 0); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -141,6 +141,20 @@ VPBasicBlock *vputils::getFirstLoopHeader(VPlan &Plan, VPDominatorTree &VPDT) { | |
return I == DepthFirst.end() ? nullptr : cast<VPBasicBlock>(*I); | ||
} | ||
|
||
unsigned vputils::getVFScaleFactor(VPRecipeBase *R) { | ||
if (!R) | ||
return 1; | ||
if (auto *RR = dyn_cast<VPReductionPHIRecipe>(R)) | ||
return RR->getVFScaleFactor(); | ||
if (auto *RR = dyn_cast<VPPartialReductionRecipe>(R)) | ||
return RR->getVFScaleFactor(); | ||
assert( | ||
(!isa<VPInstruction>(R) || cast<VPInstruction>(R)->getOpcode() != | ||
VPInstruction::ReductionStartVector) && | ||
"getting scaling factor of reduction-start-vector not implemented yet"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Will this now result in the compiler (with asserts enabled) to fail on valid code when trying to vectorise it? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This just moves the existing code and allows |
||
return 1; | ||
} | ||
|
||
std::optional<VPValue *> | ||
vputils::getRecipesForUncountableExit(VPlan &Plan, | ||
SmallVectorImpl<VPRecipeBase *> &Recipes, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1381,8 +1381,8 @@ for.body: ; preds = %for.body.preheader, | |
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !loop !1 | ||
} | ||
|
||
define i32 @red_extended_add_chain(ptr %start, ptr %end, i32 %offset) { | ||
; CHECK-NEON-LABEL: define i32 @red_extended_add_chain( | ||
define i32 @red_extended_add_incomplete_chain(ptr %start, ptr %end, i32 %offset) { | ||
; CHECK-NEON-LABEL: define i32 @red_extended_add_incomplete_chain( | ||
; CHECK-NEON-SAME: ptr [[START:%.*]], ptr [[END:%.*]], i32 [[OFFSET:%.*]]) #[[ATTR1:[0-9]+]] { | ||
; CHECK-NEON-NEXT: entry: | ||
; CHECK-NEON-NEXT: [[START2:%.*]] = ptrtoint ptr [[START]] to i64 | ||
|
@@ -1404,7 +1404,7 @@ define i32 @red_extended_add_chain(ptr %start, ptr %end, i32 %offset) { | |
; CHECK-NEON-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[INDEX]] | ||
; CHECK-NEON-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[NEXT_GEP]], align 1 | ||
; CHECK-NEON-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> | ||
; CHECK-NEON-NEXT: [[PARTIAL_REDUCE:%.*]] = call <16 x i32> @llvm.vector.partial.reduce.add.v16i32.v16i32(<16 x i32> [[VEC_PHI]], <16 x i32> [[TMP3]]) | ||
; CHECK-NEON-NEXT: [[PARTIAL_REDUCE:%.*]] = add <16 x i32> [[VEC_PHI]], [[TMP3]] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This test should probably be renamed now that it doesn't produce a partial reduction. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. thanks, updated to |
||
; CHECK-NEON-NEXT: [[TMP4]] = add <16 x i32> [[PARTIAL_REDUCE]], [[BROADCAST_SPLAT]] | ||
; CHECK-NEON-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 | ||
; CHECK-NEON-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] | ||
|
@@ -1415,7 +1415,7 @@ define i32 @red_extended_add_chain(ptr %start, ptr %end, i32 %offset) { | |
; CHECK-NEON-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] | ||
; CHECK-NEON: scalar.ph: | ||
; | ||
; CHECK-SVE-LABEL: define i32 @red_extended_add_chain( | ||
; CHECK-SVE-LABEL: define i32 @red_extended_add_incomplete_chain( | ||
; CHECK-SVE-SAME: ptr [[START:%.*]], ptr [[END:%.*]], i32 [[OFFSET:%.*]]) #[[ATTR1:[0-9]+]] { | ||
; CHECK-SVE-NEXT: entry: | ||
; CHECK-SVE-NEXT: [[START2:%.*]] = ptrtoint ptr [[START]] to i64 | ||
|
@@ -1452,7 +1452,7 @@ define i32 @red_extended_add_chain(ptr %start, ptr %end, i32 %offset) { | |
; CHECK-SVE-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] | ||
; CHECK-SVE: scalar.ph: | ||
; | ||
; CHECK-SVE-MAXBW-LABEL: define i32 @red_extended_add_chain( | ||
; CHECK-SVE-MAXBW-LABEL: define i32 @red_extended_add_incomplete_chain( | ||
; CHECK-SVE-MAXBW-SAME: ptr [[START:%.*]], ptr [[END:%.*]], i32 [[OFFSET:%.*]]) #[[ATTR1:[0-9]+]] { | ||
; CHECK-SVE-MAXBW-NEXT: entry: | ||
; CHECK-SVE-MAXBW-NEXT: [[START2:%.*]] = ptrtoint ptr [[START]] to i64 | ||
|
@@ -1478,7 +1478,7 @@ define i32 @red_extended_add_chain(ptr %start, ptr %end, i32 %offset) { | |
; CHECK-SVE-MAXBW-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[INDEX]] | ||
; CHECK-SVE-MAXBW-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x i8>, ptr [[NEXT_GEP]], align 1 | ||
; CHECK-SVE-MAXBW-NEXT: [[TMP7:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD]] to <vscale x 8 x i32> | ||
; CHECK-SVE-MAXBW-NEXT: [[PARTIAL_REDUCE:%.*]] = call <vscale x 8 x i32> @llvm.vector.partial.reduce.add.nxv8i32.nxv8i32(<vscale x 8 x i32> [[VEC_PHI]], <vscale x 8 x i32> [[TMP7]]) | ||
; CHECK-SVE-MAXBW-NEXT: [[PARTIAL_REDUCE:%.*]] = add <vscale x 8 x i32> [[VEC_PHI]], [[TMP7]] | ||
; CHECK-SVE-MAXBW-NEXT: [[TMP8]] = add <vscale x 8 x i32> [[PARTIAL_REDUCE]], [[BROADCAST_SPLAT]] | ||
; CHECK-SVE-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] | ||
; CHECK-SVE-MAXBW-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Bad change?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this is intentional? tryToCreatePartialReduction can now return nullptr on line 8185 so I presume we need to check it here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, we need to account for
tryToCreatePartialReduction
now possibly returningnullptr