Skip to content

Commit

Permalink
Revert "[SLP]Alternate vectorization for cmp instructions."
Browse files Browse the repository at this point in the history
This reverts commit afaaecc.

Crashes when compiling SciPy, test case https://reviews.llvm.org/P8276
  • Loading branch information
d0k committed Feb 1, 2022
1 parent 762f0b5 commit 5281f0d
Show file tree
Hide file tree
Showing 6 changed files with 232 additions and 393 deletions.
167 changes: 6 additions & 161 deletions llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Expand Up @@ -471,36 +471,17 @@ static bool isValidForAlternation(unsigned Opcode) {
return true;
}

static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
unsigned BaseIndex = 0);

/// Checks if the provided operands of 2 cmp instructions are compatible, i.e.
/// compatible instructions or constants, or just some other regular values.
static bool areCompatibleCmpOps(Value *BaseOp0, Value *BaseOp1, Value *Op0,
Value *Op1) {
return (isConstant(BaseOp0) && isConstant(Op0)) ||
(isConstant(BaseOp1) && isConstant(Op1)) ||
(!isa<Instruction>(BaseOp0) && !isa<Instruction>(Op0) &&
!isa<Instruction>(BaseOp1) && !isa<Instruction>(Op1)) ||
getSameOpcode({BaseOp0, Op0}).getOpcode() ||
getSameOpcode({BaseOp1, Op1}).getOpcode();
}

/// \returns analysis of the Instructions in \p VL described in
/// InstructionsState, the Opcode that we suppose the whole list
/// could be vectorized even if its structure is diverse.
static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
unsigned BaseIndex) {
unsigned BaseIndex = 0) {
// Make sure these are all Instructions.
if (llvm::any_of(VL, [](Value *V) { return !isa<Instruction>(V); }))
return InstructionsState(VL[BaseIndex], nullptr, nullptr);

bool IsCastOp = isa<CastInst>(VL[BaseIndex]);
bool IsBinOp = isa<BinaryOperator>(VL[BaseIndex]);
bool IsCmpOp = isa<CmpInst>(VL[BaseIndex]);
CmpInst::Predicate BasePred =
IsCmpOp ? cast<CmpInst>(VL[BaseIndex])->getPredicate()
: CmpInst::BAD_ICMP_PREDICATE;
unsigned Opcode = cast<Instruction>(VL[BaseIndex])->getOpcode();
unsigned AltOpcode = Opcode;
unsigned AltIndex = BaseIndex;
Expand Down Expand Up @@ -533,47 +514,6 @@ static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
continue;
}
}
} else if (IsCmpOp && isa<CmpInst>(VL[Cnt])) {
auto *BaseInst = cast<Instruction>(VL[BaseIndex]);
auto *Inst = cast<Instruction>(VL[Cnt]);
Type *Ty0 = BaseInst->getOperand(0)->getType();
Type *Ty1 = Inst->getOperand(0)->getType();
if (Ty0 == Ty1) {
Value *BaseOp0 = BaseInst->getOperand(0);
Value *BaseOp1 = BaseInst->getOperand(1);
Value *Op0 = Inst->getOperand(0);
Value *Op1 = Inst->getOperand(1);
CmpInst::Predicate CurrentPred =
cast<CmpInst>(VL[Cnt])->getPredicate();
// Check for compatible operands. If the corresponding operands are not
// compatible - need to perform alternate vectorization.
if (InstOpcode == Opcode) {
if (BasePred == CurrentPred &&
areCompatibleCmpOps(BaseOp0, BaseOp1, Op0, Op1))
continue;
if (BasePred == CmpInst::getSwappedPredicate(CurrentPred) &&
areCompatibleCmpOps(BaseOp0, BaseOp1, Op1, Op0))
continue;
auto *AltInst = cast<CmpInst>(VL[AltIndex]);
CmpInst::Predicate AltPred = AltInst->getPredicate();
Value *AltOp0 = AltInst->getOperand(0);
Value *AltOp1 = AltInst->getOperand(1);
// Check if operands are compatible with alternate operands.
if (AltPred == CurrentPred &&
areCompatibleCmpOps(AltOp0, AltOp1, Op0, Op1))
continue;
if (AltPred == CmpInst::getSwappedPredicate(CurrentPred) &&
areCompatibleCmpOps(AltOp0, AltOp1, Op1, Op0))
continue;
}
if (BaseIndex == AltIndex) {
assert(isValidForAlternation(Opcode) &&
isValidForAlternation(InstOpcode) &&
"Cast isn't safe for alternation, logic needs to be updated!");
AltIndex = Cnt;
continue;
}
}
} else if (InstOpcode == Opcode || InstOpcode == AltOpcode)
continue;
return InstructionsState(VL[BaseIndex], nullptr, nullptr);
Expand Down Expand Up @@ -4414,41 +4354,9 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
LLVM_DEBUG(dbgs() << "SLP: added a ShuffleVector op.\n");

// Reorder operands if reordering would enable vectorization.
auto *CI = dyn_cast<CmpInst>(VL0);
if (isa<BinaryOperator>(VL0) || CI) {
if (isa<BinaryOperator>(VL0)) {
ValueList Left, Right;
if (!CI || all_of(VL, [](Value *V) {
return cast<CmpInst>(V)->isCommutative();
})) {
reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE, *this);
} else {
CmpInst::Predicate P0 = CI->getPredicate();
CmpInst::Predicate AltP0 = cast<CmpInst>(S.AltOp)->getPredicate();
CmpInst::Predicate AltP0Swapped = CmpInst::getSwappedPredicate(AltP0);
Value *BaseOp0 = VL0->getOperand(0);
Value *BaseOp1 = VL0->getOperand(1);
// Collect operands - commute if it uses the swapped predicate or
// alternate operation.
for (Value *V : VL) {
auto *Cmp = cast<CmpInst>(V);
Value *LHS = Cmp->getOperand(0);
Value *RHS = Cmp->getOperand(1);
CmpInst::Predicate CurrentPred = CI->getPredicate();
CmpInst::Predicate CurrentPredSwapped =
CmpInst::getSwappedPredicate(CurrentPred);
if (P0 == AltP0 || P0 == AltP0Swapped) {
if ((P0 == CurrentPred &&
!areCompatibleCmpOps(BaseOp0, BaseOp1, LHS, RHS)) ||
(P0 == CurrentPredSwapped &&
!areCompatibleCmpOps(BaseOp0, BaseOp1, RHS, LHS)))
std::swap(LHS, RHS);
} else if (!areCompatibleCmpOps(BaseOp0, BaseOp1, LHS, RHS)) {
std::swap(LHS, RHS);
}
Left.push_back(LHS);
Right.push_back(RHS);
}
}
reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE, *this);
TE->setOperand(0, Left);
TE->setOperand(1, Right);
buildTree_rec(Left, Depth + 1, {TE, 0});
Expand Down Expand Up @@ -5380,8 +5288,7 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
((Instruction::isBinaryOp(E->getOpcode()) &&
Instruction::isBinaryOp(E->getAltOpcode())) ||
(Instruction::isCast(E->getOpcode()) &&
Instruction::isCast(E->getAltOpcode())) ||
(isa<CmpInst>(VL0) && isa<CmpInst>(E->getAltOp()))) &&
Instruction::isCast(E->getAltOpcode()))) &&
"Invalid Shuffle Vector Operand");
InstructionCost ScalarCost = 0;
if (NeedToShuffleReuses) {
Expand Down Expand Up @@ -5429,14 +5336,6 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
VecCost = TTI->getArithmeticInstrCost(E->getOpcode(), VecTy, CostKind);
VecCost += TTI->getArithmeticInstrCost(E->getAltOpcode(), VecTy,
CostKind);
} else if (auto *CI0 = dyn_cast<CmpInst>(VL0)) {
VecCost = TTI->getCmpSelInstrCost(E->getOpcode(), ScalarTy,
Builder.getInt1Ty(),
CI0->getPredicate(), CostKind, VL0);
VecCost += TTI->getCmpSelInstrCost(
E->getOpcode(), ScalarTy, Builder.getInt1Ty(),
cast<CmpInst>(E->getAltOp())->getPredicate(), CostKind,
E->getAltOp());
} else {
Type *Src0SclTy = E->getMainOp()->getOperand(0)->getType();
Type *Src1SclTy = E->getAltOp()->getOperand(0)->getType();
Expand All @@ -5453,28 +5352,6 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
E->Scalars, E->ReorderIndices, E->ReuseShuffleIndices,
[E](Instruction *I) {
assert(E->isOpcodeOrAlt(I) && "Unexpected main/alternate opcode");
if (auto *CI0 = dyn_cast<CmpInst>(E->getMainOp())) {
auto *AltCI0 = cast<CmpInst>(E->getAltOp());
auto *CI = cast<CmpInst>(I);
CmpInst::Predicate P0 = CI0->getPredicate();
CmpInst::Predicate AltP0 = AltCI0->getPredicate();
CmpInst::Predicate AltP0Swapped =
CmpInst::getSwappedPredicate(AltP0);
CmpInst::Predicate CurrentPred = CI->getPredicate();
CmpInst::Predicate CurrentPredSwapped =
CmpInst::getSwappedPredicate(CurrentPred);
if (P0 == AltP0 || P0 == AltP0Swapped) {
unsigned Idx =
std::distance(E->Scalars.begin(), find(E->Scalars, I));
// Alternate cmps have same/swapped predicate as main cmps but
// different order of compatible operands.
ArrayRef<Value *> VLOp0 = E->getOperand(0);
return (P0 == CurrentPred && CI->getOperand(0) != VLOp0[Idx]) ||
(P0 == CurrentPredSwapped &&
CI->getOperand(1) != VLOp0[Idx]);
}
return CurrentPred != P0 && CurrentPredSwapped != P0;
}
return I->getOpcode() == E->getAltOpcode();
},
Mask);
Expand Down Expand Up @@ -6957,12 +6834,11 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
((Instruction::isBinaryOp(E->getOpcode()) &&
Instruction::isBinaryOp(E->getAltOpcode())) ||
(Instruction::isCast(E->getOpcode()) &&
Instruction::isCast(E->getAltOpcode())) ||
(isa<CmpInst>(VL0) && isa<CmpInst>(E->getAltOp()))) &&
Instruction::isCast(E->getAltOpcode()))) &&
"Invalid Shuffle Vector Operand");

Value *LHS = nullptr, *RHS = nullptr;
if (Instruction::isBinaryOp(E->getOpcode()) || isa<CmpInst>(VL0)) {
if (Instruction::isBinaryOp(E->getOpcode())) {
setInsertPointAfterBundle(E);
LHS = vectorizeTree(E->getOperand(0));
RHS = vectorizeTree(E->getOperand(1));
Expand All @@ -6982,15 +6858,6 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
static_cast<Instruction::BinaryOps>(E->getOpcode()), LHS, RHS);
V1 = Builder.CreateBinOp(
static_cast<Instruction::BinaryOps>(E->getAltOpcode()), LHS, RHS);
} else if (auto *CI0 = dyn_cast<CmpInst>(VL0)) {
V0 = Builder.CreateCmp(CI0->getPredicate(), LHS, RHS);
auto *AltCI = cast<CmpInst>(E->getAltOp());
CmpInst::Predicate AltPred = AltCI->getPredicate();
unsigned AltIdx =
std::distance(E->Scalars.begin(), find(E->Scalars, AltCI));
if (AltCI->getOperand(0) != E->getOperand(0)[AltIdx])
AltPred = CmpInst::getSwappedPredicate(AltPred);
V1 = Builder.CreateCmp(AltPred, LHS, RHS);
} else {
V0 = Builder.CreateCast(
static_cast<Instruction::CastOps>(E->getOpcode()), LHS, VecTy);
Expand All @@ -7015,28 +6882,6 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
E->Scalars, E->ReorderIndices, E->ReuseShuffleIndices,
[E](Instruction *I) {
assert(E->isOpcodeOrAlt(I) && "Unexpected main/alternate opcode");
if (auto *CI0 = dyn_cast<CmpInst>(E->getMainOp())) {
auto *AltCI0 = cast<CmpInst>(E->getAltOp());
auto *CI = cast<CmpInst>(I);
CmpInst::Predicate P0 = CI0->getPredicate();
CmpInst::Predicate AltP0 = AltCI0->getPredicate();
CmpInst::Predicate AltP0Swapped =
CmpInst::getSwappedPredicate(AltP0);
CmpInst::Predicate CurrentPred = CI->getPredicate();
CmpInst::Predicate CurrentPredSwapped =
CmpInst::getSwappedPredicate(CurrentPred);
if (P0 == AltP0 || P0 == AltP0Swapped) {
unsigned Idx =
std::distance(E->Scalars.begin(), find(E->Scalars, I));
// Alternate cmps have same/swapped predicate as main cmps but
// different order of compatible operands.
ArrayRef<Value *> VLOp0 = E->getOperand(0);
return (P0 == CurrentPred && CI->getOperand(0) != VLOp0[Idx]) ||
(P0 == CurrentPredSwapped &&
CI->getOperand(1) != VLOp0[Idx]);
}
return CurrentPred != P0 && CurrentPredSwapped != P0;
}
return I->getOpcode() == E->getAltOpcode();
},
Mask, &OpScalars, &AltScalars);
Expand Down
57 changes: 35 additions & 22 deletions llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll
Expand Up @@ -90,17 +90,24 @@ return:
define float @test_merge_anyof_v4sf(<4 x float> %t) {
; CHECK-LABEL: @test_merge_anyof_v4sf(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x float> [[T:%.*]], <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP0:%.*]] = fcmp olt <8 x float> [[SHUFFLE]], <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt <8 x float> [[SHUFFLE]], <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i1> [[TMP0]], <8 x i1> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: [[TMP3:%.*]] = freeze <8 x i1> [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i1> [[TMP3]] to i8
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP4]], 0
; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[T]], <4 x float> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP5:%.*]] = fadd <4 x float> [[SHIFT]], [[T]]
; CHECK-NEXT: [[ADD:%.*]] = extractelement <4 x float> [[TMP5]], i64 0
; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[DOTNOT]], float [[ADD]], float 0.000000e+00
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x float> [[T:%.*]], i64 3
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[T]], i64 2
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[T]], i64 1
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[T]], i64 0
; CHECK-NEXT: [[T_FR:%.*]] = freeze <4 x float> [[T]]
; CHECK-NEXT: [[TMP4:%.*]] = fcmp olt <4 x float> [[T_FR]], zeroinitializer
; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i1> [[TMP4]] to i4
; CHECK-NEXT: [[TMP6:%.*]] = icmp ne i4 [[TMP5]], 0
; CHECK-NEXT: [[CMP19:%.*]] = fcmp ogt float [[TMP3]], 1.000000e+00
; CHECK-NEXT: [[OR_COND3:%.*]] = select i1 [[TMP6]], i1 true, i1 [[CMP19]]
; CHECK-NEXT: [[CMP24:%.*]] = fcmp ogt float [[TMP2]], 1.000000e+00
; CHECK-NEXT: [[OR_COND4:%.*]] = select i1 [[OR_COND3]], i1 true, i1 [[CMP24]]
; CHECK-NEXT: [[CMP29:%.*]] = fcmp ogt float [[TMP1]], 1.000000e+00
; CHECK-NEXT: [[OR_COND5:%.*]] = select i1 [[OR_COND4]], i1 true, i1 [[CMP29]]
; CHECK-NEXT: [[CMP34:%.*]] = fcmp ogt float [[TMP0]], 1.000000e+00
; CHECK-NEXT: [[OR_COND6:%.*]] = select i1 [[OR_COND5]], i1 true, i1 [[CMP34]]
; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP3]], [[TMP2]]
; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[OR_COND6]], float 0.000000e+00, float [[ADD]]
; CHECK-NEXT: ret float [[RETVAL_0]]
;
entry:
Expand Down Expand Up @@ -413,18 +420,24 @@ return:
define float @test_merge_anyof_v4si(<4 x i32> %t) {
; CHECK-LABEL: @test_merge_anyof_v4si(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[T:%.*]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP0:%.*]] = icmp slt <8 x i32> [[SHUFFLE]], <i32 1, i32 1, i32 1, i32 1, i32 255, i32 255, i32 255, i32 255>
; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <8 x i32> [[SHUFFLE]], <i32 1, i32 1, i32 1, i32 1, i32 255, i32 255, i32 255, i32 255>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i1> [[TMP0]], <8 x i1> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: [[TMP3:%.*]] = freeze <8 x i1> [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i1> [[TMP3]] to i8
; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP4]], 0
; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[T]], <4 x i32> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> [[SHIFT]], [[T]]
; CHECK-NEXT: [[ADD:%.*]] = extractelement <4 x i32> [[TMP5]], i64 0
; CHECK-NEXT: [[T_FR:%.*]] = freeze <4 x i32> [[T:%.*]]
; CHECK-NEXT: [[TMP0:%.*]] = icmp slt <4 x i32> [[T_FR]], <i32 1, i32 1, i32 1, i32 1>
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i1> [[TMP0]] to i4
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i4 [[TMP1]], 0
; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i32> [[T_FR]], <i32 255, i32 255, i32 255, i32 255>
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP3]], i64 0
; CHECK-NEXT: [[OR_COND3:%.*]] = or i1 [[TMP2]], [[TMP4]]
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP3]], i64 1
; CHECK-NEXT: [[OR_COND4:%.*]] = or i1 [[OR_COND3]], [[TMP5]]
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP3]], i64 2
; CHECK-NEXT: [[OR_COND5:%.*]] = or i1 [[OR_COND4]], [[TMP6]]
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP3]], i64 3
; CHECK-NEXT: [[OR_COND6:%.*]] = or i1 [[OR_COND5]], [[TMP7]]
; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[T_FR]], <4 x i32> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> [[SHIFT]], [[T_FR]]
; CHECK-NEXT: [[ADD:%.*]] = extractelement <4 x i32> [[TMP8]], i64 0
; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[ADD]] to float
; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[DOTNOT]], float [[CONV]], float 0.000000e+00
; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[OR_COND6]], float 0.000000e+00, float [[CONV]]
; CHECK-NEXT: ret float [[RETVAL_0]]
;
entry:
Expand Down

0 comments on commit 5281f0d

Please sign in to comment.