Skip to content

Commit

Permalink
[SimplifyDemandedVec] Strengthen handling all undef lanes (particular…
Browse files Browse the repository at this point in the history
…ly GEPs)

A change of two parts:
1) A generic enhancement for all callers of SDVE to exploit the fact that if all lanes are undef, the result is undef.
2) A GEP specific piece to strengthen/fix the vector index undef element handling, and call into the generic infrastructure when visiting the GEP.

The result is that we replace a vector gep with at least one undef in each lane with a undef.  We can also do the same for vector intrinsics.  Once the masked.load patch (D57372) has landed, I'll update to include call tests as well.

Differential Revision: https://reviews.llvm.org/D57468

llvm-svn: 356293
  • Loading branch information
preames committed Mar 15, 2019
1 parent d33e62c commit 68a2e4d
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 10 deletions.
3 changes: 1 addition & 2 deletions llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
Expand Up @@ -1908,8 +1908,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
if (Changed) return II;
}

// For vector result intrinsics, use the generic demanded vector support to
// simplify any operands before moving on to the per-intrinsic rules.
// For vector result intrinsics, use the generic demanded vector support.
if (II->getType()->isVectorTy()) {
auto VWidth = II->getType()->getVectorNumElements();
APInt UndefElts(VWidth, 0);
Expand Down
20 changes: 17 additions & 3 deletions llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
Expand Up @@ -1175,9 +1175,18 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
// wouldn't have a vector result to get here. Note that we intentionally
// merge the undef bits here since gepping with either an undef base or
// index results in undef.
for (unsigned i = 0; i < I->getNumOperands(); i++)
if (I->getOperand(i)->getType()->isVectorTy())
simplifyAndSetOp(I, i, DemandedElts, UndefElts);
for (unsigned i = 0; i < I->getNumOperands(); i++) {
if (isa<UndefValue>(I->getOperand(i))) {
// If the entire vector is undefined, just return this info.
UndefElts = EltMask;
return nullptr;
}
if (I->getOperand(i)->getType()->isVectorTy()) {
APInt UndefEltsOp(VWidth, 0);
simplifyAndSetOp(I, i, DemandedElts, UndefEltsOp);
UndefElts |= UndefEltsOp;
}
}

break;
}
Expand Down Expand Up @@ -1663,5 +1672,10 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
UndefElts &= UndefElts2;
}

// If we've proven all of the lanes undef, return an undef value.
// TODO: Intersect w/demanded lanes
if (UndefElts.isAllOnesValue())
return UndefValue::get(I->getType());;

return MadeChange ? I : nullptr;
}
13 changes: 13 additions & 0 deletions llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
Expand Up @@ -1557,6 +1557,19 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
if (Value *V = SimplifyGEPInst(GEPEltType, Ops, SQ.getWithInstruction(&GEP)))
return replaceInstUsesWith(GEP, V);

// For vector geps, use the generic demanded vector support.
if (GEP.getType()->isVectorTy()) {
auto VWidth = GEP.getType()->getVectorNumElements();
APInt UndefElts(VWidth, 0);
APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
if (Value *V = SimplifyDemandedVectorElts(&GEP, AllOnesEltMask,
UndefElts)) {
if (V != &GEP)
return replaceInstUsesWith(GEP, V);
return &GEP;
}
}

Value *PtrOp = GEP.getOperand(0);

// Eliminate unneeded casts for indices, and replace indices which displace
Expand Down
6 changes: 1 addition & 5 deletions llvm/test/Transforms/InstCombine/vec_demanded_elts.ll
Expand Up @@ -620,10 +620,7 @@ define i32* @gep_splat_both(i32* %base, i64 %idx) {

define <2 x i32*> @gep_all_lanes_undef(i32* %base, i64 %idx) {;
; CHECK-LABEL: @gep_all_lanes_undef(
; CHECK-NEXT: [[BASEVEC:%.*]] = insertelement <2 x i32*> undef, i32* [[BASE:%.*]], i32 0
; CHECK-NEXT: [[IDXVEC:%.*]] = insertelement <2 x i64> undef, i64 [[IDX:%.*]], i32 1
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, <2 x i32*> [[BASEVEC]], <2 x i64> [[IDXVEC]]
; CHECK-NEXT: ret <2 x i32*> [[GEP]]
; CHECK-NEXT: ret <2 x i32*> undef
;
%basevec = insertelement <2 x i32*> undef, i32* %base, i32 0
%idxvec = insertelement <2 x i64> undef, i64 %idx, i32 1
Expand All @@ -641,4 +638,3 @@ define i32* @gep_demanded_lane_undef(i32* %base, i64 %idx) {
%ee = extractelement <2 x i32*> %gep, i32 1
ret i32* %ee
}

0 comments on commit 68a2e4d

Please sign in to comment.