Skip to content

Commit

Permalink
[X86] Add initial SimplifyDemandedVectorEltsForTargetNode support
Browse files Browse the repository at this point in the history
This patch adds an initial x86 SimplifyDemandedVectorEltsForTargetNode implementation to handle target shuffles.

Currently the patch only decodes a target shuffle, calls SimplifyDemandedVectorElts on its input operands and removes any shuffle that reduces to undef/zero/identity.

Future work will need to integrate this with combineX86ShufflesRecursively, add support for other x86 ops, etc.

NOTE: There is a minor regression that appears to be affecting further (extractelement?) combines which I haven't been able to solve yet - possibly something to do with how nodes are added to the worklist after simplification.

Differential Revision: https://reviews.llvm.org/D52140

llvm-svn: 342564
  • Loading branch information
RKSimon committed Sep 19, 2018
1 parent 8306f76 commit 8191d63
Show file tree
Hide file tree
Showing 19 changed files with 439 additions and 395 deletions.
93 changes: 93 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Expand Up @@ -31765,11 +31765,104 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
{Op}, 0, Op, {0}, {}, /*Depth*/ 1,
/*HasVarMask*/ false, /*AllowVarMask*/ true, DAG, Subtarget))
return Res;

// Simplify source operands based on shuffle mask.
// TODO - merge this into combineX86ShufflesRecursively.
APInt KnownUndef, KnownZero;
APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements());
if (TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, DCI))
return SDValue(N, 0);
}

return SDValue();
}

bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
TargetLoweringOpt &TLO, unsigned Depth) const {
int NumElts = DemandedElts.getBitWidth();
unsigned Opc = Op.getOpcode();
EVT VT = Op.getValueType();

// Handle special case opcodes.
switch (Opc) {
case X86ISD::VBROADCAST: {
SDValue Src = Op.getOperand(0);
MVT SrcVT = Src.getSimpleValueType();
if (!SrcVT.isVector())
return false;
APInt SrcUndef, SrcZero;
APInt SrcElts = APInt::getOneBitSet(SrcVT.getVectorNumElements(), 0);
if (SimplifyDemandedVectorElts(Src, SrcElts, SrcUndef, SrcZero, TLO,
Depth + 1))
return true;
break;
}
}

// Simplify target shuffles.
if (!isTargetShuffle(Opc))
return false;

// Get target shuffle mask.
SmallVector<int, 64> OpMask;
SmallVector<SDValue, 2> OpInputs;
if (!resolveTargetShuffleInputs(Op, OpInputs, OpMask, TLO.DAG))
return false;

// Shuffle inputs must be the same type as the result.
if (llvm::any_of(OpInputs,
[VT](SDValue V) { return VT != V.getValueType(); }))
return false;

// Attempt to simplify inputs.
int NumSrcs = OpInputs.size();
for (int Src = 0; Src != NumSrcs; ++Src) {
int Lo = Src * NumElts;
APInt SrcElts = APInt::getNullValue(NumElts);
for (int i = 0; i != NumElts; ++i)
if (DemandedElts[i]) {
int M = OpMask[i] - Lo;
if (0 <= M && M < NumElts)
SrcElts.setBit(M);
}

APInt SrcUndef, SrcZero;
if (SimplifyDemandedVectorElts(OpInputs[Src], SrcElts, SrcUndef, SrcZero,
TLO, Depth + 1))
return true;
}

// Check if shuffle mask can be simplified to undef/zero/identity.
for (int i = 0; i != NumElts; ++i)
if (!DemandedElts[i])
OpMask[i] = SM_SentinelUndef;

if (isUndefInRange(OpMask, 0, NumElts)) {
KnownUndef.setAllBits();
return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
}
if (isUndefOrZeroInRange(OpMask, 0, NumElts)) {
KnownZero.setAllBits();
return TLO.CombineTo(
Op, getZeroVector(VT.getSimpleVT(), Subtarget, TLO.DAG, SDLoc(Op)));
}
for (int Src = 0; Src != NumSrcs; ++Src)
if (isSequentialOrUndefInRange(OpMask, 0, NumElts, Src * NumElts))
return TLO.CombineTo(Op, OpInputs[Src]);

// Extract known zero/undef elements.
// TODO - Propagate input undef/zero elts.
for (int i = 0; i != NumElts; ++i) {
if (OpMask[i] == SM_SentinelUndef)
KnownUndef.setBit(i);
if (OpMask[i] == SM_SentinelZero)
KnownZero.setBit(i);
}

return false;
}

/// Check if a vector extract from a target-specific shuffle of a load can be
/// folded into a single element load.
/// Similar handling for VECTOR_SHUFFLE is performed by DAGCombiner, but
Expand Down
7 changes: 7 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.h
Expand Up @@ -868,6 +868,13 @@ namespace llvm {
const SelectionDAG &DAG,
unsigned Depth) const override;

bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op,
const APInt &DemandedElts,
APInt &KnownUndef,
APInt &KnownZero,
TargetLoweringOpt &TLO,
unsigned Depth) const override;

SDValue unwrapAddress(SDValue N) const override;

bool isGAPlusOffset(SDNode *N, const GlobalValue* &GA,
Expand Down
5 changes: 2 additions & 3 deletions llvm/test/CodeGen/X86/2012-01-12-extract-sv.ll
Expand Up @@ -6,9 +6,8 @@ define void @endless_loop() {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vmovaps (%eax), %ymm0
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
; CHECK-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,1,1]
; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3]
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vblendps {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
Expand Down

0 comments on commit 8191d63

Please sign in to comment.