Skip to content

Commit

Permalink
[X86][AVX] Add lowerVectorShuffleAsLanePermuteAndPermute for v4f64 sh…
Browse files Browse the repository at this point in the history
…uffles (PR39161)

Add shuffle lowering for the case where we can shuffle the lanes into place followed by an in-lane permute.

This is mainly for cases where we can have non-repeating permutes in each lane, but for now I've just enabled it for v4f64 unary shuffles to fix PR39161 - there is no test coverage for other shuffles that might benefit yet.

We now have several cross-lane shuffle lowering methods that all do something similar - I've looked at merging some of these (notably by making the repeated mask mechanism in lowerVectorShuffleByMerging128BitLanes optional), but there is a lot of assertions/assumptions in the way that makes this tricky - I ended up going for adding yet another relatively simple method instead.

Differential Revision: https://reviews.llvm.org/D53148

llvm-svn: 344446
  • Loading branch information
RKSimon committed Oct 13, 2018
1 parent af1f374 commit f395241
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 10 deletions.
60 changes: 60 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Expand Up @@ -13430,6 +13430,60 @@ static SDValue lowerVectorShuffleAsSplitOrBlend(const SDLoc &DL, MVT VT,
return lowerVectorShuffleAsDecomposedShuffleBlend(DL, VT, V1, V2, Mask, DAG);
}

/// Lower a vector shuffle crossing multiple 128-bit lanes as
/// a lane permutation followed by a per-lane permutation.
///
/// This is mainly for cases where we can have non-repeating permutes
/// in each lane.
///
/// TODO: This is very similar to lowerVectorShuffleByMerging128BitLanes,
/// we should investigate merging them.
static SDValue lowerVectorShuffleAsLanePermuteAndPermute(
const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
SelectionDAG &DAG, const X86Subtarget &Subtarget) {
int NumElts = VT.getVectorNumElements();
int NumLanes = VT.getSizeInBits() / 128;
int NumEltsPerLane = NumElts / NumLanes;

SmallVector<int, 4> SrcLaneMask(NumLanes, SM_SentinelUndef);
SmallVector<int, 16> LaneMask(NumElts, SM_SentinelUndef);
SmallVector<int, 16> PermMask(NumElts, SM_SentinelUndef);

for (int i = 0; i != NumElts; ++i) {
int M = Mask[i];
if (M < 0)
continue;

// Ensure that each lane comes from a single source lane.
int SrcLane = M / NumEltsPerLane;
int DstLane = i / NumEltsPerLane;
if (!isUndefOrEqual(SrcLaneMask[DstLane], SrcLane))
return SDValue();
SrcLaneMask[DstLane] = SrcLane;

LaneMask[i] = (SrcLane * NumEltsPerLane) + (i % NumEltsPerLane);
PermMask[i] = (DstLane * NumEltsPerLane) + (M % NumEltsPerLane);
}

// If we're only shuffling a single lowest lane and the rest are identity
// then don't bother.
// TODO - isShuffleMaskInputInPlace could be extended to something like this.
int NumIdentityLanes = 0;
bool OnlyShuffleLowestLane = true;
for (int i = 0; i != NumLanes; ++i) {
if (isSequentialOrUndefInRange(PermMask, i * NumEltsPerLane, NumEltsPerLane,
i * NumEltsPerLane))
NumIdentityLanes++;
else if (SrcLaneMask[i] != 0 && SrcLaneMask[i] != NumLanes)
OnlyShuffleLowestLane = false;
}
if (OnlyShuffleLowestLane && NumIdentityLanes == (NumLanes - 1))
return SDValue();

SDValue LanePermute = DAG.getVectorShuffle(VT, DL, V1, V2, LaneMask);
return DAG.getVectorShuffle(VT, DL, LanePermute, DAG.getUNDEF(VT), PermMask);
}

/// Lower a vector shuffle crossing multiple 128-bit lanes as
/// a permutation and blend of those lanes.
///
Expand Down Expand Up @@ -14166,6 +14220,11 @@ static SDValue lowerV4F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG))
return V;

// Try to permute the lanes and then use a per-lane permute.
if (SDValue V = lowerVectorShuffleAsLanePermuteAndPermute(
DL, MVT::v4f64, V1, V2, Mask, DAG, Subtarget))
return V;

// Otherwise, fall back.
return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v4f64, V1, V2, Mask,
DAG, Subtarget);
Expand Down Expand Up @@ -14200,6 +14259,7 @@ static SDValue lowerV4F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
if (SDValue Result = lowerVectorShuffleByMerging128BitLanes(
DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG))
return Result;

// If we have VLX support, we can use VEXPAND.
if (Subtarget.hasVLX())
if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v4f64, Zeroable, Mask,
Expand Down
16 changes: 6 additions & 10 deletions llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll
Expand Up @@ -91,9 +91,8 @@ define <4 x double> @shuffle_v4f64_0300(<4 x double> %a, <4 x double> %b) {
define <4 x double> @shuffle_v4f64_1000(<4 x double> %a, <4 x double> %b) {
; AVX1-LABEL: shuffle_v4f64_1000:
; AVX1: # %bb.0:
; AVX1-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,2]
; AVX1-NEXT: retq
;
; AVX2-LABEL: shuffle_v4f64_1000:
Expand Down Expand Up @@ -174,10 +173,8 @@ define <4 x double> @shuffle_v4f64_2222_bc(<4 x i64> %a, <4 x i64> %b) {
define <4 x double> @shuffle_v4f64_2233(<4 x double> %a, <4 x double> %b) {
; AVX1-LABEL: shuffle_v4f64_2233:
; AVX1: # %bb.0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = xmm0[0,0]
; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,1]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,3,3]
; AVX1-NEXT: retq
;
; AVX2-LABEL: shuffle_v4f64_2233:
Expand Down Expand Up @@ -766,9 +763,8 @@ define <4 x i64> @shuffle_v4i64_0300(<4 x i64> %a, <4 x i64> %b) {
define <4 x i64> @shuffle_v4i64_1000(<4 x i64> %a, <4 x i64> %b) {
; AVX1-LABEL: shuffle_v4i64_1000:
; AVX1: # %bb.0:
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,1]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,2]
; AVX1-NEXT: retq
;
; AVX2-LABEL: shuffle_v4i64_1000:
Expand Down

0 comments on commit f395241

Please sign in to comment.