diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 1806e633e989e..646241a50fca1 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -17591,6 +17591,9 @@ static SDValue lowerV4F64Shuffle(const SDLoc &DL, ArrayRef Mask, Zeroable, Subtarget, DAG)) return Op; + bool V1IsInPlace = isShuffleMaskInputInPlace(0, Mask); + bool V2IsInPlace = isShuffleMaskInputInPlace(1, Mask); + // If we have lane crossing shuffles AND they don't all come from the lower // lane elements, lower to SHUFPD(VPERM2F128(V1, V2), VPERM2F128(V1, V2)). // TODO: Handle BUILD_VECTOR sources which getVectorShuffle currently @@ -17605,7 +17608,7 @@ static SDValue lowerV4F64Shuffle(const SDLoc &DL, ArrayRef Mask, // If we have one input in place, then we can permute the other input and // blend the result. - if (isShuffleMaskInputInPlace(0, Mask) || isShuffleMaskInputInPlace(1, Mask)) + if (V1IsInPlace || V2IsInPlace) return lowerShuffleAsDecomposedShuffleMerge(DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG); @@ -17619,8 +17622,7 @@ static SDValue lowerV4F64Shuffle(const SDLoc &DL, ArrayRef Mask, // shuffle. However, if we have AVX2 and either inputs are already in place, // we will be able to shuffle even across lanes the other input in a single // instruction so skip this pattern. - if (!(Subtarget.hasAVX2() && (isShuffleMaskInputInPlace(0, Mask) || - isShuffleMaskInputInPlace(1, Mask)))) + if (!(Subtarget.hasAVX2() && (V1IsInPlace || V2IsInPlace))) if (SDValue V = lowerShuffleAsLanePermuteAndRepeatedMask( DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG)) return V; @@ -17713,9 +17715,12 @@ static SDValue lowerV4I64Shuffle(const SDLoc &DL, ArrayRef Mask, if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v4i64, Mask, V1, V2, DAG)) return V; + bool V1IsInPlace = isShuffleMaskInputInPlace(0, Mask); + bool V2IsInPlace = isShuffleMaskInputInPlace(1, Mask); + // If we have one input in place, then we can permute the other input and // blend the result. - if (isShuffleMaskInputInPlace(0, Mask) || isShuffleMaskInputInPlace(1, Mask)) + if (V1IsInPlace || V2IsInPlace) return lowerShuffleAsDecomposedShuffleMerge(DL, MVT::v4i64, V1, V2, Mask, Subtarget, DAG); @@ -17734,8 +17739,7 @@ static SDValue lowerV4I64Shuffle(const SDLoc &DL, ArrayRef Mask, // shuffle. However, if we have AVX2 and either inputs are already in place, // we will be able to shuffle even across lanes the other input in a single // instruction so skip this pattern. - if (!isShuffleMaskInputInPlace(0, Mask) && - !isShuffleMaskInputInPlace(1, Mask)) + if (!V1IsInPlace && !V2IsInPlace) if (SDValue Result = lowerShuffleAsLanePermuteAndRepeatedMask( DL, MVT::v4i64, V1, V2, Mask, Subtarget, DAG)) return Result;