Skip to content

Commit

Permalink
[X86] combineVectorHADDSUB - remove the broken HOP(x,x) merging code …
Browse files Browse the repository at this point in the history
…(PR51974)

This intention of this code turns out to be superfluous as we can handle this with shuffle combining, and it has a critical flaw in that it doesn't check for dependencies.

Fixes PR51974

(cherry picked from commit 468ff70)
  • Loading branch information
RKSimon authored and tstellar committed Jan 20, 2022
1 parent 6349865 commit 75e33f7
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 23 deletions.
23 changes: 0 additions & 23 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Expand Up @@ -44076,32 +44076,9 @@ static SDValue combineVectorHADDSUB(SDNode *N, SelectionDAG &DAG,
"Unexpected horizontal add/sub opcode");

if (!shouldUseHorizontalOp(true, DAG, Subtarget)) {
// For slow-hop targets, if we have a hop with a single op, see if we already
// have another user that we can reuse and shuffle the result.
MVT VT = N->getSimpleValueType(0);
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
if (VT.is128BitVector() && LHS == RHS) {
for (SDNode *User : LHS->uses()) {
if (User != N && User->getOpcode() == N->getOpcode()) {
MVT ShufVT = VT.isFloatingPoint() ? MVT::v4f32 : MVT::v4i32;
if (User->getOperand(0) == LHS && !User->getOperand(1).isUndef()) {
return DAG.getBitcast(
VT,
DAG.getVectorShuffle(ShufVT, SDLoc(N),
DAG.getBitcast(ShufVT, SDValue(User, 0)),
DAG.getUNDEF(ShufVT), {0, 1, 0, 1}));
}
if (User->getOperand(1) == LHS && !User->getOperand(0).isUndef()) {
return DAG.getBitcast(
VT,
DAG.getVectorShuffle(ShufVT, SDLoc(N),
DAG.getBitcast(ShufVT, SDValue(User, 0)),
DAG.getUNDEF(ShufVT), {2, 3, 2, 3}));
}
}
}
}

// HOP(HOP'(X,X),HOP'(Y,Y)) -> HOP(PERMUTE(HOP'(X,Y)),PERMUTE(HOP'(X,Y)).
if (LHS != RHS && LHS.getOpcode() == N->getOpcode() &&
Expand Down
19 changes: 19 additions & 0 deletions llvm/test/CodeGen/X86/horizontal-shuffle-2.ll
Expand Up @@ -171,6 +171,25 @@ define <4 x float> @test_unpacklo_hadd_v4f32_unary(<4 x float> %0) {
ret <4 x float> %3
}

define <8 x i16> @PR51974(<8 x i16> %a0) {
; SSE-LABEL: PR51974:
; SSE: ## %bb.0:
; SSE-NEXT: movdqa %xmm0, %xmm1
; SSE-NEXT: phaddw %xmm0, %xmm1
; SSE-NEXT: phaddw %xmm0, %xmm1
; SSE-NEXT: movdqa %xmm1, %xmm0
; SSE-NEXT: ret{{[l|q]}}
;
; AVX-LABEL: PR51974:
; AVX: ## %bb.0:
; AVX-NEXT: vphaddw %xmm0, %xmm0, %xmm1
; AVX-NEXT: vphaddw %xmm0, %xmm1, %xmm0
; AVX-NEXT: ret{{[l|q]}}
%r0 = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %a0, <8 x i16> %a0)
%r1 = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %r0, <8 x i16> %a0)
ret <8 x i16> %r1
}

declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>)
declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>)
declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>)
Expand Down

0 comments on commit 75e33f7

Please sign in to comment.