Skip to content

Commit

Permalink
[X86] Add DAG combine to turn (vzext_movl (vbroadcast_load)) -> vzext…
Browse files Browse the repository at this point in the history
…_load.

If we're zeroing the other elements then we don't need the broadcast.
  • Loading branch information
topperc committed Mar 8, 2020
1 parent 0bec7e4 commit 70e4fb8
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 6 deletions.
25 changes: 23 additions & 2 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Expand Up @@ -35965,9 +35965,30 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
VT.getVectorElementType(),
LN->getPointerInfo(),
LN->getAlignment(),
MachineMemOperand::MOLoad);
LN->getMemOperand()->getFlags());
DCI.CombineTo(N, VZLoad);
DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), VZLoad.getValue(1));
DCI.recursivelyDeleteUnusedNodes(LN);
return SDValue(N, 0);
}
}

// If this a VZEXT_MOVL of a VBROADCAST_LOAD, we don't need the broadcast and
// can just use a VZEXT_LOAD.
// FIXME: Is there some way to do this with SimplifyDemandedVectorElts?
if (N->getOpcode() == X86ISD::VZEXT_MOVL && N->getOperand(0).hasOneUse() &&
N->getOperand(0).getOpcode() == X86ISD::VBROADCAST_LOAD) {
auto *LN = cast<MemSDNode>(N->getOperand(0));
if (VT.getScalarSizeInBits() == LN->getMemoryVT().getSizeInBits()) {
SDVTList Tys = DAG.getVTList(VT, MVT::Other);
SDValue Ops[] = { LN->getChain(), LN->getBasePtr() };
SDValue VZLoad =
DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops,
LN->getMemoryVT(), LN->getMemOperand());
DCI.CombineTo(N, VZLoad);
DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), VZLoad.getValue(1));
return VZLoad;
DCI.recursivelyDeleteUnusedNodes(LN);
return SDValue(N, 0);
}
}

Expand Down
7 changes: 3 additions & 4 deletions llvm/test/CodeGen/X86/vector-extend-inreg.ll
Expand Up @@ -71,17 +71,16 @@ define i64 @extract_any_extend_vector_inreg_v16i64(<16 x i64> %a0, i32 %a1) noun
; X32-AVX-NEXT: andl $-128, %esp
; X32-AVX-NEXT: subl $384, %esp # imm = 0x180
; X32-AVX-NEXT: movl 40(%ebp), %ecx
; X32-AVX-NEXT: vpbroadcastq 32(%ebp), %ymm0
; X32-AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; X32-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; X32-AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X32-AVX-NEXT: vmovaps %ymm1, {{[0-9]+}}(%esp)
; X32-AVX-NEXT: vmovaps %ymm1, {{[0-9]+}}(%esp)
; X32-AVX-NEXT: vmovaps %ymm1, {{[0-9]+}}(%esp)
; X32-AVX-NEXT: vmovdqa %ymm0, {{[0-9]+}}(%esp)
; X32-AVX-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp)
; X32-AVX-NEXT: vmovaps %ymm1, {{[0-9]+}}(%esp)
; X32-AVX-NEXT: vmovaps %ymm1, {{[0-9]+}}(%esp)
; X32-AVX-NEXT: vmovaps %ymm1, (%esp)
; X32-AVX-NEXT: vmovdqa %ymm0, {{[0-9]+}}(%esp)
; X32-AVX-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp)
; X32-AVX-NEXT: leal (%ecx,%ecx), %eax
; X32-AVX-NEXT: andl $31, %eax
; X32-AVX-NEXT: movl 128(%esp,%eax,4), %eax
Expand Down

0 comments on commit 70e4fb8

Please sign in to comment.