Skip to content

Commit

Permalink
[ARM] Extract from a VDUP
Browse files Browse the repository at this point in the history
If we get into the situation where we are extracting from a VDUP, the
extracted value is just the origin, so long as the types match or we can
bitcast between the two.

Differential Revision: https://reviews.llvm.org/D78708
  • Loading branch information
davemgreen committed May 6, 2020
1 parent 3197873 commit a349949
Show file tree
Hide file tree
Showing 3 changed files with 96 additions and 78 deletions.
25 changes: 25 additions & 0 deletions llvm/lib/Target/ARM/ARMISelLowering.cpp
Expand Up @@ -934,6 +934,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::BUILD_VECTOR);
setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
setTargetDAGCombine(ISD::STORE);
setTargetDAGCombine(ISD::SIGN_EXTEND);
setTargetDAGCombine(ISD::ZERO_EXTEND);
Expand Down Expand Up @@ -13313,6 +13314,29 @@ static SDValue PerformInsertEltCombine(SDNode *N,
return DAG.getNode(ISD::BITCAST, dl, VT, InsElt);
}

static SDValue PerformExtractEltCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
SDValue Op0 = N->getOperand(0);
EVT VT = N->getValueType(0);
SDLoc dl(N);

// extract (vdup x) -> x
if (Op0->getOpcode() == ARMISD::VDUP) {
SDValue X = Op0->getOperand(0);
if (VT == MVT::f16 && X.getValueType() == MVT::i32)
return DCI.DAG.getNode(ARMISD::VMOVhr, dl, VT, X);
if (VT == MVT::i32 && X.getValueType() == MVT::f16)
return DCI.DAG.getNode(ARMISD::VMOVrh, dl, VT, X);

while (X.getValueType() != VT && X->getOpcode() == ISD::BITCAST)
X = X->getOperand(0);
if (X.getValueType() == VT)
return X;
}

return SDValue();
}

/// PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for
/// ISD::VECTOR_SHUFFLE.
static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) {
Expand Down Expand Up @@ -15301,6 +15325,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::STORE: return PerformSTORECombine(N, DCI, Subtarget);
case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI, Subtarget);
case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI);
case ISD::EXTRACT_VECTOR_ELT: return PerformExtractEltCombine(N, DCI);
case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG);
case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI);
case ARMISD::VDUP: return PerformVDUPCombine(N, DCI, Subtarget);
Expand Down
142 changes: 69 additions & 73 deletions llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll
Expand Up @@ -5174,116 +5174,112 @@ entry:
define arm_aapcs_vfpcc <8 x half> @vcmp_oeq_v8f16_bc(<8 x half> %src, half* %src2p, <8 x half> %a, <8 x half> %b) {
; CHECK-MVE-LABEL: vcmp_oeq_v8f16_bc:
; CHECK-MVE: @ %bb.0: @ %entry
; CHECK-MVE-NEXT: .vsave {d8, d9, d10, d11}
; CHECK-MVE-NEXT: vpush {d8, d9, d10, d11}
; CHECK-MVE-NEXT: ldrh r0, [r0]
; CHECK-MVE-NEXT: .vsave {d8, d9, d10}
; CHECK-MVE-NEXT: vpush {d8, d9, d10}
; CHECK-MVE-NEXT: ldrh r1, [r0]
; CHECK-MVE-NEXT: vmovx.f16 s12, s0
; CHECK-MVE-NEXT: vmovx.f16 s14, s8
; CHECK-MVE-NEXT: movs r2, #0
; CHECK-MVE-NEXT: movs r1, #0
; CHECK-MVE-NEXT: vdup.16 q4, r0
; CHECK-MVE-NEXT: vmovx.f16 s20, s9
; CHECK-MVE-NEXT: movs r0, #0
; CHECK-MVE-NEXT: vmovx.f16 s14, s16
; CHECK-MVE-NEXT: vmovx.f16 s22, s17
; CHECK-MVE-NEXT: vcmp.f16 s12, s14
; CHECK-MVE-NEXT: vmov.f16 s16, r1
; CHECK-MVE-NEXT: movs r1, #0
; CHECK-MVE-NEXT: vcmp.f16 s12, s16
; CHECK-MVE-NEXT: vmovx.f16 s12, s4
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it eq
; CHECK-MVE-NEXT: moveq r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: moveq r1, #1
; CHECK-MVE-NEXT: cmp r1, #0
; CHECK-MVE-NEXT: vcmp.f16 s0, s16
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: vmovx.f16 s14, s8
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: cset r1, ne
; CHECK-MVE-NEXT: vmovx.f16 s0, s3
; CHECK-MVE-NEXT: lsls r1, r1, #31
; CHECK-MVE-NEXT: vseleq.f16 s12, s14, s12
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it eq
; CHECK-MVE-NEXT: moveq r2, #1
; CHECK-MVE-NEXT: cmp r2, #0
; CHECK-MVE-NEXT: cset r2, ne
; CHECK-MVE-NEXT: vmov r0, s12
; CHECK-MVE-NEXT: vmov r1, s12
; CHECK-MVE-NEXT: lsls r2, r2, #31
; CHECK-MVE-NEXT: vcmp.f16 s1, s17
; CHECK-MVE-NEXT: vcmp.f16 s1, s16
; CHECK-MVE-NEXT: vseleq.f16 s12, s8, s4
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vmov r2, s12
; CHECK-MVE-NEXT: vmov.16 q3[0], r2
; CHECK-MVE-NEXT: vmov.16 q3[1], r0
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: it eq
; CHECK-MVE-NEXT: moveq r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vseleq.f16 s20, s9, s5
; CHECK-MVE-NEXT: vmov r0, s20
; CHECK-MVE-NEXT: vmovx.f16 s20, s1
; CHECK-MVE-NEXT: vcmp.f16 s20, s22
; CHECK-MVE-NEXT: vmov.16 q3[2], r0
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: vmov.16 q3[1], r1
; CHECK-MVE-NEXT: mov.w r1, #0
; CHECK-MVE-NEXT: it eq
; CHECK-MVE-NEXT: moveq r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: vmovx.f16 s20, s5
; CHECK-MVE-NEXT: vmovx.f16 s22, s9
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vseleq.f16 s20, s22, s20
; CHECK-MVE-NEXT: vcmp.f16 s2, s18
; CHECK-MVE-NEXT: vmov r0, s20
; CHECK-MVE-NEXT: moveq r1, #1
; CHECK-MVE-NEXT: cmp r1, #0
; CHECK-MVE-NEXT: cset r1, ne
; CHECK-MVE-NEXT: lsls r1, r1, #31
; CHECK-MVE-NEXT: vseleq.f16 s18, s9, s5
; CHECK-MVE-NEXT: vmov r1, s18
; CHECK-MVE-NEXT: vmovx.f16 s18, s1
; CHECK-MVE-NEXT: vcmp.f16 s18, s16
; CHECK-MVE-NEXT: vmov.16 q3[2], r1
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vmov.16 q3[3], r0
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: mov.w r1, #0
; CHECK-MVE-NEXT: it eq
; CHECK-MVE-NEXT: moveq r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: vmovx.f16 s22, s18
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vseleq.f16 s20, s10, s6
; CHECK-MVE-NEXT: vmov r0, s20
; CHECK-MVE-NEXT: vmovx.f16 s20, s2
; CHECK-MVE-NEXT: vcmp.f16 s20, s22
; CHECK-MVE-NEXT: vmov.16 q3[4], r0
; CHECK-MVE-NEXT: moveq r1, #1
; CHECK-MVE-NEXT: cmp r1, #0
; CHECK-MVE-NEXT: cset r1, ne
; CHECK-MVE-NEXT: vmovx.f16 s18, s5
; CHECK-MVE-NEXT: lsls r1, r1, #31
; CHECK-MVE-NEXT: vcmp.f16 s2, s16
; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: vmov r1, s18
; CHECK-MVE-NEXT: vmovx.f16 s20, s10
; CHECK-MVE-NEXT: vmov.16 q3[3], r1
; CHECK-MVE-NEXT: mov.w r1, #0
; CHECK-MVE-NEXT: it eq
; CHECK-MVE-NEXT: moveq r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: vmovx.f16 s20, s6
; CHECK-MVE-NEXT: vmovx.f16 s22, s10
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vseleq.f16 s20, s22, s20
; CHECK-MVE-NEXT: vcmp.f16 s3, s19
; CHECK-MVE-NEXT: vmov r0, s20
; CHECK-MVE-NEXT: moveq r1, #1
; CHECK-MVE-NEXT: cmp r1, #0
; CHECK-MVE-NEXT: cset r1, ne
; CHECK-MVE-NEXT: lsls r1, r1, #31
; CHECK-MVE-NEXT: vseleq.f16 s18, s10, s6
; CHECK-MVE-NEXT: vmov r1, s18
; CHECK-MVE-NEXT: vmovx.f16 s18, s2
; CHECK-MVE-NEXT: vcmp.f16 s18, s16
; CHECK-MVE-NEXT: vmov.16 q3[4], r1
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vmov.16 q3[5], r0
; CHECK-MVE-NEXT: mov.w r0, #0
; CHECK-MVE-NEXT: mov.w r1, #0
; CHECK-MVE-NEXT: it eq
; CHECK-MVE-NEXT: moveq r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: vmovx.f16 s2, s19
; CHECK-MVE-NEXT: vcmp.f16 s0, s2
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vseleq.f16 s20, s11, s7
; CHECK-MVE-NEXT: moveq r1, #1
; CHECK-MVE-NEXT: cmp r1, #0
; CHECK-MVE-NEXT: cset r1, ne
; CHECK-MVE-NEXT: vmovx.f16 s18, s6
; CHECK-MVE-NEXT: lsls r1, r1, #31
; CHECK-MVE-NEXT: vcmp.f16 s3, s16
; CHECK-MVE-NEXT: vseleq.f16 s18, s20, s18
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: vmov r1, s18
; CHECK-MVE-NEXT: vcmp.f16 s0, s16
; CHECK-MVE-NEXT: vmov.16 q3[5], r1
; CHECK-MVE-NEXT: mov.w r1, #0
; CHECK-MVE-NEXT: it eq
; CHECK-MVE-NEXT: moveq r1, #1
; CHECK-MVE-NEXT: vmov r0, s20
; CHECK-MVE-NEXT: cmp r1, #0
; CHECK-MVE-NEXT: vmov.16 q3[6], r0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: cset r1, ne
; CHECK-MVE-NEXT: vmovx.f16 s0, s7
; CHECK-MVE-NEXT: lsls r1, r1, #31
; CHECK-MVE-NEXT: vmovx.f16 s2, s11
; CHECK-MVE-NEXT: vseleq.f16 s18, s11, s7
; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-MVE-NEXT: it eq
; CHECK-MVE-NEXT: moveq r0, #1
; CHECK-MVE-NEXT: cmp r0, #0
; CHECK-MVE-NEXT: cset r0, ne
; CHECK-MVE-NEXT: vmov r1, s18
; CHECK-MVE-NEXT: lsls r0, r0, #31
; CHECK-MVE-NEXT: vmov.16 q3[6], r1
; CHECK-MVE-NEXT: vseleq.f16 s0, s2, s0
; CHECK-MVE-NEXT: vmov r0, s0
; CHECK-MVE-NEXT: vmov.16 q3[7], r0
; CHECK-MVE-NEXT: vmov q0, q3
; CHECK-MVE-NEXT: vpop {d8, d9, d10, d11}
; CHECK-MVE-NEXT: vpop {d8, d9, d10}
; CHECK-MVE-NEXT: bx lr
;
; CHECK-MVEFP-LABEL: vcmp_oeq_v8f16_bc:
Expand Down
7 changes: 2 additions & 5 deletions llvm/test/CodeGen/Thumb2/mve-vdup.ll
Expand Up @@ -240,9 +240,6 @@ entry:
define arm_aapcs_vfpcc float @vdup_f32_extract(float %src) {
; CHECK-LABEL: vdup_f32_extract:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: vdup.32 q0, r0
; CHECK-NEXT: vmov.f32 s0, s2
; CHECK-NEXT: bx lr
entry:
%srcbc = bitcast float %src to i32
Expand All @@ -260,8 +257,8 @@ define arm_aapcs_vfpcc half @vdup_f16_extract(half* %src1, half* %src2) {
; CHECK-NEXT: vldr.16 s2, [r1]
; CHECK-NEXT: vadd.f16 s0, s2, s0
; CHECK-NEXT: vmov.f16 r1, s0
; CHECK-NEXT: vdup.16 q0, r1
; CHECK-NEXT: vstr.16 s1, [r0]
; CHECK-NEXT: vmov.f16 s0, r1
; CHECK-NEXT: vstr.16 s0, [r0]
; CHECK-NEXT: bx lr
entry:
%0 = load half, half *%src1, align 2
Expand Down

0 comments on commit a349949

Please sign in to comment.