Skip to content

Commit

Permalink
[ARM] MVE vector for 64bit types
Browse files Browse the repository at this point in the history
We need to make sure that we are sensibly dealing with vectors of types v2i64
and v2f64, even if most of the time we cannot generate native operations for
them. This mostly adds a lot of testing, plus fixes up a couple of the issues
found. And, or and xor can be legal for v2i64, and shifts combining needs a
slight fixup.

Differential Revision: https://reviews.llvm.org/D64316

llvm-svn: 366106
  • Loading branch information
davemgreen committed Jul 15, 2019
1 parent 4885978 commit dc56995
Show file tree
Hide file tree
Showing 19 changed files with 1,875 additions and 40 deletions.
6 changes: 6 additions & 0 deletions llvm/lib/Target/ARM/ARMISelLowering.cpp
Expand Up @@ -320,6 +320,10 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
}
// We can do bitwise operations on v2i64 vectors
setOperationAction(ISD::AND, MVT::v2i64, Legal);
setOperationAction(ISD::OR, MVT::v2i64, Legal);
setOperationAction(ISD::XOR, MVT::v2i64, Legal);

// It is legal to extload from v4i8 to v4i16 or v4i32.
addAllExtLoads(MVT::v8i16, MVT::v8i8, Legal);
Expand Down Expand Up @@ -12855,6 +12859,8 @@ static SDValue PerformShiftCombine(SDNode *N,
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (!VT.isVector() || !TLI.isTypeLegal(VT))
return SDValue();
if (ST->hasMVEIntegerOps() && VT == MVT::v2i64)
return SDValue();

int64_t Cnt;

Expand Down
14 changes: 13 additions & 1 deletion llvm/lib/Target/ARM/ARMInstrMVE.td
Expand Up @@ -1035,6 +1035,8 @@ let Predicates = [HasMVEInt] in {
(v8i16 (MVE_VMVN (v8i16 MQPR:$val1)))>;
def : Pat<(v4i32 (vnotq (v4i32 MQPR:$val1))),
(v4i32 (MVE_VMVN (v4i32 MQPR:$val1)))>;
def : Pat<(v2i64 (vnotq (v2i64 MQPR:$val1))),
(v2i64 (MVE_VMVN (v2i64 MQPR:$val1)))>;
}

class MVE_bit_ops<string iname, bits<2> bit_21_20, bit bit_28>
Expand Down Expand Up @@ -1081,34 +1083,44 @@ let Predicates = [HasMVEInt] in {
(v8i16 (MVE_VAND (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
def : Pat<(v4i32 (and (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
(v4i32 (MVE_VAND (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
def : Pat<(v2i64 (and (v2i64 MQPR:$val1), (v2i64 MQPR:$val2))),
(v2i64 (MVE_VAND (v2i64 MQPR:$val1), (v2i64 MQPR:$val2)))>;

def : Pat<(v16i8 (or (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
(v16i8 (MVE_VORR (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
def : Pat<(v8i16 (or (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
(v8i16 (MVE_VORR (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
def : Pat<(v4i32 (or (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
(v4i32 (MVE_VORR (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
def : Pat<(v2i64 (or (v2i64 MQPR:$val1), (v2i64 MQPR:$val2))),
(v2i64 (MVE_VORR (v2i64 MQPR:$val1), (v2i64 MQPR:$val2)))>;

def : Pat<(v16i8 (xor (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
(v16i8 (MVE_VEOR (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
def : Pat<(v8i16 (xor (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
(v8i16 (MVE_VEOR (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
def : Pat<(v4i32 (xor (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
(v4i32 (MVE_VEOR (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
def : Pat<(v2i64 (xor (v2i64 MQPR:$val1), (v2i64 MQPR:$val2))),
(v2i64 (MVE_VEOR (v2i64 MQPR:$val1), (v2i64 MQPR:$val2)))>;

def : Pat<(v16i8 (and (v16i8 MQPR:$val1), (vnotq MQPR:$val2))),
(v16i8 (MVE_VBIC (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
def : Pat<(v8i16 (and (v8i16 MQPR:$val1), (vnotq MQPR:$val2))),
(v8i16 (MVE_VBIC (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
def : Pat<(v4i32 (and (v4i32 MQPR:$val1), (vnotq MQPR:$val2))),
(v4i32 (MVE_VBIC (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
def : Pat<(v2i64 (and (v2i64 MQPR:$val1), (vnotq MQPR:$val2))),
(v2i64 (MVE_VBIC (v2i64 MQPR:$val1), (v2i64 MQPR:$val2)))>;

def : Pat<(v16i8 (or (v16i8 MQPR:$val1), (vnotq (v16i8 MQPR:$val2)))),
def : Pat<(v16i8 (or (v16i8 MQPR:$val1), (vnotq MQPR:$val2))),
(v16i8 (MVE_VORN (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
def : Pat<(v8i16 (or (v8i16 MQPR:$val1), (vnotq MQPR:$val2))),
(v8i16 (MVE_VORN (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
def : Pat<(v4i32 (or (v4i32 MQPR:$val1), (vnotq MQPR:$val2))),
(v4i32 (MVE_VORN (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
def : Pat<(v2i64 (or (v2i64 MQPR:$val1), (vnotq MQPR:$val2))),
(v2i64 (MVE_VORN (v2i64 MQPR:$val1), (v2i64 MQPR:$val2)))>;
}

class MVE_bit_cmode<string iname, string suffix, bits<4> cmode, dag inOps>
Expand Down
47 changes: 47 additions & 0 deletions llvm/test/CodeGen/Thumb2/mve-abs.ll
Expand Up @@ -36,3 +36,50 @@ entry:
%2 = select <4 x i1> %0, <4 x i32> %1, <4 x i32> %s1
ret <4 x i32> %2
}

define arm_aapcs_vfpcc <2 x i64> @abs_v2i64(<2 x i64> %s1) {
; CHECK-LABEL: abs_v2i64:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r6, r7, lr}
; CHECK-NEXT: push {r4, r5, r6, r7, lr}
; CHECK-NEXT: vmov r12, s2
; CHECK-NEXT: movs r0, #0
; CHECK-NEXT: vmov r3, s3
; CHECK-NEXT: vmov r1, s0
; CHECK-NEXT: rsbs.w lr, r12, #0
; CHECK-NEXT: sbc.w r5, r0, r3
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: mov r2, lr
; CHECK-NEXT: lsrl r2, r5, #32
; CHECK-NEXT: mov.w r5, #0
; CHECK-NEXT: it mi
; CHECK-NEXT: movmi r5, #1
; CHECK-NEXT: cmp r5, #0
; CHECK-NEXT: it eq
; CHECK-NEXT: moveq r2, r3
; CHECK-NEXT: vmov r3, s1
; CHECK-NEXT: rsbs r4, r1, #0
; CHECK-NEXT: mov r6, r4
; CHECK-NEXT: sbc.w r7, r0, r3
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: lsrl r6, r7, #32
; CHECK-NEXT: it mi
; CHECK-NEXT: movmi r0, #1
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: ite eq
; CHECK-NEXT: moveq r6, r3
; CHECK-NEXT: movne r1, r4
; CHECK-NEXT: vmov.32 q0[0], r1
; CHECK-NEXT: cmp r5, #0
; CHECK-NEXT: vmov.32 q0[1], r6
; CHECK-NEXT: it eq
; CHECK-NEXT: moveq lr, r12
; CHECK-NEXT: vmov.32 q0[2], lr
; CHECK-NEXT: vmov.32 q0[3], r2
; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
entry:
%0 = icmp slt <2 x i64> %s1, zeroinitializer
%1 = sub nsw <2 x i64> zeroinitializer, %s1
%2 = select <2 x i1> %0, <2 x i64> %1, <2 x i64> %s1
ret <2 x i64> %2
}
65 changes: 65 additions & 0 deletions llvm/test/CodeGen/Thumb2/mve-bitarith.ll
Expand Up @@ -31,6 +31,16 @@ entry:
ret <4 x i32> %0
}

define arm_aapcs_vfpcc <2 x i64> @and_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
; CHECK-LABEL: and_int64_t:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vand q0, q0, q1
; CHECK-NEXT: bx lr
entry:
%0 = and <2 x i64> %src1, %src2
ret <2 x i64> %0
}


define arm_aapcs_vfpcc <16 x i8> @or_int8_t(<16 x i8> %src1, <16 x i8> %src2) {
; CHECK-LABEL: or_int8_t:
Expand Down Expand Up @@ -62,6 +72,16 @@ entry:
ret <4 x i32> %0
}

define arm_aapcs_vfpcc <2 x i64> @or_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
; CHECK-LABEL: or_int64_t:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vorr q0, q0, q1
; CHECK-NEXT: bx lr
entry:
%0 = or <2 x i64> %src1, %src2
ret <2 x i64> %0
}


define arm_aapcs_vfpcc <16 x i8> @xor_int8_t(<16 x i8> %src1, <16 x i8> %src2) {
; CHECK-LABEL: xor_int8_t:
Expand Down Expand Up @@ -93,6 +113,16 @@ entry:
ret <4 x i32> %0
}

define arm_aapcs_vfpcc <2 x i64> @xor_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
; CHECK-LABEL: xor_int64_t:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: veor q0, q0, q1
; CHECK-NEXT: bx lr
entry:
%0 = xor <2 x i64> %src1, %src2
ret <2 x i64> %0
}

define arm_aapcs_vfpcc <16 x i8> @v_mvn_i8(<16 x i8> %src) {
; CHECK-LABEL: v_mvn_i8:
; CHECK: @ %bb.0: @ %entry
Expand Down Expand Up @@ -123,6 +153,17 @@ entry:
ret <4 x i32> %0
}

define arm_aapcs_vfpcc <2 x i64> @v_mvn_i64(<2 x i64> %src) {
; CHECK-LABEL: v_mvn_i64:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmvn q0, q0
; CHECK-NEXT: bx lr
entry:
%0 = xor <2 x i64> %src, <i64 -1, i64 -1>
ret <2 x i64> %0
}


define arm_aapcs_vfpcc <16 x i8> @v_bic_i8(<16 x i8> %src1, <16 x i8> %src2) {
; CHECK-LABEL: v_bic_i8:
; CHECK: @ %bb.0: @ %entry
Expand Down Expand Up @@ -156,6 +197,18 @@ entry:
ret <4 x i32> %1
}

define arm_aapcs_vfpcc <2 x i64> @v_bic_i64(<2 x i64> %src1, <2 x i64> %src2) {
; CHECK-LABEL: v_bic_i64:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vbic q0, q1, q0
; CHECK-NEXT: bx lr
entry:
%0 = xor <2 x i64> %src1, <i64 -1, i64 -1>
%1 = and <2 x i64> %src2, %0
ret <2 x i64> %1
}


define arm_aapcs_vfpcc <16 x i8> @v_or_i8(<16 x i8> %src1, <16 x i8> %src2) {
; CHECK-LABEL: v_or_i8:
; CHECK: @ %bb.0: @ %entry
Expand Down Expand Up @@ -188,3 +241,15 @@ entry:
%1 = or <4 x i32> %src2, %0
ret <4 x i32> %1
}

define arm_aapcs_vfpcc <2 x i64> @v_or_i64(<2 x i64> %src1, <2 x i64> %src2) {
; CHECK-LABEL: v_or_i64:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vorn q0, q1, q0
; CHECK-NEXT: bx lr
entry:
%0 = xor <2 x i64> %src1, <i64 -1, i64 -1>
%1 = or <2 x i64> %src2, %0
ret <2 x i64> %1
}

0 comments on commit dc56995

Please sign in to comment.