Skip to content

Commit

Permalink
[ARM] MVE bitwise instruction patterns
Browse files Browse the repository at this point in the history
This adds patterns for the simpler VAND, VORR and VEOR bitwise vector
instructions. It also adjusts the top16Zero PatLeaf to not match on vector
instructions, which can otherwise cause problems.

Code written by David Sherwood.

Differential Revision: https://reviews.llvm.org/D63867

llvm-svn: 365113
  • Loading branch information
davemgreen committed Jul 4, 2019
1 parent c1ead0e commit d2a9ec2
Show file tree
Hide file tree
Showing 5 changed files with 122 additions and 3 deletions.
23 changes: 23 additions & 0 deletions llvm/lib/Target/ARM/ARMInstrMVE.td
Expand Up @@ -1567,6 +1567,29 @@ foreach s=["s8", "s16", "s32", "u8", "u16", "u32", "i8", "i16", "i32", "f16", "f
(MVE_VAND MQPR:$QdSrc, MQPR:$QnSrc, MQPR:$QmSrc, vpred_r:$vp)>;
}

let Predicates = [HasMVEInt] in {
def : Pat<(v16i8 (and (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
(v16i8 (MVE_VAND (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
def : Pat<(v8i16 (and (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
(v8i16 (MVE_VAND (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
def : Pat<(v4i32 (and (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
(v4i32 (MVE_VAND (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;

def : Pat<(v16i8 (or (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
(v16i8 (MVE_VORR (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
def : Pat<(v8i16 (or (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
(v8i16 (MVE_VORR (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
def : Pat<(v4i32 (or (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
(v4i32 (MVE_VORR (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;

def : Pat<(v16i8 (xor (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))),
(v16i8 (MVE_VEOR (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>;
def : Pat<(v8i16 (xor (v8i16 MQPR:$val1), (v8i16 MQPR:$val2))),
(v8i16 (MVE_VEOR (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>;
def : Pat<(v4i32 (xor (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))),
(v4i32 (MVE_VEOR (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>;
}

class MVE_bit_cmode<string iname, string suffix, bits<4> cmode, dag inOps>
: MVE_p<(outs MQPR:$Qd), inOps, NoItinerary,
iname, suffix, "$Qd, $imm", vpred_n, "$Qd = $Qd_src"> {
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/ARM/ARMInstrThumb2.td
Expand Up @@ -2756,7 +2756,8 @@ def : T2Pat<(and rGPR:$src, t2_so_imm_not:$imm),

// top16Zero - answer true if the upper 16 bits of $src are 0, false otherwise
def top16Zero: PatLeaf<(i32 rGPR:$src), [{
return CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(32, 16));
return !SDValue(N,0)->getValueType(0).isVector() &&
CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(32, 16));
}]>;

// so_imm_notSext is needed instead of so_imm_not, as the value of imm
Expand Down
95 changes: 95 additions & 0 deletions llvm/test/CodeGen/Thumb2/mve-bitarith.ll
@@ -0,0 +1,95 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s

define arm_aapcs_vfpcc <16 x i8> @and_int8_t(<16 x i8> %src1, <16 x i8> %src2) {
; CHECK-LABEL: and_int8_t:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vand q0, q0, q1
; CHECK-NEXT: bx lr
entry:
%0 = and <16 x i8> %src1, %src2
ret <16 x i8> %0
}

define arm_aapcs_vfpcc <8 x i16> @and_int16_t(<8 x i16> %src1, <8 x i16> %src2) {
; CHECK-LABEL: and_int16_t:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vand q0, q0, q1
; CHECK-NEXT: bx lr
entry:
%0 = and <8 x i16> %src1, %src2
ret <8 x i16> %0
}

define arm_aapcs_vfpcc <4 x i32> @and_int32_t(<4 x i32> %src1, <4 x i32> %src2) {
; CHECK-LABEL: and_int32_t:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vand q0, q0, q1
; CHECK-NEXT: bx lr
entry:
%0 = and <4 x i32> %src1, %src2
ret <4 x i32> %0
}


define arm_aapcs_vfpcc <16 x i8> @or_int8_t(<16 x i8> %src1, <16 x i8> %src2) {
; CHECK-LABEL: or_int8_t:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vorr q0, q0, q1
; CHECK-NEXT: bx lr
entry:
%0 = or <16 x i8> %src1, %src2
ret <16 x i8> %0
}

define arm_aapcs_vfpcc <8 x i16> @or_int16_t(<8 x i16> %src1, <8 x i16> %src2) {
; CHECK-LABEL: or_int16_t:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vorr q0, q0, q1
; CHECK-NEXT: bx lr
entry:
%0 = or <8 x i16> %src1, %src2
ret <8 x i16> %0
}

define arm_aapcs_vfpcc <4 x i32> @or_int32_t(<4 x i32> %src1, <4 x i32> %src2) {
; CHECK-LABEL: or_int32_t:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vorr q0, q0, q1
; CHECK-NEXT: bx lr
entry:
%0 = or <4 x i32> %src1, %src2
ret <4 x i32> %0
}


define arm_aapcs_vfpcc <16 x i8> @xor_int8_t(<16 x i8> %src1, <16 x i8> %src2) {
; CHECK-LABEL: xor_int8_t:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: veor q0, q0, q1
; CHECK-NEXT: bx lr
entry:
%0 = xor <16 x i8> %src1, %src2
ret <16 x i8> %0
}

define arm_aapcs_vfpcc <8 x i16> @xor_int16_t(<8 x i16> %src1, <8 x i16> %src2) {
; CHECK-LABEL: xor_int16_t:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: veor q0, q0, q1
; CHECK-NEXT: bx lr
entry:
%0 = xor <8 x i16> %src1, %src2
ret <8 x i16> %0
}

define arm_aapcs_vfpcc <4 x i32> @xor_int32_t(<4 x i32> %src1, <4 x i32> %src2) {
; CHECK-LABEL: xor_int32_t:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: veor q0, q0, q1
; CHECK-NEXT: bx lr
entry:
%0 = xor <4 x i32> %src1, %src2
ret <4 x i32> %0
}

2 changes: 1 addition & 1 deletion llvm/test/CodeGen/Thumb2/mve-div-expand.ll
Expand Up @@ -870,12 +870,12 @@ define arm_aapcs_vfpcc <8 x half> @frem_f16(<8 x half> %in1, <8 x half> %in2) {
; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13}
; CHECK-NEXT: .pad #64
; CHECK-NEXT: sub sp, #64
; CHECK-NEXT: vmov q5, q1
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: vmov.u16 r0, q1[0]
; CHECK-NEXT: vmov s0, r0
; CHECK-NEXT: vmov.u16 r0, q4[0]
; CHECK-NEXT: vmov s2, r0
; CHECK-NEXT: vmov q5, q1
; CHECK-NEXT: vcvtb.f32.f16 s2, s2
; CHECK-NEXT: vstr s2, [sp, #56]
; CHECK-NEXT: vcvtb.f32.f16 s0, s0
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/Thumb2/mve-fmath.ll
Expand Up @@ -1042,12 +1042,12 @@ define arm_aapcs_vfpcc <8 x half> @pow_float16_t(<8 x half> %src1, <8 x half> %s
; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13}
; CHECK-NEXT: .pad #64
; CHECK-NEXT: sub sp, #64
; CHECK-NEXT: vmov q5, q1
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: vmov.u16 r0, q1[0]
; CHECK-NEXT: vmov s0, r0
; CHECK-NEXT: vmov.u16 r0, q4[0]
; CHECK-NEXT: vmov s2, r0
; CHECK-NEXT: vmov q5, q1
; CHECK-NEXT: vcvtb.f32.f16 s2, s2
; CHECK-NEXT: vstr s2, [sp, #56]
; CHECK-NEXT: vcvtb.f32.f16 s0, s0
Expand Down

0 comments on commit d2a9ec2

Please sign in to comment.