Skip to content
Permalink
Browse files

[ARM] MVE predicate register support

This adds support code for building and shuffling i1 predicate registers. It
generally uses two basic principles, either converting the predicate into an
scalar (through a PREDICATE_CAST) and doing scalar operations on it there, or
by converting the register to an full vector register and back.

Some of the code here is a not super efficient but will hopefully cover most
cases of moving i1 vectors around and can be improved in subsequent patches.

Some code by David Sherwood.

Differential Revision: https://reviews.llvm.org/D65052

llvm-svn: 366890
  • Loading branch information...
DMG862 committed Jul 24, 2019
1 parent b09bc8a commit c7e55d4f5213d27456c8a2f74ee5284181067d73

Large diffs are not rendered by default.

@@ -129,6 +129,8 @@ class VectorType;
LOOP_DEC, // Really a part of LE, performs the sub
LE, // Low-overhead loops, Loop End

PREDICATE_CAST, // Predicate cast for MVE i1 types

VCEQ, // Vector compare equal.
VCEQZ, // Vector compare equal to zero.
VCNE, // Vector compare not equal (MVE)
@@ -3016,6 +3016,26 @@ let Predicates = [HasMVEInt] in {
defm MVE_VCGEU : unpred_vcmp_r<ARMvcgeu, "u", 2>;
}

// Occasionally we need to cast between a i32 and a boolean vector, for
// example when moving between rGPR and VPR.P0 as part of predicate vector
// shuffles. We also sometimes need to cast between different predicate
// vector types (v4i1<>v8i1, etc.) also as part of lowering vector shuffles.

def predicate_cast : SDNode<"ARMISD::PREDICATE_CAST", SDTUnaryOp>;

let Predicates = [HasMVEInt] in {
foreach VT = [ v4i1, v8i1, v16i1 ] in {
def : Pat<(i32 (predicate_cast (VT VCCR:$src))),
(i32 (COPY_TO_REGCLASS (VT VCCR:$src), VCCR))>;
def : Pat<(VT (predicate_cast (i32 VCCR:$src))),
(VT (COPY_TO_REGCLASS (i32 VCCR:$src), VCCR))>;

foreach VT2 = [ v4i1, v8i1, v16i1 ] in
def : Pat<(VT (predicate_cast (VT2 VCCR:$src))),
(VT (COPY_TO_REGCLASS (VT2 VCCR:$src), VCCR))>;
}
}

// end of MVE compares

// start of MVE_qDest_qSrc
@@ -4410,6 +4430,37 @@ let Predicates = [HasMVEInt] in {
(v8i16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, VCCR:$pred))>;
def : Pat<(v4i32 (vselect (v4i1 VCCR:$pred), (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))),
(v4i32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, VCCR:$pred))>;

def : Pat<(v16i8 (vselect (v16i8 MQPR:$pred), (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))),
(v16i8 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0,
(MVE_VCMPi8 (v16i8 MQPR:$pred), (MVE_VMOVimmi8 0), 1)))>;
def : Pat<(v8i16 (vselect (v8i16 MQPR:$pred), (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))),
(v8i16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0,
(MVE_VCMPi16 (v8i16 MQPR:$pred), (MVE_VMOVimmi16 0), 1)))>;
def : Pat<(v4i32 (vselect (v4i32 MQPR:$pred), (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))),
(v4i32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0,
(MVE_VCMPi32 (v4i32 MQPR:$pred), (MVE_VMOVimmi32 0), 1)))>;

def : Pat<(v16i8 (zext (v16i1 VCCR:$pred))),
(v16i8 (MVE_VPSEL (MVE_VMOVimmi8 1), (MVE_VMOVimmi8 0), 0, VCCR:$pred))>;
def : Pat<(v8i16 (zext (v8i1 VCCR:$pred))),
(v8i16 (MVE_VPSEL (MVE_VMOVimmi16 1), (MVE_VMOVimmi16 0), 0, VCCR:$pred))>;
def : Pat<(v4i32 (zext (v4i1 VCCR:$pred))),
(v4i32 (MVE_VPSEL (MVE_VMOVimmi32 1), (MVE_VMOVimmi32 0), 0, VCCR:$pred))>;

def : Pat<(v16i8 (sext (v16i1 VCCR:$pred))),
(v16i8 (MVE_VPSEL (MVE_VMOVimmi8 255), (MVE_VMOVimmi8 0), 0, VCCR:$pred))>;
def : Pat<(v8i16 (sext (v8i1 VCCR:$pred))),
(v8i16 (MVE_VPSEL (MVE_VMOVimmi8 255), (MVE_VMOVimmi16 0), 0, VCCR:$pred))>;
def : Pat<(v4i32 (sext (v4i1 VCCR:$pred))),
(v4i32 (MVE_VPSEL (MVE_VMOVimmi8 255), (MVE_VMOVimmi32 0), 0, VCCR:$pred))>;

def : Pat<(v16i8 (anyext (v16i1 VCCR:$pred))),
(v16i8 (MVE_VPSEL (MVE_VMOVimmi8 1), (MVE_VMOVimmi8 0), 0, VCCR:$pred))>;
def : Pat<(v8i16 (anyext (v8i1 VCCR:$pred))),
(v8i16 (MVE_VPSEL (MVE_VMOVimmi16 1), (MVE_VMOVimmi16 0), 0, VCCR:$pred))>;
def : Pat<(v4i32 (anyext (v4i1 VCCR:$pred))),
(v4i32 (MVE_VPSEL (MVE_VMOVimmi32 1), (MVE_VMOVimmi32 0), 0, VCCR:$pred))>;
}

def MVE_VPNOT : MVE_p<(outs), (ins), NoItinerary,
@@ -0,0 +1,196 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s


define arm_aapcs_vfpcc <4 x i32> @build_true_v4i1(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: build_true_v4i1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: bx lr
entry:
%s = select <4 x i1> <i1 1, i1 1, i1 1, i1 1>, <4 x i32> %a, <4 x i32> %b
ret <4 x i32> %s
}

define arm_aapcs_vfpcc <4 x i32> @build_false_v4i1(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: build_false_v4i1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov q0, q1
; CHECK-NEXT: bx lr
entry:
%s = select <4 x i1> <i1 0, i1 0, i1 0, i1 0>, <4 x i32> %a, <4 x i32> %b
ret <4 x i32> %s
}

define arm_aapcs_vfpcc <4 x i32> @build_upper_v4i1(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: build_upper_v4i1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: mov.w r0, #65280
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: bx lr
entry:
%s = select <4 x i1> <i1 0, i1 0, i1 1, i1 1>, <4 x i32> %a, <4 x i32> %b
ret <4 x i32> %s
}

define arm_aapcs_vfpcc <4 x i32> @build_lower_v4i1(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: build_lower_v4i1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: movs r0, #255
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: bx lr
entry:
%s = select <4 x i1> <i1 1, i1 1, i1 0, i1 0>, <4 x i32> %a, <4 x i32> %b
ret <4 x i32> %s
}


define arm_aapcs_vfpcc <8 x i16> @build_true_v8i1(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: build_true_v8i1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: bx lr
entry:
%s = select <8 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <8 x i16> %a, <8 x i16> %b
ret <8 x i16> %s
}

define arm_aapcs_vfpcc <8 x i16> @build_false_v8i1(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: build_false_v8i1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov q0, q1
; CHECK-NEXT: bx lr
entry:
%s = select <8 x i1> <i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0>, <8 x i16> %a, <8 x i16> %b
ret <8 x i16> %s
}

define arm_aapcs_vfpcc <8 x i16> @build_upper_v8i1(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: build_upper_v8i1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: mov.w r0, #65280
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: bx lr
entry:
%s = select <8 x i1> <i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1>, <8 x i16> %a, <8 x i16> %b
ret <8 x i16> %s
}

define arm_aapcs_vfpcc <8 x i16> @build_lower_v8i1(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: build_lower_v8i1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: movs r0, #255
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: bx lr
entry:
%s = select <8 x i1> <i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0>, <8 x i16> %a, <8 x i16> %b
ret <8 x i16> %s
}


define arm_aapcs_vfpcc <16 x i8> @build_true_v16i1(<16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: build_true_v16i1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: bx lr
entry:
%s = select <16 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <16 x i8> %a, <16 x i8> %b
ret <16 x i8> %s
}

define arm_aapcs_vfpcc <16 x i8> @build_false_v16i1(<16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: build_false_v16i1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov q0, q1
; CHECK-NEXT: bx lr
entry:
%s = select <16 x i1> <i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0>, <16 x i8> %a, <16 x i8> %b
ret <16 x i8> %s
}

define arm_aapcs_vfpcc <16 x i8> @build_upper_v16i1(<16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: build_upper_v16i1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: mov.w r0, #65280
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: bx lr
entry:
%s = select <16 x i1> <i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <16 x i8> %a, <16 x i8> %b
ret <16 x i8> %s
}

define arm_aapcs_vfpcc <16 x i8> @build_lower_v16i1(<16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: build_lower_v16i1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: movs r0, #255
; CHECK-NEXT: vmsr p0, r0
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: bx lr
entry:
%s = select <16 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0>, <16 x i8> %a, <16 x i8> %b
ret <16 x i8> %s
}


define arm_aapcs_vfpcc <2 x i64> @build_true_v2i1(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: build_true_v2i1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: bx lr
entry:
%s = select <2 x i1> <i1 1, i1 1>, <2 x i64> %a, <2 x i64> %b
ret <2 x i64> %s
}

define arm_aapcs_vfpcc <2 x i64> @build_false_v2i1(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: build_false_v2i1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov q0, q1
; CHECK-NEXT: bx lr
entry:
%s = select <2 x i1> <i1 0, i1 0>, <2 x i64> %a, <2 x i64> %b
ret <2 x i64> %s
}

define arm_aapcs_vfpcc <2 x i64> @build_upper_v2i1(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: build_upper_v2i1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: adr r0, .LCPI14_0
; CHECK-NEXT: vldrw.u32 q2, [r0]
; CHECK-NEXT: vbic q1, q1, q2
; CHECK-NEXT: vand q0, q0, q2
; CHECK-NEXT: vorr q0, q0, q1
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI14_0:
; CHECK-NEXT: .long 0 @ 0x0
; CHECK-NEXT: .long 0 @ 0x0
; CHECK-NEXT: .long 4294967295 @ 0xffffffff
; CHECK-NEXT: .long 4294967295 @ 0xffffffff
entry:
%s = select <2 x i1> <i1 0, i1 1>, <2 x i64> %a, <2 x i64> %b
ret <2 x i64> %s
}

define arm_aapcs_vfpcc <2 x i64> @build_lower_v2i1(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: build_lower_v2i1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: adr r0, .LCPI15_0
; CHECK-NEXT: vldrw.u32 q2, [r0]
; CHECK-NEXT: vbic q1, q1, q2
; CHECK-NEXT: vand q0, q0, q2
; CHECK-NEXT: vorr q0, q0, q1
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI15_0:
; CHECK-NEXT: .long 4294967295 @ 0xffffffff
; CHECK-NEXT: .long 4294967295 @ 0xffffffff
; CHECK-NEXT: .long 0 @ 0x0
; CHECK-NEXT: .long 0 @ 0x0
entry:
%s = select <2 x i1> <i1 1, i1 0>, <2 x i64> %a, <2 x i64> %b
ret <2 x i64> %s
}

0 comments on commit c7e55d4

Please sign in to comment.
You can’t perform that action at this time.