289 changes: 289 additions & 0 deletions llvm/test/CodeGen/Thumb2/mve-pred-build-var.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,289 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s


define arm_aapcs_vfpcc <4 x i32> @build_var0_v4i1(i32 %s, i32 %t, <4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: build_var0_v4i1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: cmp r0, r1
; CHECK-NEXT: mov.w r0, #0
; CHECK-NEXT: it lo
; CHECK-NEXT: movlo r0, #1
; CHECK-NEXT: movs r2, #0
; CHECK-NEXT: rsbs r0, r0, #0
; CHECK-NEXT: bfi r2, r0, #0, #4
; CHECK-NEXT: vmsr p0, r2
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: bx lr
entry:
%c = icmp ult i32 %s, %t
%vc = insertelement <4 x i1> zeroinitializer, i1 %c, i64 0
%r = select <4 x i1> %vc, <4 x i32> %a, <4 x i32> %b
ret <4 x i32> %r
}

define arm_aapcs_vfpcc <4 x i32> @build_var3_v4i1(i32 %s, i32 %t, <4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: build_var3_v4i1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: cmp r0, r1
; CHECK-NEXT: mov.w r0, #0
; CHECK-NEXT: it lo
; CHECK-NEXT: movlo r0, #1
; CHECK-NEXT: movs r2, #0
; CHECK-NEXT: rsbs r0, r0, #0
; CHECK-NEXT: bfi r2, r0, #12, #4
; CHECK-NEXT: vmsr p0, r2
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: bx lr
entry:
%c = icmp ult i32 %s, %t
%vc = insertelement <4 x i1> zeroinitializer, i1 %c, i64 3
%r = select <4 x i1> %vc, <4 x i32> %a, <4 x i32> %b
ret <4 x i32> %r
}

define arm_aapcs_vfpcc <4 x i32> @build_varN_v4i1(i32 %s, i32 %t, <4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: build_varN_v4i1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: cmp r0, r1
; CHECK-NEXT: mov.w r0, #0
; CHECK-NEXT: it lo
; CHECK-NEXT: movlo r0, #1
; CHECK-NEXT: movs r2, #0
; CHECK-NEXT: rsbs r0, r0, #0
; CHECK-NEXT: bfi r2, r0, #0, #4
; CHECK-NEXT: bfi r2, r0, #4, #4
; CHECK-NEXT: bfi r2, r0, #8, #4
; CHECK-NEXT: bfi r2, r0, #12, #4
; CHECK-NEXT: vmsr p0, r2
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: bx lr
entry:
%c = icmp ult i32 %s, %t
%vc1 = insertelement <4 x i1> undef, i1 %c, i64 0
%vc4 = shufflevector <4 x i1> %vc1, <4 x i1> undef, <4 x i32> zeroinitializer
%r = select <4 x i1> %vc4, <4 x i32> %a, <4 x i32> %b
ret <4 x i32> %r
}


define arm_aapcs_vfpcc <8 x i16> @build_var0_v8i1(i32 %s, i32 %t, <8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: build_var0_v8i1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: cmp r0, r1
; CHECK-NEXT: mov.w r0, #0
; CHECK-NEXT: it lo
; CHECK-NEXT: movlo r0, #1
; CHECK-NEXT: movs r2, #0
; CHECK-NEXT: rsbs r0, r0, #0
; CHECK-NEXT: bfi r2, r0, #0, #2
; CHECK-NEXT: vmsr p0, r2
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: bx lr
entry:
%c = icmp ult i32 %s, %t
%vc = insertelement <8 x i1> zeroinitializer, i1 %c, i64 0
%r = select <8 x i1> %vc, <8 x i16> %a, <8 x i16> %b
ret <8 x i16> %r
}

define arm_aapcs_vfpcc <8 x i16> @build_var3_v8i1(i32 %s, i32 %t, <8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: build_var3_v8i1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: cmp r0, r1
; CHECK-NEXT: mov.w r0, #0
; CHECK-NEXT: it lo
; CHECK-NEXT: movlo r0, #1
; CHECK-NEXT: movs r2, #0
; CHECK-NEXT: rsbs r0, r0, #0
; CHECK-NEXT: bfi r2, r0, #6, #2
; CHECK-NEXT: vmsr p0, r2
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: bx lr
entry:
%c = icmp ult i32 %s, %t
%vc = insertelement <8 x i1> zeroinitializer, i1 %c, i64 3
%r = select <8 x i1> %vc, <8 x i16> %a, <8 x i16> %b
ret <8 x i16> %r
}

define arm_aapcs_vfpcc <8 x i16> @build_varN_v8i1(i32 %s, i32 %t, <8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: build_varN_v8i1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: cmp r0, r1
; CHECK-NEXT: mov.w r0, #0
; CHECK-NEXT: it lo
; CHECK-NEXT: movlo r0, #1
; CHECK-NEXT: movs r2, #0
; CHECK-NEXT: rsbs r0, r0, #0
; CHECK-NEXT: bfi r2, r0, #0, #2
; CHECK-NEXT: bfi r2, r0, #2, #2
; CHECK-NEXT: bfi r2, r0, #4, #2
; CHECK-NEXT: bfi r2, r0, #6, #2
; CHECK-NEXT: bfi r2, r0, #8, #2
; CHECK-NEXT: bfi r2, r0, #10, #2
; CHECK-NEXT: bfi r2, r0, #12, #2
; CHECK-NEXT: bfi r2, r0, #14, #2
; CHECK-NEXT: vmsr p0, r2
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: bx lr
entry:
%c = icmp ult i32 %s, %t
%vc1 = insertelement <8 x i1> undef, i1 %c, i64 0
%vc4 = shufflevector <8 x i1> %vc1, <8 x i1> undef, <8 x i32> zeroinitializer
%r = select <8 x i1> %vc4, <8 x i16> %a, <8 x i16> %b
ret <8 x i16> %r
}


define arm_aapcs_vfpcc <16 x i8> @build_var0_v16i1(i32 %s, i32 %t, <16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: build_var0_v16i1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: cmp r0, r1
; CHECK-NEXT: mov.w r0, #0
; CHECK-NEXT: it lo
; CHECK-NEXT: movlo r0, #1
; CHECK-NEXT: movs r2, #0
; CHECK-NEXT: rsbs r0, r0, #0
; CHECK-NEXT: bfi r2, r0, #0, #1
; CHECK-NEXT: vmsr p0, r2
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: bx lr
entry:
%c = icmp ult i32 %s, %t
%vc = insertelement <16 x i1> zeroinitializer, i1 %c, i64 0
%r = select <16 x i1> %vc, <16 x i8> %a, <16 x i8> %b
ret <16 x i8> %r
}

define arm_aapcs_vfpcc <16 x i8> @build_var3_v16i1(i32 %s, i32 %t, <16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: build_var3_v16i1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: cmp r0, r1
; CHECK-NEXT: mov.w r0, #0
; CHECK-NEXT: it lo
; CHECK-NEXT: movlo r0, #1
; CHECK-NEXT: movs r2, #0
; CHECK-NEXT: rsbs r0, r0, #0
; CHECK-NEXT: bfi r2, r0, #3, #1
; CHECK-NEXT: vmsr p0, r2
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: bx lr
entry:
%c = icmp ult i32 %s, %t
%vc = insertelement <16 x i1> zeroinitializer, i1 %c, i64 3
%r = select <16 x i1> %vc, <16 x i8> %a, <16 x i8> %b
ret <16 x i8> %r
}

define arm_aapcs_vfpcc <16 x i8> @build_varN_v16i1(i32 %s, i32 %t, <16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: build_varN_v16i1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: cmp r0, r1
; CHECK-NEXT: mov.w r0, #0
; CHECK-NEXT: it lo
; CHECK-NEXT: movlo r0, #1
; CHECK-NEXT: movs r2, #0
; CHECK-NEXT: rsbs r0, r0, #0
; CHECK-NEXT: bfi r2, r0, #0, #1
; CHECK-NEXT: bfi r2, r0, #1, #1
; CHECK-NEXT: bfi r2, r0, #2, #1
; CHECK-NEXT: bfi r2, r0, #3, #1
; CHECK-NEXT: bfi r2, r0, #4, #1
; CHECK-NEXT: bfi r2, r0, #5, #1
; CHECK-NEXT: bfi r2, r0, #6, #1
; CHECK-NEXT: bfi r2, r0, #7, #1
; CHECK-NEXT: bfi r2, r0, #8, #1
; CHECK-NEXT: bfi r2, r0, #9, #1
; CHECK-NEXT: bfi r2, r0, #10, #1
; CHECK-NEXT: bfi r2, r0, #11, #1
; CHECK-NEXT: bfi r2, r0, #12, #1
; CHECK-NEXT: bfi r2, r0, #13, #1
; CHECK-NEXT: bfi r2, r0, #14, #1
; CHECK-NEXT: bfi r2, r0, #15, #1
; CHECK-NEXT: vmsr p0, r2
; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: bx lr
entry:
%c = icmp ult i32 %s, %t
%vc1 = insertelement <16 x i1> undef, i1 %c, i64 0
%vc4 = shufflevector <16 x i1> %vc1, <16 x i1> undef, <16 x i32> zeroinitializer
%r = select <16 x i1> %vc4, <16 x i8> %a, <16 x i8> %b
ret <16 x i8> %r
}


define arm_aapcs_vfpcc <2 x i64> @build_var0_v2i1(i32 %s, i32 %t, <2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: build_var0_v2i1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: movs r2, #0
; CHECK-NEXT: cmp r0, r1
; CHECK-NEXT: it lo
; CHECK-NEXT: movlo r2, #1
; CHECK-NEXT: rsbs r0, r2, #0
; CHECK-NEXT: vmov s8, r0
; CHECK-NEXT: vldr s10, .LCPI9_0
; CHECK-NEXT: vmov.f32 s9, s8
; CHECK-NEXT: vmov.f32 s11, s10
; CHECK-NEXT: vbic q1, q1, q2
; CHECK-NEXT: vand q0, q0, q2
; CHECK-NEXT: vorr q0, q0, q1
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 2
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI9_0:
; CHECK-NEXT: .long 0 @ float 0
entry:
%c = icmp ult i32 %s, %t
%vc = insertelement <2 x i1> zeroinitializer, i1 %c, i64 0
%r = select <2 x i1> %vc, <2 x i64> %a, <2 x i64> %b
ret <2 x i64> %r
}

define arm_aapcs_vfpcc <2 x i64> @build_var1_v2i1(i32 %s, i32 %t, <2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: build_var1_v2i1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: movs r2, #0
; CHECK-NEXT: cmp r0, r1
; CHECK-NEXT: it lo
; CHECK-NEXT: movlo r2, #1
; CHECK-NEXT: rsbs r0, r2, #0
; CHECK-NEXT: vmov s10, r0
; CHECK-NEXT: vldr s8, .LCPI10_0
; CHECK-NEXT: vmov.f32 s9, s8
; CHECK-NEXT: vmov.f32 s11, s10
; CHECK-NEXT: vbic q1, q1, q2
; CHECK-NEXT: vand q0, q0, q2
; CHECK-NEXT: vorr q0, q0, q1
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 2
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI10_0:
; CHECK-NEXT: .long 0 @ float 0
entry:
%c = icmp ult i32 %s, %t
%vc = insertelement <2 x i1> zeroinitializer, i1 %c, i64 1
%r = select <2 x i1> %vc, <2 x i64> %a, <2 x i64> %b
ret <2 x i64> %r
}

define arm_aapcs_vfpcc <2 x i64> @build_varN_v2i1(i32 %s, i32 %t, <2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: build_varN_v2i1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: movs r2, #0
; CHECK-NEXT: cmp r0, r1
; CHECK-NEXT: it lo
; CHECK-NEXT: movlo r2, #1
; CHECK-NEXT: rsbs r0, r2, #0
; CHECK-NEXT: vdup.32 q2, r0
; CHECK-NEXT: vbic q1, q1, q2
; CHECK-NEXT: vand q0, q0, q2
; CHECK-NEXT: vorr q0, q0, q1
; CHECK-NEXT: bx lr
entry:
%c = icmp ult i32 %s, %t
%vc1 = insertelement <2 x i1> undef, i1 %c, i64 0
%vc4 = shufflevector <2 x i1> %vc1, <2 x i1> undef, <2 x i32> zeroinitializer
%r = select <2 x i1> %vc4, <2 x i64> %a, <2 x i64> %b
ret <2 x i64> %r
}
164 changes: 164 additions & 0 deletions llvm/test/CodeGen/Thumb2/mve-pred-ext.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s

define arm_aapcs_vfpcc <4 x i32> @sext_v4i1_v4i32(<4 x i32> %src) {
; CHECK-LABEL: sext_v4i1_v4i32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vcmp.s32 gt, q0, zr
; CHECK-NEXT: vmov.i32 q0, #0x0
; CHECK-NEXT: vmov.i8 q1, #0xff
; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: bx lr
entry:
%c = icmp sgt <4 x i32> %src, zeroinitializer
%0 = sext <4 x i1> %c to <4 x i32>
ret <4 x i32> %0
}

define arm_aapcs_vfpcc <8 x i16> @sext_v8i1_v8i16(<8 x i16> %src) {
; CHECK-LABEL: sext_v8i1_v8i16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vcmp.s16 gt, q0, zr
; CHECK-NEXT: vmov.i16 q0, #0x0
; CHECK-NEXT: vmov.i8 q1, #0xff
; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: bx lr
entry:
%c = icmp sgt <8 x i16> %src, zeroinitializer
%0 = sext <8 x i1> %c to <8 x i16>
ret <8 x i16> %0
}

define arm_aapcs_vfpcc <16 x i8> @sext_v16i1_v16i8(<16 x i8> %src) {
; CHECK-LABEL: sext_v16i1_v16i8:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vcmp.s8 gt, q0, zr
; CHECK-NEXT: vmov.i8 q0, #0x0
; CHECK-NEXT: vmov.i8 q1, #0xff
; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: bx lr
entry:
%c = icmp sgt <16 x i8> %src, zeroinitializer
%0 = sext <16 x i1> %c to <16 x i8>
ret <16 x i8> %0
}

define arm_aapcs_vfpcc <2 x i64> @sext_v2i1_v2i64(<2 x i64> %src) {
; CHECK-LABEL: sext_v2i1_v2i64:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov r1, s0
; CHECK-NEXT: movs r2, #0
; CHECK-NEXT: vmov r0, s1
; CHECK-NEXT: rsbs r1, r1, #0
; CHECK-NEXT: sbcs.w r0, r2, r0
; CHECK-NEXT: vmov r1, s2
; CHECK-NEXT: mov.w r0, #0
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r0, #1
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: it ne
; CHECK-NEXT: movne.w r0, #-1
; CHECK-NEXT: vmov.32 q1[0], r0
; CHECK-NEXT: vmov.32 q1[1], r0
; CHECK-NEXT: vmov r0, s3
; CHECK-NEXT: rsbs r1, r1, #0
; CHECK-NEXT: sbcs.w r0, r2, r0
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r2, #1
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: it ne
; CHECK-NEXT: movne.w r2, #-1
; CHECK-NEXT: vmov.32 q1[2], r2
; CHECK-NEXT: vmov.32 q1[3], r2
; CHECK-NEXT: vmov q0, q1
; CHECK-NEXT: bx lr
entry:
%c = icmp sgt <2 x i64> %src, zeroinitializer
%0 = sext <2 x i1> %c to <2 x i64>
ret <2 x i64> %0
}


define arm_aapcs_vfpcc <4 x i32> @zext_v4i1_v4i32(<4 x i32> %src) {
; CHECK-LABEL: zext_v4i1_v4i32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vcmp.s32 gt, q0, zr
; CHECK-NEXT: vmov.i32 q0, #0x0
; CHECK-NEXT: vmov.i32 q1, #0x1
; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: bx lr
entry:
%c = icmp sgt <4 x i32> %src, zeroinitializer
%0 = zext <4 x i1> %c to <4 x i32>
ret <4 x i32> %0
}

define arm_aapcs_vfpcc <8 x i16> @zext_v8i1_v8i16(<8 x i16> %src) {
; CHECK-LABEL: zext_v8i1_v8i16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vcmp.s16 gt, q0, zr
; CHECK-NEXT: vmov.i16 q0, #0x0
; CHECK-NEXT: vmov.i16 q1, #0x1
; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: bx lr
entry:
%c = icmp sgt <8 x i16> %src, zeroinitializer
%0 = zext <8 x i1> %c to <8 x i16>
ret <8 x i16> %0
}

define arm_aapcs_vfpcc <16 x i8> @zext_v16i1_v16i8(<16 x i8> %src) {
; CHECK-LABEL: zext_v16i1_v16i8:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vcmp.s8 gt, q0, zr
; CHECK-NEXT: vmov.i8 q0, #0x0
; CHECK-NEXT: vmov.i8 q1, #0x1
; CHECK-NEXT: vpsel q0, q1, q0
; CHECK-NEXT: bx lr
entry:
%c = icmp sgt <16 x i8> %src, zeroinitializer
%0 = zext <16 x i1> %c to <16 x i8>
ret <16 x i8> %0
}

define arm_aapcs_vfpcc <2 x i64> @zext_v2i1_v2i64(<2 x i64> %src) {
; CHECK-LABEL: zext_v2i1_v2i64:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov r1, s0
; CHECK-NEXT: movs r2, #0
; CHECK-NEXT: vmov r0, s1
; CHECK-NEXT: rsbs r1, r1, #0
; CHECK-NEXT: sbcs.w r0, r2, r0
; CHECK-NEXT: vmov r1, s2
; CHECK-NEXT: mov.w r0, #0
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r0, #1
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: it ne
; CHECK-NEXT: movne.w r0, #-1
; CHECK-NEXT: vmov.32 q1[0], r0
; CHECK-NEXT: vmov r0, s3
; CHECK-NEXT: rsbs r1, r1, #0
; CHECK-NEXT: sbcs.w r0, r2, r0
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r2, #1
; CHECK-NEXT: adr r0, .LCPI7_0
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: vldrw.u32 q0, [r0]
; CHECK-NEXT: it ne
; CHECK-NEXT: movne.w r2, #-1
; CHECK-NEXT: vmov.32 q1[2], r2
; CHECK-NEXT: vand q0, q1, q0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI7_0:
; CHECK-NEXT: .long 1 @ 0x1
; CHECK-NEXT: .long 0 @ 0x0
; CHECK-NEXT: .long 1 @ 0x1
; CHECK-NEXT: .long 0 @ 0x0
entry:
%c = icmp sgt <2 x i64> %src, zeroinitializer
%0 = zext <2 x i1> %c to <2 x i64>
ret <2 x i64> %0
}
564 changes: 564 additions & 0 deletions llvm/test/CodeGen/Thumb2/mve-pred-shuffle.ll

Large diffs are not rendered by default.

81 changes: 81 additions & 0 deletions llvm/test/CodeGen/Thumb2/mve-pred-spill.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s

declare arm_aapcs_vfpcc <4 x i32> @ext_i32()
declare arm_aapcs_vfpcc <8 x i16> @ext_i16()
declare arm_aapcs_vfpcc <16 x i8> @ext_i8()

define arm_aapcs_vfpcc <4 x i32> @shuffle1_v4i32(<4 x i32> %src, <4 x i32> %a) {
; CHECK-LABEL: shuffle1_v4i32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: .pad #8
; CHECK-NEXT: sub sp, #8
; CHECK-NEXT: vcmp.i32 eq, q0, zr
; CHECK-NEXT: vmov q4, q1
; CHECK-NEXT: vstr p0, [sp, #4] @ 4-byte Spill
; CHECK-NEXT: bl ext_i32
; CHECK-NEXT: vldr p0, [sp, #4] @ 4-byte Reload
; CHECK-NEXT: vpsel q0, q4, q0
; CHECK-NEXT: add sp, #8
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: pop {r7, pc}
entry:
%c = icmp eq <4 x i32> %src, zeroinitializer
%ext = call arm_aapcs_vfpcc <4 x i32> @ext_i32()
%s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %ext
ret <4 x i32> %s
}

define arm_aapcs_vfpcc <8 x i16> @shuffle1_v8i16(<8 x i16> %src, <8 x i16> %a) {
; CHECK-LABEL: shuffle1_v8i16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: .pad #8
; CHECK-NEXT: sub sp, #8
; CHECK-NEXT: vcmp.i16 eq, q0, zr
; CHECK-NEXT: vmov q4, q1
; CHECK-NEXT: vstr p0, [sp, #4] @ 4-byte Spill
; CHECK-NEXT: bl ext_i16
; CHECK-NEXT: vldr p0, [sp, #4] @ 4-byte Reload
; CHECK-NEXT: vpsel q0, q4, q0
; CHECK-NEXT: add sp, #8
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: pop {r7, pc}
entry:
%c = icmp eq <8 x i16> %src, zeroinitializer
%ext = call arm_aapcs_vfpcc <8 x i16> @ext_i16()
%s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %ext
ret <8 x i16> %s
}

define arm_aapcs_vfpcc <16 x i8> @shuffle1_v16i8(<16 x i8> %src, <16 x i8> %a) {
; CHECK-LABEL: shuffle1_v16i8:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: .pad #8
; CHECK-NEXT: sub sp, #8
; CHECK-NEXT: vcmp.i8 eq, q0, zr
; CHECK-NEXT: vmov q4, q1
; CHECK-NEXT: vstr p0, [sp, #4] @ 4-byte Spill
; CHECK-NEXT: bl ext_i8
; CHECK-NEXT: vldr p0, [sp, #4] @ 4-byte Reload
; CHECK-NEXT: vpsel q0, q4, q0
; CHECK-NEXT: add sp, #8
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: pop {r7, pc}
entry:
%c = icmp eq <16 x i8> %src, zeroinitializer
%ext = call arm_aapcs_vfpcc <16 x i8> @ext_i8()
%s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %ext
ret <16 x i8> %s
}
104 changes: 104 additions & 0 deletions llvm/test/CodeGen/Thumb2/mve-vcmp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -447,3 +447,107 @@ entry:
%s = select <2 x i1> %c, <2 x i32> %a, <2 x i32> %b
ret <2 x i32> %s
}

define arm_aapcs_vfpcc <2 x i32> @vcmp_multi_v2i32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
; CHECK-LABEL: vcmp_multi_v2i32:
; CHECK: @ %bb.0:
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: .vsave {d8, d9, d10, d11}
; CHECK-NEXT: vpush {d8, d9, d10, d11}
; CHECK-NEXT: vmov r0, s1
; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: vmov r1, s0
; CHECK-NEXT: vmov r2, s8
; CHECK-NEXT: vmov lr, s10
; CHECK-NEXT: orrs r0, r1
; CHECK-NEXT: vmov r1, s2
; CHECK-NEXT: clz r0, r0
; CHECK-NEXT: lsrs r0, r0, #5
; CHECK-NEXT: it ne
; CHECK-NEXT: movne.w r0, #-1
; CHECK-NEXT: vmov.32 q3[0], r0
; CHECK-NEXT: vmov.32 q3[1], r0
; CHECK-NEXT: vmov r0, s3
; CHECK-NEXT: orrs r0, r1
; CHECK-NEXT: clz r0, r0
; CHECK-NEXT: lsrs r0, r0, #5
; CHECK-NEXT: it ne
; CHECK-NEXT: movne.w r0, #-1
; CHECK-NEXT: vmov.32 q3[2], r0
; CHECK-NEXT: vmov.32 q3[3], r0
; CHECK-NEXT: vbic q0, q2, q3
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: subs r1, r0, r2
; CHECK-NEXT: asr.w r12, r0, #31
; CHECK-NEXT: sbcs.w r1, r12, r2, asr #31
; CHECK-NEXT: mov.w r1, #0
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r1, #1
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: it ne
; CHECK-NEXT: movne.w r1, #-1
; CHECK-NEXT: vmov.32 q3[0], r1
; CHECK-NEXT: vmov.32 q3[1], r1
; CHECK-NEXT: vmov r1, s2
; CHECK-NEXT: subs.w r2, r1, lr
; CHECK-NEXT: asr.w r12, r1, #31
; CHECK-NEXT: sbcs.w r2, r12, lr, asr #31
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r3, #1
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: it ne
; CHECK-NEXT: movne.w r3, #-1
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: it ne
; CHECK-NEXT: movne r0, #1
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: it ne
; CHECK-NEXT: movne.w r0, #-1
; CHECK-NEXT: vmov.32 q4[0], r0
; CHECK-NEXT: vmov.32 q4[1], r0
; CHECK-NEXT: vmov r0, s4
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: it ne
; CHECK-NEXT: movne r1, #1
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: it ne
; CHECK-NEXT: movne.w r1, #-1
; CHECK-NEXT: vmov.32 q4[2], r1
; CHECK-NEXT: vmov.32 q3[2], r3
; CHECK-NEXT: vmov.32 q4[3], r1
; CHECK-NEXT: vmov.32 q3[3], r3
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: it ne
; CHECK-NEXT: movne r0, #1
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: it ne
; CHECK-NEXT: movne.w r0, #-1
; CHECK-NEXT: vmov.32 q5[0], r0
; CHECK-NEXT: vmov.32 q5[1], r0
; CHECK-NEXT: vmov r0, s6
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: it ne
; CHECK-NEXT: movne r0, #1
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: it ne
; CHECK-NEXT: movne.w r0, #-1
; CHECK-NEXT: vmov.32 q5[2], r0
; CHECK-NEXT: vmov.32 q5[3], r0
; CHECK-NEXT: vand q1, q5, q4
; CHECK-NEXT: vand q1, q3, q1
; CHECK-NEXT: vbic q0, q0, q1
; CHECK-NEXT: vand q1, q2, q1
; CHECK-NEXT: vorr q0, q1, q0
; CHECK-NEXT: vpop {d8, d9, d10, d11}
; CHECK-NEXT: pop {r7, pc}
%a4 = icmp eq <2 x i64> %a, zeroinitializer
%a5 = select <2 x i1> %a4, <2 x i32> zeroinitializer, <2 x i32> %c
%a6 = icmp ne <2 x i32> %b, zeroinitializer
%a7 = icmp slt <2 x i32> %a5, %c
%a8 = icmp ne <2 x i32> %a5, zeroinitializer
%a9 = and <2 x i1> %a6, %a8
%a10 = and <2 x i1> %a7, %a9
%a11 = select <2 x i1> %a10, <2 x i32> %c, <2 x i32> %a5
ret <2 x i32> %a11
}