308 changes: 142 additions & 166 deletions llvm/test/CodeGen/Thumb2/mve-vabdus.ll

Large diffs are not rendered by default.

172 changes: 77 additions & 95 deletions llvm/test/CodeGen/Thumb2/mve-vcmp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -367,36 +367,31 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @vcmp_eq_v2i64(<2 x i64> %src, <2 x i64> %srcb, <2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: vcmp_eq_v2i64:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: vmov r0, s5
; CHECK-NEXT: vmov r1, s1
; CHECK-NEXT: vmov r2, s0
; CHECK-NEXT: eors r0, r1
; CHECK-NEXT: vmov r1, s4
; CHECK-NEXT: eors r1, r2
; CHECK-NEXT: vmov r2, s2
; CHECK-NEXT: orrs r0, r1
; CHECK-NEXT: vmov r1, s3
; CHECK-NEXT: cset r0, eq
; CHECK-NEXT: tst.w r0, #1
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: vmov.32 q4[0], r0
; CHECK-NEXT: vmov.32 q4[1], r0
; CHECK-NEXT: vmov r0, s7
; CHECK-NEXT: vmov r1, s3
; CHECK-NEXT: vmov r2, s2
; CHECK-NEXT: vmov r3, s0
; CHECK-NEXT: eors r0, r1
; CHECK-NEXT: vmov r1, s6
; CHECK-NEXT: eors r1, r2
; CHECK-NEXT: vmov r2, s1
; CHECK-NEXT: orrs r0, r1
; CHECK-NEXT: vmov r1, s5
; CHECK-NEXT: cset r0, eq
; CHECK-NEXT: tst.w r0, #1
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: vmov.32 q4[2], r0
; CHECK-NEXT: vmov.32 q4[3], r0
; CHECK-NEXT: vbic q0, q3, q4
; CHECK-NEXT: vand q1, q2, q4
; CHECK-NEXT: vorr q0, q1, q0
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: eors r1, r2
; CHECK-NEXT: vmov r2, s4
; CHECK-NEXT: eors r2, r3
; CHECK-NEXT: orrs r1, r2
; CHECK-NEXT: cset r1, eq
; CHECK-NEXT: tst.w r1, #1
; CHECK-NEXT: csetm r1, ne
; CHECK-NEXT: vmov q0[2], q0[0], r1, r0
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
; CHECK-NEXT: vbic q1, q3, q0
; CHECK-NEXT: vand q0, q2, q0
; CHECK-NEXT: vorr q0, q0, q1
; CHECK-NEXT: bx lr
entry:
%c = icmp eq <2 x i64> %src, %srcb
Expand All @@ -407,36 +402,31 @@ entry:
define arm_aapcs_vfpcc <2 x i32> @vcmp_eq_v2i32(<2 x i64> %src, <2 x i64> %srcb, <2 x i32> %a, <2 x i32> %b) {
; CHECK-LABEL: vcmp_eq_v2i32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: vmov r0, s5
; CHECK-NEXT: vmov r1, s1
; CHECK-NEXT: vmov r2, s0
; CHECK-NEXT: eors r0, r1
; CHECK-NEXT: vmov r1, s4
; CHECK-NEXT: eors r1, r2
; CHECK-NEXT: vmov r2, s2
; CHECK-NEXT: orrs r0, r1
; CHECK-NEXT: vmov r1, s3
; CHECK-NEXT: cset r0, eq
; CHECK-NEXT: tst.w r0, #1
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: vmov.32 q4[0], r0
; CHECK-NEXT: vmov.32 q4[1], r0
; CHECK-NEXT: vmov r0, s7
; CHECK-NEXT: vmov r1, s3
; CHECK-NEXT: vmov r2, s2
; CHECK-NEXT: vmov r3, s0
; CHECK-NEXT: eors r0, r1
; CHECK-NEXT: vmov r1, s6
; CHECK-NEXT: eors r1, r2
; CHECK-NEXT: vmov r2, s1
; CHECK-NEXT: orrs r0, r1
; CHECK-NEXT: vmov r1, s5
; CHECK-NEXT: cset r0, eq
; CHECK-NEXT: tst.w r0, #1
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: vmov.32 q4[2], r0
; CHECK-NEXT: vmov.32 q4[3], r0
; CHECK-NEXT: vbic q0, q3, q4
; CHECK-NEXT: vand q1, q2, q4
; CHECK-NEXT: vorr q0, q1, q0
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: eors r1, r2
; CHECK-NEXT: vmov r2, s4
; CHECK-NEXT: eors r2, r3
; CHECK-NEXT: orrs r1, r2
; CHECK-NEXT: cset r1, eq
; CHECK-NEXT: tst.w r1, #1
; CHECK-NEXT: csetm r1, ne
; CHECK-NEXT: vmov q0[2], q0[0], r1, r0
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
; CHECK-NEXT: vbic q1, q3, q0
; CHECK-NEXT: vand q0, q2, q0
; CHECK-NEXT: vorr q0, q0, q1
; CHECK-NEXT: bx lr
entry:
%c = icmp eq <2 x i64> %src, %srcb
Expand All @@ -447,84 +437,76 @@ entry:
define arm_aapcs_vfpcc <2 x i32> @vcmp_multi_v2i32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
; CHECK-LABEL: vcmp_multi_v2i32:
; CHECK: @ %bb.0:
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: .vsave {d8, d9, d10, d11}
; CHECK-NEXT: vpush {d8, d9, d10, d11}
; CHECK-NEXT: vmov r0, s1
; CHECK-NEXT: .save {r4, lr}
; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: vmov r0, s3
; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: vmov r1, s0
; CHECK-NEXT: vmov r2, s8
; CHECK-NEXT: orrs r0, r1
; CHECK-NEXT: vmov r1, s2
; CHECK-NEXT: cset r0, eq
; CHECK-NEXT: tst.w r0, #1
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: vmov.32 q3[0], r0
; CHECK-NEXT: vmov.32 q3[1], r0
; CHECK-NEXT: vmov r0, s3
; CHECK-NEXT: vmov r2, s0
; CHECK-NEXT: orrs r0, r1
; CHECK-NEXT: vmov r1, s1
; CHECK-NEXT: cset r0, eq
; CHECK-NEXT: tst.w r0, #1
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: vmov.32 q3[2], r0
; CHECK-NEXT: vmov.32 q3[3], r0
; CHECK-NEXT: vbic q0, q2, q3
; CHECK-NEXT: vmov lr, s0
; CHECK-NEXT: subs.w r1, lr, r2
; CHECK-NEXT: asr.w r12, lr, #31
; CHECK-NEXT: orrs r1, r2
; CHECK-NEXT: vmov r2, s10
; CHECK-NEXT: cset r1, eq
; CHECK-NEXT: tst.w r1, #1
; CHECK-NEXT: csetm r1, ne
; CHECK-NEXT: vmov q0[2], q0[0], r1, r0
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
; CHECK-NEXT: vbic q0, q2, q0
; CHECK-NEXT: vmov r0, s2
; CHECK-NEXT: subs r1, r0, r2
; CHECK-NEXT: asr.w r12, r0, #31
; CHECK-NEXT: sbcs.w r1, r12, r2, asr #31
; CHECK-NEXT: mov.w r1, #0
; CHECK-NEXT: vmov r2, s10
; CHECK-NEXT: vmov r2, s0
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r1, #1
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: csetm r1, ne
; CHECK-NEXT: vmov.32 q3[0], r1
; CHECK-NEXT: vmov.32 q3[1], r1
; CHECK-NEXT: vmov r1, s2
; CHECK-NEXT: subs r0, r1, r2
; CHECK-NEXT: asr.w r12, r1, #31
; CHECK-NEXT: sbcs.w r0, r12, r2, asr #31
; CHECK-NEXT: vmov r1, s8
; CHECK-NEXT: csetm lr, ne
; CHECK-NEXT: asr.w r12, r2, #31
; CHECK-NEXT: subs r4, r2, r1
; CHECK-NEXT: sbcs.w r1, r12, r1, asr #31
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r3, #1
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: cmp.w lr, #0
; CHECK-NEXT: vmov.32 q3[2], r0
; CHECK-NEXT: vmov.32 q3[3], r0
; CHECK-NEXT: cset r0, ne
; CHECK-NEXT: tst.w r0, #1
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: vmov.32 q4[0], r0
; CHECK-NEXT: vmov.32 q4[1], r0
; CHECK-NEXT: cset r0, ne
; CHECK-NEXT: tst.w r0, #1
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: vmov.32 q4[2], r0
; CHECK-NEXT: vmov.32 q4[3], r0
; CHECK-NEXT: vmov r0, s4
; CHECK-NEXT: csetm r1, ne
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: cset r0, ne
; CHECK-NEXT: vmov q3[2], q3[0], r1, lr
; CHECK-NEXT: tst.w r0, #1
; CHECK-NEXT: vmov q3[3], q3[1], r1, lr
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: vmov.32 q5[0], r0
; CHECK-NEXT: vmov.32 q5[1], r0
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: cset r1, ne
; CHECK-NEXT: tst.w r1, #1
; CHECK-NEXT: csetm r1, ne
; CHECK-NEXT: vmov q4[2], q4[0], r1, r0
; CHECK-NEXT: vmov q4[3], q4[1], r1, r0
; CHECK-NEXT: vmov r0, s6
; CHECK-NEXT: vmov r1, s4
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: cset r0, ne
; CHECK-NEXT: tst.w r0, #1
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: vmov.32 q5[2], r0
; CHECK-NEXT: vmov.32 q5[3], r0
; CHECK-NEXT: vand q1, q5, q4
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: cset r1, ne
; CHECK-NEXT: tst.w r1, #1
; CHECK-NEXT: csetm r1, ne
; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
; CHECK-NEXT: vmov q1[3], q1[1], r1, r0
; CHECK-NEXT: vand q1, q1, q4
; CHECK-NEXT: vand q1, q3, q1
; CHECK-NEXT: vbic q0, q0, q1
; CHECK-NEXT: vand q1, q2, q1
; CHECK-NEXT: vorr q0, q1, q0
; CHECK-NEXT: vpop {d8, d9, d10, d11}
; CHECK-NEXT: pop {r7, pc}
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: pop {r4, pc}
%a4 = icmp eq <2 x i64> %a, zeroinitializer
%a5 = select <2 x i1> %a4, <2 x i32> zeroinitializer, <2 x i32> %c
%a6 = icmp ne <2 x i32> %b, zeroinitializer
Expand Down
284 changes: 130 additions & 154 deletions llvm/test/CodeGen/Thumb2/mve-vcmpr.ll

Large diffs are not rendered by default.

104 changes: 48 additions & 56 deletions llvm/test/CodeGen/Thumb2/mve-vcmpz.ll
Original file line number Diff line number Diff line change
Expand Up @@ -361,25 +361,23 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @vcmp_eqz_v2i64(<2 x i64> %src, <2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: vcmp_eqz_v2i64:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov r0, s1
; CHECK-NEXT: vmov r1, s0
; CHECK-NEXT: orrs r0, r1
; CHECK-NEXT: vmov r1, s2
; CHECK-NEXT: cset r0, eq
; CHECK-NEXT: tst.w r0, #1
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: vmov.32 q3[0], r0
; CHECK-NEXT: vmov.32 q3[1], r0
; CHECK-NEXT: vmov r0, s3
; CHECK-NEXT: vmov r1, s2
; CHECK-NEXT: vmov r2, s0
; CHECK-NEXT: orrs r0, r1
; CHECK-NEXT: vmov r1, s1
; CHECK-NEXT: cset r0, eq
; CHECK-NEXT: tst.w r0, #1
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: vmov.32 q3[2], r0
; CHECK-NEXT: vmov.32 q3[3], r0
; CHECK-NEXT: vbic q0, q2, q3
; CHECK-NEXT: vand q1, q1, q3
; CHECK-NEXT: vorr q0, q1, q0
; CHECK-NEXT: orrs r1, r2
; CHECK-NEXT: cset r1, eq
; CHECK-NEXT: tst.w r1, #1
; CHECK-NEXT: csetm r1, ne
; CHECK-NEXT: vmov q0[2], q0[0], r1, r0
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
; CHECK-NEXT: vbic q2, q2, q0
; CHECK-NEXT: vand q0, q1, q0
; CHECK-NEXT: vorr q0, q0, q2
; CHECK-NEXT: bx lr
entry:
%c = icmp eq <2 x i64> %src, zeroinitializer
Expand All @@ -390,25 +388,23 @@ entry:
define arm_aapcs_vfpcc <2 x i32> @vcmp_eqz_v2i32(<2 x i64> %src, <2 x i32> %a, <2 x i32> %b) {
; CHECK-LABEL: vcmp_eqz_v2i32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov r0, s1
; CHECK-NEXT: vmov r1, s0
; CHECK-NEXT: orrs r0, r1
; CHECK-NEXT: vmov r1, s2
; CHECK-NEXT: cset r0, eq
; CHECK-NEXT: tst.w r0, #1
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: vmov.32 q3[0], r0
; CHECK-NEXT: vmov.32 q3[1], r0
; CHECK-NEXT: vmov r0, s3
; CHECK-NEXT: vmov r1, s2
; CHECK-NEXT: vmov r2, s0
; CHECK-NEXT: orrs r0, r1
; CHECK-NEXT: vmov r1, s1
; CHECK-NEXT: cset r0, eq
; CHECK-NEXT: tst.w r0, #1
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: vmov.32 q3[2], r0
; CHECK-NEXT: vmov.32 q3[3], r0
; CHECK-NEXT: vbic q0, q2, q3
; CHECK-NEXT: vand q1, q1, q3
; CHECK-NEXT: vorr q0, q1, q0
; CHECK-NEXT: orrs r1, r2
; CHECK-NEXT: cset r1, eq
; CHECK-NEXT: tst.w r1, #1
; CHECK-NEXT: csetm r1, ne
; CHECK-NEXT: vmov q0[2], q0[0], r1, r0
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
; CHECK-NEXT: vbic q2, q2, q0
; CHECK-NEXT: vand q0, q1, q0
; CHECK-NEXT: vorr q0, q0, q2
; CHECK-NEXT: bx lr
entry:
%c = icmp eq <2 x i64> %src, zeroinitializer
Expand Down Expand Up @@ -779,25 +775,23 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @vcmp_r_eqz_v2i64(<2 x i64> %src, <2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: vcmp_r_eqz_v2i64:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov r0, s1
; CHECK-NEXT: vmov r1, s0
; CHECK-NEXT: orrs r0, r1
; CHECK-NEXT: vmov r1, s2
; CHECK-NEXT: cset r0, eq
; CHECK-NEXT: tst.w r0, #1
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: vmov.32 q3[0], r0
; CHECK-NEXT: vmov.32 q3[1], r0
; CHECK-NEXT: vmov r0, s3
; CHECK-NEXT: vmov r1, s2
; CHECK-NEXT: vmov r2, s0
; CHECK-NEXT: orrs r0, r1
; CHECK-NEXT: vmov r1, s1
; CHECK-NEXT: cset r0, eq
; CHECK-NEXT: tst.w r0, #1
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: vmov.32 q3[2], r0
; CHECK-NEXT: vmov.32 q3[3], r0
; CHECK-NEXT: vbic q0, q2, q3
; CHECK-NEXT: vand q1, q1, q3
; CHECK-NEXT: vorr q0, q1, q0
; CHECK-NEXT: orrs r1, r2
; CHECK-NEXT: cset r1, eq
; CHECK-NEXT: tst.w r1, #1
; CHECK-NEXT: csetm r1, ne
; CHECK-NEXT: vmov q0[2], q0[0], r1, r0
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
; CHECK-NEXT: vbic q2, q2, q0
; CHECK-NEXT: vand q0, q1, q0
; CHECK-NEXT: vorr q0, q0, q2
; CHECK-NEXT: bx lr
entry:
%c = icmp eq <2 x i64> zeroinitializer, %src
Expand All @@ -808,25 +802,23 @@ entry:
define arm_aapcs_vfpcc <2 x i32> @vcmp_r_eqz_v2i32(<2 x i64> %src, <2 x i32> %a, <2 x i32> %b) {
; CHECK-LABEL: vcmp_r_eqz_v2i32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov r0, s1
; CHECK-NEXT: vmov r1, s0
; CHECK-NEXT: orrs r0, r1
; CHECK-NEXT: vmov r1, s2
; CHECK-NEXT: cset r0, eq
; CHECK-NEXT: tst.w r0, #1
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: vmov.32 q3[0], r0
; CHECK-NEXT: vmov.32 q3[1], r0
; CHECK-NEXT: vmov r0, s3
; CHECK-NEXT: vmov r1, s2
; CHECK-NEXT: vmov r2, s0
; CHECK-NEXT: orrs r0, r1
; CHECK-NEXT: vmov r1, s1
; CHECK-NEXT: cset r0, eq
; CHECK-NEXT: tst.w r0, #1
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: vmov.32 q3[2], r0
; CHECK-NEXT: vmov.32 q3[3], r0
; CHECK-NEXT: vbic q0, q2, q3
; CHECK-NEXT: vand q1, q1, q3
; CHECK-NEXT: vorr q0, q1, q0
; CHECK-NEXT: orrs r1, r2
; CHECK-NEXT: cset r1, eq
; CHECK-NEXT: tst.w r1, #1
; CHECK-NEXT: csetm r1, ne
; CHECK-NEXT: vmov q0[2], q0[0], r1, r0
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
; CHECK-NEXT: vbic q2, q2, q0
; CHECK-NEXT: vand q0, q1, q0
; CHECK-NEXT: vorr q0, q0, q2
; CHECK-NEXT: bx lr
entry:
%c = icmp eq <2 x i64> %src, zeroinitializer
Expand Down
161 changes: 72 additions & 89 deletions llvm/test/CodeGen/Thumb2/mve-vcreate.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,8 @@
define arm_aapcs_vfpcc <4 x i32> @vcreate_i32(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: vcreate_i32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.32 q0[0], r1
; CHECK-NEXT: vmov.32 q0[1], r0
; CHECK-NEXT: vmov.32 q0[2], r3
; CHECK-NEXT: vmov.32 q0[3], r2
; CHECK-NEXT: vmov q0[2], q0[0], r1, r3
; CHECK-NEXT: vmov q0[3], q0[1], r0, r2
; CHECK-NEXT: bx lr
entry:
%conv = zext i32 %a to i64
Expand All @@ -27,10 +25,8 @@ entry:
define arm_aapcs_vfpcc <4 x i32> @insert_0123(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: insert_0123:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.32 q0[0], r0
; CHECK-NEXT: vmov.32 q0[1], r1
; CHECK-NEXT: vmov.32 q0[2], r2
; CHECK-NEXT: vmov.32 q0[3], r3
; CHECK-NEXT: vmov q0[2], q0[0], r0, r2
; CHECK-NEXT: vmov q0[3], q0[1], r1, r3
; CHECK-NEXT: bx lr
entry:
%v1 = insertelement <4 x i32> undef, i32 %a, i32 0
Expand All @@ -43,10 +39,8 @@ entry:
define arm_aapcs_vfpcc <4 x i32> @insert_3210(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: insert_3210:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.32 q0[0], r3
; CHECK-NEXT: vmov.32 q0[1], r2
; CHECK-NEXT: vmov.32 q0[2], r1
; CHECK-NEXT: vmov.32 q0[3], r0
; CHECK-NEXT: vmov q0[2], q0[0], r3, r1
; CHECK-NEXT: vmov q0[3], q0[1], r2, r0
; CHECK-NEXT: bx lr
entry:
%v1 = insertelement <4 x i32> undef, i32 %a, i32 3
Expand All @@ -59,10 +53,8 @@ entry:
define arm_aapcs_vfpcc <4 x i32> @insert_0213(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: insert_0213:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.32 q0[0], r0
; CHECK-NEXT: vmov.32 q0[1], r2
; CHECK-NEXT: vmov.32 q0[2], r1
; CHECK-NEXT: vmov.32 q0[3], r3
; CHECK-NEXT: vmov q0[2], q0[0], r0, r1
; CHECK-NEXT: vmov q0[3], q0[1], r2, r3
; CHECK-NEXT: bx lr
entry:
%v1 = insertelement <4 x i32> undef, i32 %a, i32 0
Expand All @@ -75,8 +67,7 @@ entry:
define arm_aapcs_vfpcc <4 x i32> @insert_0220(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: insert_0220:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.32 q0[0], r3
; CHECK-NEXT: vmov.32 q0[2], r2
; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
; CHECK-NEXT: bx lr
entry:
%v1 = insertelement <4 x i32> undef, i32 %a, i32 0
Expand All @@ -89,9 +80,8 @@ entry:
define arm_aapcs_vfpcc <4 x i32> @insert_321(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: insert_321:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.32 q0[1], r2
; CHECK-NEXT: vmov.32 q0[2], r1
; CHECK-NEXT: vmov.32 q0[3], r0
; CHECK-NEXT: vmov q0[3], q0[1], r2, r0
; CHECK-NEXT: bx lr
entry:
%v1 = insertelement <4 x i32> undef, i32 %a, i32 3
Expand All @@ -104,8 +94,7 @@ define arm_aapcs_vfpcc <4 x i32> @insert_310(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: insert_310:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.32 q0[0], r2
; CHECK-NEXT: vmov.32 q0[1], r1
; CHECK-NEXT: vmov.32 q0[3], r0
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
; CHECK-NEXT: bx lr
entry:
%v1 = insertelement <4 x i32> undef, i32 %a, i32 3
Expand All @@ -117,8 +106,7 @@ entry:
define arm_aapcs_vfpcc <4 x i32> @insert_320(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: insert_320:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.32 q0[0], r2
; CHECK-NEXT: vmov.32 q0[2], r1
; CHECK-NEXT: vmov q0[2], q0[0], r2, r1
; CHECK-NEXT: vmov.32 q0[3], r0
; CHECK-NEXT: bx lr
entry:
Expand All @@ -131,8 +119,7 @@ entry:
define arm_aapcs_vfpcc <4 x i32> @insert_31(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: insert_31:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.32 q0[1], r1
; CHECK-NEXT: vmov.32 q0[3], r0
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
; CHECK-NEXT: bx lr
entry:
%v1 = insertelement <4 x i32> undef, i32 %a, i32 3
Expand Down Expand Up @@ -165,9 +152,8 @@ entry:
define arm_aapcs_vfpcc <4 x i32> @insert_210(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: insert_210:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.32 q0[0], r2
; CHECK-NEXT: vmov.32 q0[1], r1
; CHECK-NEXT: vmov.32 q0[2], r0
; CHECK-NEXT: vmov q0[2], q0[0], r2, r0
; CHECK-NEXT: bx lr
entry:
%v1 = insertelement <4 x i32> undef, i32 %a, i32 2
Expand All @@ -179,8 +165,7 @@ entry:
define arm_aapcs_vfpcc <4 x i32> @insert_20(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: insert_20:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.32 q0[0], r1
; CHECK-NEXT: vmov.32 q0[2], r0
; CHECK-NEXT: vmov q0[2], q0[0], r1, r0
; CHECK-NEXT: bx lr
entry:
%v1 = insertelement <4 x i32> undef, i32 %a, i32 2
Expand Down Expand Up @@ -245,28 +230,26 @@ entry:
define hidden <8 x i16> @create_i16(i16 zeroext %a, i16 zeroext %b, i16 zeroext %c, i16 zeroext %d, i16 zeroext %a2, i16 zeroext %b2, i16 zeroext %c2, i16 zeroext %d2) local_unnamed_addr #0 {
; CHECK-LABEL: create_i16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r7, lr}
; CHECK-NEXT: push {r4, r5, r7, lr}
; CHECK-NEXT: movs r5, #0
; CHECK-NEXT: orr.w r0, r1, r0, lsl #16
; CHECK-NEXT: lsll r2, r5, #16
; CHECK-NEXT: ldrd lr, r4, [sp, #16]
; CHECK-NEXT: orr.w r1, r2, r3
; CHECK-NEXT: .save {r5, r7, r9, lr}
; CHECK-NEXT: push.w {r5, r7, r9, lr}
; CHECK-NEXT: ldr.w r12, [sp, #24]
; CHECK-NEXT: orrs r0, r5
; CHECK-NEXT: vmov.32 q0[0], r1
; CHECK-NEXT: mov.w r9, #0
; CHECK-NEXT: movs r7, #0
; CHECK-NEXT: vmov.32 q0[1], r0
; CHECK-NEXT: ldr r0, [sp, #28]
; CHECK-NEXT: ldr r5, [sp, #28]
; CHECK-NEXT: lsll r2, r9, #16
; CHECK-NEXT: lsll r12, r7, #16
; CHECK-NEXT: orr.w r4, r4, lr, lsl #16
; CHECK-NEXT: orr.w r0, r0, r12
; CHECK-NEXT: orrs r7, r4
; CHECK-NEXT: vmov.32 q0[2], r0
; CHECK-NEXT: vmov.32 q0[3], r7
; CHECK-NEXT: orr.w r5, r5, r12
; CHECK-NEXT: orrs r2, r3
; CHECK-NEXT: vmov q0[2], q0[0], r2, r5
; CHECK-NEXT: ldrd r2, r3, [sp, #16]
; CHECK-NEXT: orr.w r0, r1, r0, lsl #16
; CHECK-NEXT: orr.w r0, r0, r9
; CHECK-NEXT: orr.w r2, r3, r2, lsl #16
; CHECK-NEXT: orrs r2, r7
; CHECK-NEXT: vmov q0[3], q0[1], r0, r2
; CHECK-NEXT: vmov r0, r1, d0
; CHECK-NEXT: vmov r2, r3, d1
; CHECK-NEXT: pop {r4, r5, r7, pc}
; CHECK-NEXT: pop.w {r5, r7, r9, pc}
entry:
%conv = zext i16 %a to i64
%shl = shl nuw i64 %conv, 48
Expand Down Expand Up @@ -325,59 +308,59 @@ entry:
define hidden <16 x i8> @create_i8(i8 zeroext %a1, i8 zeroext %b1, i8 zeroext %c1, i8 zeroext %d1, i8 zeroext %a2, i8 zeroext %b2, i8 zeroext %c2, i8 zeroext %d2, i8 zeroext %a3, i8 zeroext %b3, i8 zeroext %c3, i8 zeroext %d3, i8 zeroext %a4, i8 zeroext %b4, i8 zeroext %c4, i8 zeroext %d4) local_unnamed_addr #0 {
; CHECK-LABEL: create_i8:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r7, r9, r11, lr}
; CHECK-NEXT: push.w {r4, r5, r7, r9, r11, lr}
; CHECK-NEXT: ldr.w r12, [sp, #28]
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr}
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r11, lr}
; CHECK-NEXT: ldr r4, [sp, #68]
; CHECK-NEXT: mov.w r11, #0
; CHECK-NEXT: ldr r4, [sp, #24]
; CHECK-NEXT: ldr r6, [sp, #64]
; CHECK-NEXT: movs r5, #0
; CHECK-NEXT: lsll r12, r11, #16
; CHECK-NEXT: lsls r1, r1, #16
; CHECK-NEXT: lsll r4, r5, #24
; CHECK-NEXT: orr.w r0, r1, r0, lsl #22
; CHECK-NEXT: orr.w r12, r12, r4
; CHECK-NEXT: ldr r4, [sp, #32]
; CHECK-NEXT: lsll r4, r11, #16
; CHECK-NEXT: mov lr, r1
; CHECK-NEXT: lsll r6, r5, #24
; CHECK-NEXT: movs r7, #0
; CHECK-NEXT: orr.w r0, r0, r2, lsl #8
; CHECK-NEXT: orr.w r1, r6, r4
; CHECK-NEXT: ldr r4, [sp, #72]
; CHECK-NEXT: mov r12, r3
; CHECK-NEXT: ldr r3, [sp, #76]
; CHECK-NEXT: lsll r4, r7, #8
; CHECK-NEXT: add r0, r3
; CHECK-NEXT: orr.w r12, r12, r4
; CHECK-NEXT: ldr r4, [sp, #36]
; CHECK-NEXT: orrs r0, r5
; CHECK-NEXT: ldr r2, [sp, #56]
; CHECK-NEXT: orr.w r0, r0, r11
; CHECK-NEXT: orr.w r4, r4, r12
; CHECK-NEXT: vmov.32 q0[0], r4
; CHECK-NEXT: orrs r0, r7
; CHECK-NEXT: vmov.32 q0[1], r0
; CHECK-NEXT: ldr r0, [sp, #60]
; CHECK-NEXT: movs r1, #0
; CHECK-NEXT: ldr r6, [sp, #36]
; CHECK-NEXT: orrs r1, r4
; CHECK-NEXT: ldr r4, [sp, #32]
; CHECK-NEXT: orr.w r8, r1, r3
; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: lsll r0, r1, #16
; CHECK-NEXT: lsll r2, r3, #24
; CHECK-NEXT: orrs r0, r2
; CHECK-NEXT: ldr r2, [sp, #64]
; CHECK-NEXT: movs r1, #0
; CHECK-NEXT: lsll r6, r3, #16
; CHECK-NEXT: lsll r4, r1, #24
; CHECK-NEXT: mov.w r9, #0
; CHECK-NEXT: lsll r2, r9, #8
; CHECK-NEXT: orrs r0, r2
; CHECK-NEXT: ldr r2, [sp, #68]
; CHECK-NEXT: orrs r0, r2
; CHECK-NEXT: ldr r2, [sp, #40]
; CHECK-NEXT: vmov.32 q0[2], r0
; CHECK-NEXT: ldr r0, [sp, #44]
; CHECK-NEXT: lsls r0, r0, #16
; CHECK-NEXT: orr.w r0, r0, r2, lsl #22
; CHECK-NEXT: ldr r2, [sp, #48]
; CHECK-NEXT: orrs r4, r6
; CHECK-NEXT: ldr r6, [sp, #40]
; CHECK-NEXT: lsll r6, r9, #8
; CHECK-NEXT: orrs r4, r6
; CHECK-NEXT: ldr r6, [sp, #44]
; CHECK-NEXT: orrs r4, r6
; CHECK-NEXT: ldr r6, [sp, #48]
; CHECK-NEXT: vmov q0[2], q0[0], r4, r8
; CHECK-NEXT: ldr r4, [sp, #52]
; CHECK-NEXT: lsls r4, r4, #16
; CHECK-NEXT: orr.w r4, r4, r6, lsl #22
; CHECK-NEXT: ldr r6, [sp, #56]
; CHECK-NEXT: orr.w r4, r4, r6, lsl #8
; CHECK-NEXT: ldr r6, [sp, #60]
; CHECK-NEXT: add r4, r6
; CHECK-NEXT: orrs r4, r5
; CHECK-NEXT: orr.w r4, r4, r11
; CHECK-NEXT: orrs r4, r7
; CHECK-NEXT: lsl.w r7, lr, #16
; CHECK-NEXT: orr.w r0, r7, r0, lsl #22
; CHECK-NEXT: orr.w r0, r0, r2, lsl #8
; CHECK-NEXT: ldr r2, [sp, #52]
; CHECK-NEXT: add r0, r2
; CHECK-NEXT: orrs r0, r3
; CHECK-NEXT: add r0, r12
; CHECK-NEXT: orrs r0, r1
; CHECK-NEXT: orrs r0, r3
; CHECK-NEXT: orr.w r0, r0, r9
; CHECK-NEXT: vmov.32 q0[3], r0
; CHECK-NEXT: vmov q0[3], q0[1], r0, r4
; CHECK-NEXT: vmov r0, r1, d0
; CHECK-NEXT: vmov r2, r3, d1
; CHECK-NEXT: pop.w {r4, r5, r7, r9, r11, pc}
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r11, pc}
entry:
%conv = zext i8 %a1 to i64
%shl = shl nuw nsw i64 %conv, 54
Expand Down
74 changes: 32 additions & 42 deletions llvm/test/CodeGen/Thumb2/mve-vcvt.ll
Original file line number Diff line number Diff line change
Expand Up @@ -43,18 +43,16 @@ entry:
define arm_aapcs_vfpcc <4 x i32> @foo_int32_float(<4 x float> %src) {
; CHECK-MVE-LABEL: foo_int32_float:
; CHECK-MVE: @ %bb.0: @ %entry
; CHECK-MVE-NEXT: vcvt.s32.f32 s4, s0
; CHECK-MVE-NEXT: vcvt.s32.f32 s6, s1
; CHECK-MVE-NEXT: vcvt.s32.f32 s10, s2
; CHECK-MVE-NEXT: vcvt.s32.f32 s4, s2
; CHECK-MVE-NEXT: vcvt.s32.f32 s6, s0
; CHECK-MVE-NEXT: vcvt.s32.f32 s8, s3
; CHECK-MVE-NEXT: vcvt.s32.f32 s10, s1
; CHECK-MVE-NEXT: vmov r0, s4
; CHECK-MVE-NEXT: vmov.32 q0[0], r0
; CHECK-MVE-NEXT: vmov r0, s6
; CHECK-MVE-NEXT: vmov.32 q0[1], r0
; CHECK-MVE-NEXT: vmov r0, s10
; CHECK-MVE-NEXT: vmov.32 q0[2], r0
; CHECK-MVE-NEXT: vmov r1, s6
; CHECK-MVE-NEXT: vmov q0[2], q0[0], r1, r0
; CHECK-MVE-NEXT: vmov r0, s8
; CHECK-MVE-NEXT: vmov.32 q0[3], r0
; CHECK-MVE-NEXT: vmov r1, s10
; CHECK-MVE-NEXT: vmov q0[3], q0[1], r1, r0
; CHECK-MVE-NEXT: bx lr
;
; CHECK-MVEFP-LABEL: foo_int32_float:
Expand All @@ -69,18 +67,16 @@ entry:
define arm_aapcs_vfpcc <4 x i32> @foo_uint32_float(<4 x float> %src) {
; CHECK-MVE-LABEL: foo_uint32_float:
; CHECK-MVE: @ %bb.0: @ %entry
; CHECK-MVE-NEXT: vcvt.u32.f32 s4, s0
; CHECK-MVE-NEXT: vcvt.u32.f32 s6, s1
; CHECK-MVE-NEXT: vcvt.u32.f32 s10, s2
; CHECK-MVE-NEXT: vcvt.u32.f32 s4, s2
; CHECK-MVE-NEXT: vcvt.u32.f32 s6, s0
; CHECK-MVE-NEXT: vcvt.u32.f32 s8, s3
; CHECK-MVE-NEXT: vcvt.u32.f32 s10, s1
; CHECK-MVE-NEXT: vmov r0, s4
; CHECK-MVE-NEXT: vmov.32 q0[0], r0
; CHECK-MVE-NEXT: vmov r0, s6
; CHECK-MVE-NEXT: vmov.32 q0[1], r0
; CHECK-MVE-NEXT: vmov r0, s10
; CHECK-MVE-NEXT: vmov.32 q0[2], r0
; CHECK-MVE-NEXT: vmov r1, s6
; CHECK-MVE-NEXT: vmov q0[2], q0[0], r1, r0
; CHECK-MVE-NEXT: vmov r0, s8
; CHECK-MVE-NEXT: vmov.32 q0[3], r0
; CHECK-MVE-NEXT: vmov r1, s10
; CHECK-MVE-NEXT: vmov q0[3], q0[1], r1, r0
; CHECK-MVE-NEXT: bx lr
;
; CHECK-MVEFP-LABEL: foo_uint32_float:
Expand Down Expand Up @@ -349,24 +345,21 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @foo_int64_float(<2 x double> %src) {
; CHECK-LABEL: foo_int64_float:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: .save {r4, r5, r7, lr}
; CHECK-NEXT: push {r4, r5, r7, lr}
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: vmov r0, r1, d8
; CHECK-NEXT: vmov r0, r1, d9
; CHECK-NEXT: bl __aeabi_d2lz
; CHECK-NEXT: vmov r2, r3, d9
; CHECK-NEXT: vmov.32 q4[0], r0
; CHECK-NEXT: vmov.32 q4[1], r1
; CHECK-NEXT: mov r0, r2
; CHECK-NEXT: mov r1, r3
; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: mov r5, r1
; CHECK-NEXT: vmov r0, r1, d8
; CHECK-NEXT: bl __aeabi_d2lz
; CHECK-NEXT: vmov.32 q4[2], r0
; CHECK-NEXT: vmov.32 q4[3], r1
; CHECK-NEXT: vmov q0, q4
; CHECK-NEXT: vmov q0[2], q0[0], r0, r4
; CHECK-NEXT: vmov q0[3], q0[1], r1, r5
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: pop {r7, pc}
; CHECK-NEXT: pop {r4, r5, r7, pc}
entry:
%out = fptosi <2 x double> %src to <2 x i64>
ret <2 x i64> %out
Expand All @@ -375,24 +368,21 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @foo_uint64_float(<2 x double> %src) {
; CHECK-LABEL: foo_uint64_float:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: .save {r4, r5, r7, lr}
; CHECK-NEXT: push {r4, r5, r7, lr}
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: vmov r0, r1, d8
; CHECK-NEXT: vmov r0, r1, d9
; CHECK-NEXT: bl __aeabi_d2ulz
; CHECK-NEXT: vmov r2, r3, d9
; CHECK-NEXT: vmov.32 q4[0], r0
; CHECK-NEXT: vmov.32 q4[1], r1
; CHECK-NEXT: mov r0, r2
; CHECK-NEXT: mov r1, r3
; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: mov r5, r1
; CHECK-NEXT: vmov r0, r1, d8
; CHECK-NEXT: bl __aeabi_d2ulz
; CHECK-NEXT: vmov.32 q4[2], r0
; CHECK-NEXT: vmov.32 q4[3], r1
; CHECK-NEXT: vmov q0, q4
; CHECK-NEXT: vmov q0[2], q0[0], r0, r4
; CHECK-NEXT: vmov q0[3], q0[1], r1, r5
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: pop {r7, pc}
; CHECK-NEXT: pop {r4, r5, r7, pc}
entry:
%out = fptoui <2 x double> %src to <2 x i64>
ret <2 x i64> %out
Expand Down
6 changes: 2 additions & 4 deletions llvm/test/CodeGen/Thumb2/mve-vdup.ll
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,8 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @vdup_i64(i64 %src) {
; CHECK-LABEL: vdup_i64:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.32 q0[0], r0
; CHECK-NEXT: vmov.32 q0[1], r1
; CHECK-NEXT: vmov.32 q0[2], r0
; CHECK-NEXT: vmov.32 q0[3], r1
; CHECK-NEXT: vmov q0[2], q0[0], r0, r0
; CHECK-NEXT: vmov q0[3], q0[1], r1, r1
; CHECK-NEXT: bx lr
entry:
%0 = insertelement <2 x i64> undef, i64 %src, i32 0
Expand Down
660 changes: 298 additions & 362 deletions llvm/test/CodeGen/Thumb2/mve-vecreduce-add.ll

Large diffs are not rendered by default.

2,138 changes: 941 additions & 1,197 deletions llvm/test/CodeGen/Thumb2/mve-vecreduce-addpred.ll

Large diffs are not rendered by default.

998 changes: 458 additions & 540 deletions llvm/test/CodeGen/Thumb2/mve-vecreduce-mla.ll

Large diffs are not rendered by default.

2,350 changes: 1,043 additions & 1,307 deletions llvm/test/CodeGen/Thumb2/mve-vecreduce-mlapred.ll

Large diffs are not rendered by default.

6 changes: 2 additions & 4 deletions llvm/test/CodeGen/Thumb2/mve-vld2-post.ll
Original file line number Diff line number Diff line change
Expand Up @@ -87,11 +87,9 @@ define <4 x i64> *@vld2_v2i64(<4 x i64> *%src, <2 x i64> *%dst) {
; CHECK-NEXT: vmov r2, s1
; CHECK-NEXT: adc.w r12, r12, lr
; CHECK-NEXT: adds r5, r5, r4
; CHECK-NEXT: vmov.32 q0[0], r5
; CHECK-NEXT: vmov q0[2], q0[0], r5, r6
; CHECK-NEXT: adcs r2, r3
; CHECK-NEXT: vmov.32 q0[1], r2
; CHECK-NEXT: vmov.32 q0[2], r6
; CHECK-NEXT: vmov.32 q0[3], r12
; CHECK-NEXT: vmov q0[3], q0[1], r2, r12
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: pop {r4, r5, r6, pc}
entry:
Expand Down
98 changes: 46 additions & 52 deletions llvm/test/CodeGen/Thumb2/mve-vld2.ll
Original file line number Diff line number Diff line change
Expand Up @@ -280,11 +280,9 @@ define void @vld2_v2i64(<4 x i64> *%src, <2 x i64> *%dst) {
; CHECK-NEXT: adc.w r12, r12, r2
; CHECK-NEXT: vmov r2, s1
; CHECK-NEXT: adds r0, r0, r4
; CHECK-NEXT: vmov.32 q0[0], r0
; CHECK-NEXT: vmov q0[2], q0[0], r0, lr
; CHECK-NEXT: adcs r2, r3
; CHECK-NEXT: vmov.32 q0[1], r2
; CHECK-NEXT: vmov.32 q0[2], lr
; CHECK-NEXT: vmov.32 q0[3], r12
; CHECK-NEXT: vmov q0[3], q0[1], r2, r12
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: pop {r4, pc}
entry:
Expand All @@ -299,62 +297,58 @@ entry:
define void @vld2_v4i64(<8 x i64> *%src, <4 x i64> *%dst) {
; CHECK-LABEL: vld2_v4i64:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, lr}
; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: .save {r4, r5, r6, lr}
; CHECK-NEXT: push {r4, r5, r6, lr}
; CHECK-NEXT: .vsave {d8, d9, d10, d11}
; CHECK-NEXT: vpush {d8, d9, d10, d11}
; CHECK-NEXT: vldrw.u32 q3, [r0, #32]
; CHECK-NEXT: vldrw.u32 q5, [r0, #48]
; CHECK-NEXT: vldrw.u32 q0, [r0]
; CHECK-NEXT: vldrw.u32 q2, [r0, #16]
; CHECK-NEXT: vmov.f64 d8, d7
; CHECK-NEXT: vmov.f32 s17, s15
; CHECK-NEXT: vmov.f32 s18, s22
; CHECK-NEXT: vmov.f32 s14, s20
; CHECK-NEXT: vmov.f32 s15, s21
; CHECK-NEXT: vmov.f32 s19, s23
; CHECK-NEXT: vmov r3, s18
; CHECK-NEXT: vmov r0, s14
; CHECK-NEXT: vmov r4, s12
; CHECK-NEXT: vmov.f64 d2, d1
; CHECK-NEXT: vmov r12, s19
; CHECK-NEXT: vmov r2, s15
; CHECK-NEXT: vmov.f32 s5, s3
; CHECK-NEXT: vmov.f32 s6, s10
; CHECK-NEXT: vmov.f32 s2, s8
; CHECK-NEXT: vmov.f32 s7, s11
; CHECK-NEXT: vmov.f32 s3, s9
; CHECK-NEXT: vldrw.u32 q5, [r0, #16]
; CHECK-NEXT: vldrw.u32 q1, [r0, #32]
; CHECK-NEXT: vldrw.u32 q4, [r0, #48]
; CHECK-NEXT: vmov.f64 d4, d1
; CHECK-NEXT: vmov.f32 s9, s3
; CHECK-NEXT: vmov.f32 s10, s22
; CHECK-NEXT: vmov.f32 s2, s20
; CHECK-NEXT: vmov.f32 s11, s23
; CHECK-NEXT: vmov.f32 s3, s21
; CHECK-NEXT: vmov r3, s10
; CHECK-NEXT: vmov r0, s2
; CHECK-NEXT: vmov.f64 d6, d3
; CHECK-NEXT: vmov r12, s11
; CHECK-NEXT: vmov r2, s3
; CHECK-NEXT: vmov.f32 s13, s7
; CHECK-NEXT: vmov.f32 s14, s18
; CHECK-NEXT: vmov.f32 s6, s16
; CHECK-NEXT: vmov.f32 s7, s17
; CHECK-NEXT: vmov.f32 s15, s19
; CHECK-NEXT: vmov r4, s6
; CHECK-NEXT: vmov r5, s12
; CHECK-NEXT: vmov r6, s4
; CHECK-NEXT: adds.w lr, r0, r3
; CHECK-NEXT: vmov r0, s16
; CHECK-NEXT: vmov r3, s17
; CHECK-NEXT: vmov r3, s14
; CHECK-NEXT: vmov r0, s7
; CHECK-NEXT: adc.w r12, r12, r2
; CHECK-NEXT: vmov r2, s15
; CHECK-NEXT: adds r3, r3, r4
; CHECK-NEXT: vmov r4, s5
; CHECK-NEXT: adcs r0, r2
; CHECK-NEXT: vmov r2, s13
; CHECK-NEXT: adds r0, r0, r4
; CHECK-NEXT: vmov r4, s2
; CHECK-NEXT: vmov.32 q3[0], r0
; CHECK-NEXT: vmov r0, s7
; CHECK-NEXT: adcs r2, r3
; CHECK-NEXT: vmov r3, s6
; CHECK-NEXT: vmov.32 q3[1], r2
; CHECK-NEXT: vmov r2, s3
; CHECK-NEXT: vmov.32 q3[2], lr
; CHECK-NEXT: vmov.32 q3[3], r12
; CHECK-NEXT: vstrw.32 q3, [r1, #16]
; CHECK-NEXT: adds.w lr, r4, r3
; CHECK-NEXT: vmov r3, s0
; CHECK-NEXT: adc.w r12, r2, r0
; CHECK-NEXT: vmov r0, s4
; CHECK-NEXT: vmov r2, s5
; CHECK-NEXT: vmov r4, s1
; CHECK-NEXT: adds r0, r0, r3
; CHECK-NEXT: vmov.32 q0[0], r0
; CHECK-NEXT: adds r5, r5, r6
; CHECK-NEXT: vmov r6, s0
; CHECK-NEXT: vmov q3[2], q3[0], r5, r3
; CHECK-NEXT: adcs r2, r4
; CHECK-NEXT: vmov.32 q0[1], r2
; CHECK-NEXT: vmov.32 q0[2], lr
; CHECK-NEXT: vmov.32 q0[3], r12
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: vmov r4, s8
; CHECK-NEXT: vmov q3[3], q3[1], r2, r0
; CHECK-NEXT: vmov r0, s9
; CHECK-NEXT: vmov r2, s1
; CHECK-NEXT: vstrw.32 q3, [r1, #16]
; CHECK-NEXT: adds r4, r4, r6
; CHECK-NEXT: vmov q1[2], q1[0], r4, lr
; CHECK-NEXT: adcs r0, r2
; CHECK-NEXT: vmov q1[3], q1[1], r0, r12
; CHECK-NEXT: vstrw.32 q1, [r1]
; CHECK-NEXT: vpop {d8, d9, d10, d11}
; CHECK-NEXT: pop {r4, pc}
; CHECK-NEXT: pop {r4, r5, r6, pc}
entry:
%l1 = load <8 x i64>, <8 x i64>* %src, align 4
%s1 = shufflevector <8 x i64> %l1, <8 x i64> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
Expand Down
1,130 changes: 542 additions & 588 deletions llvm/test/CodeGen/Thumb2/mve-vld3.ll

Large diffs are not rendered by default.

6 changes: 2 additions & 4 deletions llvm/test/CodeGen/Thumb2/mve-vld4-post.ll
Original file line number Diff line number Diff line change
Expand Up @@ -152,10 +152,8 @@ define <8 x i64> *@vld4_v2i64(<8 x i64> *%src, <2 x i64> *%dst) {
; CHECK-NEXT: adcs r4, r5
; CHECK-NEXT: adds r2, r2, r3
; CHECK-NEXT: adc.w r3, r4, r6
; CHECK-NEXT: vmov.32 q0[0], r2
; CHECK-NEXT: vmov.32 q0[1], r3
; CHECK-NEXT: vmov.32 q0[2], lr
; CHECK-NEXT: vmov.32 q0[3], r12
; CHECK-NEXT: vmov q0[2], q0[0], r2, lr
; CHECK-NEXT: vmov q0[3], q0[1], r3, r12
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: vpop {d8, d9, d10, d11}
; CHECK-NEXT: add sp, #4
Expand Down
233 changes: 108 additions & 125 deletions llvm/test/CodeGen/Thumb2/mve-vld4.ll
Original file line number Diff line number Diff line change
Expand Up @@ -229,41 +229,33 @@ define void @vld4_v4i16(<16 x i16> *%src, <4 x i16> *%dst) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: vldrw.u32 q1, [r0]
; CHECK-NEXT: vldrw.u32 q0, [r0, #16]
; CHECK-NEXT: vmov.u16 r2, q1[3]
; CHECK-NEXT: vmov.u16 r0, q0[3]
; CHECK-NEXT: vmov.32 q2[0], r2
; CHECK-NEXT: vmov.u16 r2, q1[7]
; CHECK-NEXT: vmov.32 q2[1], r2
; CHECK-NEXT: vmov.32 q2[2], r0
; CHECK-NEXT: vldrw.u32 q1, [r0]
; CHECK-NEXT: vmov.u16 r2, q0[3]
; CHECK-NEXT: vmov.u16 r0, q1[3]
; CHECK-NEXT: vmov q2[2], q2[0], r0, r2
; CHECK-NEXT: vmov.u16 r0, q0[7]
; CHECK-NEXT: vmov.32 q2[3], r0
; CHECK-NEXT: vmov.u16 r0, q1[2]
; CHECK-NEXT: vmov.32 q3[0], r0
; CHECK-NEXT: vmov.u16 r0, q1[6]
; CHECK-NEXT: vmov.32 q3[1], r0
; CHECK-NEXT: vmov.u16 r2, q1[7]
; CHECK-NEXT: vmov q2[3], q2[1], r2, r0
; CHECK-NEXT: vmov.u16 r0, q0[2]
; CHECK-NEXT: vmov.32 q3[2], r0
; CHECK-NEXT: vmov.u16 r2, q1[2]
; CHECK-NEXT: vmov q3[2], q3[0], r2, r0
; CHECK-NEXT: vmov.u16 r0, q0[6]
; CHECK-NEXT: vmov.32 q3[3], r0
; CHECK-NEXT: vmov.u16 r0, q1[0]
; CHECK-NEXT: vadd.i32 q2, q3, q2
; CHECK-NEXT: vmov.32 q3[0], r0
; CHECK-NEXT: vmov.u16 r0, q1[4]
; CHECK-NEXT: vmov.32 q3[1], r0
; CHECK-NEXT: vmov.u16 r2, q1[6]
; CHECK-NEXT: vmov q3[3], q3[1], r2, r0
; CHECK-NEXT: vmov.u16 r0, q0[0]
; CHECK-NEXT: vmov.32 q3[2], r0
; CHECK-NEXT: vmov.u16 r0, q1[1]
; CHECK-NEXT: vmov.32 q4[0], r0
; CHECK-NEXT: vmov.u16 r0, q1[5]
; CHECK-NEXT: vmov.32 q4[1], r0
; CHECK-NEXT: vmov.u16 r2, q1[0]
; CHECK-NEXT: vadd.i32 q2, q3, q2
; CHECK-NEXT: vmov q3[2], q3[0], r2, r0
; CHECK-NEXT: vmov.u16 r0, q0[1]
; CHECK-NEXT: vmov.32 q4[2], r0
; CHECK-NEXT: vmov.u16 r2, q1[1]
; CHECK-NEXT: vmov q4[2], q4[0], r2, r0
; CHECK-NEXT: vmov.u16 r0, q0[5]
; CHECK-NEXT: vmov.32 q4[3], r0
; CHECK-NEXT: vmov.u16 r2, q1[5]
; CHECK-NEXT: vmov q4[3], q4[1], r2, r0
; CHECK-NEXT: vmov.u16 r0, q0[4]
; CHECK-NEXT: vmov.32 q3[3], r0
; CHECK-NEXT: vmov.u16 r2, q1[4]
; CHECK-NEXT: vmov q3[3], q3[1], r2, r0
; CHECK-NEXT: vadd.i32 q0, q3, q4
; CHECK-NEXT: vadd.i32 q0, q0, q2
; CHECK-NEXT: vstrh.32 q0, [r1]
Expand Down Expand Up @@ -389,15 +381,13 @@ define void @vld4_v4i8(<16 x i8> *%src, <4 x i8> *%dst) {
; CHECK-LABEL: vld4_v4i8:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrw.u32 q0, [r0]
; CHECK-NEXT: vmov.u8 r0, q0[2]
; CHECK-NEXT: vrev32.8 q2, q0
; CHECK-NEXT: vmov.32 q1[0], r0
; CHECK-NEXT: vmov.u8 r0, q0[6]
; CHECK-NEXT: vmov.32 q1[1], r0
; CHECK-NEXT: vmov.u8 r0, q0[10]
; CHECK-NEXT: vmov.32 q1[2], r0
; CHECK-NEXT: vmov.u8 r2, q0[2]
; CHECK-NEXT: vmov q1[2], q1[0], r2, r0
; CHECK-NEXT: vmov.u8 r0, q0[14]
; CHECK-NEXT: vmov.32 q1[3], r0
; CHECK-NEXT: vmov.u8 r2, q0[6]
; CHECK-NEXT: vrev32.8 q2, q0
; CHECK-NEXT: vmov q1[3], q1[1], r2, r0
; CHECK-NEXT: vadd.i32 q1, q1, q2
; CHECK-NEXT: vrev16.8 q2, q0
; CHECK-NEXT: vadd.i32 q0, q0, q2
Expand Down Expand Up @@ -589,10 +579,8 @@ define void @vld4_v2i64(<8 x i64> *%src, <2 x i64> *%dst) {
; CHECK-NEXT: adcs r3, r4
; CHECK-NEXT: adds r0, r0, r5
; CHECK-NEXT: adcs r2, r3
; CHECK-NEXT: vmov.32 q0[0], r0
; CHECK-NEXT: vmov.32 q0[1], r2
; CHECK-NEXT: vmov.32 q0[2], lr
; CHECK-NEXT: vmov.32 q0[3], r12
; CHECK-NEXT: vmov q0[2], q0[0], r0, lr
; CHECK-NEXT: vmov q0[3], q0[1], r2, r12
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: vpop {d8, d9, d10, d11}
; CHECK-NEXT: pop {r4, r5, r6, pc}
Expand All @@ -612,123 +600,118 @@ entry:
define void @vld4_v4i64(<16 x i64> *%src, <4 x i64> *%dst) {
; CHECK-LABEL: vld4_v4i64:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr}
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr}
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, lr}
; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
; CHECK-NEXT: .pad #72
; CHECK-NEXT: sub sp, #72
; CHECK-NEXT: .pad #64
; CHECK-NEXT: sub sp, #64
; CHECK-NEXT: vldrw.u32 q1, [r0, #64]
; CHECK-NEXT: vldrw.u32 q0, [r0, #96]
; CHECK-NEXT: vldrw.u32 q5, [r0, #80]
; CHECK-NEXT: vldrw.u32 q7, [r0, #16]
; CHECK-NEXT: vmov.f64 d8, d3
; CHECK-NEXT: vstrw.32 q0, [sp, #32] @ 16-byte Spill
; CHECK-NEXT: vstrw.32 q1, [sp, #48] @ 16-byte Spill
; CHECK-NEXT: vmov.f32 s17, s7
; CHECK-NEXT: vldrw.u32 q1, [r0]
; CHECK-NEXT: vmov.f32 s18, s2
; CHECK-NEXT: vmov.f32 s19, s3
; CHECK-NEXT: vldrw.u32 q0, [r0, #112]
; CHECK-NEXT: vmov.f64 d12, d11
; CHECK-NEXT: vldrw.u32 q4, [r0, #80]
; CHECK-NEXT: vldrw.u32 q6, [r0, #16]
; CHECK-NEXT: vmov.f64 d4, d3
; CHECK-NEXT: vstrw.32 q0, [sp, #16] @ 16-byte Spill
; CHECK-NEXT: vmov.f32 s25, s23
; CHECK-NEXT: vmov.f32 s26, s2
; CHECK-NEXT: vmov.f64 d6, d3
; CHECK-NEXT: vmov.f32 s27, s3
; CHECK-NEXT: vldrw.u32 q0, [r0, #32]
; CHECK-NEXT: vmov.f32 s13, s7
; CHECK-NEXT: vmov.f32 s14, s2
; CHECK-NEXT: vldrw.u32 q3, [r0]
; CHECK-NEXT: vstrw.32 q1, [sp, #32] @ 16-byte Spill
; CHECK-NEXT: vmov.f32 s9, s7
; CHECK-NEXT: vldrw.u32 q1, [r0, #32]
; CHECK-NEXT: vmov.f32 s10, s2
; CHECK-NEXT: vmov.f32 s11, s3
; CHECK-NEXT: vldrw.u32 q0, [r0, #112]
; CHECK-NEXT: vmov.f64 d14, d9
; CHECK-NEXT: vstrw.32 q2, [sp, #48] @ 16-byte Spill
; CHECK-NEXT: vstrw.32 q0, [sp] @ 16-byte Spill
; CHECK-NEXT: vmov.f64 d4, d15
; CHECK-NEXT: vmov.f32 s15, s3
; CHECK-NEXT: vmov.f32 s29, s19
; CHECK-NEXT: vmov.f32 s30, s2
; CHECK-NEXT: vmov.f64 d4, d13
; CHECK-NEXT: vmov.f32 s31, s3
; CHECK-NEXT: vldrw.u32 q0, [r0, #48]
; CHECK-NEXT: vmov.f32 s9, s31
; CHECK-NEXT: vmov.f32 s9, s27
; CHECK-NEXT: vmov.f32 s10, s2
; CHECK-NEXT: vmov.f32 s30, s0
; CHECK-NEXT: vmov.f32 s26, s0
; CHECK-NEXT: vmov.f32 s11, s3
; CHECK-NEXT: vmov.f32 s31, s1
; CHECK-NEXT: vmov.f32 s27, s1
; CHECK-NEXT: vldrw.u32 q0, [sp] @ 16-byte Reload
; CHECK-NEXT: vmov r3, s10
; CHECK-NEXT: vmov r0, s30
; CHECK-NEXT: vmov.f32 s6, s0
; CHECK-NEXT: vmov.f32 s7, s1
; CHECK-NEXT: vldrw.u32 q0, [sp, #16] @ 16-byte Reload
; CHECK-NEXT: vmov r4, s6
; CHECK-NEXT: vstrw.32 q1, [sp] @ 16-byte Spill
; CHECK-NEXT: vmov r0, s26
; CHECK-NEXT: vmov.f64 d10, d7
; CHECK-NEXT: vmov r12, s11
; CHECK-NEXT: vmov r2, s31
; CHECK-NEXT: vmov.f32 s22, s0
; CHECK-NEXT: vmov.f32 s23, s1
; CHECK-NEXT: vmov r2, s27
; CHECK-NEXT: vmov.f32 s21, s15
; CHECK-NEXT: vmov.f32 s22, s6
; CHECK-NEXT: vmov.f32 s14, s4
; CHECK-NEXT: vmov.f32 s15, s5
; CHECK-NEXT: vmov.f32 s23, s7
; CHECK-NEXT: vldrw.u32 q1, [sp, #16] @ 16-byte Reload
; CHECK-NEXT: vmov r4, s14
; CHECK-NEXT: vmov.f32 s18, s0
; CHECK-NEXT: vmov.f32 s19, s1
; CHECK-NEXT: vldrw.u32 q0, [sp, #48] @ 16-byte Reload
; CHECK-NEXT: vmov r5, s18
; CHECK-NEXT: vmov r7, s16
; CHECK-NEXT: adds.w lr, r0, r3
; CHECK-NEXT: vmov r3, s14
; CHECK-NEXT: vmov r0, s7
; CHECK-NEXT: vldrw.u32 q1, [sp, #32] @ 16-byte Reload
; CHECK-NEXT: vmov.f32 s2, s4
; CHECK-NEXT: vmov.f32 s3, s5
; CHECK-NEXT: vldrw.u32 q1, [sp] @ 16-byte Reload
; CHECK-NEXT: vmov r3, s22
; CHECK-NEXT: vmov r0, s15
; CHECK-NEXT: adc.w r12, r12, r2
; CHECK-NEXT: vmov r2, s15
; CHECK-NEXT: vmov r6, s2
; CHECK-NEXT: vmov r2, s23
; CHECK-NEXT: adds r3, r3, r4
; CHECK-NEXT: vmov r4, s23
; CHECK-NEXT: vmov r4, s28
; CHECK-NEXT: adcs r0, r2
; CHECK-NEXT: adds.w lr, lr, r3
; CHECK-NEXT: vmov r2, s16
; CHECK-NEXT: adc.w r12, r12, r0
; CHECK-NEXT: vmov r0, s26
; CHECK-NEXT: vmov r2, s22
; CHECK-NEXT: vmov r3, s27
; CHECK-NEXT: adds r0, r0, r2
; CHECK-NEXT: adc.w r2, r4, r3
; CHECK-NEXT: vmov r3, s19
; CHECK-NEXT: vmov r4, s3
; CHECK-NEXT: vmov r0, s29
; CHECK-NEXT: vmov r3, s17
; CHECK-NEXT: adds r2, r2, r4
; CHECK-NEXT: adcs r3, r0
; CHECK-NEXT: vmov r0, s1
; CHECK-NEXT: vldrw.u32 q0, [sp, #32] @ 16-byte Reload
; CHECK-NEXT: vmov.f32 s2, s4
; CHECK-NEXT: vmov.f32 s3, s5
; CHECK-NEXT: vldrw.u32 q1, [sp, #48] @ 16-byte Reload
; CHECK-NEXT: vmov r6, s0
; CHECK-NEXT: vmov r5, s4
; CHECK-NEXT: vmov r4, s1
; CHECK-NEXT: vmov r7, s6
; CHECK-NEXT: adds r5, r5, r6
; CHECK-NEXT: vmov r6, s20
; CHECK-NEXT: adcs r3, r4
; CHECK-NEXT: adds r0, r0, r5
; CHECK-NEXT: vmov r5, s24
; CHECK-NEXT: adc.w r8, r3, r2
; CHECK-NEXT: vmov r2, s25
; CHECK-NEXT: vmov r4, s21
; CHECK-NEXT: vmov r3, s0
; CHECK-NEXT: vmov r6, s18
; CHECK-NEXT: adcs r4, r0
; CHECK-NEXT: adds.w r9, r5, r2
; CHECK-NEXT: vmov r5, s30
; CHECK-NEXT: adc.w r8, r4, r3
; CHECK-NEXT: vmov r2, s31
; CHECK-NEXT: vmov r4, s19
; CHECK-NEXT: vmov r3, s2
; CHECK-NEXT: vmov r0, s8
; CHECK-NEXT: adds r5, r5, r6
; CHECK-NEXT: vmov r6, s1
; CHECK-NEXT: vmov r6, s3
; CHECK-NEXT: adcs r2, r4
; CHECK-NEXT: vmov r4, s17
; CHECK-NEXT: vmov r4, s7
; CHECK-NEXT: adds r3, r3, r7
; CHECK-NEXT: vmov r7, s28
; CHECK-NEXT: vmov r7, s12
; CHECK-NEXT: adcs r4, r6
; CHECK-NEXT: adds r3, r3, r5
; CHECK-NEXT: vmov r6, s8
; CHECK-NEXT: adcs r2, r4
; CHECK-NEXT: vmov r4, s9
; CHECK-NEXT: vmov.32 q0[0], r3
; CHECK-NEXT: vmov r5, s29
; CHECK-NEXT: vmov.32 q0[1], r2
; CHECK-NEXT: vmov.32 q0[2], r0
; CHECK-NEXT: vmov r0, s12
; CHECK-NEXT: vmov r3, s13
; CHECK-NEXT: vmov.32 q0[3], r8
; CHECK-NEXT: vmov r2, s5
; CHECK-NEXT: vstrw.32 q0, [r1, #16]
; CHECK-NEXT: vmov r6, s20
; CHECK-NEXT: adc.w r10, r4, r2
; CHECK-NEXT: vmov r4, s21
; CHECK-NEXT: vmov q1[2], q1[0], r9, r3
; CHECK-NEXT: vmov r5, s13
; CHECK-NEXT: vmov q1[3], q1[1], r8, r10
; CHECK-NEXT: vmov r2, s24
; CHECK-NEXT: vstrw.32 q1, [r1, #16]
; CHECK-NEXT: adds r6, r6, r7
; CHECK-NEXT: vmov r7, s25
; CHECK-NEXT: adcs r4, r5
; CHECK-NEXT: vmov r5, s4
; CHECK-NEXT: adds r0, r0, r5
; CHECK-NEXT: adcs r2, r3
; CHECK-NEXT: vmov r5, s9
; CHECK-NEXT: adds r0, r0, r2
; CHECK-NEXT: adc.w r2, r7, r5
; CHECK-NEXT: adds r0, r0, r6
; CHECK-NEXT: adcs r2, r4
; CHECK-NEXT: vmov.32 q0[0], r0
; CHECK-NEXT: vmov.32 q0[1], r2
; CHECK-NEXT: vmov.32 q0[2], lr
; CHECK-NEXT: vmov.32 q0[3], r12
; CHECK-NEXT: vmov q0[2], q0[0], r0, lr
; CHECK-NEXT: adc.w r0, r4, r2
; CHECK-NEXT: vmov q0[3], q0[1], r0, r12
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: add sp, #72
; CHECK-NEXT: add sp, #64
; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc}
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, pc}
entry:
%l1 = load <16 x i64>, <16 x i64>* %src, align 4
%s1 = shufflevector <16 x i64> %l1, <16 x i64> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
Expand Down
122 changes: 52 additions & 70 deletions llvm/test/CodeGen/Thumb2/mve-vmulh.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,12 @@ define arm_aapcs_vfpcc <2 x i32> @vmulhs_v2i32(<2 x i32> %s0, <2 x i32> %s1) {
; CHECK-LABEL: vmulhs_v2i32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmullb.s32 q2, q0, q1
; CHECK-NEXT: vmov r0, s9
; CHECK-NEXT: vmov.32 q0[0], r0
; CHECK-NEXT: asrs r0, r0, #31
; CHECK-NEXT: vmov.32 q0[1], r0
; CHECK-NEXT: vmov r0, s11
; CHECK-NEXT: vmov.32 q0[2], r0
; CHECK-NEXT: vmov r1, s9
; CHECK-NEXT: vmov q0[2], q0[0], r1, r0
; CHECK-NEXT: asrs r0, r0, #31
; CHECK-NEXT: vmov.32 q0[3], r0
; CHECK-NEXT: asrs r1, r1, #31
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
; CHECK-NEXT: bx lr
entry:
%s0s = sext <2 x i32> %s0 to <2 x i64>
Expand Down Expand Up @@ -48,28 +46,28 @@ entry:
define arm_aapcs_vfpcc <4 x i32> @vmulhs_v4i32(<4 x i32> %s0, <4 x i32> %s1) {
; CHECK-LABEL: vmulhs_v4i32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .vsave {d8, d9, d10, d11}
; CHECK-NEXT: vpush {d8, d9, d10, d11}
; CHECK-NEXT: vmov.f32 s8, s4
; CHECK-NEXT: vmov.f32 s12, s0
; CHECK-NEXT: vmov.f32 s14, s1
; CHECK-NEXT: vmov.f32 s10, s5
; CHECK-NEXT: vmov r2, s12
; CHECK-NEXT: vmov r1, s14
; CHECK-NEXT: vmov r0, s10
; CHECK-NEXT: vmov.f32 s12, s6
; CHECK-NEXT: vmov.f32 s14, s7
; CHECK-NEXT: vmov.f32 s14, s1
; CHECK-NEXT: vmov r0, s8
; CHECK-NEXT: vmov r1, s12
; CHECK-NEXT: vmov.f32 s16, s6
; CHECK-NEXT: vmov.f32 s18, s7
; CHECK-NEXT: vmov.f32 s4, s2
; CHECK-NEXT: vmov.f32 s6, s3
; CHECK-NEXT: vmullb.s32 q0, q1, q3
; CHECK-NEXT: vmullb.s32 q5, q1, q4
; CHECK-NEXT: smmul r0, r1, r0
; CHECK-NEXT: vmov r1, s8
; CHECK-NEXT: smmul r1, r2, r1
; CHECK-NEXT: vmov.32 q2[0], r1
; CHECK-NEXT: vmov.32 q2[1], r0
; CHECK-NEXT: vmov r0, s1
; CHECK-NEXT: vmov.32 q2[2], r0
; CHECK-NEXT: vmov r0, s3
; CHECK-NEXT: vmov.32 q2[3], r0
; CHECK-NEXT: vmov q0, q2
; CHECK-NEXT: vmov r1, s21
; CHECK-NEXT: vmov q0[2], q0[0], r0, r1
; CHECK-NEXT: vmov r0, s10
; CHECK-NEXT: vmov r1, s14
; CHECK-NEXT: smmul r0, r1, r0
; CHECK-NEXT: vmov r1, s23
; CHECK-NEXT: vmov q0[3], q0[1], r0, r1
; CHECK-NEXT: vpop {d8, d9, d10, d11}
; CHECK-NEXT: bx lr
entry:
%s0s = sext <4 x i32> %s0 to <4 x i64>
Expand Down Expand Up @@ -141,22 +139,19 @@ entry:
define arm_aapcs_vfpcc <8 x i16> @vmulhs_v8i16(<8 x i16> %s0, <8 x i16> %s1) {
; CHECK-LABEL: vmulhs_v8i16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.u16 r0, q1[0]
; CHECK-NEXT: vmov.32 q2[0], r0
; CHECK-NEXT: vmov.u16 r0, q1[1]
; CHECK-NEXT: vmov.32 q2[1], r0
; CHECK-NEXT: vmov.u16 r0, q1[2]
; CHECK-NEXT: vmov.32 q2[2], r0
; CHECK-NEXT: vmov.u16 r1, q1[0]
; CHECK-NEXT: vmov q2[2], q2[0], r1, r0
; CHECK-NEXT: vmov.u16 r0, q1[3]
; CHECK-NEXT: vmov.32 q2[3], r0
; CHECK-NEXT: vmov.u16 r0, q0[0]
; CHECK-NEXT: vmov.32 q3[0], r0
; CHECK-NEXT: vmov.u16 r0, q0[1]
; CHECK-NEXT: vmov.32 q3[1], r0
; CHECK-NEXT: vmov.u16 r1, q1[1]
; CHECK-NEXT: vmov q2[3], q2[1], r1, r0
; CHECK-NEXT: vmov.u16 r0, q0[2]
; CHECK-NEXT: vmov.32 q3[2], r0
; CHECK-NEXT: vmov.u16 r1, q0[0]
; CHECK-NEXT: vmov q3[2], q3[0], r1, r0
; CHECK-NEXT: vmov.u16 r0, q0[3]
; CHECK-NEXT: vmov.32 q3[3], r0
; CHECK-NEXT: vmov.u16 r1, q0[1]
; CHECK-NEXT: vmov q3[3], q3[1], r1, r0
; CHECK-NEXT: vmov.u16 r1, q1[4]
; CHECK-NEXT: vmullb.s16 q2, q3, q2
; CHECK-NEXT: vshr.s32 q3, q2, #16
; CHECK-NEXT: vmov r0, s12
Expand All @@ -167,22 +162,17 @@ define arm_aapcs_vfpcc <8 x i16> @vmulhs_v8i16(<8 x i16> %s0, <8 x i16> %s1) {
; CHECK-NEXT: vmov.16 q2[2], r0
; CHECK-NEXT: vmov r0, s15
; CHECK-NEXT: vmov.16 q2[3], r0
; CHECK-NEXT: vmov.u16 r0, q1[4]
; CHECK-NEXT: vmov.32 q3[0], r0
; CHECK-NEXT: vmov.u16 r0, q1[5]
; CHECK-NEXT: vmov.32 q3[1], r0
; CHECK-NEXT: vmov.u16 r0, q1[6]
; CHECK-NEXT: vmov.32 q3[2], r0
; CHECK-NEXT: vmov q3[2], q3[0], r1, r0
; CHECK-NEXT: vmov.u16 r0, q1[7]
; CHECK-NEXT: vmov.32 q3[3], r0
; CHECK-NEXT: vmov.u16 r0, q0[4]
; CHECK-NEXT: vmov.32 q1[0], r0
; CHECK-NEXT: vmov.u16 r0, q0[5]
; CHECK-NEXT: vmov.32 q1[1], r0
; CHECK-NEXT: vmov.u16 r1, q1[5]
; CHECK-NEXT: vmov q3[3], q3[1], r1, r0
; CHECK-NEXT: vmov.u16 r0, q0[6]
; CHECK-NEXT: vmov.32 q1[2], r0
; CHECK-NEXT: vmov.u16 r1, q0[4]
; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
; CHECK-NEXT: vmov.u16 r0, q0[7]
; CHECK-NEXT: vmov.32 q1[3], r0
; CHECK-NEXT: vmov.u16 r1, q0[5]
; CHECK-NEXT: vmov q1[3], q1[1], r1, r0
; CHECK-NEXT: vmullb.s16 q0, q1, q3
; CHECK-NEXT: vshr.s32 q0, q0, #16
; CHECK-NEXT: vmov r0, s0
Expand All @@ -207,22 +197,19 @@ entry:
define arm_aapcs_vfpcc <8 x i16> @vmulhu_v8i16(<8 x i16> %s0, <8 x i16> %s1) {
; CHECK-LABEL: vmulhu_v8i16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.u16 r0, q1[0]
; CHECK-NEXT: vmov.32 q2[0], r0
; CHECK-NEXT: vmov.u16 r0, q1[1]
; CHECK-NEXT: vmov.32 q2[1], r0
; CHECK-NEXT: vmov.u16 r0, q1[2]
; CHECK-NEXT: vmov.32 q2[2], r0
; CHECK-NEXT: vmov.u16 r1, q1[0]
; CHECK-NEXT: vmov q2[2], q2[0], r1, r0
; CHECK-NEXT: vmov.u16 r0, q1[3]
; CHECK-NEXT: vmov.32 q2[3], r0
; CHECK-NEXT: vmov.u16 r0, q0[0]
; CHECK-NEXT: vmov.32 q3[0], r0
; CHECK-NEXT: vmov.u16 r0, q0[1]
; CHECK-NEXT: vmov.32 q3[1], r0
; CHECK-NEXT: vmov.u16 r1, q1[1]
; CHECK-NEXT: vmov q2[3], q2[1], r1, r0
; CHECK-NEXT: vmov.u16 r0, q0[2]
; CHECK-NEXT: vmov.32 q3[2], r0
; CHECK-NEXT: vmov.u16 r1, q0[0]
; CHECK-NEXT: vmov q3[2], q3[0], r1, r0
; CHECK-NEXT: vmov.u16 r0, q0[3]
; CHECK-NEXT: vmov.32 q3[3], r0
; CHECK-NEXT: vmov.u16 r1, q0[1]
; CHECK-NEXT: vmov q3[3], q3[1], r1, r0
; CHECK-NEXT: vmov.u16 r1, q1[4]
; CHECK-NEXT: vmullb.u16 q2, q3, q2
; CHECK-NEXT: vshr.u32 q3, q2, #16
; CHECK-NEXT: vmov r0, s12
Expand All @@ -233,22 +220,17 @@ define arm_aapcs_vfpcc <8 x i16> @vmulhu_v8i16(<8 x i16> %s0, <8 x i16> %s1) {
; CHECK-NEXT: vmov.16 q2[2], r0
; CHECK-NEXT: vmov r0, s15
; CHECK-NEXT: vmov.16 q2[3], r0
; CHECK-NEXT: vmov.u16 r0, q1[4]
; CHECK-NEXT: vmov.32 q3[0], r0
; CHECK-NEXT: vmov.u16 r0, q1[5]
; CHECK-NEXT: vmov.32 q3[1], r0
; CHECK-NEXT: vmov.u16 r0, q1[6]
; CHECK-NEXT: vmov.32 q3[2], r0
; CHECK-NEXT: vmov q3[2], q3[0], r1, r0
; CHECK-NEXT: vmov.u16 r0, q1[7]
; CHECK-NEXT: vmov.32 q3[3], r0
; CHECK-NEXT: vmov.u16 r0, q0[4]
; CHECK-NEXT: vmov.32 q1[0], r0
; CHECK-NEXT: vmov.u16 r0, q0[5]
; CHECK-NEXT: vmov.32 q1[1], r0
; CHECK-NEXT: vmov.u16 r1, q1[5]
; CHECK-NEXT: vmov q3[3], q3[1], r1, r0
; CHECK-NEXT: vmov.u16 r0, q0[6]
; CHECK-NEXT: vmov.32 q1[2], r0
; CHECK-NEXT: vmov.u16 r1, q0[4]
; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
; CHECK-NEXT: vmov.u16 r0, q0[7]
; CHECK-NEXT: vmov.32 q1[3], r0
; CHECK-NEXT: vmov.u16 r1, q0[5]
; CHECK-NEXT: vmov q1[3], q1[1], r1, r0
; CHECK-NEXT: vmullb.u16 q0, q1, q3
; CHECK-NEXT: vshr.u32 q0, q0, #16
; CHECK-NEXT: vmov r0, s0
Expand Down
41 changes: 18 additions & 23 deletions llvm/test/CodeGen/Thumb2/mve-vmull-loop.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,38 +4,33 @@
define arm_aapcs_vfpcc void @test32(i32* noalias nocapture readonly %x, i32* noalias nocapture readonly %y, i32* nocapture %z, i32 %n) {
; CHECK-LABEL: test32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r5, lr}
; CHECK-NEXT: push {r5, lr}
; CHECK-NEXT: .save {r4, r5, r7, lr}
; CHECK-NEXT: push {r4, r5, r7, lr}
; CHECK-NEXT: cmp r3, #1
; CHECK-NEXT: it lt
; CHECK-NEXT: poplt {r5, pc}
; CHECK-NEXT: blt .LBB0_2
; CHECK-NEXT: .LBB0_1: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrw.u32 q1, [r0], #16
; CHECK-NEXT: vldrw.u32 q2, [r1], #16
; CHECK-NEXT: subs r3, #4
; CHECK-NEXT: vmullt.s32 q3, q2, q1
; CHECK-NEXT: vmov r5, s13
; CHECK-NEXT: vmov r12, s12
; CHECK-NEXT: lsrl r12, r5, #31
; CHECK-NEXT: vmov.32 q0[0], r12
; CHECK-NEXT: vmov r12, s14
; CHECK-NEXT: vmov.32 q0[1], r5
; CHECK-NEXT: vmov r5, s15
; CHECK-NEXT: lsrl r12, r5, #31
; CHECK-NEXT: vmullt.s32 q0, q2, q1
; CHECK-NEXT: vmullb.s32 q3, q2, q1
; CHECK-NEXT: vmov.32 q0[2], r12
; CHECK-NEXT: vmov r12, s12
; CHECK-NEXT: vmov.32 q0[3], r5
; CHECK-NEXT: vmov r5, s13
; CHECK-NEXT: vmov r5, s3
; CHECK-NEXT: vmov r12, s2
; CHECK-NEXT: vmov r7, s1
; CHECK-NEXT: lsrl r12, r5, #31
; CHECK-NEXT: vmov.32 q1[0], r12
; CHECK-NEXT: vmov r4, s0
; CHECK-NEXT: lsrl r4, r7, #31
; CHECK-NEXT: vmov q0[2], q0[0], r4, r12
; CHECK-NEXT: vmov r12, s14
; CHECK-NEXT: vmov.32 q1[1], r5
; CHECK-NEXT: vmov q0[3], q0[1], r7, r5
; CHECK-NEXT: vmov r5, s15
; CHECK-NEXT: vmov r7, s13
; CHECK-NEXT: lsrl r12, r5, #31
; CHECK-NEXT: vmov.32 q1[2], r12
; CHECK-NEXT: vmov.32 q1[3], r5
; CHECK-NEXT: vmov r4, s12
; CHECK-NEXT: lsrl r4, r7, #31
; CHECK-NEXT: vmov q1[2], q1[0], r4, r12
; CHECK-NEXT: vmov q1[3], q1[1], r7, r5
; CHECK-NEXT: vmov.f32 s8, s6
; CHECK-NEXT: vmov.f32 s9, s7
; CHECK-NEXT: vmov.f32 s6, s0
Expand All @@ -47,8 +42,8 @@ define arm_aapcs_vfpcc void @test32(i32* noalias nocapture readonly %x, i32* noa
; CHECK-NEXT: vmov.f32 s7, s10
; CHECK-NEXT: vstrb.8 q1, [r2], #16
; CHECK-NEXT: bne .LBB0_1
; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup
; CHECK-NEXT: pop {r5, pc}
; CHECK-NEXT: .LBB0_2: @ %for.cond.cleanup
; CHECK-NEXT: pop {r4, r5, r7, pc}
entry:
%0 = and i32 %n, 3
%cmp = icmp eq i32 %0, 0
Expand Down
42 changes: 17 additions & 25 deletions llvm/test/CodeGen/Thumb2/mve-vqdmulh.ll
Original file line number Diff line number Diff line change
Expand Up @@ -73,22 +73,19 @@ define arm_aapcs_vfpcc <8 x i16> @vqdmulh_i16_c(<8 x i16> %s0, <8 x i16> %s1) {
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: vmov q2, q0
; CHECK-NEXT: vmov.u16 r0, q0[0]
; CHECK-NEXT: vmov.32 q0[0], r0
; CHECK-NEXT: vmov.u16 r0, q2[1]
; CHECK-NEXT: vmov.32 q0[1], r0
; CHECK-NEXT: vmov.u16 r0, q2[2]
; CHECK-NEXT: vmov.32 q0[2], r0
; CHECK-NEXT: vmov.u16 r0, q0[2]
; CHECK-NEXT: vmov.u16 r1, q0[0]
; CHECK-NEXT: vmov q0[2], q0[0], r1, r0
; CHECK-NEXT: vmov.u16 r0, q2[3]
; CHECK-NEXT: vmov.32 q0[3], r0
; CHECK-NEXT: vmov.u16 r0, q1[0]
; CHECK-NEXT: vmov.32 q3[0], r0
; CHECK-NEXT: vmov.u16 r0, q1[1]
; CHECK-NEXT: vmov.32 q3[1], r0
; CHECK-NEXT: vmov.u16 r1, q2[1]
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
; CHECK-NEXT: vmov.u16 r0, q1[2]
; CHECK-NEXT: vmov.32 q3[2], r0
; CHECK-NEXT: vmov.u16 r1, q1[0]
; CHECK-NEXT: vmov q3[2], q3[0], r1, r0
; CHECK-NEXT: vmov.u16 r0, q1[3]
; CHECK-NEXT: vmov.32 q3[3], r0
; CHECK-NEXT: vmov.u16 r1, q1[1]
; CHECK-NEXT: vmov q3[3], q3[1], r1, r0
; CHECK-NEXT: vmov.u16 r1, q2[4]
; CHECK-NEXT: vmullb.s16 q0, q3, q0
; CHECK-NEXT: vmov.i32 q3, #0x7fff
; CHECK-NEXT: vshl.i32 q0, q0, #10
Expand All @@ -103,22 +100,17 @@ define arm_aapcs_vfpcc <8 x i16> @vqdmulh_i16_c(<8 x i16> %s0, <8 x i16> %s1) {
; CHECK-NEXT: vmov.16 q0[2], r0
; CHECK-NEXT: vmov r0, s19
; CHECK-NEXT: vmov.16 q0[3], r0
; CHECK-NEXT: vmov.u16 r0, q2[4]
; CHECK-NEXT: vmov.32 q4[0], r0
; CHECK-NEXT: vmov.u16 r0, q2[5]
; CHECK-NEXT: vmov.32 q4[1], r0
; CHECK-NEXT: vmov.u16 r0, q2[6]
; CHECK-NEXT: vmov.32 q4[2], r0
; CHECK-NEXT: vmov q4[2], q4[0], r1, r0
; CHECK-NEXT: vmov.u16 r0, q2[7]
; CHECK-NEXT: vmov.32 q4[3], r0
; CHECK-NEXT: vmov.u16 r0, q1[4]
; CHECK-NEXT: vmov.32 q2[0], r0
; CHECK-NEXT: vmov.u16 r0, q1[5]
; CHECK-NEXT: vmov.32 q2[1], r0
; CHECK-NEXT: vmov.u16 r1, q2[5]
; CHECK-NEXT: vmov q4[3], q4[1], r1, r0
; CHECK-NEXT: vmov.u16 r0, q1[6]
; CHECK-NEXT: vmov.32 q2[2], r0
; CHECK-NEXT: vmov.u16 r1, q1[4]
; CHECK-NEXT: vmov q2[2], q2[0], r1, r0
; CHECK-NEXT: vmov.u16 r0, q1[7]
; CHECK-NEXT: vmov.32 q2[3], r0
; CHECK-NEXT: vmov.u16 r1, q1[5]
; CHECK-NEXT: vmov q2[3], q2[1], r1, r0
; CHECK-NEXT: vmullb.s16 q1, q2, q4
; CHECK-NEXT: vshl.i32 q1, q1, #10
; CHECK-NEXT: vshr.s32 q1, q1, #10
Expand Down
144 changes: 66 additions & 78 deletions llvm/test/CodeGen/Thumb2/mve-vqmovn.ll
Original file line number Diff line number Diff line change
Expand Up @@ -164,57 +164,53 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @vqmovni64_smaxmin(<2 x i64> %s0) {
; CHECK-LABEL: vqmovni64_smaxmin:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov r2, s0
; CHECK-NEXT: mvn r3, #-2147483648
; CHECK-NEXT: vmov r1, s1
; CHECK-NEXT: movs r0, #0
; CHECK-NEXT: subs r2, r2, r3
; CHECK-NEXT: sbcs r1, r1, #0
; CHECK-NEXT: vmov r2, s2
; CHECK-NEXT: mov.w r1, #0
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r1, #1
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: csetm r1, ne
; CHECK-NEXT: vmov.32 q1[0], r1
; CHECK-NEXT: vmov.32 q1[1], r1
; CHECK-NEXT: mvn r12, #-2147483648
; CHECK-NEXT: vmov r1, s3
; CHECK-NEXT: subs r2, r2, r3
; CHECK-NEXT: mov.w r3, #-1
; CHECK-NEXT: movs r0, #0
; CHECK-NEXT: vmov r3, s0
; CHECK-NEXT: subs.w r2, r2, r12
; CHECK-NEXT: vmov r2, s1
; CHECK-NEXT: sbcs r1, r1, #0
; CHECK-NEXT: mov.w r1, #0
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r1, #1
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: csetm r1, ne
; CHECK-NEXT: vmov.32 q1[2], r1
; CHECK-NEXT: vmov.32 q1[3], r1
; CHECK-NEXT: subs.w r3, r3, r12
; CHECK-NEXT: mov.w r12, #-1
; CHECK-NEXT: sbcs r2, r2, #0
; CHECK-NEXT: mov.w r2, #0
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r2, #1
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: csetm r2, ne
; CHECK-NEXT: vmov q1[2], q1[0], r2, r1
; CHECK-NEXT: vmov q1[3], q1[1], r2, r1
; CHECK-NEXT: adr r1, .LCPI12_0
; CHECK-NEXT: vldrw.u32 q2, [r1]
; CHECK-NEXT: vand q0, q0, q1
; CHECK-NEXT: vbic q2, q2, q1
; CHECK-NEXT: vorr q0, q0, q2
; CHECK-NEXT: vmov r2, s0
; CHECK-NEXT: vmov r1, s1
; CHECK-NEXT: rsbs.w r2, r2, #-2147483648
; CHECK-NEXT: sbcs.w r1, r3, r1
; CHECK-NEXT: vmov r2, s2
; CHECK-NEXT: vmov r1, s3
; CHECK-NEXT: vmov r3, s0
; CHECK-NEXT: rsbs.w r2, r2, #-2147483648
; CHECK-NEXT: vmov r2, s1
; CHECK-NEXT: sbcs.w r1, r12, r1
; CHECK-NEXT: mov.w r1, #0
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r1, #1
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: csetm r1, ne
; CHECK-NEXT: vmov.32 q1[0], r1
; CHECK-NEXT: vmov.32 q1[1], r1
; CHECK-NEXT: vmov r1, s3
; CHECK-NEXT: rsbs.w r2, r2, #-2147483648
; CHECK-NEXT: sbcs.w r1, r3, r1
; CHECK-NEXT: rsbs.w r3, r3, #-2147483648
; CHECK-NEXT: sbcs.w r2, r12, r2
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r0, #1
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: vmov.32 q1[2], r0
; CHECK-NEXT: vmov.32 q1[3], r0
; CHECK-NEXT: vmov q1[2], q1[0], r0, r1
; CHECK-NEXT: vmov q1[3], q1[1], r0, r1
; CHECK-NEXT: adr r0, .LCPI12_1
; CHECK-NEXT: vldrw.u32 q2, [r0]
; CHECK-NEXT: vand q0, q0, q1
Expand Down Expand Up @@ -244,57 +240,53 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @vqmovni64_sminmax(<2 x i64> %s0) {
; CHECK-LABEL: vqmovni64_sminmax:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov r2, s0
; CHECK-NEXT: mov.w r3, #-1
; CHECK-NEXT: vmov r1, s1
; CHECK-NEXT: movs r0, #0
; CHECK-NEXT: rsbs.w r2, r2, #-2147483648
; CHECK-NEXT: sbcs.w r1, r3, r1
; CHECK-NEXT: vmov r2, s2
; CHECK-NEXT: mov.w r1, #0
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r1, #1
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: csetm r1, ne
; CHECK-NEXT: vmov.32 q1[0], r1
; CHECK-NEXT: vmov.32 q1[1], r1
; CHECK-NEXT: mov.w r12, #-1
; CHECK-NEXT: vmov r1, s3
; CHECK-NEXT: movs r0, #0
; CHECK-NEXT: vmov r3, s0
; CHECK-NEXT: rsbs.w r2, r2, #-2147483648
; CHECK-NEXT: sbcs.w r1, r3, r1
; CHECK-NEXT: mvn r3, #-2147483648
; CHECK-NEXT: vmov r2, s1
; CHECK-NEXT: sbcs.w r1, r12, r1
; CHECK-NEXT: mov.w r1, #0
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r1, #1
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: csetm r1, ne
; CHECK-NEXT: vmov.32 q1[2], r1
; CHECK-NEXT: vmov.32 q1[3], r1
; CHECK-NEXT: rsbs.w r3, r3, #-2147483648
; CHECK-NEXT: sbcs.w r2, r12, r2
; CHECK-NEXT: mvn r12, #-2147483648
; CHECK-NEXT: mov.w r2, #0
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r2, #1
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: csetm r2, ne
; CHECK-NEXT: vmov q1[2], q1[0], r2, r1
; CHECK-NEXT: vmov q1[3], q1[1], r2, r1
; CHECK-NEXT: adr r1, .LCPI13_0
; CHECK-NEXT: vldrw.u32 q2, [r1]
; CHECK-NEXT: vand q0, q0, q1
; CHECK-NEXT: vbic q2, q2, q1
; CHECK-NEXT: vorr q0, q0, q2
; CHECK-NEXT: vmov r2, s0
; CHECK-NEXT: vmov r1, s1
; CHECK-NEXT: subs r2, r2, r3
; CHECK-NEXT: sbcs r1, r1, #0
; CHECK-NEXT: vmov r2, s2
; CHECK-NEXT: vmov r1, s3
; CHECK-NEXT: vmov r3, s0
; CHECK-NEXT: subs.w r2, r2, r12
; CHECK-NEXT: vmov r2, s1
; CHECK-NEXT: sbcs r1, r1, #0
; CHECK-NEXT: mov.w r1, #0
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r1, #1
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: csetm r1, ne
; CHECK-NEXT: vmov.32 q1[0], r1
; CHECK-NEXT: vmov.32 q1[1], r1
; CHECK-NEXT: vmov r1, s3
; CHECK-NEXT: subs r2, r2, r3
; CHECK-NEXT: sbcs r1, r1, #0
; CHECK-NEXT: subs.w r3, r3, r12
; CHECK-NEXT: sbcs r2, r2, #0
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r0, #1
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: vmov.32 q1[2], r0
; CHECK-NEXT: vmov.32 q1[3], r0
; CHECK-NEXT: vmov q1[2], q1[0], r0, r1
; CHECK-NEXT: vmov q1[3], q1[1], r0, r1
; CHECK-NEXT: adr r0, .LCPI13_1
; CHECK-NEXT: vldrw.u32 q2, [r0]
; CHECK-NEXT: vand q0, q0, q1
Expand Down Expand Up @@ -324,29 +316,27 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @vqmovni64_umaxmin(<2 x i64> %s0) {
; CHECK-LABEL: vqmovni64_umaxmin:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov r1, s0
; CHECK-NEXT: vmov r1, s2
; CHECK-NEXT: movs r2, #0
; CHECK-NEXT: vmov r0, s1
; CHECK-NEXT: vmov r0, s3
; CHECK-NEXT: vmov.i64 q2, #0xffffffff
; CHECK-NEXT: vmov r3, s0
; CHECK-NEXT: subs.w r1, r1, #-1
; CHECK-NEXT: vmov r1, s1
; CHECK-NEXT: sbcs r0, r0, #0
; CHECK-NEXT: vmov r1, s2
; CHECK-NEXT: mov.w r0, #0
; CHECK-NEXT: it lo
; CHECK-NEXT: movlo r0, #1
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: vmov.32 q1[0], r0
; CHECK-NEXT: vmov.32 q1[1], r0
; CHECK-NEXT: vmov r0, s3
; CHECK-NEXT: subs.w r1, r1, #-1
; CHECK-NEXT: sbcs r0, r0, #0
; CHECK-NEXT: subs.w r3, r3, #-1
; CHECK-NEXT: sbcs r1, r1, #0
; CHECK-NEXT: it lo
; CHECK-NEXT: movlo r2, #1
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: vmov.32 q1[2], r0
; CHECK-NEXT: vmov.32 q1[3], r0
; CHECK-NEXT: csetm r1, ne
; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
; CHECK-NEXT: vmov q1[3], q1[1], r1, r0
; CHECK-NEXT: vbic q2, q2, q1
; CHECK-NEXT: vand q0, q0, q1
; CHECK-NEXT: vorr q0, q0, q2
Expand All @@ -360,29 +350,27 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @vqmovni64_uminmax(<2 x i64> %s0) {
; CHECK-LABEL: vqmovni64_uminmax:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov r1, s0
; CHECK-NEXT: vmov r1, s2
; CHECK-NEXT: movs r2, #0
; CHECK-NEXT: vmov r0, s1
; CHECK-NEXT: vmov r0, s3
; CHECK-NEXT: vmov.i64 q2, #0xffffffff
; CHECK-NEXT: vmov r3, s0
; CHECK-NEXT: subs.w r1, r1, #-1
; CHECK-NEXT: vmov r1, s1
; CHECK-NEXT: sbcs r0, r0, #0
; CHECK-NEXT: vmov r1, s2
; CHECK-NEXT: mov.w r0, #0
; CHECK-NEXT: it lo
; CHECK-NEXT: movlo r0, #1
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: vmov.32 q1[0], r0
; CHECK-NEXT: vmov.32 q1[1], r0
; CHECK-NEXT: vmov r0, s3
; CHECK-NEXT: subs.w r1, r1, #-1
; CHECK-NEXT: sbcs r0, r0, #0
; CHECK-NEXT: subs.w r3, r3, #-1
; CHECK-NEXT: sbcs r1, r1, #0
; CHECK-NEXT: it lo
; CHECK-NEXT: movlo r2, #1
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: vmov.32 q1[2], r0
; CHECK-NEXT: vmov.32 q1[3], r0
; CHECK-NEXT: csetm r1, ne
; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
; CHECK-NEXT: vmov q1[3], q1[1], r1, r0
; CHECK-NEXT: vbic q2, q2, q1
; CHECK-NEXT: vand q0, q0, q1
; CHECK-NEXT: vorr q0, q0, q2
Expand Down
292 changes: 136 additions & 156 deletions llvm/test/CodeGen/Thumb2/mve-vqshrn.ll
Original file line number Diff line number Diff line change
Expand Up @@ -180,71 +180,65 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @vqshrni64_smaxmin(<2 x i64> %so) {
; CHECK-LABEL: vqshrni64_smaxmin:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r5, lr}
; CHECK-NEXT: push {r5, lr}
; CHECK-NEXT: vmov r5, s1
; CHECK-NEXT: mvn lr, #-2147483648
; CHECK-NEXT: vmov r2, s0
; CHECK-NEXT: mov.w r12, #0
; CHECK-NEXT: asrl r2, r5, #3
; CHECK-NEXT: vmov r3, s3
; CHECK-NEXT: subs.w r0, r2, lr
; CHECK-NEXT: vmov.32 q2[0], r2
; CHECK-NEXT: sbcs r0, r5, #0
; CHECK-NEXT: vmov.32 q2[1], r5
; CHECK-NEXT: .save {r4, r5, r7, lr}
; CHECK-NEXT: push {r4, r5, r7, lr}
; CHECK-NEXT: vmov r1, s3
; CHECK-NEXT: mvn r12, #-2147483648
; CHECK-NEXT: vmov r2, s2
; CHECK-NEXT: mov.w lr, #0
; CHECK-NEXT: asrl r2, r1, #3
; CHECK-NEXT: vmov r4, s0
; CHECK-NEXT: subs.w r3, r2, r12
; CHECK-NEXT: sbcs r3, r1, #0
; CHECK-NEXT: mov.w r3, #0
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r3, #1
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: vmov r3, s1
; CHECK-NEXT: csetm r5, ne
; CHECK-NEXT: asrl r4, r3, #3
; CHECK-NEXT: subs.w r0, r4, r12
; CHECK-NEXT: vmov q2[2], q2[0], r4, r2
; CHECK-NEXT: sbcs r0, r3, #0
; CHECK-NEXT: vmov q2[3], q2[1], r3, r1
; CHECK-NEXT: mov.w r0, #0
; CHECK-NEXT: mov.w r2, #-1
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r0, #1
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: vmov.32 q1[0], r0
; CHECK-NEXT: vmov.32 q1[1], r0
; CHECK-NEXT: vmov r0, s2
; CHECK-NEXT: asrl r0, r3, #3
; CHECK-NEXT: subs.w r1, r0, lr
; CHECK-NEXT: vmov.32 q2[2], r0
; CHECK-NEXT: sbcs r1, r3, #0
; CHECK-NEXT: vmov.32 q2[3], r3
; CHECK-NEXT: mov.w r1, #0
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r1, #1
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: csetm r1, ne
; CHECK-NEXT: vmov.32 q1[2], r1
; CHECK-NEXT: vmov.32 q1[3], r1
; CHECK-NEXT: adr r1, .LCPI12_0
; CHECK-NEXT: vldrw.u32 q0, [r1]
; CHECK-NEXT: vbic q0, q0, q1
; CHECK-NEXT: vand q1, q2, q1
; CHECK-NEXT: vorr q0, q1, q0
; CHECK-NEXT: vmov r1, s0
; CHECK-NEXT: vmov r0, s1
; CHECK-NEXT: vmov q0[2], q0[0], r0, r5
; CHECK-NEXT: vmov q0[3], q0[1], r0, r5
; CHECK-NEXT: adr r0, .LCPI12_0
; CHECK-NEXT: vldrw.u32 q1, [r0]
; CHECK-NEXT: vbic q1, q1, q0
; CHECK-NEXT: vand q0, q2, q0
; CHECK-NEXT: vorr q0, q0, q1
; CHECK-NEXT: vmov r1, s2
; CHECK-NEXT: vmov r0, s3
; CHECK-NEXT: vmov r3, s0
; CHECK-NEXT: rsbs.w r1, r1, #-2147483648
; CHECK-NEXT: vmov r1, s1
; CHECK-NEXT: sbcs.w r0, r2, r0
; CHECK-NEXT: vmov r1, s2
; CHECK-NEXT: mov.w r0, #0
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r0, #1
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: vmov.32 q1[0], r0
; CHECK-NEXT: vmov.32 q1[1], r0
; CHECK-NEXT: vmov r0, s3
; CHECK-NEXT: rsbs.w r1, r1, #-2147483648
; CHECK-NEXT: sbcs.w r0, r2, r0
; CHECK-NEXT: rsbs.w r3, r3, #-2147483648
; CHECK-NEXT: sbcs.w r1, r2, r1
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt.w r12, #1
; CHECK-NEXT: cmp.w r12, #0
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: vmov.32 q1[2], r0
; CHECK-NEXT: vmov.32 q1[3], r0
; CHECK-NEXT: movlt.w lr, #1
; CHECK-NEXT: cmp.w lr, #0
; CHECK-NEXT: csetm r1, ne
; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
; CHECK-NEXT: vmov q1[3], q1[1], r1, r0
; CHECK-NEXT: adr r0, .LCPI12_1
; CHECK-NEXT: vldrw.u32 q2, [r0]
; CHECK-NEXT: vand q0, q0, q1
; CHECK-NEXT: vbic q2, q2, q1
; CHECK-NEXT: vorr q0, q0, q2
; CHECK-NEXT: pop {r5, pc}
; CHECK-NEXT: pop {r4, r5, r7, pc}
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI12_0:
Expand All @@ -269,71 +263,65 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @vqshrni64_sminmax(<2 x i64> %so) {
; CHECK-LABEL: vqshrni64_sminmax:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, lr}
; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: vmov r1, s1
; CHECK-NEXT: mov.w lr, #-1
; CHECK-NEXT: vmov r2, s0
; CHECK-NEXT: mov.w r12, #0
; CHECK-NEXT: .save {r4, r5, r7, lr}
; CHECK-NEXT: push {r4, r5, r7, lr}
; CHECK-NEXT: vmov r1, s3
; CHECK-NEXT: mov.w r12, #-1
; CHECK-NEXT: vmov r2, s2
; CHECK-NEXT: mov.w lr, #0
; CHECK-NEXT: asrl r2, r1, #3
; CHECK-NEXT: vmov r3, s3
; CHECK-NEXT: rsbs.w r0, r2, #-2147483648
; CHECK-NEXT: vmov.32 q2[0], r2
; CHECK-NEXT: sbcs.w r0, lr, r1
; CHECK-NEXT: vmov.32 q2[1], r1
; CHECK-NEXT: mov.w r0, #0
; CHECK-NEXT: mvn r2, #-2147483648
; CHECK-NEXT: vmov r4, s0
; CHECK-NEXT: rsbs.w r3, r2, #-2147483648
; CHECK-NEXT: sbcs.w r3, r12, r1
; CHECK-NEXT: mov.w r3, #0
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r0, #1
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: movlt r3, #1
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: vmov r3, s1
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: vmov.32 q1[0], r0
; CHECK-NEXT: vmov.32 q1[1], r0
; CHECK-NEXT: vmov r0, s2
; CHECK-NEXT: asrl r0, r3, #3
; CHECK-NEXT: rsbs.w r4, r0, #-2147483648
; CHECK-NEXT: vmov.32 q2[2], r0
; CHECK-NEXT: sbcs.w r4, lr, r3
; CHECK-NEXT: vmov.32 q2[3], r3
; CHECK-NEXT: mov.w r4, #0
; CHECK-NEXT: asrl r4, r3, #3
; CHECK-NEXT: rsbs.w r5, r4, #-2147483648
; CHECK-NEXT: vmov q2[2], q2[0], r4, r2
; CHECK-NEXT: sbcs.w r5, r12, r3
; CHECK-NEXT: vmov q2[3], q2[1], r3, r1
; CHECK-NEXT: mov.w r5, #0
; CHECK-NEXT: mvn r2, #-2147483648
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r4, #1
; CHECK-NEXT: cmp r4, #0
; CHECK-NEXT: csetm r4, ne
; CHECK-NEXT: vmov.32 q1[2], r4
; CHECK-NEXT: vmov.32 q1[3], r4
; CHECK-NEXT: adr r4, .LCPI13_0
; CHECK-NEXT: vldrw.u32 q0, [r4]
; CHECK-NEXT: vbic q0, q0, q1
; CHECK-NEXT: vand q1, q2, q1
; CHECK-NEXT: vorr q0, q1, q0
; CHECK-NEXT: vmov r1, s0
; CHECK-NEXT: vmov r0, s1
; CHECK-NEXT: movlt r5, #1
; CHECK-NEXT: cmp r5, #0
; CHECK-NEXT: csetm r5, ne
; CHECK-NEXT: vmov q0[2], q0[0], r5, r0
; CHECK-NEXT: vmov q0[3], q0[1], r5, r0
; CHECK-NEXT: adr r0, .LCPI13_0
; CHECK-NEXT: vldrw.u32 q1, [r0]
; CHECK-NEXT: vbic q1, q1, q0
; CHECK-NEXT: vand q0, q2, q0
; CHECK-NEXT: vorr q0, q0, q1
; CHECK-NEXT: vmov r1, s2
; CHECK-NEXT: vmov r0, s3
; CHECK-NEXT: vmov r3, s0
; CHECK-NEXT: subs r1, r1, r2
; CHECK-NEXT: vmov r1, s1
; CHECK-NEXT: sbcs r0, r0, #0
; CHECK-NEXT: vmov r1, s2
; CHECK-NEXT: mov.w r0, #0
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r0, #1
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: vmov.32 q1[0], r0
; CHECK-NEXT: vmov.32 q1[1], r0
; CHECK-NEXT: vmov r0, s3
; CHECK-NEXT: subs r1, r1, r2
; CHECK-NEXT: sbcs r0, r0, #0
; CHECK-NEXT: subs r2, r3, r2
; CHECK-NEXT: sbcs r1, r1, #0
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt.w r12, #1
; CHECK-NEXT: cmp.w r12, #0
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: vmov.32 q1[2], r0
; CHECK-NEXT: vmov.32 q1[3], r0
; CHECK-NEXT: movlt.w lr, #1
; CHECK-NEXT: cmp.w lr, #0
; CHECK-NEXT: csetm r1, ne
; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
; CHECK-NEXT: vmov q1[3], q1[1], r1, r0
; CHECK-NEXT: adr r0, .LCPI13_1
; CHECK-NEXT: vldrw.u32 q2, [r0]
; CHECK-NEXT: vand q0, q0, q1
; CHECK-NEXT: vbic q2, q2, q1
; CHECK-NEXT: vorr q0, q0, q2
; CHECK-NEXT: pop {r4, pc}
; CHECK-NEXT: pop {r4, r5, r7, pc}
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI13_0:
Expand All @@ -358,41 +346,37 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @vqshrni64_umaxmin(<2 x i64> %so) {
; CHECK-LABEL: vqshrni64_umaxmin:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r5, r6, r7, lr}
; CHECK-NEXT: push {r5, r6, r7, lr}
; CHECK-NEXT: vmov r7, s1
; CHECK-NEXT: movs r1, #0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: lsrl r0, r7, #3
; CHECK-NEXT: vmov r3, s3
; CHECK-NEXT: subs.w r2, r0, #-1
; CHECK-NEXT: vmov.32 q2[0], r0
; CHECK-NEXT: sbcs r2, r7, #0
; CHECK-NEXT: vmov.32 q2[1], r7
; CHECK-NEXT: mov.w r2, #0
; CHECK-NEXT: .save {r4, r5, r7, lr}
; CHECK-NEXT: push {r4, r5, r7, lr}
; CHECK-NEXT: vmov r5, s3
; CHECK-NEXT: movs r2, #0
; CHECK-NEXT: vmov r0, s2
; CHECK-NEXT: vmov.i64 q1, #0xffffffff
; CHECK-NEXT: lsrl r0, r5, #3
; CHECK-NEXT: vmov r4, s0
; CHECK-NEXT: subs.w r3, r0, #-1
; CHECK-NEXT: sbcs r3, r5, #0
; CHECK-NEXT: mov.w r3, #0
; CHECK-NEXT: it lo
; CHECK-NEXT: movlo r3, #1
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: vmov r3, s1
; CHECK-NEXT: csetm r12, ne
; CHECK-NEXT: lsrl r4, r3, #3
; CHECK-NEXT: subs.w r1, r4, #-1
; CHECK-NEXT: vmov q2[2], q2[0], r4, r0
; CHECK-NEXT: sbcs r1, r3, #0
; CHECK-NEXT: it lo
; CHECK-NEXT: movlo r2, #1
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: csetm r2, ne
; CHECK-NEXT: vmov.32 q1[0], r2
; CHECK-NEXT: vmov.32 q1[1], r2
; CHECK-NEXT: vmov r2, s2
; CHECK-NEXT: lsrl r2, r3, #3
; CHECK-NEXT: vmov.i64 q0, #0xffffffff
; CHECK-NEXT: subs.w r5, r2, #-1
; CHECK-NEXT: vmov.32 q2[2], r2
; CHECK-NEXT: sbcs r5, r3, #0
; CHECK-NEXT: it lo
; CHECK-NEXT: movlo r1, #1
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: vmov.32 q2[3], r3
; CHECK-NEXT: vmov q2[3], q2[1], r3, r5
; CHECK-NEXT: csetm r1, ne
; CHECK-NEXT: vmov.32 q1[2], r1
; CHECK-NEXT: vmov.32 q1[3], r1
; CHECK-NEXT: vbic q0, q0, q1
; CHECK-NEXT: vand q1, q2, q1
; CHECK-NEXT: vorr q0, q1, q0
; CHECK-NEXT: pop {r5, r6, r7, pc}
; CHECK-NEXT: vmov q0[2], q0[0], r1, r12
; CHECK-NEXT: vmov q0[3], q0[1], r1, r12
; CHECK-NEXT: vbic q1, q1, q0
; CHECK-NEXT: vand q0, q2, q0
; CHECK-NEXT: vorr q0, q0, q1
; CHECK-NEXT: pop {r4, r5, r7, pc}
entry:
%s0 = lshr <2 x i64> %so, <i64 3, i64 3>
%c1 = icmp ult <2 x i64> %s0, <i64 4294967295, i64 4294967295>
Expand All @@ -403,41 +387,37 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @vqshrni64_uminmax(<2 x i64> %so) {
; CHECK-LABEL: vqshrni64_uminmax:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r5, r6, r7, lr}
; CHECK-NEXT: push {r5, r6, r7, lr}
; CHECK-NEXT: vmov r7, s1
; CHECK-NEXT: movs r1, #0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: lsrl r0, r7, #3
; CHECK-NEXT: vmov r3, s3
; CHECK-NEXT: subs.w r2, r0, #-1
; CHECK-NEXT: vmov.32 q2[0], r0
; CHECK-NEXT: sbcs r2, r7, #0
; CHECK-NEXT: vmov.32 q2[1], r7
; CHECK-NEXT: mov.w r2, #0
; CHECK-NEXT: .save {r4, r5, r7, lr}
; CHECK-NEXT: push {r4, r5, r7, lr}
; CHECK-NEXT: vmov r5, s3
; CHECK-NEXT: movs r2, #0
; CHECK-NEXT: vmov r0, s2
; CHECK-NEXT: vmov.i64 q1, #0xffffffff
; CHECK-NEXT: lsrl r0, r5, #3
; CHECK-NEXT: vmov r4, s0
; CHECK-NEXT: subs.w r3, r0, #-1
; CHECK-NEXT: sbcs r3, r5, #0
; CHECK-NEXT: mov.w r3, #0
; CHECK-NEXT: it lo
; CHECK-NEXT: movlo r3, #1
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: vmov r3, s1
; CHECK-NEXT: csetm r12, ne
; CHECK-NEXT: lsrl r4, r3, #3
; CHECK-NEXT: subs.w r1, r4, #-1
; CHECK-NEXT: vmov q2[2], q2[0], r4, r0
; CHECK-NEXT: sbcs r1, r3, #0
; CHECK-NEXT: it lo
; CHECK-NEXT: movlo r2, #1
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: csetm r2, ne
; CHECK-NEXT: vmov.32 q1[0], r2
; CHECK-NEXT: vmov.32 q1[1], r2
; CHECK-NEXT: vmov r2, s2
; CHECK-NEXT: lsrl r2, r3, #3
; CHECK-NEXT: vmov.i64 q0, #0xffffffff
; CHECK-NEXT: subs.w r5, r2, #-1
; CHECK-NEXT: vmov.32 q2[2], r2
; CHECK-NEXT: sbcs r5, r3, #0
; CHECK-NEXT: it lo
; CHECK-NEXT: movlo r1, #1
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: vmov.32 q2[3], r3
; CHECK-NEXT: vmov q2[3], q2[1], r3, r5
; CHECK-NEXT: csetm r1, ne
; CHECK-NEXT: vmov.32 q1[2], r1
; CHECK-NEXT: vmov.32 q1[3], r1
; CHECK-NEXT: vbic q0, q0, q1
; CHECK-NEXT: vand q1, q2, q1
; CHECK-NEXT: vorr q0, q1, q0
; CHECK-NEXT: pop {r5, r6, r7, pc}
; CHECK-NEXT: vmov q0[2], q0[0], r1, r12
; CHECK-NEXT: vmov q0[3], q0[1], r1, r12
; CHECK-NEXT: vbic q1, q1, q0
; CHECK-NEXT: vand q0, q2, q0
; CHECK-NEXT: vorr q0, q0, q1
; CHECK-NEXT: pop {r4, r5, r7, pc}
entry:
%s0 = lshr <2 x i64> %so, <i64 3, i64 3>
%c2 = icmp ult <2 x i64> %s0, <i64 4294967295, i64 4294967295>
Expand Down
46 changes: 20 additions & 26 deletions llvm/test/CodeGen/Thumb2/mve-vst2.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,21 +6,19 @@
define void @vst2_v2i32(<2 x i32> *%src, <4 x i32> *%dst) {
; CHECK-LABEL: vst2_v2i32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrd r2, r12, [r0]
; CHECK-NEXT: ldrd r3, r0, [r0, #8]
; CHECK-NEXT: vmov.32 q0[0], r2
; CHECK-NEXT: vmov.32 q1[0], r3
; CHECK-NEXT: vmov.32 q0[2], r12
; CHECK-NEXT: vmov.f64 d4, d1
; CHECK-NEXT: vmov.32 q1[2], r0
; CHECK-NEXT: vmov.f32 s9, s3
; CHECK-NEXT: vmov.f32 s2, s4
; CHECK-NEXT: vmov.f32 s3, s5
; CHECK-NEXT: vmov.f32 s10, s6
; CHECK-NEXT: vmov.f32 s1, s2
; CHECK-NEXT: vmov.f32 s11, s7
; CHECK-NEXT: ldrd r12, r3, [r0]
; CHECK-NEXT: ldrd r2, r0, [r0, #8]
; CHECK-NEXT: vmov q0[2], q0[0], r12, r3
; CHECK-NEXT: vmov.f64 d2, d1
; CHECK-NEXT: vmov q2[2], q2[0], r2, r0
; CHECK-NEXT: vmov.f32 s5, s3
; CHECK-NEXT: vmov.f32 s2, s8
; CHECK-NEXT: vmov.f32 s3, s10
; CHECK-NEXT: vmov.f32 s3, s9
; CHECK-NEXT: vmov.f32 s6, s10
; CHECK-NEXT: vmov.f32 s1, s2
; CHECK-NEXT: vmov.f32 s7, s11
; CHECK-NEXT: vmov.f32 s2, s4
; CHECK-NEXT: vmov.f32 s3, s6
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
Expand Down Expand Up @@ -113,14 +111,12 @@ entry:
define void @vst2_v2i16(<2 x i16> *%src, <4 x i16> *%dst) {
; CHECK-LABEL: vst2_v2i16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrh r2, [r0, #2]
; CHECK-NEXT: ldrh r3, [r0]
; CHECK-NEXT: ldrh r2, [r0, #4]
; CHECK-NEXT: vmov.32 q0[0], r3
; CHECK-NEXT: ldrh.w r12, [r0, #6]
; CHECK-NEXT: ldrh r0, [r0, #2]
; CHECK-NEXT: vmov.32 q0[1], r2
; CHECK-NEXT: vmov.32 q0[2], r0
; CHECK-NEXT: vmov.32 q0[3], r12
; CHECK-NEXT: ldrh r0, [r0, #4]
; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
; CHECK-NEXT: vmov q0[3], q0[1], r0, r12
; CHECK-NEXT: vstrh.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
Expand Down Expand Up @@ -197,13 +193,11 @@ define void @vst2_v2i8(<2 x i8> *%src, <4 x i8> *%dst) {
; CHECK-LABEL: vst2_v2i8:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrb r2, [r0]
; CHECK-NEXT: ldrb r3, [r0, #2]
; CHECK-NEXT: vmov.32 q0[0], r2
; CHECK-NEXT: ldrb.w r12, [r0, #1]
; CHECK-NEXT: vmov.32 q0[1], r3
; CHECK-NEXT: ldrb r3, [r0, #1]
; CHECK-NEXT: ldrb.w r12, [r0, #2]
; CHECK-NEXT: ldrb r0, [r0, #3]
; CHECK-NEXT: vmov.32 q0[2], r12
; CHECK-NEXT: vmov.32 q0[3], r0
; CHECK-NEXT: vmov q0[2], q0[0], r2, r3
; CHECK-NEXT: vmov q0[3], q0[1], r12, r0
; CHECK-NEXT: vstrb.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
Expand Down
47 changes: 21 additions & 26 deletions llvm/test/CodeGen/Thumb2/mve-vst3.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,13 @@ define void @vst3_v2i32(<2 x i32> *%src, <6 x i32> *%dst) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, lr}
; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: ldm.w r0, {r2, r3, r12, lr}
; CHECK-NEXT: ldrd lr, r12, [r0]
; CHECK-NEXT: ldrd r3, r2, [r0, #8]
; CHECK-NEXT: ldrd r4, r0, [r0, #16]
; CHECK-NEXT: vmov.32 q1[0], r2
; CHECK-NEXT: vmov.32 q1[1], r3
; CHECK-NEXT: vmov q1[2], q1[0], lr, r3
; CHECK-NEXT: vmov.32 q0[0], r4
; CHECK-NEXT: vmov.32 q1[2], r12
; CHECK-NEXT: vmov q1[3], q1[1], r12, r2
; CHECK-NEXT: vmov.32 q0[1], r0
; CHECK-NEXT: vmov.32 q1[3], lr
; CHECK-NEXT: vmov.f32 s8, s7
; CHECK-NEXT: vmov.f32 s10, s1
; CHECK-NEXT: vmov r2, s8
Expand Down Expand Up @@ -302,18 +301,16 @@ define void @vst3_v2i16(<2 x i16> *%src, <6 x i16> *%dst) {
; CHECK-NEXT: .save {r4, lr}
; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: ldrh r2, [r0, #6]
; CHECK-NEXT: ldrh r3, [r0, #4]
; CHECK-NEXT: ldrh.w lr, [r0, #4]
; CHECK-NEXT: ldrh.w r12, [r0, #8]
; CHECK-NEXT: vmov.16 q0[4], r2
; CHECK-NEXT: ldrh.w lr, [r0, #2]
; CHECK-NEXT: vmov.32 q1[0], r3
; CHECK-NEXT: ldrh r3, [r0, #2]
; CHECK-NEXT: vmov q1[2], q1[0], lr, r2
; CHECK-NEXT: ldrh r4, [r0]
; CHECK-NEXT: vmov.32 q1[2], r2
; CHECK-NEXT: ldrh r0, [r0, #10]
; CHECK-NEXT: vmov.16 q0[5], r0
; CHECK-NEXT: vmov r0, s2
; CHECK-NEXT: vmov.32 q0[0], r4
; CHECK-NEXT: vmov.32 q0[2], lr
; CHECK-NEXT: vmov q0[2], q0[0], r4, r3
; CHECK-NEXT: vmov.f32 s1, s4
; CHECK-NEXT: vdup.32 q1, r12
; CHECK-NEXT: vmov.f32 s3, s2
Expand Down Expand Up @@ -689,9 +686,8 @@ define void @vst3_v2i8(<2 x i8> *%src, <6 x i8> *%dst) {
; CHECK-NEXT: ldrb r2, [r0]
; CHECK-NEXT: movs r6, #0
; CHECK-NEXT: ldrb r3, [r0, #1]
; CHECK-NEXT: vmov.32 q0[0], r2
; CHECK-NEXT: ldrb.w r12, [r0, #2]
; CHECK-NEXT: vmov.32 q0[2], r3
; CHECK-NEXT: vmov q0[2], q0[0], r2, r3
; CHECK-NEXT: ldrb.w lr, [r0, #3]
; CHECK-NEXT: vmov r4, s0
; CHECK-NEXT: ldrb r5, [r0, #5]
Expand Down Expand Up @@ -1461,23 +1457,21 @@ entry:
define void @vst3_v4f16(<4 x half> *%src, <12 x half> *%dst) {
; CHECK-LABEL: vst3_v4f16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: ldm.w r0, {r2, r3, r12, lr}
; CHECK-NEXT: vmov.32 q0[0], r2
; CHECK-NEXT: vmov.32 q0[1], r3
; CHECK-NEXT: vmov.32 q0[2], r12
; CHECK-NEXT: vmov.32 q0[3], lr
; CHECK-NEXT: .save {r4, lr}
; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: ldrd lr, r12, [r0]
; CHECK-NEXT: ldrd r3, r2, [r0, #8]
; CHECK-NEXT: ldrd r4, r0, [r0, #16]
; CHECK-NEXT: vmov q0[2], q0[0], lr, r3
; CHECK-NEXT: vmov q0[3], q0[1], r12, r2
; CHECK-NEXT: vmov.32 q1[0], r4
; CHECK-NEXT: vmov r3, s0
; CHECK-NEXT: vmovx.f16 s12, s0
; CHECK-NEXT: vmov.32 q1[1], r0
; CHECK-NEXT: vmov r2, s2
; CHECK-NEXT: vmov.16 q2[0], r3
; CHECK-NEXT: vmov.16 q2[1], r2
; CHECK-NEXT: ldrd r2, r0, [r0, #16]
; CHECK-NEXT: vmovx.f16 s0, s3
; CHECK-NEXT: vmov.32 q1[0], r2
; CHECK-NEXT: vmov.32 q1[1], r0
; CHECK-NEXT: vmov r0, s4
; CHECK-NEXT: vmovx.f16 s12, s0
; CHECK-NEXT: vmov.16 q2[2], r0
; CHECK-NEXT: vmov r0, s12
; CHECK-NEXT: vmovx.f16 s12, s2
Expand All @@ -1486,6 +1480,7 @@ define void @vst3_v4f16(<4 x half> *%src, <12 x half> *%dst) {
; CHECK-NEXT: vmovx.f16 s12, s4
; CHECK-NEXT: vmov.16 q2[4], r0
; CHECK-NEXT: vmov r0, s12
; CHECK-NEXT: vmovx.f16 s0, s3
; CHECK-NEXT: vmov.16 q2[5], r0
; CHECK-NEXT: vmov r0, s1
; CHECK-NEXT: vmov.16 q2[6], r0
Expand All @@ -1505,7 +1500,7 @@ define void @vst3_v4f16(<4 x half> *%src, <12 x half> *%dst) {
; CHECK-NEXT: vmov r0, s9
; CHECK-NEXT: vmov r2, s8
; CHECK-NEXT: strd r2, r0, [r1, #16]
; CHECK-NEXT: pop {r7, pc}
; CHECK-NEXT: pop {r4, pc}
entry:
%s1 = getelementptr <4 x half>, <4 x half>* %src, i32 0
%l1 = load <4 x half>, <4 x half>* %s1, align 4
Expand Down
101 changes: 47 additions & 54 deletions llvm/test/CodeGen/Thumb2/mve-vst4.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,16 @@ define void @vst4_v2i32(<2 x i32> *%src, <8 x i32> *%dst) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, lr}
; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: ldm.w r0, {r2, r3, r12, lr}
; CHECK-NEXT: ldrd lr, r12, [r0]
; CHECK-NEXT: ldrd r3, r2, [r0, #8]
; CHECK-NEXT: ldrd r4, r0, [r0, #16]
; CHECK-NEXT: vmov.32 q1[0], r4
; CHECK-NEXT: vmov.32 q1[2], r0
; CHECK-NEXT: vmov q1[2], q1[0], r4, r0
; CHECK-NEXT: vmov.f64 d0, d2
; CHECK-NEXT: vmov.f32 s1, s6
; CHECK-NEXT: vmov.f32 s2, s4
; CHECK-NEXT: vmov.f32 s3, s6
; CHECK-NEXT: vmov.32 q1[0], r2
; CHECK-NEXT: vmov.32 q1[1], r3
; CHECK-NEXT: vmov.32 q1[2], r12
; CHECK-NEXT: vmov.32 q1[3], lr
; CHECK-NEXT: vmov q1[2], q1[0], lr, r3
; CHECK-NEXT: vmov q1[3], q1[1], r12, r2
; CHECK-NEXT: vmov.f64 d4, d2
; CHECK-NEXT: vmov.f32 s9, s6
; CHECK-NEXT: vmov.f32 s10, s0
Expand Down Expand Up @@ -209,23 +207,22 @@ define void @vst4_v2i16(<2 x i16> *%src, <8 x i16> *%dst) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, lr}
; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: ldrh r4, [r0]
; CHECK-NEXT: ldrh.w lr, [r0, #4]
; CHECK-NEXT: ldrh r2, [r0, #2]
; CHECK-NEXT: ldrh.w r12, [r0, #4]
; CHECK-NEXT: ldrh r3, [r0, #8]
; CHECK-NEXT: vmov.32 q0[0], r4
; CHECK-NEXT: ldrh.w r12, [r0, #6]
; CHECK-NEXT: ldrh r2, [r0, #10]
; CHECK-NEXT: ldrh r0, [r0, #2]
; CHECK-NEXT: vmov.32 q0[2], r0
; CHECK-NEXT: vmov r4, s0
; CHECK-NEXT: vmov.16 q0[0], r4
; CHECK-NEXT: vmov.16 q0[1], lr
; CHECK-NEXT: ldrh.w lr, [r0, #6]
; CHECK-NEXT: ldrh r4, [r0, #10]
; CHECK-NEXT: ldrh r0, [r0]
; CHECK-NEXT: vmov q0[2], q0[0], r0, r2
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: vmov.16 q0[0], r0
; CHECK-NEXT: vmov.16 q0[1], r12
; CHECK-NEXT: vmov.16 q0[2], r3
; CHECK-NEXT: vmov.16 q0[3], r3
; CHECK-NEXT: vmov.16 q0[4], r0
; CHECK-NEXT: vmov.16 q0[5], r12
; CHECK-NEXT: vmov.16 q0[6], r2
; CHECK-NEXT: vmov.16 q0[7], r2
; CHECK-NEXT: vmov.16 q0[4], r2
; CHECK-NEXT: vmov.16 q0[5], lr
; CHECK-NEXT: vmov.16 q0[6], r4
; CHECK-NEXT: vmov.16 q0[7], r4
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: pop {r4, pc}
entry:
Expand Down Expand Up @@ -376,9 +373,8 @@ define void @vst4_v2i8(<2 x i8> *%src, <8 x i8> *%dst) {
; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: ldrb r2, [r0]
; CHECK-NEXT: ldrb r3, [r0, #1]
; CHECK-NEXT: vmov.32 q0[0], r2
; CHECK-NEXT: ldrb.w r12, [r0, #2]
; CHECK-NEXT: vmov.32 q0[2], r3
; CHECK-NEXT: vmov q0[2], q0[0], r2, r3
; CHECK-NEXT: ldrb.w lr, [r0, #3]
; CHECK-NEXT: vmov r2, s0
; CHECK-NEXT: ldrb r4, [r0, #5]
Expand Down Expand Up @@ -911,61 +907,58 @@ entry:
define void @vst4_v4f16(<4 x half> *%src, <16 x half> *%dst) {
; CHECK-LABEL: vst4_v4f16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: ldm.w r0, {r2, r3, r12, lr}
; CHECK-NEXT: vmov.32 q0[0], r2
; CHECK-NEXT: vmov.32 q0[1], r3
; CHECK-NEXT: vmov.32 q0[2], r12
; CHECK-NEXT: vmov.32 q0[3], lr
; CHECK-NEXT: vmov r3, s1
; CHECK-NEXT: vmovx.f16 s12, s1
; CHECK-NEXT: vmov r2, s3
; CHECK-NEXT: .save {r4, lr}
; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: ldrd lr, r12, [r0]
; CHECK-NEXT: ldrd r3, r2, [r0, #8]
; CHECK-NEXT: ldrd r4, r0, [r0, #16]
; CHECK-NEXT: vmov q1[2], q1[0], lr, r3
; CHECK-NEXT: vmov q1[3], q1[1], r12, r2
; CHECK-NEXT: vmov q0[2], q0[0], r4, r4
; CHECK-NEXT: vmov r3, s5
; CHECK-NEXT: vmov q0[3], q0[1], r0, r0
; CHECK-NEXT: vmov r2, s7
; CHECK-NEXT: vmov.16 q2[0], r3
; CHECK-NEXT: vmov.16 q2[1], r2
; CHECK-NEXT: ldrd r2, r0, [r0, #16]
; CHECK-NEXT: vmov.32 q1[0], r2
; CHECK-NEXT: vmov.32 q1[1], r0
; CHECK-NEXT: vmov.32 q1[2], r2
; CHECK-NEXT: vmov r2, s0
; CHECK-NEXT: vmov.32 q1[3], r0
; CHECK-NEXT: vmov r0, s5
; CHECK-NEXT: vmov r0, s1
; CHECK-NEXT: vmov.16 q2[2], r0
; CHECK-NEXT: vmov r0, s7
; CHECK-NEXT: vmov r0, s3
; CHECK-NEXT: vmovx.f16 s12, s5
; CHECK-NEXT: vmov.16 q2[3], r0
; CHECK-NEXT: vmov r0, s12
; CHECK-NEXT: vmovx.f16 s12, s3
; CHECK-NEXT: vmovx.f16 s12, s7
; CHECK-NEXT: vmov.16 q2[4], r0
; CHECK-NEXT: vmov r0, s12
; CHECK-NEXT: vmovx.f16 s12, s5
; CHECK-NEXT: vmovx.f16 s12, s1
; CHECK-NEXT: vmov.16 q2[5], r0
; CHECK-NEXT: vmov r0, s12
; CHECK-NEXT: vmovx.f16 s12, s7
; CHECK-NEXT: vmovx.f16 s12, s3
; CHECK-NEXT: vmov.16 q2[6], r0
; CHECK-NEXT: vmov r0, s12
; CHECK-NEXT: vmovx.f16 s12, s0
; CHECK-NEXT: vmov.16 q2[7], r0
; CHECK-NEXT: vmov r0, s2
; CHECK-NEXT: vmov r2, s4
; CHECK-NEXT: vstrw.32 q2, [r1, #16]
; CHECK-NEXT: vmov r0, s6
; CHECK-NEXT: vmov.16 q2[0], r2
; CHECK-NEXT: vmovx.f16 s12, s4
; CHECK-NEXT: vmov.16 q2[1], r0
; CHECK-NEXT: vmov r0, s4
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: vmov.16 q2[2], r0
; CHECK-NEXT: vmov r0, s6
; CHECK-NEXT: vmov r0, s2
; CHECK-NEXT: vmov.16 q2[3], r0
; CHECK-NEXT: vmov r0, s12
; CHECK-NEXT: vmovx.f16 s0, s2
; CHECK-NEXT: vmovx.f16 s4, s6
; CHECK-NEXT: vmov.16 q2[4], r0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: vmovx.f16 s0, s4
; CHECK-NEXT: vmov r0, s4
; CHECK-NEXT: vmovx.f16 s4, s0
; CHECK-NEXT: vmov.16 q2[5], r0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: vmovx.f16 s0, s6
; CHECK-NEXT: vmov r0, s4
; CHECK-NEXT: vmovx.f16 s0, s2
; CHECK-NEXT: vmov.16 q2[6], r0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: vmov.16 q2[7], r0
; CHECK-NEXT: vstrw.32 q2, [r1]
; CHECK-NEXT: pop {r7, pc}
; CHECK-NEXT: pop {r4, pc}
entry:
%s1 = getelementptr <4 x half>, <4 x half>* %src, i32 0
%l1 = load <4 x half>, <4 x half>* %s1, align 4
Expand Down
46 changes: 19 additions & 27 deletions llvm/test/CodeGen/Thumb2/mve-widen-narrow.ll
Original file line number Diff line number Diff line change
Expand Up @@ -275,45 +275,37 @@ define void @foo_int32_int8_both(<16 x i32>* %dest, <16 x i8>* readonly %src, i3
; CHECK-LABEL: foo_int32_int8_both:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrb.s16 q1, [r1, #8]
; CHECK-NEXT: vmov.u16 r2, q1[4]
; CHECK-NEXT: vmov.32 q0[0], r2
; CHECK-NEXT: vmov.u16 r2, q1[5]
; CHECK-NEXT: vmov.32 q0[1], r2
; CHECK-NEXT: vmov.u16 r2, q1[6]
; CHECK-NEXT: vmov.32 q0[2], r2
; CHECK-NEXT: vmov.u16 r3, q1[4]
; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
; CHECK-NEXT: vmov.u16 r2, q1[7]
; CHECK-NEXT: vmov.32 q0[3], r2
; CHECK-NEXT: vmov.u16 r3, q1[5]
; CHECK-NEXT: vmov q0[3], q0[1], r3, r2
; CHECK-NEXT: vmov.u16 r2, q1[0]
; CHECK-NEXT: vmovlb.u16 q2, q0
; CHECK-NEXT: vldrb.s16 q0, [r1]
; CHECK-NEXT: vmov.u16 r1, q1[0]
; CHECK-NEXT: vstrw.32 q2, [r0, #48]
; CHECK-NEXT: vmov.32 q2[0], r1
; CHECK-NEXT: vmov.u16 r1, q1[1]
; CHECK-NEXT: vmov.32 q2[1], r1
; CHECK-NEXT: vmov.u16 r1, q1[2]
; CHECK-NEXT: vmov.32 q2[2], r1
; CHECK-NEXT: vstrw.32 q2, [r0, #48]
; CHECK-NEXT: vmov q2[2], q2[0], r2, r1
; CHECK-NEXT: vmov.u16 r1, q1[3]
; CHECK-NEXT: vmov.32 q2[3], r1
; CHECK-NEXT: vmov.u16 r1, q0[4]
; CHECK-NEXT: vmov.u16 r2, q1[1]
; CHECK-NEXT: vmov q2[3], q2[1], r2, r1
; CHECK-NEXT: vmov.u16 r1, q0[6]
; CHECK-NEXT: vmovlb.u16 q1, q2
; CHECK-NEXT: vmov.u16 r2, q0[4]
; CHECK-NEXT: vstrw.32 q1, [r0, #32]
; CHECK-NEXT: vmov.32 q1[0], r1
; CHECK-NEXT: vmov.u16 r1, q0[5]
; CHECK-NEXT: vmov.32 q1[1], r1
; CHECK-NEXT: vmov.u16 r1, q0[6]
; CHECK-NEXT: vmov.32 q1[2], r1
; CHECK-NEXT: vmov q1[2], q1[0], r2, r1
; CHECK-NEXT: vmov.u16 r1, q0[7]
; CHECK-NEXT: vmov.32 q1[3], r1
; CHECK-NEXT: vmov.u16 r1, q0[0]
; CHECK-NEXT: vmov.u16 r2, q0[5]
; CHECK-NEXT: vmov q1[3], q1[1], r2, r1
; CHECK-NEXT: vmov.u16 r1, q0[2]
; CHECK-NEXT: vmovlb.u16 q1, q1
; CHECK-NEXT: vmov.u16 r2, q0[0]
; CHECK-NEXT: vstrw.32 q1, [r0, #16]
; CHECK-NEXT: vmov.32 q1[0], r1
; CHECK-NEXT: vmov.u16 r1, q0[1]
; CHECK-NEXT: vmov.32 q1[1], r1
; CHECK-NEXT: vmov.u16 r1, q0[2]
; CHECK-NEXT: vmov.32 q1[2], r1
; CHECK-NEXT: vmov q1[2], q1[0], r2, r1
; CHECK-NEXT: vmov.u16 r1, q0[3]
; CHECK-NEXT: vmov.32 q1[3], r1
; CHECK-NEXT: vmov.u16 r2, q0[1]
; CHECK-NEXT: vmov q1[3], q1[1], r2, r1
; CHECK-NEXT: vmovlb.u16 q0, q1
; CHECK-NEXT: vstrw.32 q0, [r0]
; CHECK-NEXT: bx lr
Expand Down