291 changes: 158 additions & 133 deletions llvm/test/CodeGen/Thumb2/mve-vabdus.ll

Large diffs are not rendered by default.

160 changes: 89 additions & 71 deletions llvm/test/CodeGen/Thumb2/mve-vcmp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -367,31 +367,36 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @vcmp_eq_v2i64(<2 x i64> %src, <2 x i64> %srcb, <2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: vcmp_eq_v2i64:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: vmov r0, s5
; CHECK-NEXT: vmov r1, s1
; CHECK-NEXT: vmov r2, s0
; CHECK-NEXT: vmov r3, s2
; CHECK-NEXT: eors r0, r1
; CHECK-NEXT: vmov r1, s4
; CHECK-NEXT: eors r1, r2
; CHECK-NEXT: vmov r2, s3
; CHECK-NEXT: vmov r2, s2
; CHECK-NEXT: orrs r0, r1
; CHECK-NEXT: vmov r1, s7
; CHECK-NEXT: vmov r1, s3
; CHECK-NEXT: cset r0, eq
; CHECK-NEXT: tst.w r0, #1
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: vmov.32 q4[0], r0
; CHECK-NEXT: vmov.32 q4[1], r0
; CHECK-NEXT: vmov r0, s7
; CHECK-NEXT: eors r0, r1
; CHECK-NEXT: vmov r1, s6
; CHECK-NEXT: eors r1, r2
; CHECK-NEXT: vmov r2, s6
; CHECK-NEXT: eors r2, r3
; CHECK-NEXT: orrs r1, r2
; CHECK-NEXT: cset r1, eq
; CHECK-NEXT: tst.w r1, #1
; CHECK-NEXT: csetm r1, ne
; CHECK-NEXT: vmov q0[2], q0[0], r1, r0
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
; CHECK-NEXT: vbic q1, q3, q0
; CHECK-NEXT: vand q0, q2, q0
; CHECK-NEXT: vorr q0, q0, q1
; CHECK-NEXT: orrs r0, r1
; CHECK-NEXT: cset r0, eq
; CHECK-NEXT: tst.w r0, #1
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: vmov.32 q4[2], r0
; CHECK-NEXT: vmov.32 q4[3], r0
; CHECK-NEXT: vbic q0, q3, q4
; CHECK-NEXT: vand q1, q2, q4
; CHECK-NEXT: vorr q0, q1, q0
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: bx lr
entry:
%c = icmp eq <2 x i64> %src, %srcb
Expand All @@ -402,31 +407,36 @@ entry:
define arm_aapcs_vfpcc <2 x i32> @vcmp_eq_v2i32(<2 x i64> %src, <2 x i64> %srcb, <2 x i32> %a, <2 x i32> %b) {
; CHECK-LABEL: vcmp_eq_v2i32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: vmov r0, s5
; CHECK-NEXT: vmov r1, s1
; CHECK-NEXT: vmov r2, s0
; CHECK-NEXT: vmov r3, s2
; CHECK-NEXT: eors r0, r1
; CHECK-NEXT: vmov r1, s4
; CHECK-NEXT: eors r1, r2
; CHECK-NEXT: vmov r2, s3
; CHECK-NEXT: vmov r2, s2
; CHECK-NEXT: orrs r0, r1
; CHECK-NEXT: vmov r1, s7
; CHECK-NEXT: vmov r1, s3
; CHECK-NEXT: cset r0, eq
; CHECK-NEXT: tst.w r0, #1
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: vmov.32 q4[0], r0
; CHECK-NEXT: vmov.32 q4[1], r0
; CHECK-NEXT: vmov r0, s7
; CHECK-NEXT: eors r0, r1
; CHECK-NEXT: vmov r1, s6
; CHECK-NEXT: eors r1, r2
; CHECK-NEXT: vmov r2, s6
; CHECK-NEXT: eors r2, r3
; CHECK-NEXT: orrs r1, r2
; CHECK-NEXT: cset r1, eq
; CHECK-NEXT: tst.w r1, #1
; CHECK-NEXT: csetm r1, ne
; CHECK-NEXT: vmov q0[2], q0[0], r1, r0
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
; CHECK-NEXT: vbic q1, q3, q0
; CHECK-NEXT: vand q0, q2, q0
; CHECK-NEXT: vorr q0, q0, q1
; CHECK-NEXT: orrs r0, r1
; CHECK-NEXT: cset r0, eq
; CHECK-NEXT: tst.w r0, #1
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: vmov.32 q4[2], r0
; CHECK-NEXT: vmov.32 q4[3], r0
; CHECK-NEXT: vbic q0, q3, q4
; CHECK-NEXT: vand q1, q2, q4
; CHECK-NEXT: vorr q0, q1, q0
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: bx lr
entry:
%c = icmp eq <2 x i64> %src, %srcb
Expand All @@ -437,76 +447,84 @@ entry:
define arm_aapcs_vfpcc <2 x i32> @vcmp_multi_v2i32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
; CHECK-LABEL: vcmp_multi_v2i32:
; CHECK: @ %bb.0:
; CHECK-NEXT: .save {r4, lr}
; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: .vsave {d8, d9, d10, d11}
; CHECK-NEXT: vpush {d8, d9, d10, d11}
; CHECK-NEXT: vmov r0, s1
; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: vmov r1, s0
; CHECK-NEXT: vmov r2, s2
; CHECK-NEXT: vmov r2, s8
; CHECK-NEXT: orrs r0, r1
; CHECK-NEXT: vmov r1, s3
; CHECK-NEXT: vmov r1, s2
; CHECK-NEXT: cset r0, eq
; CHECK-NEXT: tst.w r0, #1
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: orrs r1, r2
; CHECK-NEXT: vmov r2, s8
; CHECK-NEXT: cset r1, eq
; CHECK-NEXT: tst.w r1, #1
; CHECK-NEXT: csetm r1, ne
; CHECK-NEXT: vmov q0[2], q0[0], r1, r0
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
; CHECK-NEXT: vbic q0, q2, q0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: subs r1, r0, r2
; CHECK-NEXT: asr.w r12, r0, #31
; CHECK-NEXT: vmov.32 q3[0], r0
; CHECK-NEXT: vmov.32 q3[1], r0
; CHECK-NEXT: vmov r0, s3
; CHECK-NEXT: orrs r0, r1
; CHECK-NEXT: cset r0, eq
; CHECK-NEXT: tst.w r0, #1
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: vmov.32 q3[2], r0
; CHECK-NEXT: vmov.32 q3[3], r0
; CHECK-NEXT: vbic q0, q2, q3
; CHECK-NEXT: vmov lr, s0
; CHECK-NEXT: subs.w r1, lr, r2
; CHECK-NEXT: asr.w r12, lr, #31
; CHECK-NEXT: sbcs.w r1, r12, r2, asr #31
; CHECK-NEXT: mov.w r1, #0
; CHECK-NEXT: vmov r2, s2
; CHECK-NEXT: vmov r2, s10
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r1, #1
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: vmov r1, s10
; CHECK-NEXT: csetm lr, ne
; CHECK-NEXT: asr.w r12, r2, #31
; CHECK-NEXT: subs r4, r2, r1
; CHECK-NEXT: sbcs.w r1, r12, r1, asr #31
; CHECK-NEXT: csetm r1, ne
; CHECK-NEXT: vmov.32 q3[0], r1
; CHECK-NEXT: vmov.32 q3[1], r1
; CHECK-NEXT: vmov r1, s2
; CHECK-NEXT: subs r0, r1, r2
; CHECK-NEXT: asr.w r12, r1, #31
; CHECK-NEXT: sbcs.w r0, r12, r2, asr #31
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r3, #1
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: csetm r1, ne
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: cmp.w lr, #0
; CHECK-NEXT: vmov.32 q3[2], r0
; CHECK-NEXT: vmov.32 q3[3], r0
; CHECK-NEXT: cset r0, ne
; CHECK-NEXT: vmov q3[2], q3[0], r1, lr
; CHECK-NEXT: tst.w r0, #1
; CHECK-NEXT: vmov q3[3], q3[1], r1, lr
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: cset r1, ne
; CHECK-NEXT: tst.w r1, #1
; CHECK-NEXT: csetm r1, ne
; CHECK-NEXT: vmov q4[2], q4[0], r1, r0
; CHECK-NEXT: vmov q4[3], q4[1], r1, r0
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: vmov.32 q4[0], r0
; CHECK-NEXT: vmov.32 q4[1], r0
; CHECK-NEXT: cset r0, ne
; CHECK-NEXT: tst.w r0, #1
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: vmov.32 q4[2], r0
; CHECK-NEXT: vmov.32 q4[3], r0
; CHECK-NEXT: vmov r0, s4
; CHECK-NEXT: vmov r1, s6
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: cset r0, ne
; CHECK-NEXT: tst.w r0, #1
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: cset r1, ne
; CHECK-NEXT: tst.w r1, #1
; CHECK-NEXT: csetm r1, ne
; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
; CHECK-NEXT: vmov q1[3], q1[1], r1, r0
; CHECK-NEXT: vand q1, q1, q4
; CHECK-NEXT: vmov.32 q5[0], r0
; CHECK-NEXT: vmov.32 q5[1], r0
; CHECK-NEXT: vmov r0, s6
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: cset r0, ne
; CHECK-NEXT: tst.w r0, #1
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: vmov.32 q5[2], r0
; CHECK-NEXT: vmov.32 q5[3], r0
; CHECK-NEXT: vand q1, q5, q4
; CHECK-NEXT: vand q1, q3, q1
; CHECK-NEXT: vbic q0, q0, q1
; CHECK-NEXT: vand q1, q2, q1
; CHECK-NEXT: vorr q0, q1, q0
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: pop {r4, pc}
; CHECK-NEXT: vpop {d8, d9, d10, d11}
; CHECK-NEXT: pop {r7, pc}
%a4 = icmp eq <2 x i64> %a, zeroinitializer
%a5 = select <2 x i1> %a4, <2 x i32> zeroinitializer, <2 x i32> %c
%a6 = icmp ne <2 x i32> %b, zeroinitializer
Expand Down
268 changes: 146 additions & 122 deletions llvm/test/CodeGen/Thumb2/mve-vcmpr.ll
Original file line number Diff line number Diff line change
Expand Up @@ -438,22 +438,24 @@ define arm_aapcs_vfpcc <2 x i64> @vcmp_eq_v2i64(<2 x i64> %src, i64 %src2, <2 x
; CHECK-NEXT: eors r2, r1
; CHECK-NEXT: eors r3, r0
; CHECK-NEXT: orrs r2, r3
; CHECK-NEXT: vmov r3, s3
; CHECK-NEXT: cset r2, eq
; CHECK-NEXT: tst.w r2, #1
; CHECK-NEXT: csetm r2, ne
; CHECK-NEXT: eors r1, r3
; CHECK-NEXT: vmov r3, s2
; CHECK-NEXT: eors r0, r3
; CHECK-NEXT: vmov.32 q3[0], r2
; CHECK-NEXT: vmov.32 q3[1], r2
; CHECK-NEXT: vmov r2, s3
; CHECK-NEXT: eors r1, r2
; CHECK-NEXT: vmov r2, s2
; CHECK-NEXT: eors r0, r2
; CHECK-NEXT: orrs r0, r1
; CHECK-NEXT: cset r0, eq
; CHECK-NEXT: tst.w r0, #1
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: vmov q0[2], q0[0], r0, r2
; CHECK-NEXT: vmov q0[3], q0[1], r0, r2
; CHECK-NEXT: vbic q2, q2, q0
; CHECK-NEXT: vand q0, q1, q0
; CHECK-NEXT: vorr q0, q0, q2
; CHECK-NEXT: vmov.32 q3[2], r0
; CHECK-NEXT: vmov.32 q3[3], r0
; CHECK-NEXT: vbic q0, q2, q3
; CHECK-NEXT: vand q1, q1, q3
; CHECK-NEXT: vorr q0, q1, q0
; CHECK-NEXT: bx lr
entry:
%i = insertelement <2 x i64> undef, i64 %src2, i32 0
Expand All @@ -471,22 +473,24 @@ define arm_aapcs_vfpcc <2 x i32> @vcmp_eq_v2i32(<2 x i64> %src, i64 %src2, <2 x
; CHECK-NEXT: eors r2, r1
; CHECK-NEXT: eors r3, r0
; CHECK-NEXT: orrs r2, r3
; CHECK-NEXT: vmov r3, s3
; CHECK-NEXT: cset r2, eq
; CHECK-NEXT: tst.w r2, #1
; CHECK-NEXT: csetm r2, ne
; CHECK-NEXT: eors r1, r3
; CHECK-NEXT: vmov r3, s2
; CHECK-NEXT: eors r0, r3
; CHECK-NEXT: vmov.32 q3[0], r2
; CHECK-NEXT: vmov.32 q3[1], r2
; CHECK-NEXT: vmov r2, s3
; CHECK-NEXT: eors r1, r2
; CHECK-NEXT: vmov r2, s2
; CHECK-NEXT: eors r0, r2
; CHECK-NEXT: orrs r0, r1
; CHECK-NEXT: cset r0, eq
; CHECK-NEXT: tst.w r0, #1
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: vmov q0[2], q0[0], r0, r2
; CHECK-NEXT: vmov q0[3], q0[1], r0, r2
; CHECK-NEXT: vbic q2, q2, q0
; CHECK-NEXT: vand q0, q1, q0
; CHECK-NEXT: vorr q0, q0, q2
; CHECK-NEXT: vmov.32 q3[2], r0
; CHECK-NEXT: vmov.32 q3[3], r0
; CHECK-NEXT: vbic q0, q2, q3
; CHECK-NEXT: vand q1, q1, q3
; CHECK-NEXT: vorr q0, q1, q0
; CHECK-NEXT: bx lr
entry:
%i = insertelement <2 x i64> undef, i64 %src2, i32 0
Expand All @@ -499,76 +503,84 @@ entry:
define arm_aapcs_vfpcc <2 x i32> @vcmp_multi_v2i32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
; CHECK-LABEL: vcmp_multi_v2i32:
; CHECK: @ %bb.0:
; CHECK-NEXT: .save {r4, lr}
; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: .vsave {d8, d9, d10, d11}
; CHECK-NEXT: vpush {d8, d9, d10, d11}
; CHECK-NEXT: vmov r0, s1
; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: vmov r1, s0
; CHECK-NEXT: vmov r2, s2
; CHECK-NEXT: vmov r2, s8
; CHECK-NEXT: orrs r0, r1
; CHECK-NEXT: vmov r1, s3
; CHECK-NEXT: vmov r1, s2
; CHECK-NEXT: cset r0, eq
; CHECK-NEXT: tst.w r0, #1
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: orrs r1, r2
; CHECK-NEXT: vmov r2, s8
; CHECK-NEXT: cset r1, eq
; CHECK-NEXT: tst.w r1, #1
; CHECK-NEXT: csetm r1, ne
; CHECK-NEXT: vmov q0[2], q0[0], r1, r0
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
; CHECK-NEXT: vbic q0, q2, q0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: subs r1, r0, r2
; CHECK-NEXT: asr.w r12, r0, #31
; CHECK-NEXT: vmov.32 q3[0], r0
; CHECK-NEXT: vmov.32 q3[1], r0
; CHECK-NEXT: vmov r0, s3
; CHECK-NEXT: orrs r0, r1
; CHECK-NEXT: cset r0, eq
; CHECK-NEXT: tst.w r0, #1
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: vmov.32 q3[2], r0
; CHECK-NEXT: vmov.32 q3[3], r0
; CHECK-NEXT: vbic q0, q2, q3
; CHECK-NEXT: vmov lr, s0
; CHECK-NEXT: subs.w r1, lr, r2
; CHECK-NEXT: asr.w r12, lr, #31
; CHECK-NEXT: sbcs.w r1, r12, r2, asr #31
; CHECK-NEXT: mov.w r1, #0
; CHECK-NEXT: vmov r2, s2
; CHECK-NEXT: vmov r2, s10
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r1, #1
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: vmov r1, s10
; CHECK-NEXT: csetm lr, ne
; CHECK-NEXT: asr.w r12, r2, #31
; CHECK-NEXT: subs r4, r2, r1
; CHECK-NEXT: sbcs.w r1, r12, r1, asr #31
; CHECK-NEXT: csetm r1, ne
; CHECK-NEXT: vmov.32 q3[0], r1
; CHECK-NEXT: vmov.32 q3[1], r1
; CHECK-NEXT: vmov r1, s2
; CHECK-NEXT: subs r0, r1, r2
; CHECK-NEXT: asr.w r12, r1, #31
; CHECK-NEXT: sbcs.w r0, r12, r2, asr #31
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r3, #1
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: csetm r1, ne
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: cmp.w lr, #0
; CHECK-NEXT: vmov.32 q3[2], r0
; CHECK-NEXT: vmov.32 q3[3], r0
; CHECK-NEXT: cset r0, ne
; CHECK-NEXT: vmov q3[2], q3[0], r1, lr
; CHECK-NEXT: tst.w r0, #1
; CHECK-NEXT: vmov q3[3], q3[1], r1, lr
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: cset r1, ne
; CHECK-NEXT: tst.w r1, #1
; CHECK-NEXT: csetm r1, ne
; CHECK-NEXT: vmov q4[2], q4[0], r1, r0
; CHECK-NEXT: vmov q4[3], q4[1], r1, r0
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: vmov.32 q4[0], r0
; CHECK-NEXT: vmov.32 q4[1], r0
; CHECK-NEXT: cset r0, ne
; CHECK-NEXT: tst.w r0, #1
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: vmov.32 q4[2], r0
; CHECK-NEXT: vmov.32 q4[3], r0
; CHECK-NEXT: vmov r0, s4
; CHECK-NEXT: vmov r1, s6
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: cset r0, ne
; CHECK-NEXT: tst.w r0, #1
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: cset r1, ne
; CHECK-NEXT: tst.w r1, #1
; CHECK-NEXT: csetm r1, ne
; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
; CHECK-NEXT: vmov q1[3], q1[1], r1, r0
; CHECK-NEXT: vand q1, q1, q4
; CHECK-NEXT: vmov.32 q5[0], r0
; CHECK-NEXT: vmov.32 q5[1], r0
; CHECK-NEXT: vmov r0, s6
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: cset r0, ne
; CHECK-NEXT: tst.w r0, #1
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: vmov.32 q5[2], r0
; CHECK-NEXT: vmov.32 q5[3], r0
; CHECK-NEXT: vand q1, q5, q4
; CHECK-NEXT: vand q1, q3, q1
; CHECK-NEXT: vbic q0, q0, q1
; CHECK-NEXT: vand q1, q2, q1
; CHECK-NEXT: vorr q0, q1, q0
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: pop {r4, pc}
; CHECK-NEXT: vpop {d8, d9, d10, d11}
; CHECK-NEXT: pop {r7, pc}
%a4 = icmp eq <2 x i64> %a, zeroinitializer
%a5 = select <2 x i1> %a4, <2 x i32> zeroinitializer, <2 x i32> %c
%a6 = icmp ne <2 x i32> %b, zeroinitializer
Expand Down Expand Up @@ -1019,22 +1031,24 @@ define arm_aapcs_vfpcc <2 x i64> @vcmp_r_eq_v2i64(<2 x i64> %src, i64 %src2, <2
; CHECK-NEXT: eors r2, r1
; CHECK-NEXT: eors r3, r0
; CHECK-NEXT: orrs r2, r3
; CHECK-NEXT: vmov r3, s3
; CHECK-NEXT: cset r2, eq
; CHECK-NEXT: tst.w r2, #1
; CHECK-NEXT: csetm r2, ne
; CHECK-NEXT: eors r1, r3
; CHECK-NEXT: vmov r3, s2
; CHECK-NEXT: eors r0, r3
; CHECK-NEXT: vmov.32 q3[0], r2
; CHECK-NEXT: vmov.32 q3[1], r2
; CHECK-NEXT: vmov r2, s3
; CHECK-NEXT: eors r1, r2
; CHECK-NEXT: vmov r2, s2
; CHECK-NEXT: eors r0, r2
; CHECK-NEXT: orrs r0, r1
; CHECK-NEXT: cset r0, eq
; CHECK-NEXT: tst.w r0, #1
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: vmov q0[2], q0[0], r0, r2
; CHECK-NEXT: vmov q0[3], q0[1], r0, r2
; CHECK-NEXT: vbic q2, q2, q0
; CHECK-NEXT: vand q0, q1, q0
; CHECK-NEXT: vorr q0, q0, q2
; CHECK-NEXT: vmov.32 q3[2], r0
; CHECK-NEXT: vmov.32 q3[3], r0
; CHECK-NEXT: vbic q0, q2, q3
; CHECK-NEXT: vand q1, q1, q3
; CHECK-NEXT: vorr q0, q1, q0
; CHECK-NEXT: bx lr
entry:
%i = insertelement <2 x i64> undef, i64 %src2, i32 0
Expand All @@ -1052,22 +1066,24 @@ define arm_aapcs_vfpcc <2 x i32> @vcmp_r_eq_v2i32(<2 x i64> %src, i64 %src2, <2
; CHECK-NEXT: eors r2, r1
; CHECK-NEXT: eors r3, r0
; CHECK-NEXT: orrs r2, r3
; CHECK-NEXT: vmov r3, s3
; CHECK-NEXT: cset r2, eq
; CHECK-NEXT: tst.w r2, #1
; CHECK-NEXT: csetm r2, ne
; CHECK-NEXT: eors r1, r3
; CHECK-NEXT: vmov r3, s2
; CHECK-NEXT: eors r0, r3
; CHECK-NEXT: vmov.32 q3[0], r2
; CHECK-NEXT: vmov.32 q3[1], r2
; CHECK-NEXT: vmov r2, s3
; CHECK-NEXT: eors r1, r2
; CHECK-NEXT: vmov r2, s2
; CHECK-NEXT: eors r0, r2
; CHECK-NEXT: orrs r0, r1
; CHECK-NEXT: cset r0, eq
; CHECK-NEXT: tst.w r0, #1
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: vmov q0[2], q0[0], r0, r2
; CHECK-NEXT: vmov q0[3], q0[1], r0, r2
; CHECK-NEXT: vbic q2, q2, q0
; CHECK-NEXT: vand q0, q1, q0
; CHECK-NEXT: vorr q0, q0, q2
; CHECK-NEXT: vmov.32 q3[2], r0
; CHECK-NEXT: vmov.32 q3[3], r0
; CHECK-NEXT: vbic q0, q2, q3
; CHECK-NEXT: vand q1, q1, q3
; CHECK-NEXT: vorr q0, q1, q0
; CHECK-NEXT: bx lr
entry:
%i = insertelement <2 x i64> undef, i64 %src2, i32 0
Expand All @@ -1080,76 +1096,84 @@ entry:
define arm_aapcs_vfpcc <2 x i32> @vcmp_r_multi_v2i32(<2 x i64> %a, <2 x i32> %b, <2 x i32> %c) {
; CHECK-LABEL: vcmp_r_multi_v2i32:
; CHECK: @ %bb.0:
; CHECK-NEXT: .save {r4, lr}
; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: .vsave {d8, d9, d10, d11}
; CHECK-NEXT: vpush {d8, d9, d10, d11}
; CHECK-NEXT: vmov r0, s1
; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: vmov r1, s0
; CHECK-NEXT: vmov r2, s2
; CHECK-NEXT: vmov r2, s8
; CHECK-NEXT: orrs r0, r1
; CHECK-NEXT: vmov r1, s3
; CHECK-NEXT: vmov r1, s2
; CHECK-NEXT: cset r0, eq
; CHECK-NEXT: tst.w r0, #1
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: orrs r1, r2
; CHECK-NEXT: vmov r2, s8
; CHECK-NEXT: cset r1, eq
; CHECK-NEXT: tst.w r1, #1
; CHECK-NEXT: csetm r1, ne
; CHECK-NEXT: vmov q0[2], q0[0], r1, r0
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
; CHECK-NEXT: vbic q0, q2, q0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: subs r1, r0, r2
; CHECK-NEXT: asr.w r12, r0, #31
; CHECK-NEXT: vmov.32 q3[0], r0
; CHECK-NEXT: vmov.32 q3[1], r0
; CHECK-NEXT: vmov r0, s3
; CHECK-NEXT: orrs r0, r1
; CHECK-NEXT: cset r0, eq
; CHECK-NEXT: tst.w r0, #1
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: vmov.32 q3[2], r0
; CHECK-NEXT: vmov.32 q3[3], r0
; CHECK-NEXT: vbic q0, q2, q3
; CHECK-NEXT: vmov lr, s0
; CHECK-NEXT: subs.w r1, lr, r2
; CHECK-NEXT: asr.w r12, lr, #31
; CHECK-NEXT: sbcs.w r1, r12, r2, asr #31
; CHECK-NEXT: mov.w r1, #0
; CHECK-NEXT: vmov r2, s2
; CHECK-NEXT: vmov r2, s10
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r1, #1
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: vmov r1, s10
; CHECK-NEXT: csetm lr, ne
; CHECK-NEXT: asr.w r12, r2, #31
; CHECK-NEXT: subs r4, r2, r1
; CHECK-NEXT: sbcs.w r1, r12, r1, asr #31
; CHECK-NEXT: csetm r1, ne
; CHECK-NEXT: vmov.32 q3[0], r1
; CHECK-NEXT: vmov.32 q3[1], r1
; CHECK-NEXT: vmov r1, s2
; CHECK-NEXT: subs r0, r1, r2
; CHECK-NEXT: asr.w r12, r1, #31
; CHECK-NEXT: sbcs.w r0, r12, r2, asr #31
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r3, #1
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: csetm r1, ne
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: cmp.w lr, #0
; CHECK-NEXT: vmov.32 q3[2], r0
; CHECK-NEXT: vmov.32 q3[3], r0
; CHECK-NEXT: cset r0, ne
; CHECK-NEXT: vmov q3[2], q3[0], r1, lr
; CHECK-NEXT: tst.w r0, #1
; CHECK-NEXT: vmov q3[3], q3[1], r1, lr
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: cset r1, ne
; CHECK-NEXT: tst.w r1, #1
; CHECK-NEXT: csetm r1, ne
; CHECK-NEXT: vmov q4[2], q4[0], r1, r0
; CHECK-NEXT: vmov q4[3], q4[1], r1, r0
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: vmov.32 q4[0], r0
; CHECK-NEXT: vmov.32 q4[1], r0
; CHECK-NEXT: cset r0, ne
; CHECK-NEXT: tst.w r0, #1
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: vmov.32 q4[2], r0
; CHECK-NEXT: vmov.32 q4[3], r0
; CHECK-NEXT: vmov r0, s4
; CHECK-NEXT: vmov r1, s6
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: cset r0, ne
; CHECK-NEXT: tst.w r0, #1
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: cset r1, ne
; CHECK-NEXT: tst.w r1, #1
; CHECK-NEXT: csetm r1, ne
; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
; CHECK-NEXT: vmov q1[3], q1[1], r1, r0
; CHECK-NEXT: vand q1, q1, q4
; CHECK-NEXT: vmov.32 q5[0], r0
; CHECK-NEXT: vmov.32 q5[1], r0
; CHECK-NEXT: vmov r0, s6
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: cset r0, ne
; CHECK-NEXT: tst.w r0, #1
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: vmov.32 q5[2], r0
; CHECK-NEXT: vmov.32 q5[3], r0
; CHECK-NEXT: vand q1, q5, q4
; CHECK-NEXT: vand q1, q3, q1
; CHECK-NEXT: vbic q0, q0, q1
; CHECK-NEXT: vand q1, q2, q1
; CHECK-NEXT: vorr q0, q1, q0
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: pop {r4, pc}
; CHECK-NEXT: vpop {d8, d9, d10, d11}
; CHECK-NEXT: pop {r7, pc}
%a4 = icmp eq <2 x i64> %a, zeroinitializer
%a5 = select <2 x i1> %a4, <2 x i32> zeroinitializer, <2 x i32> %c
%a6 = icmp ne <2 x i32> %b, zeroinitializer
Expand Down
96 changes: 52 additions & 44 deletions llvm/test/CodeGen/Thumb2/mve-vcmpz.ll
Original file line number Diff line number Diff line change
Expand Up @@ -363,21 +363,23 @@ define arm_aapcs_vfpcc <2 x i64> @vcmp_eqz_v2i64(<2 x i64> %src, <2 x i64> %a, <
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov r0, s1
; CHECK-NEXT: vmov r1, s0
; CHECK-NEXT: vmov r2, s2
; CHECK-NEXT: orrs r0, r1
; CHECK-NEXT: vmov r1, s3
; CHECK-NEXT: vmov r1, s2
; CHECK-NEXT: cset r0, eq
; CHECK-NEXT: tst.w r0, #1
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: orrs r1, r2
; CHECK-NEXT: cset r1, eq
; CHECK-NEXT: tst.w r1, #1
; CHECK-NEXT: csetm r1, ne
; CHECK-NEXT: vmov q0[2], q0[0], r1, r0
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
; CHECK-NEXT: vbic q2, q2, q0
; CHECK-NEXT: vand q0, q1, q0
; CHECK-NEXT: vorr q0, q0, q2
; CHECK-NEXT: vmov.32 q3[0], r0
; CHECK-NEXT: vmov.32 q3[1], r0
; CHECK-NEXT: vmov r0, s3
; CHECK-NEXT: orrs r0, r1
; CHECK-NEXT: cset r0, eq
; CHECK-NEXT: tst.w r0, #1
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: vmov.32 q3[2], r0
; CHECK-NEXT: vmov.32 q3[3], r0
; CHECK-NEXT: vbic q0, q2, q3
; CHECK-NEXT: vand q1, q1, q3
; CHECK-NEXT: vorr q0, q1, q0
; CHECK-NEXT: bx lr
entry:
%c = icmp eq <2 x i64> %src, zeroinitializer
Expand All @@ -390,21 +392,23 @@ define arm_aapcs_vfpcc <2 x i32> @vcmp_eqz_v2i32(<2 x i64> %src, <2 x i32> %a, <
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov r0, s1
; CHECK-NEXT: vmov r1, s0
; CHECK-NEXT: vmov r2, s2
; CHECK-NEXT: orrs r0, r1
; CHECK-NEXT: vmov r1, s3
; CHECK-NEXT: vmov r1, s2
; CHECK-NEXT: cset r0, eq
; CHECK-NEXT: tst.w r0, #1
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: vmov.32 q3[0], r0
; CHECK-NEXT: vmov.32 q3[1], r0
; CHECK-NEXT: vmov r0, s3
; CHECK-NEXT: orrs r0, r1
; CHECK-NEXT: cset r0, eq
; CHECK-NEXT: tst.w r0, #1
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: orrs r1, r2
; CHECK-NEXT: cset r1, eq
; CHECK-NEXT: tst.w r1, #1
; CHECK-NEXT: csetm r1, ne
; CHECK-NEXT: vmov q0[2], q0[0], r1, r0
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
; CHECK-NEXT: vbic q2, q2, q0
; CHECK-NEXT: vand q0, q1, q0
; CHECK-NEXT: vorr q0, q0, q2
; CHECK-NEXT: vmov.32 q3[2], r0
; CHECK-NEXT: vmov.32 q3[3], r0
; CHECK-NEXT: vbic q0, q2, q3
; CHECK-NEXT: vand q1, q1, q3
; CHECK-NEXT: vorr q0, q1, q0
; CHECK-NEXT: bx lr
entry:
%c = icmp eq <2 x i64> %src, zeroinitializer
Expand Down Expand Up @@ -777,21 +781,23 @@ define arm_aapcs_vfpcc <2 x i64> @vcmp_r_eqz_v2i64(<2 x i64> %src, <2 x i64> %a,
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov r0, s1
; CHECK-NEXT: vmov r1, s0
; CHECK-NEXT: vmov r2, s2
; CHECK-NEXT: orrs r0, r1
; CHECK-NEXT: vmov r1, s3
; CHECK-NEXT: vmov r1, s2
; CHECK-NEXT: cset r0, eq
; CHECK-NEXT: tst.w r0, #1
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: orrs r1, r2
; CHECK-NEXT: cset r1, eq
; CHECK-NEXT: tst.w r1, #1
; CHECK-NEXT: csetm r1, ne
; CHECK-NEXT: vmov q0[2], q0[0], r1, r0
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
; CHECK-NEXT: vbic q2, q2, q0
; CHECK-NEXT: vand q0, q1, q0
; CHECK-NEXT: vorr q0, q0, q2
; CHECK-NEXT: vmov.32 q3[0], r0
; CHECK-NEXT: vmov.32 q3[1], r0
; CHECK-NEXT: vmov r0, s3
; CHECK-NEXT: orrs r0, r1
; CHECK-NEXT: cset r0, eq
; CHECK-NEXT: tst.w r0, #1
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: vmov.32 q3[2], r0
; CHECK-NEXT: vmov.32 q3[3], r0
; CHECK-NEXT: vbic q0, q2, q3
; CHECK-NEXT: vand q1, q1, q3
; CHECK-NEXT: vorr q0, q1, q0
; CHECK-NEXT: bx lr
entry:
%c = icmp eq <2 x i64> zeroinitializer, %src
Expand All @@ -804,21 +810,23 @@ define arm_aapcs_vfpcc <2 x i32> @vcmp_r_eqz_v2i32(<2 x i64> %src, <2 x i32> %a,
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov r0, s1
; CHECK-NEXT: vmov r1, s0
; CHECK-NEXT: vmov r2, s2
; CHECK-NEXT: orrs r0, r1
; CHECK-NEXT: vmov r1, s3
; CHECK-NEXT: vmov r1, s2
; CHECK-NEXT: cset r0, eq
; CHECK-NEXT: tst.w r0, #1
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: vmov.32 q3[0], r0
; CHECK-NEXT: vmov.32 q3[1], r0
; CHECK-NEXT: vmov r0, s3
; CHECK-NEXT: orrs r0, r1
; CHECK-NEXT: cset r0, eq
; CHECK-NEXT: tst.w r0, #1
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: orrs r1, r2
; CHECK-NEXT: cset r1, eq
; CHECK-NEXT: tst.w r1, #1
; CHECK-NEXT: csetm r1, ne
; CHECK-NEXT: vmov q0[2], q0[0], r1, r0
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
; CHECK-NEXT: vbic q2, q2, q0
; CHECK-NEXT: vand q0, q1, q0
; CHECK-NEXT: vorr q0, q0, q2
; CHECK-NEXT: vmov.32 q3[2], r0
; CHECK-NEXT: vmov.32 q3[3], r0
; CHECK-NEXT: vbic q0, q2, q3
; CHECK-NEXT: vand q1, q1, q3
; CHECK-NEXT: vorr q0, q1, q0
; CHECK-NEXT: bx lr
entry:
%c = icmp eq <2 x i64> %src, zeroinitializer
Expand Down
165 changes: 91 additions & 74 deletions llvm/test/CodeGen/Thumb2/mve-vcreate.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@
define arm_aapcs_vfpcc <4 x i32> @vcreate_i32(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: vcreate_i32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov q0[2], q0[0], r3, r1
; CHECK-NEXT: vmov q0[3], q0[1], r2, r0
; CHECK-NEXT: vmov.32 q0[0], r1
; CHECK-NEXT: vmov.32 q0[1], r0
; CHECK-NEXT: vmov.32 q0[2], r3
; CHECK-NEXT: vmov.32 q0[3], r2
; CHECK-NEXT: bx lr
entry:
%conv = zext i32 %a to i64
Expand All @@ -25,8 +27,10 @@ entry:
define arm_aapcs_vfpcc <4 x i32> @insert_0123(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: insert_0123:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov q0[2], q0[0], r2, r0
; CHECK-NEXT: vmov q0[3], q0[1], r3, r1
; CHECK-NEXT: vmov.32 q0[0], r0
; CHECK-NEXT: vmov.32 q0[1], r1
; CHECK-NEXT: vmov.32 q0[2], r2
; CHECK-NEXT: vmov.32 q0[3], r3
; CHECK-NEXT: bx lr
entry:
%v1 = insertelement <4 x i32> undef, i32 %a, i32 0
Expand All @@ -39,8 +43,10 @@ entry:
define arm_aapcs_vfpcc <4 x i32> @insert_3210(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: insert_3210:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov q0[2], q0[0], r1, r3
; CHECK-NEXT: vmov q0[3], q0[1], r0, r2
; CHECK-NEXT: vmov.32 q0[0], r3
; CHECK-NEXT: vmov.32 q0[1], r2
; CHECK-NEXT: vmov.32 q0[2], r1
; CHECK-NEXT: vmov.32 q0[3], r0
; CHECK-NEXT: bx lr
entry:
%v1 = insertelement <4 x i32> undef, i32 %a, i32 3
Expand All @@ -53,8 +59,10 @@ entry:
define arm_aapcs_vfpcc <4 x i32> @insert_0213(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: insert_0213:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov q0[2], q0[0], r1, r0
; CHECK-NEXT: vmov q0[3], q0[1], r3, r2
; CHECK-NEXT: vmov.32 q0[0], r0
; CHECK-NEXT: vmov.32 q0[1], r2
; CHECK-NEXT: vmov.32 q0[2], r1
; CHECK-NEXT: vmov.32 q0[3], r3
; CHECK-NEXT: bx lr
entry:
%v1 = insertelement <4 x i32> undef, i32 %a, i32 0
Expand All @@ -67,7 +75,8 @@ entry:
define arm_aapcs_vfpcc <4 x i32> @insert_0220(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: insert_0220:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov q0[2], q0[0], r2, r3
; CHECK-NEXT: vmov.32 q0[0], r3
; CHECK-NEXT: vmov.32 q0[2], r2
; CHECK-NEXT: bx lr
entry:
%v1 = insertelement <4 x i32> undef, i32 %a, i32 0
Expand All @@ -80,8 +89,9 @@ entry:
define arm_aapcs_vfpcc <4 x i32> @insert_321(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: insert_321:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.32 q0[1], r2
; CHECK-NEXT: vmov.32 q0[2], r1
; CHECK-NEXT: vmov q0[3], q0[1], r0, r2
; CHECK-NEXT: vmov.32 q0[3], r0
; CHECK-NEXT: bx lr
entry:
%v1 = insertelement <4 x i32> undef, i32 %a, i32 3
Expand All @@ -94,7 +104,8 @@ define arm_aapcs_vfpcc <4 x i32> @insert_310(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: insert_310:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.32 q0[0], r2
; CHECK-NEXT: vmov q0[3], q0[1], r0, r1
; CHECK-NEXT: vmov.32 q0[1], r1
; CHECK-NEXT: vmov.32 q0[3], r0
; CHECK-NEXT: bx lr
entry:
%v1 = insertelement <4 x i32> undef, i32 %a, i32 3
Expand All @@ -106,7 +117,8 @@ entry:
define arm_aapcs_vfpcc <4 x i32> @insert_320(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: insert_320:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov q0[2], q0[0], r1, r2
; CHECK-NEXT: vmov.32 q0[0], r2
; CHECK-NEXT: vmov.32 q0[2], r1
; CHECK-NEXT: vmov.32 q0[3], r0
; CHECK-NEXT: bx lr
entry:
Expand All @@ -119,7 +131,8 @@ entry:
define arm_aapcs_vfpcc <4 x i32> @insert_31(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: insert_31:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov q0[3], q0[1], r0, r1
; CHECK-NEXT: vmov.32 q0[1], r1
; CHECK-NEXT: vmov.32 q0[3], r0
; CHECK-NEXT: bx lr
entry:
%v1 = insertelement <4 x i32> undef, i32 %a, i32 3
Expand Down Expand Up @@ -152,8 +165,9 @@ entry:
define arm_aapcs_vfpcc <4 x i32> @insert_210(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: insert_210:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.32 q0[0], r2
; CHECK-NEXT: vmov.32 q0[1], r1
; CHECK-NEXT: vmov q0[2], q0[0], r0, r2
; CHECK-NEXT: vmov.32 q0[2], r0
; CHECK-NEXT: bx lr
entry:
%v1 = insertelement <4 x i32> undef, i32 %a, i32 2
Expand All @@ -165,7 +179,8 @@ entry:
define arm_aapcs_vfpcc <4 x i32> @insert_20(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: insert_20:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov q0[2], q0[0], r0, r1
; CHECK-NEXT: vmov.32 q0[0], r1
; CHECK-NEXT: vmov.32 q0[2], r0
; CHECK-NEXT: bx lr
entry:
%v1 = insertelement <4 x i32> undef, i32 %a, i32 2
Expand Down Expand Up @@ -230,26 +245,28 @@ entry:
define hidden <8 x i16> @create_i16(i16 zeroext %a, i16 zeroext %b, i16 zeroext %c, i16 zeroext %d, i16 zeroext %a2, i16 zeroext %b2, i16 zeroext %c2, i16 zeroext %d2) local_unnamed_addr #0 {
; CHECK-LABEL: create_i16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r5, r6, r7, lr}
; CHECK-NEXT: push {r5, r6, r7, lr}
; CHECK-NEXT: movs r7, #0
; CHECK-NEXT: .save {r4, r5, r7, lr}
; CHECK-NEXT: push {r4, r5, r7, lr}
; CHECK-NEXT: movs r5, #0
; CHECK-NEXT: lsll r2, r7, #16
; CHECK-NEXT: orr.w r0, r1, r0, lsl #16
; CHECK-NEXT: orr.w r12, r2, r3
; CHECK-NEXT: ldr r2, [sp, #24]
; CHECK-NEXT: ldr r3, [sp, #28]
; CHECK-NEXT: orrs r0, r7
; CHECK-NEXT: lsll r2, r5, #16
; CHECK-NEXT: orrs r2, r3
; CHECK-NEXT: vmov q0[2], q0[0], r2, r12
; CHECK-NEXT: ldrd r1, r2, [sp, #16]
; CHECK-NEXT: orr.w r1, r2, r1, lsl #16
; CHECK-NEXT: orrs r1, r5
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
; CHECK-NEXT: ldrd lr, r4, [sp, #16]
; CHECK-NEXT: orr.w r1, r2, r3
; CHECK-NEXT: ldr.w r12, [sp, #24]
; CHECK-NEXT: orrs r0, r5
; CHECK-NEXT: vmov.32 q0[0], r1
; CHECK-NEXT: movs r7, #0
; CHECK-NEXT: vmov.32 q0[1], r0
; CHECK-NEXT: ldr r0, [sp, #28]
; CHECK-NEXT: lsll r12, r7, #16
; CHECK-NEXT: orr.w r4, r4, lr, lsl #16
; CHECK-NEXT: orr.w r0, r0, r12
; CHECK-NEXT: orrs r7, r4
; CHECK-NEXT: vmov.32 q0[2], r0
; CHECK-NEXT: vmov.32 q0[3], r7
; CHECK-NEXT: vmov r0, r1, d0
; CHECK-NEXT: vmov r2, r3, d1
; CHECK-NEXT: pop {r5, r6, r7, pc}
; CHECK-NEXT: pop {r4, r5, r7, pc}
entry:
%conv = zext i16 %a to i64
%shl = shl nuw i64 %conv, 48
Expand Down Expand Up @@ -308,59 +325,59 @@ entry:
define hidden <16 x i8> @create_i8(i8 zeroext %a1, i8 zeroext %b1, i8 zeroext %c1, i8 zeroext %d1, i8 zeroext %a2, i8 zeroext %b2, i8 zeroext %c2, i8 zeroext %d2, i8 zeroext %a3, i8 zeroext %b3, i8 zeroext %c3, i8 zeroext %d3, i8 zeroext %a4, i8 zeroext %b4, i8 zeroext %c4, i8 zeroext %d4) local_unnamed_addr #0 {
; CHECK-LABEL: create_i8:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr}
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r11, lr}
; CHECK-NEXT: ldr r4, [sp, #36]
; CHECK-NEXT: .save {r4, r5, r7, r9, r11, lr}
; CHECK-NEXT: push.w {r4, r5, r7, r9, r11, lr}
; CHECK-NEXT: ldr.w r12, [sp, #28]
; CHECK-NEXT: mov.w r11, #0
; CHECK-NEXT: ldr r6, [sp, #32]
; CHECK-NEXT: ldr r4, [sp, #24]
; CHECK-NEXT: movs r5, #0
; CHECK-NEXT: lsll r12, r11, #16
; CHECK-NEXT: lsls r1, r1, #16
; CHECK-NEXT: lsll r4, r5, #24
; CHECK-NEXT: orr.w r0, r1, r0, lsl #22
; CHECK-NEXT: orr.w r12, r12, r4
; CHECK-NEXT: ldr r4, [sp, #32]
; CHECK-NEXT: movs r7, #0
; CHECK-NEXT: lsll r4, r11, #16
; CHECK-NEXT: mov lr, r1
; CHECK-NEXT: lsll r6, r7, #24
; CHECK-NEXT: mov r12, r3
; CHECK-NEXT: orr.w r1, r6, r4
; CHECK-NEXT: ldr r4, [sp, #40]
; CHECK-NEXT: orr.w r0, r0, r2, lsl #8
; CHECK-NEXT: lsll r4, r7, #8
; CHECK-NEXT: add r0, r3
; CHECK-NEXT: orr.w r12, r12, r4
; CHECK-NEXT: ldr r4, [sp, #36]
; CHECK-NEXT: orrs r0, r5
; CHECK-NEXT: ldr r2, [sp, #56]
; CHECK-NEXT: orr.w r0, r0, r11
; CHECK-NEXT: orr.w r4, r4, r12
; CHECK-NEXT: vmov.32 q0[0], r4
; CHECK-NEXT: orrs r0, r7
; CHECK-NEXT: vmov.32 q0[1], r0
; CHECK-NEXT: ldr r0, [sp, #60]
; CHECK-NEXT: movs r1, #0
; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: ldr r6, [sp, #68]
; CHECK-NEXT: lsll r4, r3, #8
; CHECK-NEXT: movs r5, #0
; CHECK-NEXT: orrs r1, r4
; CHECK-NEXT: ldr r4, [sp, #44]
; CHECK-NEXT: lsll r6, r5, #16
; CHECK-NEXT: lsll r0, r1, #16
; CHECK-NEXT: lsll r2, r3, #24
; CHECK-NEXT: orrs r0, r2
; CHECK-NEXT: ldr r2, [sp, #64]
; CHECK-NEXT: mov.w r9, #0
; CHECK-NEXT: orr.w r8, r1, r4
; CHECK-NEXT: ldr r4, [sp, #64]
; CHECK-NEXT: movs r1, #0
; CHECK-NEXT: lsll r4, r1, #24
; CHECK-NEXT: orrs r4, r6
; CHECK-NEXT: ldr r6, [sp, #72]
; CHECK-NEXT: lsll r6, r9, #8
; CHECK-NEXT: orrs r4, r6
; CHECK-NEXT: ldr r6, [sp, #76]
; CHECK-NEXT: orrs r4, r6
; CHECK-NEXT: lsl.w r6, lr, #16
; CHECK-NEXT: orr.w r0, r6, r0, lsl #22
; CHECK-NEXT: vmov q0[2], q0[0], r4, r8
; CHECK-NEXT: lsll r2, r9, #8
; CHECK-NEXT: orrs r0, r2
; CHECK-NEXT: ldr r2, [sp, #68]
; CHECK-NEXT: orrs r0, r2
; CHECK-NEXT: ldr r2, [sp, #40]
; CHECK-NEXT: vmov.32 q0[2], r0
; CHECK-NEXT: ldr r0, [sp, #44]
; CHECK-NEXT: lsls r0, r0, #16
; CHECK-NEXT: orr.w r0, r0, r2, lsl #22
; CHECK-NEXT: ldr r2, [sp, #48]
; CHECK-NEXT: orr.w r0, r0, r2, lsl #8
; CHECK-NEXT: ldr r2, [sp, #52]
; CHECK-NEXT: add r0, r12
; CHECK-NEXT: orrs r0, r7
; CHECK-NEXT: orr.w r0, r0, r11
; CHECK-NEXT: lsls r2, r2, #16
; CHECK-NEXT: add r0, r2
; CHECK-NEXT: orrs r0, r3
; CHECK-NEXT: ldr r3, [sp, #48]
; CHECK-NEXT: orr.w r2, r2, r3, lsl #22
; CHECK-NEXT: ldr r3, [sp, #56]
; CHECK-NEXT: orr.w r2, r2, r3, lsl #8
; CHECK-NEXT: ldr r3, [sp, #60]
; CHECK-NEXT: add r2, r3
; CHECK-NEXT: orrs r1, r2
; CHECK-NEXT: orrs r1, r5
; CHECK-NEXT: orr.w r1, r1, r9
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
; CHECK-NEXT: orrs r0, r1
; CHECK-NEXT: orr.w r0, r0, r9
; CHECK-NEXT: vmov.32 q0[3], r0
; CHECK-NEXT: vmov r0, r1, d0
; CHECK-NEXT: vmov r2, r3, d1
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r11, pc}
; CHECK-NEXT: pop.w {r4, r5, r7, r9, r11, pc}
entry:
%conv = zext i8 %a1 to i64
%shl = shl nuw nsw i64 %conv, 54
Expand Down
70 changes: 40 additions & 30 deletions llvm/test/CodeGen/Thumb2/mve-vcvt.ll
Original file line number Diff line number Diff line change
Expand Up @@ -44,15 +44,17 @@ define arm_aapcs_vfpcc <4 x i32> @foo_int32_float(<4 x float> %src) {
; CHECK-MVE-LABEL: foo_int32_float:
; CHECK-MVE: @ %bb.0: @ %entry
; CHECK-MVE-NEXT: vcvt.s32.f32 s4, s0
; CHECK-MVE-NEXT: vcvt.s32.f32 s6, s2
; CHECK-MVE-NEXT: vcvt.s32.f32 s8, s1
; CHECK-MVE-NEXT: vcvt.s32.f32 s10, s3
; CHECK-MVE-NEXT: vcvt.s32.f32 s6, s1
; CHECK-MVE-NEXT: vcvt.s32.f32 s10, s2
; CHECK-MVE-NEXT: vcvt.s32.f32 s8, s3
; CHECK-MVE-NEXT: vmov r0, s4
; CHECK-MVE-NEXT: vmov r1, s6
; CHECK-MVE-NEXT: vmov q0[2], q0[0], r1, r0
; CHECK-MVE-NEXT: vmov.32 q0[0], r0
; CHECK-MVE-NEXT: vmov r0, s6
; CHECK-MVE-NEXT: vmov.32 q0[1], r0
; CHECK-MVE-NEXT: vmov r0, s10
; CHECK-MVE-NEXT: vmov.32 q0[2], r0
; CHECK-MVE-NEXT: vmov r0, s8
; CHECK-MVE-NEXT: vmov r1, s10
; CHECK-MVE-NEXT: vmov q0[3], q0[1], r1, r0
; CHECK-MVE-NEXT: vmov.32 q0[3], r0
; CHECK-MVE-NEXT: bx lr
;
; CHECK-MVEFP-LABEL: foo_int32_float:
Expand All @@ -68,15 +70,17 @@ define arm_aapcs_vfpcc <4 x i32> @foo_uint32_float(<4 x float> %src) {
; CHECK-MVE-LABEL: foo_uint32_float:
; CHECK-MVE: @ %bb.0: @ %entry
; CHECK-MVE-NEXT: vcvt.u32.f32 s4, s0
; CHECK-MVE-NEXT: vcvt.u32.f32 s6, s2
; CHECK-MVE-NEXT: vcvt.u32.f32 s8, s1
; CHECK-MVE-NEXT: vcvt.u32.f32 s10, s3
; CHECK-MVE-NEXT: vcvt.u32.f32 s6, s1
; CHECK-MVE-NEXT: vcvt.u32.f32 s10, s2
; CHECK-MVE-NEXT: vcvt.u32.f32 s8, s3
; CHECK-MVE-NEXT: vmov r0, s4
; CHECK-MVE-NEXT: vmov r1, s6
; CHECK-MVE-NEXT: vmov q0[2], q0[0], r1, r0
; CHECK-MVE-NEXT: vmov.32 q0[0], r0
; CHECK-MVE-NEXT: vmov r0, s6
; CHECK-MVE-NEXT: vmov.32 q0[1], r0
; CHECK-MVE-NEXT: vmov r0, s10
; CHECK-MVE-NEXT: vmov.32 q0[2], r0
; CHECK-MVE-NEXT: vmov r0, s8
; CHECK-MVE-NEXT: vmov r1, s10
; CHECK-MVE-NEXT: vmov q0[3], q0[1], r1, r0
; CHECK-MVE-NEXT: vmov.32 q0[3], r0
; CHECK-MVE-NEXT: bx lr
;
; CHECK-MVEFP-LABEL: foo_uint32_float:
Expand Down Expand Up @@ -345,21 +349,24 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @foo_int64_float(<2 x double> %src) {
; CHECK-LABEL: foo_int64_float:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r7, lr}
; CHECK-NEXT: push {r4, r5, r7, lr}
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: vmov r0, r1, d8
; CHECK-NEXT: bl __aeabi_d2lz
; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: mov r5, r1
; CHECK-NEXT: vmov r0, r1, d9
; CHECK-NEXT: vmov r2, r3, d9
; CHECK-NEXT: vmov.32 q4[0], r0
; CHECK-NEXT: vmov.32 q4[1], r1
; CHECK-NEXT: mov r0, r2
; CHECK-NEXT: mov r1, r3
; CHECK-NEXT: bl __aeabi_d2lz
; CHECK-NEXT: vmov q0[2], q0[0], r0, r4
; CHECK-NEXT: vmov q0[3], q0[1], r1, r5
; CHECK-NEXT: vmov.32 q4[2], r0
; CHECK-NEXT: vmov.32 q4[3], r1
; CHECK-NEXT: vmov q0, q4
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: pop {r4, r5, r7, pc}
; CHECK-NEXT: pop {r7, pc}
entry:
%out = fptosi <2 x double> %src to <2 x i64>
ret <2 x i64> %out
Expand All @@ -368,21 +375,24 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @foo_uint64_float(<2 x double> %src) {
; CHECK-LABEL: foo_uint64_float:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r7, lr}
; CHECK-NEXT: push {r4, r5, r7, lr}
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: vmov r0, r1, d8
; CHECK-NEXT: bl __aeabi_d2ulz
; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: mov r5, r1
; CHECK-NEXT: vmov r0, r1, d9
; CHECK-NEXT: vmov r2, r3, d9
; CHECK-NEXT: vmov.32 q4[0], r0
; CHECK-NEXT: vmov.32 q4[1], r1
; CHECK-NEXT: mov r0, r2
; CHECK-NEXT: mov r1, r3
; CHECK-NEXT: bl __aeabi_d2ulz
; CHECK-NEXT: vmov q0[2], q0[0], r0, r4
; CHECK-NEXT: vmov q0[3], q0[1], r1, r5
; CHECK-NEXT: vmov.32 q4[2], r0
; CHECK-NEXT: vmov.32 q4[3], r1
; CHECK-NEXT: vmov q0, q4
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: pop {r4, r5, r7, pc}
; CHECK-NEXT: pop {r7, pc}
entry:
%out = fptoui <2 x double> %src to <2 x i64>
ret <2 x i64> %out
Expand Down
6 changes: 4 additions & 2 deletions llvm/test/CodeGen/Thumb2/mve-vdup.ll
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,10 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @vdup_i64(i64 %src) {
; CHECK-LABEL: vdup_i64:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov q0[2], q0[0], r0, r0
; CHECK-NEXT: vmov q0[3], q0[1], r1, r1
; CHECK-NEXT: vmov.32 q0[0], r0
; CHECK-NEXT: vmov.32 q0[1], r1
; CHECK-NEXT: vmov.32 q0[2], r0
; CHECK-NEXT: vmov.32 q0[3], r1
; CHECK-NEXT: bx lr
entry:
%0 = insertelement <2 x i64> undef, i64 %src, i32 0
Expand Down
734 changes: 399 additions & 335 deletions llvm/test/CodeGen/Thumb2/mve-vecreduce-add.ll

Large diffs are not rendered by default.

2,078 changes: 1,167 additions & 911 deletions llvm/test/CodeGen/Thumb2/mve-vecreduce-addpred.ll

Large diffs are not rendered by default.

1,018 changes: 550 additions & 468 deletions llvm/test/CodeGen/Thumb2/mve-vecreduce-mla.ll

Large diffs are not rendered by default.

2,326 changes: 1,295 additions & 1,031 deletions llvm/test/CodeGen/Thumb2/mve-vecreduce-mlapred.ll

Large diffs are not rendered by default.

22 changes: 12 additions & 10 deletions llvm/test/CodeGen/Thumb2/mve-vld2-post.ll
Original file line number Diff line number Diff line change
Expand Up @@ -76,20 +76,22 @@ define <4 x i64> *@vld2_v2i64(<4 x i64> *%src, <2 x i64> *%dst) {
; CHECK-NEXT: vmov.f32 s2, s8
; CHECK-NEXT: vmov.f32 s7, s11
; CHECK-NEXT: vmov.f32 s3, s9
; CHECK-NEXT: vmov r2, s4
; CHECK-NEXT: vmov r3, s0
; CHECK-NEXT: vmov r12, s5
; CHECK-NEXT: vmov lr, s1
; CHECK-NEXT: vmov r4, s6
; CHECK-NEXT: vmov r5, s2
; CHECK-NEXT: vmov r2, s6
; CHECK-NEXT: vmov r3, s2
; CHECK-NEXT: vmov r4, s4
; CHECK-NEXT: vmov r5, s0
; CHECK-NEXT: vmov r12, s7
; CHECK-NEXT: vmov lr, s3
; CHECK-NEXT: adds r6, r3, r2
; CHECK-NEXT: vmov r3, s7
; CHECK-NEXT: vmov r2, s3
; CHECK-NEXT: vmov r3, s5
; CHECK-NEXT: vmov r2, s1
; CHECK-NEXT: adc.w r12, r12, lr
; CHECK-NEXT: adds r5, r5, r4
; CHECK-NEXT: vmov q0[2], q0[0], r5, r6
; CHECK-NEXT: vmov.32 q0[0], r5
; CHECK-NEXT: adcs r2, r3
; CHECK-NEXT: vmov q0[3], q0[1], r2, r12
; CHECK-NEXT: vmov.32 q0[1], r2
; CHECK-NEXT: vmov.32 q0[2], r6
; CHECK-NEXT: vmov.32 q0[3], r12
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: pop {r4, r5, r6, pc}
entry:
Expand Down
112 changes: 59 additions & 53 deletions llvm/test/CodeGen/Thumb2/mve-vld2.ll
Original file line number Diff line number Diff line change
Expand Up @@ -269,20 +269,22 @@ define void @vld2_v2i64(<4 x i64> *%src, <2 x i64> *%dst) {
; CHECK-NEXT: vmov.f32 s2, s8
; CHECK-NEXT: vmov.f32 s3, s9
; CHECK-NEXT: vmov.f32 s7, s11
; CHECK-NEXT: vmov r3, s4
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: vmov r12, s5
; CHECK-NEXT: vmov r2, s1
; CHECK-NEXT: vmov r4, s2
; CHECK-NEXT: vmov r3, s6
; CHECK-NEXT: vmov r0, s2
; CHECK-NEXT: vmov r4, s0
; CHECK-NEXT: vmov r2, s3
; CHECK-NEXT: vmov r12, s7
; CHECK-NEXT: adds.w lr, r0, r3
; CHECK-NEXT: vmov r0, s6
; CHECK-NEXT: vmov r3, s7
; CHECK-NEXT: vmov r0, s4
; CHECK-NEXT: vmov r3, s5
; CHECK-NEXT: adc.w r12, r12, r2
; CHECK-NEXT: vmov r2, s3
; CHECK-NEXT: vmov r2, s1
; CHECK-NEXT: adds r0, r0, r4
; CHECK-NEXT: vmov q0[2], q0[0], r0, lr
; CHECK-NEXT: vmov.32 q0[0], r0
; CHECK-NEXT: adcs r2, r3
; CHECK-NEXT: vmov q0[3], q0[1], r2, r12
; CHECK-NEXT: vmov.32 q0[1], r2
; CHECK-NEXT: vmov.32 q0[2], lr
; CHECK-NEXT: vmov.32 q0[3], r12
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: pop {r4, pc}
entry:
Expand All @@ -297,58 +299,62 @@ entry:
define void @vld2_v4i64(<8 x i64> *%src, <4 x i64> *%dst) {
; CHECK-LABEL: vld2_v4i64:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r6, lr}
; CHECK-NEXT: push {r4, r5, r6, lr}
; CHECK-NEXT: .save {r4, lr}
; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: .vsave {d8, d9, d10, d11}
; CHECK-NEXT: vpush {d8, d9, d10, d11}
; CHECK-NEXT: vldrw.u32 q3, [r0, #32]
; CHECK-NEXT: vldrw.u32 q5, [r0, #48]
; CHECK-NEXT: vldrw.u32 q0, [r0]
; CHECK-NEXT: vldrw.u32 q5, [r0, #16]
; CHECK-NEXT: vldrw.u32 q1, [r0, #32]
; CHECK-NEXT: vldrw.u32 q4, [r0, #48]
; CHECK-NEXT: vmov.f64 d4, d1
; CHECK-NEXT: vmov.f32 s9, s3
; CHECK-NEXT: vmov.f32 s10, s22
; CHECK-NEXT: vmov.f32 s2, s20
; CHECK-NEXT: vmov.f32 s11, s23
; CHECK-NEXT: vmov.f32 s3, s21
; CHECK-NEXT: vmov r3, s8
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: vmov r12, s9
; CHECK-NEXT: vmov r2, s1
; CHECK-NEXT: vmov.f64 d6, d3
; CHECK-NEXT: vmov.f32 s13, s7
; CHECK-NEXT: vmov.f32 s14, s18
; CHECK-NEXT: vmov.f32 s6, s16
; CHECK-NEXT: vmov.f32 s7, s17
; CHECK-NEXT: vmov.f32 s15, s19
; CHECK-NEXT: vmov r4, s4
; CHECK-NEXT: vmov r5, s14
; CHECK-NEXT: vmov r6, s6
; CHECK-NEXT: vldrw.u32 q2, [r0, #16]
; CHECK-NEXT: vmov.f64 d8, d7
; CHECK-NEXT: vmov.f32 s17, s15
; CHECK-NEXT: vmov.f32 s18, s22
; CHECK-NEXT: vmov.f32 s14, s20
; CHECK-NEXT: vmov.f32 s15, s21
; CHECK-NEXT: vmov.f32 s19, s23
; CHECK-NEXT: vmov r3, s18
; CHECK-NEXT: vmov r0, s14
; CHECK-NEXT: vmov r4, s12
; CHECK-NEXT: vmov.f64 d2, d1
; CHECK-NEXT: vmov r12, s19
; CHECK-NEXT: vmov r2, s15
; CHECK-NEXT: vmov.f32 s5, s3
; CHECK-NEXT: vmov.f32 s6, s10
; CHECK-NEXT: vmov.f32 s2, s8
; CHECK-NEXT: vmov.f32 s7, s11
; CHECK-NEXT: vmov.f32 s3, s9
; CHECK-NEXT: adds.w lr, r0, r3
; CHECK-NEXT: vmov r3, s12
; CHECK-NEXT: vmov r0, s5
; CHECK-NEXT: vmov r0, s16
; CHECK-NEXT: vmov r3, s17
; CHECK-NEXT: adc.w r12, r12, r2
; CHECK-NEXT: vmov r2, s13
; CHECK-NEXT: adds r3, r3, r4
; CHECK-NEXT: vmov r4, s7
; CHECK-NEXT: adcs r0, r2
; CHECK-NEXT: vmov r2, s15
; CHECK-NEXT: adds r5, r5, r6
; CHECK-NEXT: vmov r6, s2
; CHECK-NEXT: vmov q3[2], q3[0], r5, r3
; CHECK-NEXT: adcs r2, r4
; CHECK-NEXT: vmov r4, s10
; CHECK-NEXT: vmov q3[3], q3[1], r2, r0
; CHECK-NEXT: vmov r0, s11
; CHECK-NEXT: adds r0, r0, r4
; CHECK-NEXT: vmov r4, s2
; CHECK-NEXT: vmov.32 q3[0], r0
; CHECK-NEXT: vmov r0, s7
; CHECK-NEXT: adcs r2, r3
; CHECK-NEXT: vmov r3, s6
; CHECK-NEXT: vmov.32 q3[1], r2
; CHECK-NEXT: vmov r2, s3
; CHECK-NEXT: vmov.32 q3[2], lr
; CHECK-NEXT: vmov.32 q3[3], r12
; CHECK-NEXT: vstrw.32 q3, [r1, #16]
; CHECK-NEXT: adds r4, r4, r6
; CHECK-NEXT: vmov q1[2], q1[0], r4, lr
; CHECK-NEXT: adcs r0, r2
; CHECK-NEXT: vmov q1[3], q1[1], r0, r12
; CHECK-NEXT: vstrw.32 q1, [r1]
; CHECK-NEXT: adds.w lr, r4, r3
; CHECK-NEXT: vmov r3, s0
; CHECK-NEXT: adc.w r12, r2, r0
; CHECK-NEXT: vmov r0, s4
; CHECK-NEXT: vmov r2, s5
; CHECK-NEXT: vmov r4, s1
; CHECK-NEXT: adds r0, r0, r3
; CHECK-NEXT: vmov.32 q0[0], r0
; CHECK-NEXT: adcs r2, r4
; CHECK-NEXT: vmov.32 q0[1], r2
; CHECK-NEXT: vmov.32 q0[2], lr
; CHECK-NEXT: vmov.32 q0[3], r12
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: vpop {d8, d9, d10, d11}
; CHECK-NEXT: pop {r4, r5, r6, pc}
; CHECK-NEXT: pop {r4, pc}
entry:
%l1 = load <8 x i64>, <8 x i64>* %src, align 4
%s1 = shufflevector <8 x i64> %l1, <8 x i64> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
Expand Down
1,158 changes: 602 additions & 556 deletions llvm/test/CodeGen/Thumb2/mve-vld3.ll

Large diffs are not rendered by default.

38 changes: 20 additions & 18 deletions llvm/test/CodeGen/Thumb2/mve-vld4-post.ll
Original file line number Diff line number Diff line change
Expand Up @@ -119,41 +119,43 @@ define <8 x i64> *@vld4_v2i64(<8 x i64> *%src, <2 x i64> *%dst) {
; CHECK-NEXT: vmov.f32 s14, s20
; CHECK-NEXT: vmov.f32 s19, s23
; CHECK-NEXT: vmov.f32 s15, s21
; CHECK-NEXT: vmov r2, s16
; CHECK-NEXT: vmov r3, s12
; CHECK-NEXT: vmov r2, s18
; CHECK-NEXT: vmov r3, s14
; CHECK-NEXT: vmov.f32 s5, s3
; CHECK-NEXT: vmov.f32 s6, s10
; CHECK-NEXT: vmov.f32 s2, s8
; CHECK-NEXT: vmov.f32 s3, s9
; CHECK-NEXT: vmov.f32 s7, s11
; CHECK-NEXT: vmov r12, s17
; CHECK-NEXT: vmov lr, s13
; CHECK-NEXT: vmov r4, s4
; CHECK-NEXT: vmov r5, s0
; CHECK-NEXT: vmov r7, s2
; CHECK-NEXT: vmov r12, s19
; CHECK-NEXT: vmov lr, s15
; CHECK-NEXT: vmov r4, s6
; CHECK-NEXT: vmov r5, s2
; CHECK-NEXT: vmov r7, s0
; CHECK-NEXT: adds r6, r3, r2
; CHECK-NEXT: vmov r2, s5
; CHECK-NEXT: vmov r3, s1
; CHECK-NEXT: vmov r2, s7
; CHECK-NEXT: vmov r3, s3
; CHECK-NEXT: adc.w r12, r12, lr
; CHECK-NEXT: adds r5, r5, r4
; CHECK-NEXT: vmov r4, s18
; CHECK-NEXT: vmov r4, s16
; CHECK-NEXT: adcs r2, r3
; CHECK-NEXT: adds.w lr, r5, r6
; CHECK-NEXT: adc.w r12, r12, r2
; CHECK-NEXT: vmov r2, s14
; CHECK-NEXT: vmov r6, s19
; CHECK-NEXT: vmov r5, s15
; CHECK-NEXT: vmov r3, s6
; CHECK-NEXT: vmov r2, s12
; CHECK-NEXT: vmov r6, s17
; CHECK-NEXT: vmov r5, s13
; CHECK-NEXT: vmov r3, s4
; CHECK-NEXT: adds r2, r2, r4
; CHECK-NEXT: vmov r4, s3
; CHECK-NEXT: vmov r4, s1
; CHECK-NEXT: adcs r6, r5
; CHECK-NEXT: vmov r5, s7
; CHECK-NEXT: vmov r5, s5
; CHECK-NEXT: adds r3, r3, r7
; CHECK-NEXT: adcs r4, r5
; CHECK-NEXT: adds r2, r2, r3
; CHECK-NEXT: adc.w r3, r4, r6
; CHECK-NEXT: vmov q0[2], q0[0], r2, lr
; CHECK-NEXT: vmov q0[3], q0[1], r3, r12
; CHECK-NEXT: vmov.32 q0[0], r2
; CHECK-NEXT: vmov.32 q0[1], r3
; CHECK-NEXT: vmov.32 q0[2], lr
; CHECK-NEXT: vmov.32 q0[3], r12
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: vpop {d8, d9, d10, d11}
; CHECK-NEXT: add sp, #4
Expand Down
263 changes: 140 additions & 123 deletions llvm/test/CodeGen/Thumb2/mve-vld4.ll
Original file line number Diff line number Diff line change
Expand Up @@ -229,33 +229,41 @@ define void @vld4_v4i16(<16 x i16> *%src, <4 x i16> *%dst) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: vldrw.u32 q0, [r0]
; CHECK-NEXT: vldrw.u32 q1, [r0, #16]
; CHECK-NEXT: vmov.u16 r2, q0[3]
; CHECK-NEXT: vmov.u16 r0, q1[3]
; CHECK-NEXT: vmov q2[2], q2[0], r0, r2
; CHECK-NEXT: vmov.u16 r0, q0[7]
; CHECK-NEXT: vldrw.u32 q1, [r0]
; CHECK-NEXT: vldrw.u32 q0, [r0, #16]
; CHECK-NEXT: vmov.u16 r2, q1[3]
; CHECK-NEXT: vmov.u16 r0, q0[3]
; CHECK-NEXT: vmov.32 q2[0], r2
; CHECK-NEXT: vmov.u16 r2, q1[7]
; CHECK-NEXT: vmov q2[3], q2[1], r2, r0
; CHECK-NEXT: vmov.32 q2[1], r2
; CHECK-NEXT: vmov.32 q2[2], r0
; CHECK-NEXT: vmov.u16 r0, q0[7]
; CHECK-NEXT: vmov.32 q2[3], r0
; CHECK-NEXT: vmov.u16 r0, q1[2]
; CHECK-NEXT: vmov.32 q3[0], r0
; CHECK-NEXT: vmov.u16 r0, q1[6]
; CHECK-NEXT: vmov.32 q3[1], r0
; CHECK-NEXT: vmov.u16 r0, q0[2]
; CHECK-NEXT: vmov.u16 r2, q1[2]
; CHECK-NEXT: vmov q3[2], q3[0], r2, r0
; CHECK-NEXT: vmov.32 q3[2], r0
; CHECK-NEXT: vmov.u16 r0, q0[6]
; CHECK-NEXT: vmov.u16 r2, q1[6]
; CHECK-NEXT: vmov q3[3], q3[1], r2, r0
; CHECK-NEXT: vmov.u16 r0, q0[0]
; CHECK-NEXT: vmov.u16 r2, q1[0]
; CHECK-NEXT: vmov.32 q3[3], r0
; CHECK-NEXT: vmov.u16 r0, q1[0]
; CHECK-NEXT: vadd.i32 q2, q3, q2
; CHECK-NEXT: vmov q3[2], q3[0], r2, r0
; CHECK-NEXT: vmov.32 q3[0], r0
; CHECK-NEXT: vmov.u16 r0, q1[4]
; CHECK-NEXT: vmov.32 q3[1], r0
; CHECK-NEXT: vmov.u16 r0, q0[0]
; CHECK-NEXT: vmov.32 q3[2], r0
; CHECK-NEXT: vmov.u16 r0, q1[1]
; CHECK-NEXT: vmov.32 q4[0], r0
; CHECK-NEXT: vmov.u16 r0, q1[5]
; CHECK-NEXT: vmov.32 q4[1], r0
; CHECK-NEXT: vmov.u16 r0, q0[1]
; CHECK-NEXT: vmov.u16 r2, q1[1]
; CHECK-NEXT: vmov q4[2], q4[0], r2, r0
; CHECK-NEXT: vmov.32 q4[2], r0
; CHECK-NEXT: vmov.u16 r0, q0[5]
; CHECK-NEXT: vmov.u16 r2, q1[5]
; CHECK-NEXT: vmov q4[3], q4[1], r2, r0
; CHECK-NEXT: vmov.32 q4[3], r0
; CHECK-NEXT: vmov.u16 r0, q0[4]
; CHECK-NEXT: vmov.u16 r2, q1[4]
; CHECK-NEXT: vmov q3[3], q3[1], r2, r0
; CHECK-NEXT: vmov.32 q3[3], r0
; CHECK-NEXT: vadd.i32 q0, q3, q4
; CHECK-NEXT: vadd.i32 q0, q0, q2
; CHECK-NEXT: vstrh.32 q0, [r1]
Expand Down Expand Up @@ -382,12 +390,14 @@ define void @vld4_v4i8(<16 x i8> *%src, <4 x i8> *%dst) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrw.u32 q0, [r0]
; CHECK-NEXT: vmov.u8 r0, q0[2]
; CHECK-NEXT: vmov.u8 r2, q0[10]
; CHECK-NEXT: vmov q1[2], q1[0], r2, r0
; CHECK-NEXT: vmov.u8 r0, q0[6]
; CHECK-NEXT: vmov.u8 r2, q0[14]
; CHECK-NEXT: vrev32.8 q2, q0
; CHECK-NEXT: vmov q1[3], q1[1], r2, r0
; CHECK-NEXT: vmov.32 q1[0], r0
; CHECK-NEXT: vmov.u8 r0, q0[6]
; CHECK-NEXT: vmov.32 q1[1], r0
; CHECK-NEXT: vmov.u8 r0, q0[10]
; CHECK-NEXT: vmov.32 q1[2], r0
; CHECK-NEXT: vmov.u8 r0, q0[14]
; CHECK-NEXT: vmov.32 q1[3], r0
; CHECK-NEXT: vadd.i32 q1, q1, q2
; CHECK-NEXT: vrev16.8 q2, q0
; CHECK-NEXT: vadd.i32 q0, q0, q2
Expand Down Expand Up @@ -545,42 +555,44 @@ define void @vld4_v2i64(<8 x i64> *%src, <2 x i64> *%dst) {
; CHECK-NEXT: vmov.f32 s14, s20
; CHECK-NEXT: vmov.f32 s19, s23
; CHECK-NEXT: vmov.f32 s15, s21
; CHECK-NEXT: vmov r3, s16
; CHECK-NEXT: vmov r0, s12
; CHECK-NEXT: vmov r12, s17
; CHECK-NEXT: vmov r2, s13
; CHECK-NEXT: vmov r3, s18
; CHECK-NEXT: vmov r0, s14
; CHECK-NEXT: vmov.f64 d2, d1
; CHECK-NEXT: vmov r12, s19
; CHECK-NEXT: vmov r2, s15
; CHECK-NEXT: vmov.f32 s5, s3
; CHECK-NEXT: vmov.f32 s6, s10
; CHECK-NEXT: vmov.f32 s2, s8
; CHECK-NEXT: vmov.f32 s3, s9
; CHECK-NEXT: vmov.f32 s7, s11
; CHECK-NEXT: vmov r4, s0
; CHECK-NEXT: vmov r5, s6
; CHECK-NEXT: vmov r6, s2
; CHECK-NEXT: vmov r4, s2
; CHECK-NEXT: vmov r5, s4
; CHECK-NEXT: vmov r6, s0
; CHECK-NEXT: adds.w lr, r0, r3
; CHECK-NEXT: vmov r3, s5
; CHECK-NEXT: vmov r0, s1
; CHECK-NEXT: vmov r3, s7
; CHECK-NEXT: vmov r0, s3
; CHECK-NEXT: adc.w r12, r12, r2
; CHECK-NEXT: vmov r2, s4
; CHECK-NEXT: vmov r2, s6
; CHECK-NEXT: adds r2, r2, r4
; CHECK-NEXT: vmov r4, s15
; CHECK-NEXT: vmov r4, s13
; CHECK-NEXT: adcs r0, r3
; CHECK-NEXT: adds.w lr, lr, r2
; CHECK-NEXT: adc.w r12, r12, r0
; CHECK-NEXT: vmov r0, s18
; CHECK-NEXT: vmov r2, s14
; CHECK-NEXT: vmov r3, s19
; CHECK-NEXT: vmov r0, s16
; CHECK-NEXT: vmov r2, s12
; CHECK-NEXT: vmov r3, s17
; CHECK-NEXT: adds r0, r0, r2
; CHECK-NEXT: adc.w r2, r4, r3
; CHECK-NEXT: vmov r3, s7
; CHECK-NEXT: vmov r4, s3
; CHECK-NEXT: vmov r3, s5
; CHECK-NEXT: vmov r4, s1
; CHECK-NEXT: adds r5, r5, r6
; CHECK-NEXT: adcs r3, r4
; CHECK-NEXT: adds r0, r0, r5
; CHECK-NEXT: adcs r2, r3
; CHECK-NEXT: vmov q0[2], q0[0], r0, lr
; CHECK-NEXT: vmov q0[3], q0[1], r2, r12
; CHECK-NEXT: vmov.32 q0[0], r0
; CHECK-NEXT: vmov.32 q0[1], r2
; CHECK-NEXT: vmov.32 q0[2], lr
; CHECK-NEXT: vmov.32 q0[3], r12
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: vpop {d8, d9, d10, d11}
; CHECK-NEXT: pop {r4, r5, r6, pc}
Expand All @@ -600,118 +612,123 @@ entry:
define void @vld4_v4i64(<16 x i64> *%src, <4 x i64> *%dst) {
; CHECK-LABEL: vld4_v4i64:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, lr}
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr}
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr}
; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
; CHECK-NEXT: .pad #64
; CHECK-NEXT: sub sp, #64
; CHECK-NEXT: .pad #72
; CHECK-NEXT: sub sp, #72
; CHECK-NEXT: vldrw.u32 q1, [r0, #64]
; CHECK-NEXT: vldrw.u32 q0, [r0, #96]
; CHECK-NEXT: vldrw.u32 q4, [r0, #80]
; CHECK-NEXT: vldrw.u32 q6, [r0, #16]
; CHECK-NEXT: vmov.f64 d4, d3
; CHECK-NEXT: vstrw.32 q0, [sp, #16] @ 16-byte Spill
; CHECK-NEXT: vldrw.u32 q3, [r0]
; CHECK-NEXT: vstrw.32 q1, [sp, #32] @ 16-byte Spill
; CHECK-NEXT: vmov.f32 s9, s7
; CHECK-NEXT: vldrw.u32 q1, [r0, #32]
; CHECK-NEXT: vmov.f32 s10, s2
; CHECK-NEXT: vmov.f32 s11, s3
; CHECK-NEXT: vldrw.u32 q5, [r0, #80]
; CHECK-NEXT: vldrw.u32 q7, [r0, #16]
; CHECK-NEXT: vmov.f64 d8, d3
; CHECK-NEXT: vstrw.32 q0, [sp, #32] @ 16-byte Spill
; CHECK-NEXT: vstrw.32 q1, [sp, #48] @ 16-byte Spill
; CHECK-NEXT: vmov.f32 s17, s7
; CHECK-NEXT: vldrw.u32 q1, [r0]
; CHECK-NEXT: vmov.f32 s18, s2
; CHECK-NEXT: vmov.f32 s19, s3
; CHECK-NEXT: vldrw.u32 q0, [r0, #112]
; CHECK-NEXT: vmov.f64 d14, d9
; CHECK-NEXT: vstrw.32 q2, [sp, #48] @ 16-byte Spill
; CHECK-NEXT: vmov.f64 d12, d11
; CHECK-NEXT: vstrw.32 q0, [sp, #16] @ 16-byte Spill
; CHECK-NEXT: vmov.f32 s25, s23
; CHECK-NEXT: vmov.f32 s26, s2
; CHECK-NEXT: vmov.f64 d6, d3
; CHECK-NEXT: vmov.f32 s27, s3
; CHECK-NEXT: vldrw.u32 q0, [r0, #32]
; CHECK-NEXT: vmov.f32 s13, s7
; CHECK-NEXT: vmov.f32 s14, s2
; CHECK-NEXT: vstrw.32 q0, [sp] @ 16-byte Spill
; CHECK-NEXT: vmov.f32 s29, s19
; CHECK-NEXT: vmov.f32 s30, s2
; CHECK-NEXT: vmov.f64 d4, d13
; CHECK-NEXT: vmov.f32 s31, s3
; CHECK-NEXT: vmov.f64 d4, d15
; CHECK-NEXT: vmov.f32 s15, s3
; CHECK-NEXT: vldrw.u32 q0, [r0, #48]
; CHECK-NEXT: vmov.f32 s9, s27
; CHECK-NEXT: vmov.f32 s9, s31
; CHECK-NEXT: vmov.f32 s10, s2
; CHECK-NEXT: vmov.f32 s26, s0
; CHECK-NEXT: vmov.f32 s30, s0
; CHECK-NEXT: vmov.f32 s11, s3
; CHECK-NEXT: vmov.f32 s27, s1
; CHECK-NEXT: vmov.f32 s31, s1
; CHECK-NEXT: vldrw.u32 q0, [sp] @ 16-byte Reload
; CHECK-NEXT: vmov r3, s8
; CHECK-NEXT: vmov r0, s24
; CHECK-NEXT: vmov r12, s9
; CHECK-NEXT: vmov r2, s25
; CHECK-NEXT: vmov.f64 d10, d7
; CHECK-NEXT: vmov.f32 s21, s15
; CHECK-NEXT: vmov.f32 s22, s6
; CHECK-NEXT: vmov.f32 s14, s4
; CHECK-NEXT: vmov.f32 s15, s5
; CHECK-NEXT: vmov.f32 s23, s7
; CHECK-NEXT: vldrw.u32 q1, [sp, #16] @ 16-byte Reload
; CHECK-NEXT: vmov r4, s12
; CHECK-NEXT: vmov.f32 s18, s0
; CHECK-NEXT: vmov.f32 s19, s1
; CHECK-NEXT: vmov r3, s10
; CHECK-NEXT: vmov r0, s30
; CHECK-NEXT: vmov.f32 s6, s0
; CHECK-NEXT: vmov.f32 s7, s1
; CHECK-NEXT: vldrw.u32 q0, [sp, #16] @ 16-byte Reload
; CHECK-NEXT: vmov r4, s6
; CHECK-NEXT: vstrw.32 q1, [sp] @ 16-byte Spill
; CHECK-NEXT: vmov r12, s11
; CHECK-NEXT: vmov r2, s31
; CHECK-NEXT: vmov.f32 s22, s0
; CHECK-NEXT: vmov.f32 s23, s1
; CHECK-NEXT: vldrw.u32 q0, [sp, #48] @ 16-byte Reload
; CHECK-NEXT: vmov r5, s18
; CHECK-NEXT: vmov r7, s16
; CHECK-NEXT: adds.w lr, r0, r3
; CHECK-NEXT: vmov r3, s20
; CHECK-NEXT: vmov r0, s13
; CHECK-NEXT: vmov r3, s14
; CHECK-NEXT: vmov r0, s7
; CHECK-NEXT: vldrw.u32 q1, [sp, #32] @ 16-byte Reload
; CHECK-NEXT: vmov.f32 s2, s4
; CHECK-NEXT: vmov.f32 s3, s5
; CHECK-NEXT: vldrw.u32 q1, [sp] @ 16-byte Reload
; CHECK-NEXT: adc.w r12, r12, r2
; CHECK-NEXT: vmov r2, s21
; CHECK-NEXT: vmov r2, s15
; CHECK-NEXT: vmov r6, s2
; CHECK-NEXT: adds r3, r3, r4
; CHECK-NEXT: vmov r4, s30
; CHECK-NEXT: vmov r4, s23
; CHECK-NEXT: adcs r0, r2
; CHECK-NEXT: adds.w lr, lr, r3
; CHECK-NEXT: vmov r2, s18
; CHECK-NEXT: adc.w r12, r12, r0
; CHECK-NEXT: vmov r0, s31
; CHECK-NEXT: vmov r0, s26
; CHECK-NEXT: vmov r2, s22
; CHECK-NEXT: vmov r3, s27
; CHECK-NEXT: adds r0, r0, r2
; CHECK-NEXT: adc.w r2, r4, r3
; CHECK-NEXT: vmov r3, s19
; CHECK-NEXT: adds r2, r2, r4
; CHECK-NEXT: adcs r3, r0
; CHECK-NEXT: vmov r0, s3
; CHECK-NEXT: vldrw.u32 q0, [sp, #32] @ 16-byte Reload
; CHECK-NEXT: vmov.f32 s2, s4
; CHECK-NEXT: vmov.f32 s3, s5
; CHECK-NEXT: vldrw.u32 q1, [sp, #48] @ 16-byte Reload
; CHECK-NEXT: vmov r6, s2
; CHECK-NEXT: vmov r5, s6
; CHECK-NEXT: vmov r7, s4
; CHECK-NEXT: vmov r4, s3
; CHECK-NEXT: adds r5, r5, r6
; CHECK-NEXT: vmov r6, s16
; CHECK-NEXT: adcs r4, r0
; CHECK-NEXT: adds.w r9, r5, r2
; CHECK-NEXT: vmov r5, s28
; CHECK-NEXT: adc.w r8, r4, r3
; CHECK-NEXT: vmov r2, s29
; CHECK-NEXT: vmov r4, s17
; CHECK-NEXT: vmov r6, s20
; CHECK-NEXT: adcs r3, r4
; CHECK-NEXT: adds r0, r0, r5
; CHECK-NEXT: vmov r5, s24
; CHECK-NEXT: adc.w r8, r3, r2
; CHECK-NEXT: vmov r2, s25
; CHECK-NEXT: vmov r4, s21
; CHECK-NEXT: vmov r3, s0
; CHECK-NEXT: vmov r0, s10
; CHECK-NEXT: adds r5, r5, r6
; CHECK-NEXT: vmov r6, s1
; CHECK-NEXT: adcs r2, r4
; CHECK-NEXT: vmov r4, s5
; CHECK-NEXT: vmov r4, s17
; CHECK-NEXT: adds r3, r3, r7
; CHECK-NEXT: vmov r7, s14
; CHECK-NEXT: vmov r7, s28
; CHECK-NEXT: adcs r4, r6
; CHECK-NEXT: adds r3, r3, r5
; CHECK-NEXT: vmov r6, s22
; CHECK-NEXT: adc.w r10, r4, r2
; CHECK-NEXT: vmov r4, s23
; CHECK-NEXT: vmov q1[2], q1[0], r9, r3
; CHECK-NEXT: vmov r5, s15
; CHECK-NEXT: vmov q1[3], q1[1], r8, r10
; CHECK-NEXT: vmov r2, s26
; CHECK-NEXT: vstrw.32 q1, [r1, #16]
; CHECK-NEXT: vmov r6, s8
; CHECK-NEXT: adcs r2, r4
; CHECK-NEXT: vmov r4, s9
; CHECK-NEXT: vmov.32 q0[0], r3
; CHECK-NEXT: vmov r5, s29
; CHECK-NEXT: vmov.32 q0[1], r2
; CHECK-NEXT: vmov.32 q0[2], r0
; CHECK-NEXT: vmov r0, s12
; CHECK-NEXT: vmov r3, s13
; CHECK-NEXT: vmov.32 q0[3], r8
; CHECK-NEXT: vmov r2, s5
; CHECK-NEXT: vstrw.32 q0, [r1, #16]
; CHECK-NEXT: adds r6, r6, r7
; CHECK-NEXT: vmov r7, s27
; CHECK-NEXT: adcs r4, r5
; CHECK-NEXT: vmov r5, s11
; CHECK-NEXT: adds r0, r0, r2
; CHECK-NEXT: adc.w r2, r7, r5
; CHECK-NEXT: vmov r5, s4
; CHECK-NEXT: adds r0, r0, r5
; CHECK-NEXT: adcs r2, r3
; CHECK-NEXT: adds r0, r0, r6
; CHECK-NEXT: vmov q0[2], q0[0], r0, lr
; CHECK-NEXT: adc.w r0, r4, r2
; CHECK-NEXT: vmov q0[3], q0[1], r0, r12
; CHECK-NEXT: adcs r2, r4
; CHECK-NEXT: vmov.32 q0[0], r0
; CHECK-NEXT: vmov.32 q0[1], r2
; CHECK-NEXT: vmov.32 q0[2], lr
; CHECK-NEXT: vmov.32 q0[3], r12
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: add sp, #64
; CHECK-NEXT: add sp, #72
; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, pc}
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc}
entry:
%l1 = load <16 x i64>, <16 x i64>* %src, align 4
%s1 = shufflevector <16 x i64> %l1, <16 x i64> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
Expand Down
122 changes: 70 additions & 52 deletions llvm/test/CodeGen/Thumb2/mve-vmulh.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,13 @@ define arm_aapcs_vfpcc <2 x i32> @vmulhs_v2i32(<2 x i32> %s0, <2 x i32> %s1) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmullb.s32 q2, q0, q1
; CHECK-NEXT: vmov r0, s9
; CHECK-NEXT: vmov r1, s11
; CHECK-NEXT: vmov q0[2], q0[0], r1, r0
; CHECK-NEXT: vmov.32 q0[0], r0
; CHECK-NEXT: asrs r0, r0, #31
; CHECK-NEXT: asrs r1, r1, #31
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
; CHECK-NEXT: vmov.32 q0[1], r0
; CHECK-NEXT: vmov r0, s11
; CHECK-NEXT: vmov.32 q0[2], r0
; CHECK-NEXT: asrs r0, r0, #31
; CHECK-NEXT: vmov.32 q0[3], r0
; CHECK-NEXT: bx lr
entry:
%s0s = sext <2 x i32> %s0 to <2 x i64>
Expand Down Expand Up @@ -46,28 +48,28 @@ entry:
define arm_aapcs_vfpcc <4 x i32> @vmulhs_v4i32(<4 x i32> %s0, <4 x i32> %s1) {
; CHECK-LABEL: vmulhs_v4i32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .vsave {d8, d9, d10, d11}
; CHECK-NEXT: vpush {d8, d9, d10, d11}
; CHECK-NEXT: vmov.f32 s8, s4
; CHECK-NEXT: vmov.f32 s12, s0
; CHECK-NEXT: vmov.f32 s10, s5
; CHECK-NEXT: vmov.f32 s14, s1
; CHECK-NEXT: vmov r0, s8
; CHECK-NEXT: vmov r1, s12
; CHECK-NEXT: vmov.f32 s16, s6
; CHECK-NEXT: vmov.f32 s18, s7
; CHECK-NEXT: vmov.f32 s10, s5
; CHECK-NEXT: vmov r2, s12
; CHECK-NEXT: vmov r1, s14
; CHECK-NEXT: vmov r0, s10
; CHECK-NEXT: vmov.f32 s12, s6
; CHECK-NEXT: vmov.f32 s14, s7
; CHECK-NEXT: vmov.f32 s4, s2
; CHECK-NEXT: vmov.f32 s6, s3
; CHECK-NEXT: vmullb.s32 q5, q1, q4
; CHECK-NEXT: vmullb.s32 q0, q1, q3
; CHECK-NEXT: smmul r0, r1, r0
; CHECK-NEXT: vmov r1, s21
; CHECK-NEXT: vmov q0[2], q0[0], r1, r0
; CHECK-NEXT: vmov r0, s10
; CHECK-NEXT: vmov r1, s14
; CHECK-NEXT: smmul r0, r1, r0
; CHECK-NEXT: vmov r1, s23
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
; CHECK-NEXT: vpop {d8, d9, d10, d11}
; CHECK-NEXT: vmov r1, s8
; CHECK-NEXT: smmul r1, r2, r1
; CHECK-NEXT: vmov.32 q2[0], r1
; CHECK-NEXT: vmov.32 q2[1], r0
; CHECK-NEXT: vmov r0, s1
; CHECK-NEXT: vmov.32 q2[2], r0
; CHECK-NEXT: vmov r0, s3
; CHECK-NEXT: vmov.32 q2[3], r0
; CHECK-NEXT: vmov q0, q2
; CHECK-NEXT: bx lr
entry:
%s0s = sext <4 x i32> %s0 to <4 x i64>
Expand Down Expand Up @@ -140,18 +142,21 @@ define arm_aapcs_vfpcc <8 x i16> @vmulhs_v8i16(<8 x i16> %s0, <8 x i16> %s1) {
; CHECK-LABEL: vmulhs_v8i16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.u16 r0, q1[0]
; CHECK-NEXT: vmov.u16 r1, q1[2]
; CHECK-NEXT: vmov q2[2], q2[0], r1, r0
; CHECK-NEXT: vmov.32 q2[0], r0
; CHECK-NEXT: vmov.u16 r0, q1[1]
; CHECK-NEXT: vmov.u16 r1, q1[3]
; CHECK-NEXT: vmov q2[3], q2[1], r1, r0
; CHECK-NEXT: vmov.32 q2[1], r0
; CHECK-NEXT: vmov.u16 r0, q1[2]
; CHECK-NEXT: vmov.32 q2[2], r0
; CHECK-NEXT: vmov.u16 r0, q1[3]
; CHECK-NEXT: vmov.32 q2[3], r0
; CHECK-NEXT: vmov.u16 r0, q0[0]
; CHECK-NEXT: vmov.u16 r1, q0[2]
; CHECK-NEXT: vmov q3[2], q3[0], r1, r0
; CHECK-NEXT: vmov.32 q3[0], r0
; CHECK-NEXT: vmov.u16 r0, q0[1]
; CHECK-NEXT: vmov.u16 r1, q0[3]
; CHECK-NEXT: vmov q3[3], q3[1], r1, r0
; CHECK-NEXT: vmov.u16 r1, q1[6]
; CHECK-NEXT: vmov.32 q3[1], r0
; CHECK-NEXT: vmov.u16 r0, q0[2]
; CHECK-NEXT: vmov.32 q3[2], r0
; CHECK-NEXT: vmov.u16 r0, q0[3]
; CHECK-NEXT: vmov.32 q3[3], r0
; CHECK-NEXT: vmullb.s16 q2, q3, q2
; CHECK-NEXT: vshr.s32 q3, q2, #16
; CHECK-NEXT: vmov r0, s12
Expand All @@ -163,16 +168,21 @@ define arm_aapcs_vfpcc <8 x i16> @vmulhs_v8i16(<8 x i16> %s0, <8 x i16> %s1) {
; CHECK-NEXT: vmov r0, s15
; CHECK-NEXT: vmov.16 q2[3], r0
; CHECK-NEXT: vmov.u16 r0, q1[4]
; CHECK-NEXT: vmov q3[2], q3[0], r1, r0
; CHECK-NEXT: vmov.32 q3[0], r0
; CHECK-NEXT: vmov.u16 r0, q1[5]
; CHECK-NEXT: vmov.u16 r1, q1[7]
; CHECK-NEXT: vmov q3[3], q3[1], r1, r0
; CHECK-NEXT: vmov.32 q3[1], r0
; CHECK-NEXT: vmov.u16 r0, q1[6]
; CHECK-NEXT: vmov.32 q3[2], r0
; CHECK-NEXT: vmov.u16 r0, q1[7]
; CHECK-NEXT: vmov.32 q3[3], r0
; CHECK-NEXT: vmov.u16 r0, q0[4]
; CHECK-NEXT: vmov.u16 r1, q0[6]
; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
; CHECK-NEXT: vmov.32 q1[0], r0
; CHECK-NEXT: vmov.u16 r0, q0[5]
; CHECK-NEXT: vmov.u16 r1, q0[7]
; CHECK-NEXT: vmov q1[3], q1[1], r1, r0
; CHECK-NEXT: vmov.32 q1[1], r0
; CHECK-NEXT: vmov.u16 r0, q0[6]
; CHECK-NEXT: vmov.32 q1[2], r0
; CHECK-NEXT: vmov.u16 r0, q0[7]
; CHECK-NEXT: vmov.32 q1[3], r0
; CHECK-NEXT: vmullb.s16 q0, q1, q3
; CHECK-NEXT: vshr.s32 q0, q0, #16
; CHECK-NEXT: vmov r0, s0
Expand All @@ -198,18 +208,21 @@ define arm_aapcs_vfpcc <8 x i16> @vmulhu_v8i16(<8 x i16> %s0, <8 x i16> %s1) {
; CHECK-LABEL: vmulhu_v8i16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.u16 r0, q1[0]
; CHECK-NEXT: vmov.u16 r1, q1[2]
; CHECK-NEXT: vmov q2[2], q2[0], r1, r0
; CHECK-NEXT: vmov.32 q2[0], r0
; CHECK-NEXT: vmov.u16 r0, q1[1]
; CHECK-NEXT: vmov.u16 r1, q1[3]
; CHECK-NEXT: vmov q2[3], q2[1], r1, r0
; CHECK-NEXT: vmov.32 q2[1], r0
; CHECK-NEXT: vmov.u16 r0, q1[2]
; CHECK-NEXT: vmov.32 q2[2], r0
; CHECK-NEXT: vmov.u16 r0, q1[3]
; CHECK-NEXT: vmov.32 q2[3], r0
; CHECK-NEXT: vmov.u16 r0, q0[0]
; CHECK-NEXT: vmov.u16 r1, q0[2]
; CHECK-NEXT: vmov q3[2], q3[0], r1, r0
; CHECK-NEXT: vmov.32 q3[0], r0
; CHECK-NEXT: vmov.u16 r0, q0[1]
; CHECK-NEXT: vmov.u16 r1, q0[3]
; CHECK-NEXT: vmov q3[3], q3[1], r1, r0
; CHECK-NEXT: vmov.u16 r1, q1[6]
; CHECK-NEXT: vmov.32 q3[1], r0
; CHECK-NEXT: vmov.u16 r0, q0[2]
; CHECK-NEXT: vmov.32 q3[2], r0
; CHECK-NEXT: vmov.u16 r0, q0[3]
; CHECK-NEXT: vmov.32 q3[3], r0
; CHECK-NEXT: vmullb.u16 q2, q3, q2
; CHECK-NEXT: vshr.u32 q3, q2, #16
; CHECK-NEXT: vmov r0, s12
Expand All @@ -221,16 +234,21 @@ define arm_aapcs_vfpcc <8 x i16> @vmulhu_v8i16(<8 x i16> %s0, <8 x i16> %s1) {
; CHECK-NEXT: vmov r0, s15
; CHECK-NEXT: vmov.16 q2[3], r0
; CHECK-NEXT: vmov.u16 r0, q1[4]
; CHECK-NEXT: vmov q3[2], q3[0], r1, r0
; CHECK-NEXT: vmov.32 q3[0], r0
; CHECK-NEXT: vmov.u16 r0, q1[5]
; CHECK-NEXT: vmov.u16 r1, q1[7]
; CHECK-NEXT: vmov q3[3], q3[1], r1, r0
; CHECK-NEXT: vmov.32 q3[1], r0
; CHECK-NEXT: vmov.u16 r0, q1[6]
; CHECK-NEXT: vmov.32 q3[2], r0
; CHECK-NEXT: vmov.u16 r0, q1[7]
; CHECK-NEXT: vmov.32 q3[3], r0
; CHECK-NEXT: vmov.u16 r0, q0[4]
; CHECK-NEXT: vmov.u16 r1, q0[6]
; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
; CHECK-NEXT: vmov.32 q1[0], r0
; CHECK-NEXT: vmov.u16 r0, q0[5]
; CHECK-NEXT: vmov.u16 r1, q0[7]
; CHECK-NEXT: vmov q1[3], q1[1], r1, r0
; CHECK-NEXT: vmov.32 q1[1], r0
; CHECK-NEXT: vmov.u16 r0, q0[6]
; CHECK-NEXT: vmov.32 q1[2], r0
; CHECK-NEXT: vmov.u16 r0, q0[7]
; CHECK-NEXT: vmov.32 q1[3], r0
; CHECK-NEXT: vmullb.u16 q0, q1, q3
; CHECK-NEXT: vshr.u32 q0, q0, #16
; CHECK-NEXT: vmov r0, s0
Expand Down
43 changes: 24 additions & 19 deletions llvm/test/CodeGen/Thumb2/mve-vmull-loop.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,33 +4,38 @@
define arm_aapcs_vfpcc void @test32(i32* noalias nocapture readonly %x, i32* noalias nocapture readonly %y, i32* nocapture %z, i32 %n) {
; CHECK-LABEL: test32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r7, lr}
; CHECK-NEXT: push {r4, r5, r7, lr}
; CHECK-NEXT: .save {r5, lr}
; CHECK-NEXT: push {r5, lr}
; CHECK-NEXT: cmp r3, #1
; CHECK-NEXT: blt .LBB0_2
; CHECK-NEXT: it lt
; CHECK-NEXT: poplt {r5, pc}
; CHECK-NEXT: .LBB0_1: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrw.u32 q1, [r0], #16
; CHECK-NEXT: vldrw.u32 q2, [r1], #16
; CHECK-NEXT: subs r3, #4
; CHECK-NEXT: vmullt.s32 q0, q2, q1
; CHECK-NEXT: vmullb.s32 q3, q2, q1
; CHECK-NEXT: vmov r5, s1
; CHECK-NEXT: vmov r12, s0
; CHECK-NEXT: vmov r7, s3
; CHECK-NEXT: vmullt.s32 q3, q2, q1
; CHECK-NEXT: vmov r5, s13
; CHECK-NEXT: vmov r12, s12
; CHECK-NEXT: lsrl r12, r5, #31
; CHECK-NEXT: vmov r4, s2
; CHECK-NEXT: lsrl r4, r7, #31
; CHECK-NEXT: vmov q0[2], q0[0], r4, r12
; CHECK-NEXT: vmov.32 q0[0], r12
; CHECK-NEXT: vmov r12, s14
; CHECK-NEXT: vmov.32 q0[1], r5
; CHECK-NEXT: vmov r5, s15
; CHECK-NEXT: lsrl r12, r5, #31
; CHECK-NEXT: vmullb.s32 q3, q2, q1
; CHECK-NEXT: vmov.32 q0[2], r12
; CHECK-NEXT: vmov r12, s12
; CHECK-NEXT: vmov q0[3], q0[1], r7, r5
; CHECK-NEXT: vmov.32 q0[3], r5
; CHECK-NEXT: vmov r5, s13
; CHECK-NEXT: vmov r7, s15
; CHECK-NEXT: lsrl r12, r5, #31
; CHECK-NEXT: vmov r4, s14
; CHECK-NEXT: lsrl r4, r7, #31
; CHECK-NEXT: vmov q1[2], q1[0], r4, r12
; CHECK-NEXT: vmov q1[3], q1[1], r7, r5
; CHECK-NEXT: vmov.32 q1[0], r12
; CHECK-NEXT: vmov r12, s14
; CHECK-NEXT: vmov.32 q1[1], r5
; CHECK-NEXT: vmov r5, s15
; CHECK-NEXT: lsrl r12, r5, #31
; CHECK-NEXT: vmov.32 q1[2], r12
; CHECK-NEXT: vmov.32 q1[3], r5
; CHECK-NEXT: vmov.f32 s8, s6
; CHECK-NEXT: vmov.f32 s9, s7
; CHECK-NEXT: vmov.f32 s6, s0
Expand All @@ -42,8 +47,8 @@ define arm_aapcs_vfpcc void @test32(i32* noalias nocapture readonly %x, i32* noa
; CHECK-NEXT: vmov.f32 s7, s10
; CHECK-NEXT: vstrb.8 q1, [r2], #16
; CHECK-NEXT: bne .LBB0_1
; CHECK-NEXT: .LBB0_2: @ %for.cond.cleanup
; CHECK-NEXT: pop {r4, r5, r7, pc}
; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup
; CHECK-NEXT: pop {r5, pc}
entry:
%0 = and i32 %n, 3
%cmp = icmp eq i32 %0, 0
Expand Down
40 changes: 24 additions & 16 deletions llvm/test/CodeGen/Thumb2/mve-vqdmulh.ll
Original file line number Diff line number Diff line change
Expand Up @@ -74,18 +74,21 @@ define arm_aapcs_vfpcc <8 x i16> @vqdmulh_i16_c(<8 x i16> %s0, <8 x i16> %s1) {
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: vmov q2, q0
; CHECK-NEXT: vmov.u16 r0, q0[0]
; CHECK-NEXT: vmov.u16 r1, q0[2]
; CHECK-NEXT: vmov q0[2], q0[0], r1, r0
; CHECK-NEXT: vmov.32 q0[0], r0
; CHECK-NEXT: vmov.u16 r0, q2[1]
; CHECK-NEXT: vmov.u16 r1, q2[3]
; CHECK-NEXT: vmov q0[3], q0[1], r1, r0
; CHECK-NEXT: vmov.32 q0[1], r0
; CHECK-NEXT: vmov.u16 r0, q2[2]
; CHECK-NEXT: vmov.32 q0[2], r0
; CHECK-NEXT: vmov.u16 r0, q2[3]
; CHECK-NEXT: vmov.32 q0[3], r0
; CHECK-NEXT: vmov.u16 r0, q1[0]
; CHECK-NEXT: vmov.u16 r1, q1[2]
; CHECK-NEXT: vmov q3[2], q3[0], r1, r0
; CHECK-NEXT: vmov.32 q3[0], r0
; CHECK-NEXT: vmov.u16 r0, q1[1]
; CHECK-NEXT: vmov.u16 r1, q1[3]
; CHECK-NEXT: vmov q3[3], q3[1], r1, r0
; CHECK-NEXT: vmov.u16 r1, q2[6]
; CHECK-NEXT: vmov.32 q3[1], r0
; CHECK-NEXT: vmov.u16 r0, q1[2]
; CHECK-NEXT: vmov.32 q3[2], r0
; CHECK-NEXT: vmov.u16 r0, q1[3]
; CHECK-NEXT: vmov.32 q3[3], r0
; CHECK-NEXT: vmullb.s16 q0, q3, q0
; CHECK-NEXT: vmov.i32 q3, #0x7fff
; CHECK-NEXT: vshl.i32 q0, q0, #10
Expand All @@ -101,16 +104,21 @@ define arm_aapcs_vfpcc <8 x i16> @vqdmulh_i16_c(<8 x i16> %s0, <8 x i16> %s1) {
; CHECK-NEXT: vmov r0, s19
; CHECK-NEXT: vmov.16 q0[3], r0
; CHECK-NEXT: vmov.u16 r0, q2[4]
; CHECK-NEXT: vmov q4[2], q4[0], r1, r0
; CHECK-NEXT: vmov.32 q4[0], r0
; CHECK-NEXT: vmov.u16 r0, q2[5]
; CHECK-NEXT: vmov.u16 r1, q2[7]
; CHECK-NEXT: vmov q4[3], q4[1], r1, r0
; CHECK-NEXT: vmov.32 q4[1], r0
; CHECK-NEXT: vmov.u16 r0, q2[6]
; CHECK-NEXT: vmov.32 q4[2], r0
; CHECK-NEXT: vmov.u16 r0, q2[7]
; CHECK-NEXT: vmov.32 q4[3], r0
; CHECK-NEXT: vmov.u16 r0, q1[4]
; CHECK-NEXT: vmov.u16 r1, q1[6]
; CHECK-NEXT: vmov q2[2], q2[0], r1, r0
; CHECK-NEXT: vmov.32 q2[0], r0
; CHECK-NEXT: vmov.u16 r0, q1[5]
; CHECK-NEXT: vmov.u16 r1, q1[7]
; CHECK-NEXT: vmov q2[3], q2[1], r1, r0
; CHECK-NEXT: vmov.32 q2[1], r0
; CHECK-NEXT: vmov.u16 r0, q1[6]
; CHECK-NEXT: vmov.32 q2[2], r0
; CHECK-NEXT: vmov.u16 r0, q1[7]
; CHECK-NEXT: vmov.32 q2[3], r0
; CHECK-NEXT: vmullb.s16 q1, q2, q4
; CHECK-NEXT: vshl.i32 q1, q1, #10
; CHECK-NEXT: vshr.s32 q1, q1, #10
Expand Down
120 changes: 66 additions & 54 deletions llvm/test/CodeGen/Thumb2/mve-vqmovn.ll
Original file line number Diff line number Diff line change
Expand Up @@ -165,52 +165,56 @@ define arm_aapcs_vfpcc <2 x i64> @vqmovni64_smaxmin(<2 x i64> %s0) {
; CHECK-LABEL: vqmovni64_smaxmin:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov r2, s0
; CHECK-NEXT: mvn r12, #-2147483648
; CHECK-NEXT: mvn r3, #-2147483648
; CHECK-NEXT: vmov r1, s1
; CHECK-NEXT: movs r0, #0
; CHECK-NEXT: vmov r3, s2
; CHECK-NEXT: subs.w r2, r2, r12
; CHECK-NEXT: vmov r2, s3
; CHECK-NEXT: subs r2, r2, r3
; CHECK-NEXT: sbcs r1, r1, #0
; CHECK-NEXT: vmov r2, s2
; CHECK-NEXT: mov.w r1, #0
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r1, #1
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: csetm r1, ne
; CHECK-NEXT: subs.w r3, r3, r12
; CHECK-NEXT: mov.w r12, #-1
; CHECK-NEXT: sbcs r2, r2, #0
; CHECK-NEXT: mov.w r2, #0
; CHECK-NEXT: vmov.32 q1[0], r1
; CHECK-NEXT: vmov.32 q1[1], r1
; CHECK-NEXT: vmov r1, s3
; CHECK-NEXT: subs r2, r2, r3
; CHECK-NEXT: mov.w r3, #-1
; CHECK-NEXT: sbcs r1, r1, #0
; CHECK-NEXT: mov.w r1, #0
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r2, #1
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: csetm r2, ne
; CHECK-NEXT: vmov q1[2], q1[0], r2, r1
; CHECK-NEXT: vmov q1[3], q1[1], r2, r1
; CHECK-NEXT: movlt r1, #1
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: csetm r1, ne
; CHECK-NEXT: vmov.32 q1[2], r1
; CHECK-NEXT: vmov.32 q1[3], r1
; CHECK-NEXT: adr r1, .LCPI12_0
; CHECK-NEXT: vldrw.u32 q2, [r1]
; CHECK-NEXT: vand q0, q0, q1
; CHECK-NEXT: vbic q2, q2, q1
; CHECK-NEXT: vorr q0, q0, q2
; CHECK-NEXT: vmov r2, s0
; CHECK-NEXT: vmov r1, s1
; CHECK-NEXT: vmov r3, s2
; CHECK-NEXT: rsbs.w r2, r2, #-2147483648
; CHECK-NEXT: vmov r2, s3
; CHECK-NEXT: sbcs.w r1, r12, r1
; CHECK-NEXT: sbcs.w r1, r3, r1
; CHECK-NEXT: vmov r2, s2
; CHECK-NEXT: mov.w r1, #0
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r1, #1
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: csetm r1, ne
; CHECK-NEXT: rsbs.w r3, r3, #-2147483648
; CHECK-NEXT: sbcs.w r2, r12, r2
; CHECK-NEXT: vmov.32 q1[0], r1
; CHECK-NEXT: vmov.32 q1[1], r1
; CHECK-NEXT: vmov r1, s3
; CHECK-NEXT: rsbs.w r2, r2, #-2147483648
; CHECK-NEXT: sbcs.w r1, r3, r1
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r0, #1
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: vmov q1[2], q1[0], r0, r1
; CHECK-NEXT: vmov q1[3], q1[1], r0, r1
; CHECK-NEXT: vmov.32 q1[2], r0
; CHECK-NEXT: vmov.32 q1[3], r0
; CHECK-NEXT: adr r0, .LCPI12_1
; CHECK-NEXT: vldrw.u32 q2, [r0]
; CHECK-NEXT: vand q0, q0, q1
Expand Down Expand Up @@ -241,52 +245,56 @@ define arm_aapcs_vfpcc <2 x i64> @vqmovni64_sminmax(<2 x i64> %s0) {
; CHECK-LABEL: vqmovni64_sminmax:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov r2, s0
; CHECK-NEXT: mov.w r12, #-1
; CHECK-NEXT: mov.w r3, #-1
; CHECK-NEXT: vmov r1, s1
; CHECK-NEXT: movs r0, #0
; CHECK-NEXT: vmov r3, s2
; CHECK-NEXT: rsbs.w r2, r2, #-2147483648
; CHECK-NEXT: vmov r2, s3
; CHECK-NEXT: sbcs.w r1, r12, r1
; CHECK-NEXT: sbcs.w r1, r3, r1
; CHECK-NEXT: vmov r2, s2
; CHECK-NEXT: mov.w r1, #0
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r1, #1
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: csetm r1, ne
; CHECK-NEXT: rsbs.w r3, r3, #-2147483648
; CHECK-NEXT: sbcs.w r2, r12, r2
; CHECK-NEXT: mvn r12, #-2147483648
; CHECK-NEXT: mov.w r2, #0
; CHECK-NEXT: vmov.32 q1[0], r1
; CHECK-NEXT: vmov.32 q1[1], r1
; CHECK-NEXT: vmov r1, s3
; CHECK-NEXT: rsbs.w r2, r2, #-2147483648
; CHECK-NEXT: sbcs.w r1, r3, r1
; CHECK-NEXT: mvn r3, #-2147483648
; CHECK-NEXT: mov.w r1, #0
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r2, #1
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: csetm r2, ne
; CHECK-NEXT: vmov q1[2], q1[0], r2, r1
; CHECK-NEXT: vmov q1[3], q1[1], r2, r1
; CHECK-NEXT: movlt r1, #1
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: csetm r1, ne
; CHECK-NEXT: vmov.32 q1[2], r1
; CHECK-NEXT: vmov.32 q1[3], r1
; CHECK-NEXT: adr r1, .LCPI13_0
; CHECK-NEXT: vldrw.u32 q2, [r1]
; CHECK-NEXT: vand q0, q0, q1
; CHECK-NEXT: vbic q2, q2, q1
; CHECK-NEXT: vorr q0, q0, q2
; CHECK-NEXT: vmov r2, s0
; CHECK-NEXT: vmov r1, s1
; CHECK-NEXT: vmov r3, s2
; CHECK-NEXT: subs.w r2, r2, r12
; CHECK-NEXT: vmov r2, s3
; CHECK-NEXT: subs r2, r2, r3
; CHECK-NEXT: sbcs r1, r1, #0
; CHECK-NEXT: vmov r2, s2
; CHECK-NEXT: mov.w r1, #0
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r1, #1
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: csetm r1, ne
; CHECK-NEXT: subs.w r3, r3, r12
; CHECK-NEXT: sbcs r2, r2, #0
; CHECK-NEXT: vmov.32 q1[0], r1
; CHECK-NEXT: vmov.32 q1[1], r1
; CHECK-NEXT: vmov r1, s3
; CHECK-NEXT: subs r2, r2, r3
; CHECK-NEXT: sbcs r1, r1, #0
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r0, #1
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: vmov q1[2], q1[0], r0, r1
; CHECK-NEXT: vmov q1[3], q1[1], r0, r1
; CHECK-NEXT: vmov.32 q1[2], r0
; CHECK-NEXT: vmov.32 q1[3], r0
; CHECK-NEXT: adr r0, .LCPI13_1
; CHECK-NEXT: vldrw.u32 q2, [r0]
; CHECK-NEXT: vand q0, q0, q1
Expand Down Expand Up @@ -320,23 +328,25 @@ define arm_aapcs_vfpcc <2 x i64> @vqmovni64_umaxmin(<2 x i64> %s0) {
; CHECK-NEXT: movs r2, #0
; CHECK-NEXT: vmov r0, s1
; CHECK-NEXT: vmov.i64 q2, #0xffffffff
; CHECK-NEXT: vmov r3, s2
; CHECK-NEXT: subs.w r1, r1, #-1
; CHECK-NEXT: vmov r1, s3
; CHECK-NEXT: sbcs r0, r0, #0
; CHECK-NEXT: vmov r1, s2
; CHECK-NEXT: mov.w r0, #0
; CHECK-NEXT: it lo
; CHECK-NEXT: movlo r0, #1
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: subs.w r3, r3, #-1
; CHECK-NEXT: sbcs r1, r1, #0
; CHECK-NEXT: vmov.32 q1[0], r0
; CHECK-NEXT: vmov.32 q1[1], r0
; CHECK-NEXT: vmov r0, s3
; CHECK-NEXT: subs.w r1, r1, #-1
; CHECK-NEXT: sbcs r0, r0, #0
; CHECK-NEXT: it lo
; CHECK-NEXT: movlo r2, #1
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: csetm r1, ne
; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
; CHECK-NEXT: vmov q1[3], q1[1], r1, r0
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: vmov.32 q1[2], r0
; CHECK-NEXT: vmov.32 q1[3], r0
; CHECK-NEXT: vbic q2, q2, q1
; CHECK-NEXT: vand q0, q0, q1
; CHECK-NEXT: vorr q0, q0, q2
Expand All @@ -354,23 +364,25 @@ define arm_aapcs_vfpcc <2 x i64> @vqmovni64_uminmax(<2 x i64> %s0) {
; CHECK-NEXT: movs r2, #0
; CHECK-NEXT: vmov r0, s1
; CHECK-NEXT: vmov.i64 q2, #0xffffffff
; CHECK-NEXT: vmov r3, s2
; CHECK-NEXT: subs.w r1, r1, #-1
; CHECK-NEXT: vmov r1, s3
; CHECK-NEXT: sbcs r0, r0, #0
; CHECK-NEXT: vmov r1, s2
; CHECK-NEXT: mov.w r0, #0
; CHECK-NEXT: it lo
; CHECK-NEXT: movlo r0, #1
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: subs.w r3, r3, #-1
; CHECK-NEXT: sbcs r1, r1, #0
; CHECK-NEXT: vmov.32 q1[0], r0
; CHECK-NEXT: vmov.32 q1[1], r0
; CHECK-NEXT: vmov r0, s3
; CHECK-NEXT: subs.w r1, r1, #-1
; CHECK-NEXT: sbcs r0, r0, #0
; CHECK-NEXT: it lo
; CHECK-NEXT: movlo r2, #1
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: csetm r1, ne
; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
; CHECK-NEXT: vmov q1[3], q1[1], r1, r0
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: vmov.32 q1[2], r0
; CHECK-NEXT: vmov.32 q1[3], r0
; CHECK-NEXT: vbic q2, q2, q1
; CHECK-NEXT: vand q0, q0, q1
; CHECK-NEXT: vorr q0, q0, q2
Expand Down
268 changes: 144 additions & 124 deletions llvm/test/CodeGen/Thumb2/mve-vqshrn.ll
Original file line number Diff line number Diff line change
Expand Up @@ -180,65 +180,71 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @vqshrni64_smaxmin(<2 x i64> %so) {
; CHECK-LABEL: vqshrni64_smaxmin:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r7, lr}
; CHECK-NEXT: push {r4, r5, r7, lr}
; CHECK-NEXT: vmov r1, s1
; CHECK-NEXT: mvn r12, #-2147483648
; CHECK-NEXT: .save {r5, lr}
; CHECK-NEXT: push {r5, lr}
; CHECK-NEXT: vmov r5, s1
; CHECK-NEXT: mvn lr, #-2147483648
; CHECK-NEXT: vmov r2, s0
; CHECK-NEXT: mov.w lr, #0
; CHECK-NEXT: asrl r2, r1, #3
; CHECK-NEXT: vmov r4, s2
; CHECK-NEXT: subs.w r3, r2, r12
; CHECK-NEXT: sbcs r3, r1, #0
; CHECK-NEXT: mov.w r3, #0
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r3, #1
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: mov.w r12, #0
; CHECK-NEXT: asrl r2, r5, #3
; CHECK-NEXT: vmov r3, s3
; CHECK-NEXT: csetm r5, ne
; CHECK-NEXT: asrl r4, r3, #3
; CHECK-NEXT: subs.w r0, r4, r12
; CHECK-NEXT: vmov q2[2], q2[0], r4, r2
; CHECK-NEXT: sbcs r0, r3, #0
; CHECK-NEXT: vmov q2[3], q2[1], r3, r1
; CHECK-NEXT: subs.w r0, r2, lr
; CHECK-NEXT: vmov.32 q2[0], r2
; CHECK-NEXT: sbcs r0, r5, #0
; CHECK-NEXT: vmov.32 q2[1], r5
; CHECK-NEXT: mov.w r0, #0
; CHECK-NEXT: mov.w r2, #-1
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r0, #1
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: vmov q0[2], q0[0], r0, r5
; CHECK-NEXT: vmov q0[3], q0[1], r0, r5
; CHECK-NEXT: adr r0, .LCPI12_0
; CHECK-NEXT: vldrw.u32 q1, [r0]
; CHECK-NEXT: vbic q1, q1, q0
; CHECK-NEXT: vand q0, q2, q0
; CHECK-NEXT: vorr q0, q0, q1
; CHECK-NEXT: vmov.32 q1[0], r0
; CHECK-NEXT: vmov.32 q1[1], r0
; CHECK-NEXT: vmov r0, s2
; CHECK-NEXT: asrl r0, r3, #3
; CHECK-NEXT: subs.w r1, r0, lr
; CHECK-NEXT: vmov.32 q2[2], r0
; CHECK-NEXT: sbcs r1, r3, #0
; CHECK-NEXT: vmov.32 q2[3], r3
; CHECK-NEXT: mov.w r1, #0
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r1, #1
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: csetm r1, ne
; CHECK-NEXT: vmov.32 q1[2], r1
; CHECK-NEXT: vmov.32 q1[3], r1
; CHECK-NEXT: adr r1, .LCPI12_0
; CHECK-NEXT: vldrw.u32 q0, [r1]
; CHECK-NEXT: vbic q0, q0, q1
; CHECK-NEXT: vand q1, q2, q1
; CHECK-NEXT: vorr q0, q1, q0
; CHECK-NEXT: vmov r1, s0
; CHECK-NEXT: vmov r0, s1
; CHECK-NEXT: vmov r3, s2
; CHECK-NEXT: rsbs.w r1, r1, #-2147483648
; CHECK-NEXT: vmov r1, s3
; CHECK-NEXT: sbcs.w r0, r2, r0
; CHECK-NEXT: vmov r1, s2
; CHECK-NEXT: mov.w r0, #0
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r0, #1
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: rsbs.w r3, r3, #-2147483648
; CHECK-NEXT: sbcs.w r1, r2, r1
; CHECK-NEXT: vmov.32 q1[0], r0
; CHECK-NEXT: vmov.32 q1[1], r0
; CHECK-NEXT: vmov r0, s3
; CHECK-NEXT: rsbs.w r1, r1, #-2147483648
; CHECK-NEXT: sbcs.w r0, r2, r0
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt.w lr, #1
; CHECK-NEXT: cmp.w lr, #0
; CHECK-NEXT: csetm r1, ne
; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
; CHECK-NEXT: vmov q1[3], q1[1], r1, r0
; CHECK-NEXT: movlt.w r12, #1
; CHECK-NEXT: cmp.w r12, #0
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: vmov.32 q1[2], r0
; CHECK-NEXT: vmov.32 q1[3], r0
; CHECK-NEXT: adr r0, .LCPI12_1
; CHECK-NEXT: vldrw.u32 q2, [r0]
; CHECK-NEXT: vand q0, q0, q1
; CHECK-NEXT: vbic q2, q2, q1
; CHECK-NEXT: vorr q0, q0, q2
; CHECK-NEXT: pop {r4, r5, r7, pc}
; CHECK-NEXT: pop {r5, pc}
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI12_0:
Expand All @@ -263,65 +269,71 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @vqshrni64_sminmax(<2 x i64> %so) {
; CHECK-LABEL: vqshrni64_sminmax:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r7, lr}
; CHECK-NEXT: push {r4, r5, r7, lr}
; CHECK-NEXT: .save {r4, lr}
; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: vmov r1, s1
; CHECK-NEXT: mov.w r12, #-1
; CHECK-NEXT: mov.w lr, #-1
; CHECK-NEXT: vmov r2, s0
; CHECK-NEXT: mov.w lr, #0
; CHECK-NEXT: mov.w r12, #0
; CHECK-NEXT: asrl r2, r1, #3
; CHECK-NEXT: vmov r4, s2
; CHECK-NEXT: rsbs.w r3, r2, #-2147483648
; CHECK-NEXT: sbcs.w r3, r12, r1
; CHECK-NEXT: mov.w r3, #0
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r3, #1
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: vmov r3, s3
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: asrl r4, r3, #3
; CHECK-NEXT: rsbs.w r5, r4, #-2147483648
; CHECK-NEXT: vmov q2[2], q2[0], r4, r2
; CHECK-NEXT: sbcs.w r5, r12, r3
; CHECK-NEXT: vmov q2[3], q2[1], r3, r1
; CHECK-NEXT: mov.w r5, #0
; CHECK-NEXT: rsbs.w r0, r2, #-2147483648
; CHECK-NEXT: vmov.32 q2[0], r2
; CHECK-NEXT: sbcs.w r0, lr, r1
; CHECK-NEXT: vmov.32 q2[1], r1
; CHECK-NEXT: mov.w r0, #0
; CHECK-NEXT: mvn r2, #-2147483648
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r5, #1
; CHECK-NEXT: cmp r5, #0
; CHECK-NEXT: csetm r5, ne
; CHECK-NEXT: vmov q0[2], q0[0], r5, r0
; CHECK-NEXT: vmov q0[3], q0[1], r5, r0
; CHECK-NEXT: adr r0, .LCPI13_0
; CHECK-NEXT: vldrw.u32 q1, [r0]
; CHECK-NEXT: vbic q1, q1, q0
; CHECK-NEXT: vand q0, q2, q0
; CHECK-NEXT: vorr q0, q0, q1
; CHECK-NEXT: movlt r0, #1
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: vmov.32 q1[0], r0
; CHECK-NEXT: vmov.32 q1[1], r0
; CHECK-NEXT: vmov r0, s2
; CHECK-NEXT: asrl r0, r3, #3
; CHECK-NEXT: rsbs.w r4, r0, #-2147483648
; CHECK-NEXT: vmov.32 q2[2], r0
; CHECK-NEXT: sbcs.w r4, lr, r3
; CHECK-NEXT: vmov.32 q2[3], r3
; CHECK-NEXT: mov.w r4, #0
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r4, #1
; CHECK-NEXT: cmp r4, #0
; CHECK-NEXT: csetm r4, ne
; CHECK-NEXT: vmov.32 q1[2], r4
; CHECK-NEXT: vmov.32 q1[3], r4
; CHECK-NEXT: adr r4, .LCPI13_0
; CHECK-NEXT: vldrw.u32 q0, [r4]
; CHECK-NEXT: vbic q0, q0, q1
; CHECK-NEXT: vand q1, q2, q1
; CHECK-NEXT: vorr q0, q1, q0
; CHECK-NEXT: vmov r1, s0
; CHECK-NEXT: vmov r0, s1
; CHECK-NEXT: vmov r3, s2
; CHECK-NEXT: subs r1, r1, r2
; CHECK-NEXT: vmov r1, s3
; CHECK-NEXT: sbcs r0, r0, #0
; CHECK-NEXT: vmov r1, s2
; CHECK-NEXT: mov.w r0, #0
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r0, #1
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: subs r2, r3, r2
; CHECK-NEXT: sbcs r1, r1, #0
; CHECK-NEXT: vmov.32 q1[0], r0
; CHECK-NEXT: vmov.32 q1[1], r0
; CHECK-NEXT: vmov r0, s3
; CHECK-NEXT: subs r1, r1, r2
; CHECK-NEXT: sbcs r0, r0, #0
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt.w lr, #1
; CHECK-NEXT: cmp.w lr, #0
; CHECK-NEXT: csetm r1, ne
; CHECK-NEXT: vmov q1[2], q1[0], r1, r0
; CHECK-NEXT: vmov q1[3], q1[1], r1, r0
; CHECK-NEXT: movlt.w r12, #1
; CHECK-NEXT: cmp.w r12, #0
; CHECK-NEXT: csetm r0, ne
; CHECK-NEXT: vmov.32 q1[2], r0
; CHECK-NEXT: vmov.32 q1[3], r0
; CHECK-NEXT: adr r0, .LCPI13_1
; CHECK-NEXT: vldrw.u32 q2, [r0]
; CHECK-NEXT: vand q0, q0, q1
; CHECK-NEXT: vbic q2, q2, q1
; CHECK-NEXT: vorr q0, q0, q2
; CHECK-NEXT: pop {r4, r5, r7, pc}
; CHECK-NEXT: pop {r4, pc}
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI13_0:
Expand All @@ -346,37 +358,41 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @vqshrni64_umaxmin(<2 x i64> %so) {
; CHECK-LABEL: vqshrni64_umaxmin:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r7, lr}
; CHECK-NEXT: push {r4, r5, r7, lr}
; CHECK-NEXT: vmov r5, s1
; CHECK-NEXT: movs r2, #0
; CHECK-NEXT: .save {r5, r6, r7, lr}
; CHECK-NEXT: push {r5, r6, r7, lr}
; CHECK-NEXT: vmov r7, s1
; CHECK-NEXT: movs r1, #0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: vmov.i64 q1, #0xffffffff
; CHECK-NEXT: lsrl r0, r5, #3
; CHECK-NEXT: vmov r4, s2
; CHECK-NEXT: subs.w r3, r0, #-1
; CHECK-NEXT: sbcs r3, r5, #0
; CHECK-NEXT: mov.w r3, #0
; CHECK-NEXT: it lo
; CHECK-NEXT: movlo r3, #1
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: lsrl r0, r7, #3
; CHECK-NEXT: vmov r3, s3
; CHECK-NEXT: csetm r12, ne
; CHECK-NEXT: lsrl r4, r3, #3
; CHECK-NEXT: subs.w r1, r4, #-1
; CHECK-NEXT: vmov q2[2], q2[0], r4, r0
; CHECK-NEXT: sbcs r1, r3, #0
; CHECK-NEXT: subs.w r2, r0, #-1
; CHECK-NEXT: vmov.32 q2[0], r0
; CHECK-NEXT: sbcs r2, r7, #0
; CHECK-NEXT: vmov.32 q2[1], r7
; CHECK-NEXT: mov.w r2, #0
; CHECK-NEXT: it lo
; CHECK-NEXT: movlo r2, #1
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: vmov q2[3], q2[1], r3, r5
; CHECK-NEXT: csetm r2, ne
; CHECK-NEXT: vmov.32 q1[0], r2
; CHECK-NEXT: vmov.32 q1[1], r2
; CHECK-NEXT: vmov r2, s2
; CHECK-NEXT: lsrl r2, r3, #3
; CHECK-NEXT: vmov.i64 q0, #0xffffffff
; CHECK-NEXT: subs.w r5, r2, #-1
; CHECK-NEXT: vmov.32 q2[2], r2
; CHECK-NEXT: sbcs r5, r3, #0
; CHECK-NEXT: it lo
; CHECK-NEXT: movlo r1, #1
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: vmov.32 q2[3], r3
; CHECK-NEXT: csetm r1, ne
; CHECK-NEXT: vmov q0[2], q0[0], r1, r12
; CHECK-NEXT: vmov q0[3], q0[1], r1, r12
; CHECK-NEXT: vbic q1, q1, q0
; CHECK-NEXT: vand q0, q2, q0
; CHECK-NEXT: vorr q0, q0, q1
; CHECK-NEXT: pop {r4, r5, r7, pc}
; CHECK-NEXT: vmov.32 q1[2], r1
; CHECK-NEXT: vmov.32 q1[3], r1
; CHECK-NEXT: vbic q0, q0, q1
; CHECK-NEXT: vand q1, q2, q1
; CHECK-NEXT: vorr q0, q1, q0
; CHECK-NEXT: pop {r5, r6, r7, pc}
entry:
%s0 = lshr <2 x i64> %so, <i64 3, i64 3>
%c1 = icmp ult <2 x i64> %s0, <i64 4294967295, i64 4294967295>
Expand All @@ -387,37 +403,41 @@ entry:
define arm_aapcs_vfpcc <2 x i64> @vqshrni64_uminmax(<2 x i64> %so) {
; CHECK-LABEL: vqshrni64_uminmax:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r7, lr}
; CHECK-NEXT: push {r4, r5, r7, lr}
; CHECK-NEXT: vmov r5, s1
; CHECK-NEXT: movs r2, #0
; CHECK-NEXT: .save {r5, r6, r7, lr}
; CHECK-NEXT: push {r5, r6, r7, lr}
; CHECK-NEXT: vmov r7, s1
; CHECK-NEXT: movs r1, #0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: vmov.i64 q1, #0xffffffff
; CHECK-NEXT: lsrl r0, r5, #3
; CHECK-NEXT: vmov r4, s2
; CHECK-NEXT: subs.w r3, r0, #-1
; CHECK-NEXT: sbcs r3, r5, #0
; CHECK-NEXT: mov.w r3, #0
; CHECK-NEXT: it lo
; CHECK-NEXT: movlo r3, #1
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: lsrl r0, r7, #3
; CHECK-NEXT: vmov r3, s3
; CHECK-NEXT: csetm r12, ne
; CHECK-NEXT: lsrl r4, r3, #3
; CHECK-NEXT: subs.w r1, r4, #-1
; CHECK-NEXT: vmov q2[2], q2[0], r4, r0
; CHECK-NEXT: sbcs r1, r3, #0
; CHECK-NEXT: subs.w r2, r0, #-1
; CHECK-NEXT: vmov.32 q2[0], r0
; CHECK-NEXT: sbcs r2, r7, #0
; CHECK-NEXT: vmov.32 q2[1], r7
; CHECK-NEXT: mov.w r2, #0
; CHECK-NEXT: it lo
; CHECK-NEXT: movlo r2, #1
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: vmov q2[3], q2[1], r3, r5
; CHECK-NEXT: csetm r2, ne
; CHECK-NEXT: vmov.32 q1[0], r2
; CHECK-NEXT: vmov.32 q1[1], r2
; CHECK-NEXT: vmov r2, s2
; CHECK-NEXT: lsrl r2, r3, #3
; CHECK-NEXT: vmov.i64 q0, #0xffffffff
; CHECK-NEXT: subs.w r5, r2, #-1
; CHECK-NEXT: vmov.32 q2[2], r2
; CHECK-NEXT: sbcs r5, r3, #0
; CHECK-NEXT: it lo
; CHECK-NEXT: movlo r1, #1
; CHECK-NEXT: cmp r1, #0
; CHECK-NEXT: vmov.32 q2[3], r3
; CHECK-NEXT: csetm r1, ne
; CHECK-NEXT: vmov q0[2], q0[0], r1, r12
; CHECK-NEXT: vmov q0[3], q0[1], r1, r12
; CHECK-NEXT: vbic q1, q1, q0
; CHECK-NEXT: vand q0, q2, q0
; CHECK-NEXT: vorr q0, q0, q1
; CHECK-NEXT: pop {r4, r5, r7, pc}
; CHECK-NEXT: vmov.32 q1[2], r1
; CHECK-NEXT: vmov.32 q1[3], r1
; CHECK-NEXT: vbic q0, q0, q1
; CHECK-NEXT: vand q1, q2, q1
; CHECK-NEXT: vorr q0, q1, q0
; CHECK-NEXT: pop {r5, r6, r7, pc}
entry:
%s0 = lshr <2 x i64> %so, <i64 3, i64 3>
%c2 = icmp ult <2 x i64> %s0, <i64 4294967295, i64 4294967295>
Expand Down
50 changes: 28 additions & 22 deletions llvm/test/CodeGen/Thumb2/mve-vst2.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,21 @@
define void @vst2_v2i32(<2 x i32> *%src, <4 x i32> *%dst) {
; CHECK-LABEL: vst2_v2i32:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrd r12, r3, [r0]
; CHECK-NEXT: ldrd r2, r0, [r0, #8]
; CHECK-NEXT: vmov q0[2], q0[0], r3, r12
; CHECK-NEXT: vmov.f64 d2, d1
; CHECK-NEXT: vmov q2[2], q2[0], r0, r2
; CHECK-NEXT: vmov.f32 s5, s3
; CHECK-NEXT: vmov.f32 s2, s8
; CHECK-NEXT: vmov.f32 s3, s9
; CHECK-NEXT: vmov.f32 s6, s10
; CHECK-NEXT: vmov.f32 s1, s2
; CHECK-NEXT: vmov.f32 s7, s11
; CHECK-NEXT: ldrd r2, r12, [r0]
; CHECK-NEXT: ldrd r3, r0, [r0, #8]
; CHECK-NEXT: vmov.32 q0[0], r2
; CHECK-NEXT: vmov.32 q1[0], r3
; CHECK-NEXT: vmov.32 q0[2], r12
; CHECK-NEXT: vmov.f64 d4, d1
; CHECK-NEXT: vmov.32 q1[2], r0
; CHECK-NEXT: vmov.f32 s9, s3
; CHECK-NEXT: vmov.f32 s2, s4
; CHECK-NEXT: vmov.f32 s3, s6
; CHECK-NEXT: vmov.f32 s3, s5
; CHECK-NEXT: vmov.f32 s10, s6
; CHECK-NEXT: vmov.f32 s1, s2
; CHECK-NEXT: vmov.f32 s11, s7
; CHECK-NEXT: vmov.f32 s2, s8
; CHECK-NEXT: vmov.f32 s3, s10
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
Expand Down Expand Up @@ -111,12 +113,14 @@ entry:
define void @vst2_v2i16(<2 x i16> *%src, <4 x i16> *%dst) {
; CHECK-LABEL: vst2_v2i16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrh r2, [r0]
; CHECK-NEXT: ldrh r3, [r0, #2]
; CHECK-NEXT: ldrh.w r12, [r0, #4]
; CHECK-NEXT: ldrh r0, [r0, #6]
; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
; CHECK-NEXT: vmov q0[3], q0[1], r0, r12
; CHECK-NEXT: ldrh r3, [r0]
; CHECK-NEXT: ldrh r2, [r0, #4]
; CHECK-NEXT: vmov.32 q0[0], r3
; CHECK-NEXT: ldrh.w r12, [r0, #6]
; CHECK-NEXT: ldrh r0, [r0, #2]
; CHECK-NEXT: vmov.32 q0[1], r2
; CHECK-NEXT: vmov.32 q0[2], r0
; CHECK-NEXT: vmov.32 q0[3], r12
; CHECK-NEXT: vstrh.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
Expand Down Expand Up @@ -193,11 +197,13 @@ define void @vst2_v2i8(<2 x i8> *%src, <4 x i8> *%dst) {
; CHECK-LABEL: vst2_v2i8:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: ldrb r2, [r0]
; CHECK-NEXT: ldrb r3, [r0, #1]
; CHECK-NEXT: ldrb.w r12, [r0, #2]
; CHECK-NEXT: ldrb r3, [r0, #2]
; CHECK-NEXT: vmov.32 q0[0], r2
; CHECK-NEXT: ldrb.w r12, [r0, #1]
; CHECK-NEXT: vmov.32 q0[1], r3
; CHECK-NEXT: ldrb r0, [r0, #3]
; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
; CHECK-NEXT: vmov q0[3], q0[1], r0, r12
; CHECK-NEXT: vmov.32 q0[2], r12
; CHECK-NEXT: vmov.32 q0[3], r0
; CHECK-NEXT: vstrb.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
Expand Down
47 changes: 26 additions & 21 deletions llvm/test/CodeGen/Thumb2/mve-vst3.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,14 @@ define void @vst3_v2i32(<2 x i32> *%src, <6 x i32> *%dst) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, lr}
; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: ldrd lr, r12, [r0]
; CHECK-NEXT: ldrd r3, r2, [r0, #8]
; CHECK-NEXT: ldm.w r0, {r2, r3, r12, lr}
; CHECK-NEXT: ldrd r4, r0, [r0, #16]
; CHECK-NEXT: vmov q1[2], q1[0], r3, lr
; CHECK-NEXT: vmov.32 q1[0], r2
; CHECK-NEXT: vmov.32 q1[1], r3
; CHECK-NEXT: vmov.32 q0[0], r4
; CHECK-NEXT: vmov q1[3], q1[1], r2, r12
; CHECK-NEXT: vmov.32 q1[2], r12
; CHECK-NEXT: vmov.32 q0[1], r0
; CHECK-NEXT: vmov.32 q1[3], lr
; CHECK-NEXT: vmov.f32 s8, s7
; CHECK-NEXT: vmov.f32 s10, s1
; CHECK-NEXT: vmov r2, s8
Expand Down Expand Up @@ -301,16 +302,18 @@ define void @vst3_v2i16(<2 x i16> *%src, <6 x i16> *%dst) {
; CHECK-NEXT: .save {r4, lr}
; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: ldrh r2, [r0, #6]
; CHECK-NEXT: ldrh.w lr, [r0, #4]
; CHECK-NEXT: ldrh r3, [r0, #4]
; CHECK-NEXT: ldrh.w r12, [r0, #8]
; CHECK-NEXT: vmov.16 q0[4], r2
; CHECK-NEXT: ldrh r3, [r0, #2]
; CHECK-NEXT: vmov q1[2], q1[0], r2, lr
; CHECK-NEXT: ldrh.w lr, [r0, #2]
; CHECK-NEXT: vmov.32 q1[0], r3
; CHECK-NEXT: ldrh r4, [r0]
; CHECK-NEXT: vmov.32 q1[2], r2
; CHECK-NEXT: ldrh r0, [r0, #10]
; CHECK-NEXT: vmov.16 q0[5], r0
; CHECK-NEXT: vmov r0, s2
; CHECK-NEXT: vmov q0[2], q0[0], r3, r4
; CHECK-NEXT: vmov.32 q0[0], r4
; CHECK-NEXT: vmov.32 q0[2], lr
; CHECK-NEXT: vmov.f32 s1, s4
; CHECK-NEXT: vdup.32 q1, r12
; CHECK-NEXT: vmov.f32 s3, s2
Expand Down Expand Up @@ -686,8 +689,9 @@ define void @vst3_v2i8(<2 x i8> *%src, <6 x i8> *%dst) {
; CHECK-NEXT: ldrb r2, [r0]
; CHECK-NEXT: movs r6, #0
; CHECK-NEXT: ldrb r3, [r0, #1]
; CHECK-NEXT: vmov.32 q0[0], r2
; CHECK-NEXT: ldrb.w r12, [r0, #2]
; CHECK-NEXT: vmov q0[2], q0[0], r3, r2
; CHECK-NEXT: vmov.32 q0[2], r3
; CHECK-NEXT: ldrb.w lr, [r0, #3]
; CHECK-NEXT: vmov r4, s0
; CHECK-NEXT: ldrb r5, [r0, #5]
Expand Down Expand Up @@ -1457,21 +1461,23 @@ entry:
define void @vst3_v4f16(<4 x half> *%src, <12 x half> *%dst) {
; CHECK-LABEL: vst3_v4f16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, lr}
; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: ldrd lr, r12, [r0]
; CHECK-NEXT: ldrd r3, r2, [r0, #8]
; CHECK-NEXT: ldrd r4, r0, [r0, #16]
; CHECK-NEXT: vmov q0[2], q0[0], r3, lr
; CHECK-NEXT: vmov q0[3], q0[1], r2, r12
; CHECK-NEXT: vmov.32 q1[0], r4
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
; CHECK-NEXT: ldm.w r0, {r2, r3, r12, lr}
; CHECK-NEXT: vmov.32 q0[0], r2
; CHECK-NEXT: vmov.32 q0[1], r3
; CHECK-NEXT: vmov.32 q0[2], r12
; CHECK-NEXT: vmov.32 q0[3], lr
; CHECK-NEXT: vmov r3, s0
; CHECK-NEXT: vmov.32 q1[1], r0
; CHECK-NEXT: vmovx.f16 s12, s0
; CHECK-NEXT: vmov r2, s2
; CHECK-NEXT: vmov.16 q2[0], r3
; CHECK-NEXT: vmov.16 q2[1], r2
; CHECK-NEXT: ldrd r2, r0, [r0, #16]
; CHECK-NEXT: vmovx.f16 s0, s3
; CHECK-NEXT: vmov.32 q1[0], r2
; CHECK-NEXT: vmov.32 q1[1], r0
; CHECK-NEXT: vmov r0, s4
; CHECK-NEXT: vmovx.f16 s12, s0
; CHECK-NEXT: vmov.16 q2[2], r0
; CHECK-NEXT: vmov r0, s12
; CHECK-NEXT: vmovx.f16 s12, s2
Expand All @@ -1480,7 +1486,6 @@ define void @vst3_v4f16(<4 x half> *%src, <12 x half> *%dst) {
; CHECK-NEXT: vmovx.f16 s12, s4
; CHECK-NEXT: vmov.16 q2[4], r0
; CHECK-NEXT: vmov r0, s12
; CHECK-NEXT: vmovx.f16 s0, s3
; CHECK-NEXT: vmov.16 q2[5], r0
; CHECK-NEXT: vmov r0, s1
; CHECK-NEXT: vmov.16 q2[6], r0
Expand All @@ -1500,7 +1505,7 @@ define void @vst3_v4f16(<4 x half> *%src, <12 x half> *%dst) {
; CHECK-NEXT: vmov r0, s9
; CHECK-NEXT: vmov r2, s8
; CHECK-NEXT: strd r2, r0, [r1, #16]
; CHECK-NEXT: pop {r4, pc}
; CHECK-NEXT: pop {r7, pc}
entry:
%s1 = getelementptr <4 x half>, <4 x half>* %src, i32 0
%l1 = load <4 x half>, <4 x half>* %s1, align 4
Expand Down
Loading