Skip to content

Commit f953b3c

Browse files
committed
[ARM] Testing for stored extracted values. NFC
1 parent 183fe9d commit f953b3c

File tree

1 file changed

+216
-0
lines changed

1 file changed

+216
-0
lines changed
Lines changed: 216 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,216 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s
3+
4+
define half @extret1_f16_sf(<8 x half> %a, <8 x half> %b, half* nocapture %p) {
5+
; CHECK-LABEL: extret1_f16_sf:
6+
; CHECK: @ %bb.0:
7+
; CHECK-NEXT: .save {r4, r6, r7, lr}
8+
; CHECK-NEXT: push {r4, r6, r7, lr}
9+
; CHECK-NEXT: .setfp r7, sp, #8
10+
; CHECK-NEXT: add r7, sp, #8
11+
; CHECK-NEXT: .pad #16
12+
; CHECK-NEXT: sub sp, #16
13+
; CHECK-NEXT: mov r4, sp
14+
; CHECK-NEXT: bfc r4, #0, #4
15+
; CHECK-NEXT: mov sp, r4
16+
; CHECK-NEXT: strd r0, r1, [sp]
17+
; CHECK-NEXT: add.w r0, r7, #8
18+
; CHECK-NEXT: mov r1, sp
19+
; CHECK-NEXT: vldrw.u32 q0, [r0]
20+
; CHECK-NEXT: vldrw.u32 q1, [r1]
21+
; CHECK-NEXT: sub.w r4, r7, #8
22+
; CHECK-NEXT: ldr r0, [r7, #24]
23+
; CHECK-NEXT: vadd.f16 q0, q1, q0
24+
; CHECK-NEXT: vmovx.f16 s0, s0
25+
; CHECK-NEXT: vstr.16 s0, [r0]
26+
; CHECK-NEXT: vmov r0, s0
27+
; CHECK-NEXT: mov sp, r4
28+
; CHECK-NEXT: pop {r4, r6, r7, pc}
29+
%c = fadd <8 x half> %a, %b
30+
%e = extractelement <8 x half> %c, i32 1
31+
store half %e, half* %p, align 2
32+
ret half %e
33+
}
34+
35+
define half @extret4_f16_sf(<8 x half> %a, <8 x half> %b, half* nocapture %p) {
36+
; CHECK-LABEL: extret4_f16_sf:
37+
; CHECK: @ %bb.0:
38+
; CHECK-NEXT: mov r0, sp
39+
; CHECK-NEXT: vmov d1, r2, r3
40+
; CHECK-NEXT: vldrw.u32 q1, [r0]
41+
; CHECK-NEXT: ldr r0, [sp, #16]
42+
; CHECK-NEXT: vadd.f16 q0, q0, q1
43+
; CHECK-NEXT: vstr.16 s2, [r0]
44+
; CHECK-NEXT: vmov r0, s2
45+
; CHECK-NEXT: bx lr
46+
%c = fadd <8 x half> %a, %b
47+
%e = extractelement <8 x half> %c, i32 4
48+
store half %e, half* %p, align 2
49+
ret half %e
50+
}
51+
52+
define arm_aapcs_vfpcc half @extret1_f16_hf(<8 x half> %a, <8 x half> %b, half* nocapture %p) {
53+
; CHECK-LABEL: extret1_f16_hf:
54+
; CHECK: @ %bb.0:
55+
; CHECK-NEXT: vadd.f16 q0, q0, q1
56+
; CHECK-NEXT: vmovx.f16 s0, s0
57+
; CHECK-NEXT: vstr.16 s0, [r0]
58+
; CHECK-NEXT: bx lr
59+
%c = fadd <8 x half> %a, %b
60+
%e = extractelement <8 x half> %c, i32 1
61+
store half %e, half* %p, align 2
62+
ret half %e
63+
}
64+
65+
define arm_aapcs_vfpcc half @extret4_f16_hf(<8 x half> %a, <8 x half> %b, half* nocapture %p) {
66+
; CHECK-LABEL: extret4_f16_hf:
67+
; CHECK: @ %bb.0:
68+
; CHECK-NEXT: vadd.f16 q0, q0, q1
69+
; CHECK-NEXT: vmov.f32 s0, s2
70+
; CHECK-NEXT: vstr.16 s2, [r0]
71+
; CHECK-NEXT: bx lr
72+
%c = fadd <8 x half> %a, %b
73+
%e = extractelement <8 x half> %c, i32 4
74+
store half %e, half* %p, align 2
75+
ret half %e
76+
}
77+
78+
define arm_aapcs_vfpcc <8 x half> @extret1_v8f16_hf(<8 x half> %a, <8 x half> %b, half* nocapture %p) {
79+
; CHECK-LABEL: extret1_v8f16_hf:
80+
; CHECK: @ %bb.0:
81+
; CHECK-NEXT: vadd.f16 q0, q0, q1
82+
; CHECK-NEXT: vmovx.f16 s4, s0
83+
; CHECK-NEXT: vstr.16 s4, [r0]
84+
; CHECK-NEXT: vmov.u16 r0, q0[1]
85+
; CHECK-NEXT: vdup.16 q0, r0
86+
; CHECK-NEXT: bx lr
87+
%c = fadd <8 x half> %a, %b
88+
%e = extractelement <8 x half> %c, i32 1
89+
store half %e, half* %p, align 2
90+
%i = insertelement <8 x half> undef, half %e, i32 0
91+
%s = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
92+
ret <8 x half> %s
93+
}
94+
95+
define arm_aapcs_vfpcc <8 x half> @extret4_v8f16_hf(<8 x half> %a, <8 x half> %b, half* nocapture %p) {
96+
; CHECK-LABEL: extret4_v8f16_hf:
97+
; CHECK: @ %bb.0:
98+
; CHECK-NEXT: vadd.f16 q0, q0, q1
99+
; CHECK-NEXT: vstr.16 s2, [r0]
100+
; CHECK-NEXT: vmov.u16 r0, q0[4]
101+
; CHECK-NEXT: vdup.16 q0, r0
102+
; CHECK-NEXT: bx lr
103+
%c = fadd <8 x half> %a, %b
104+
%e = extractelement <8 x half> %c, i32 4
105+
store half %e, half* %p, align 2
106+
%i = insertelement <8 x half> undef, half %e, i32 0
107+
%s = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
108+
ret <8 x half> %s
109+
}
110+
111+
112+
define float @extret1_f32_sf(<4 x float> %a, <4 x float> %b, float* nocapture %p) {
113+
; CHECK-LABEL: extret1_f32_sf:
114+
; CHECK: @ %bb.0:
115+
; CHECK-NEXT: .save {r4, r6, r7, lr}
116+
; CHECK-NEXT: push {r4, r6, r7, lr}
117+
; CHECK-NEXT: .setfp r7, sp, #8
118+
; CHECK-NEXT: add r7, sp, #8
119+
; CHECK-NEXT: .pad #16
120+
; CHECK-NEXT: sub sp, #16
121+
; CHECK-NEXT: mov r4, sp
122+
; CHECK-NEXT: bfc r4, #0, #4
123+
; CHECK-NEXT: mov sp, r4
124+
; CHECK-NEXT: strd r0, r1, [sp]
125+
; CHECK-NEXT: add.w r0, r7, #8
126+
; CHECK-NEXT: mov r1, sp
127+
; CHECK-NEXT: vldrw.u32 q0, [r0]
128+
; CHECK-NEXT: vldrw.u32 q1, [r1]
129+
; CHECK-NEXT: ldr r1, [r7, #24]
130+
; CHECK-NEXT: sub.w r4, r7, #8
131+
; CHECK-NEXT: vadd.f32 q0, q1, q0
132+
; CHECK-NEXT: vmov r0, s1
133+
; CHECK-NEXT: vstr s1, [r1]
134+
; CHECK-NEXT: mov sp, r4
135+
; CHECK-NEXT: pop {r4, r6, r7, pc}
136+
%c = fadd <4 x float> %a, %b
137+
%e = extractelement <4 x float> %c, i32 1
138+
store float %e, float* %p, align 4
139+
ret float %e
140+
}
141+
142+
define float @extret2_f32_sf(<4 x float> %a, <4 x float> %b, float* nocapture %p) {
143+
; CHECK-LABEL: extret2_f32_sf:
144+
; CHECK: @ %bb.0:
145+
; CHECK-NEXT: mov r0, sp
146+
; CHECK-NEXT: vmov d1, r2, r3
147+
; CHECK-NEXT: vldrw.u32 q1, [r0]
148+
; CHECK-NEXT: ldr r1, [sp, #16]
149+
; CHECK-NEXT: vadd.f32 q0, q0, q1
150+
; CHECK-NEXT: vmov r0, s2
151+
; CHECK-NEXT: vstr s2, [r1]
152+
; CHECK-NEXT: bx lr
153+
%c = fadd <4 x float> %a, %b
154+
%e = extractelement <4 x float> %c, i32 2
155+
store float %e, float* %p, align 4
156+
ret float %e
157+
}
158+
159+
define arm_aapcs_vfpcc float @extret1_f32_hf(<4 x float> %a, <4 x float> %b, float* nocapture %p) {
160+
; CHECK-LABEL: extret1_f32_hf:
161+
; CHECK: @ %bb.0:
162+
; CHECK-NEXT: vadd.f32 q0, q0, q1
163+
; CHECK-NEXT: vmov.f32 s0, s1
164+
; CHECK-NEXT: vstr s1, [r0]
165+
; CHECK-NEXT: bx lr
166+
%c = fadd <4 x float> %a, %b
167+
%e = extractelement <4 x float> %c, i32 1
168+
store float %e, float* %p, align 4
169+
ret float %e
170+
}
171+
172+
173+
define arm_aapcs_vfpcc float @extret2_f32_hf(<4 x float> %a, <4 x float> %b, float* nocapture %p) {
174+
; CHECK-LABEL: extret2_f32_hf:
175+
; CHECK: @ %bb.0:
176+
; CHECK-NEXT: vadd.f32 q0, q0, q1
177+
; CHECK-NEXT: vmov.f32 s0, s2
178+
; CHECK-NEXT: vstr s2, [r0]
179+
; CHECK-NEXT: bx lr
180+
%c = fadd <4 x float> %a, %b
181+
%e = extractelement <4 x float> %c, i32 2
182+
store float %e, float* %p, align 4
183+
ret float %e
184+
}
185+
186+
define arm_aapcs_vfpcc <4 x float> @extret1_v4f32_hf(<4 x float> %a, <4 x float> %b, float* nocapture %p) {
187+
; CHECK-LABEL: extret1_v4f32_hf:
188+
; CHECK: @ %bb.0:
189+
; CHECK-NEXT: vadd.f32 q1, q0, q1
190+
; CHECK-NEXT: vmov r1, s5
191+
; CHECK-NEXT: vstr s5, [r0]
192+
; CHECK-NEXT: vdup.32 q0, r1
193+
; CHECK-NEXT: bx lr
194+
%c = fadd <4 x float> %a, %b
195+
%e = extractelement <4 x float> %c, i32 1
196+
store float %e, float* %p, align 4
197+
%i = insertelement <4 x float> undef, float %e, i32 0
198+
%s = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer
199+
ret <4 x float> %s
200+
}
201+
202+
define arm_aapcs_vfpcc <4 x float> @extret2_v4f32_hf(<4 x float> %a, <4 x float> %b, float* nocapture %p) {
203+
; CHECK-LABEL: extret2_v4f32_hf:
204+
; CHECK: @ %bb.0:
205+
; CHECK-NEXT: vadd.f32 q1, q0, q1
206+
; CHECK-NEXT: vmov r1, s6
207+
; CHECK-NEXT: vstr s6, [r0]
208+
; CHECK-NEXT: vdup.32 q0, r1
209+
; CHECK-NEXT: bx lr
210+
%c = fadd <4 x float> %a, %b
211+
%e = extractelement <4 x float> %c, i32 2
212+
store float %e, float* %p, align 4
213+
%i = insertelement <4 x float> undef, float %e, i32 0
214+
%s = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer
215+
ret <4 x float> %s
216+
}

0 commit comments

Comments
 (0)