|
2 | 2 | ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2,+fma | FileCheck %s --check-prefix=X32
|
3 | 3 | ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fma | FileCheck %s --check-prefix=X64
|
4 | 4 |
|
| 5 | +declare <8 x float> @llvm.fma.v8f32(<8 x float>, <8 x float>, <8 x float>) |
| 6 | +declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) |
| 7 | +declare float @llvm.fma.f32(float, float, float) |
| 8 | +declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) |
| 9 | + |
5 | 10 | ; This test checks combinations of FNEG and FMA intrinsics
|
6 | 11 |
|
7 | 12 | define <8 x float> @test1(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
|
8 | 13 | ; X32-LABEL: test1:
|
9 |
| -; X32: # %bb.0: # %entry |
| 14 | +; X32: # %bb.0: |
10 | 15 | ; X32-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2
|
11 | 16 | ; X32-NEXT: retl
|
12 | 17 | ;
|
13 | 18 | ; X64-LABEL: test1:
|
14 |
| -; X64: # %bb.0: # %entry |
| 19 | +; X64: # %bb.0: |
15 | 20 | ; X64-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2
|
16 | 21 | ; X64-NEXT: retq
|
17 |
| -entry: |
18 |
| - %sub.i = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c |
19 |
| - %0 = tail call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %sub.i) #2 |
20 |
| - ret <8 x float> %0 |
| 22 | + %sub.i = fsub <8 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %c |
| 23 | + %r = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %sub.i) #2 |
| 24 | + ret <8 x float> %r |
21 | 25 | }
|
22 | 26 |
|
23 |
| -declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) |
24 |
| - |
25 | 27 | define <4 x float> @test2(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
|
26 | 28 | ; X32-LABEL: test2:
|
27 |
| -; X32: # %bb.0: # %entry |
| 29 | +; X32: # %bb.0: |
28 | 30 | ; X32-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2
|
29 | 31 | ; X32-NEXT: retl
|
30 | 32 | ;
|
31 | 33 | ; X64-LABEL: test2:
|
32 |
| -; X64: # %bb.0: # %entry |
| 34 | +; X64: # %bb.0: |
33 | 35 | ; X64-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2
|
34 | 36 | ; X64-NEXT: retq
|
35 |
| -entry: |
36 |
| - %0 = tail call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a, <4 x float> %b, <4 x float> %c) #2 |
37 |
| - %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %0 |
| 37 | + %t0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) #2 |
| 38 | + %sub.i = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %t0 |
38 | 39 | ret <4 x float> %sub.i
|
39 | 40 | }
|
40 | 41 |
|
41 |
| -declare <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a, <4 x float> %b, <4 x float> %c) |
42 |
| - |
43 | 42 | define <4 x float> @test3(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
|
44 | 43 | ; X32-LABEL: test3:
|
45 |
| -; X32: # %bb.0: # %entry |
| 44 | +; X32: # %bb.0: |
46 | 45 | ; X32-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2
|
47 | 46 | ; X32-NEXT: vbroadcastss {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
|
48 | 47 | ; X32-NEXT: vxorps %xmm1, %xmm0, %xmm0
|
49 | 48 | ; X32-NEXT: retl
|
50 | 49 | ;
|
51 | 50 | ; X64-LABEL: test3:
|
52 |
| -; X64: # %bb.0: # %entry |
| 51 | +; X64: # %bb.0: |
53 | 52 | ; X64-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2
|
54 | 53 | ; X64-NEXT: vbroadcastss {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
|
55 | 54 | ; X64-NEXT: vxorps %xmm1, %xmm0, %xmm0
|
56 | 55 | ; X64-NEXT: retq
|
57 |
| -entry: |
58 |
| - %0 = tail call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %c) #2 |
59 |
| - %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %0 |
| 56 | + %a0 = extractelement <4 x float> %a, i64 0 |
| 57 | + %b0 = extractelement <4 x float> %b, i64 0 |
| 58 | + %c0 = extractelement <4 x float> %c, i64 0 |
| 59 | + %negb0 = fneg float %b0 |
| 60 | + %t0 = tail call float @llvm.fma.f32(float %a0, float %negb0, float %c0) #2 |
| 61 | + %i = insertelement <4 x float> %a, float %t0, i64 0 |
| 62 | + %sub.i = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %i |
60 | 63 | ret <4 x float> %sub.i
|
61 | 64 | }
|
62 | 65 |
|
63 |
| -declare <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %c) |
64 |
| - |
65 | 66 | define <8 x float> @test4(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
|
66 | 67 | ; X32-LABEL: test4:
|
67 |
| -; X32: # %bb.0: # %entry |
| 68 | +; X32: # %bb.0: |
68 | 69 | ; X32-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2
|
69 | 70 | ; X32-NEXT: retl
|
70 | 71 | ;
|
71 | 72 | ; X64-LABEL: test4:
|
72 |
| -; X64: # %bb.0: # %entry |
| 73 | +; X64: # %bb.0: |
73 | 74 | ; X64-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2
|
74 | 75 | ; X64-NEXT: retq
|
75 |
| -entry: |
76 |
| - %0 = tail call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c) #2 |
77 |
| - %sub.i = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %0 |
| 76 | + %negc = fneg <8 x float> %c |
| 77 | + %t0 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %negc) #2 |
| 78 | + %sub.i = fsub <8 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %t0 |
78 | 79 | ret <8 x float> %sub.i
|
79 | 80 | }
|
80 | 81 |
|
81 | 82 | define <8 x float> @test5(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
|
82 | 83 | ; X32-LABEL: test5:
|
83 |
| -; X32: # %bb.0: # %entry |
| 84 | +; X32: # %bb.0: |
84 | 85 | ; X32-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2
|
85 | 86 | ; X32-NEXT: retl
|
86 | 87 | ;
|
87 | 88 | ; X64-LABEL: test5:
|
88 |
| -; X64: # %bb.0: # %entry |
| 89 | +; X64: # %bb.0: |
89 | 90 | ; X64-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2
|
90 | 91 | ; X64-NEXT: retq
|
91 |
| -entry: |
92 |
| - %sub.c = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c |
93 |
| - %0 = tail call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %sub.c) #2 |
94 |
| - ret <8 x float> %0 |
| 92 | + %sub.c = fsub <8 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %c |
| 93 | + %negsubc = fneg <8 x float> %sub.c |
| 94 | + %t0 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %negsubc) #2 |
| 95 | + ret <8 x float> %t0 |
95 | 96 | }
|
96 | 97 |
|
97 |
| -declare <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>) |
98 |
| - |
99 |
| - |
100 | 98 | define <2 x double> @test6(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
|
101 | 99 | ; X32-LABEL: test6:
|
102 |
| -; X32: # %bb.0: # %entry |
| 100 | +; X32: # %bb.0: |
103 | 101 | ; X32-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2
|
104 | 102 | ; X32-NEXT: retl
|
105 | 103 | ;
|
106 | 104 | ; X64-LABEL: test6:
|
107 |
| -; X64: # %bb.0: # %entry |
| 105 | +; X64: # %bb.0: |
108 | 106 | ; X64-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2
|
109 | 107 | ; X64-NEXT: retq
|
110 |
| -entry: |
111 |
| - %0 = tail call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %a, <2 x double> %b, <2 x double> %c) #2 |
112 |
| - %sub.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %0 |
| 108 | + %t0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) #2 |
| 109 | + %sub.i = fsub <2 x double> <double -0.0, double -0.0>, %t0 |
113 | 110 | ret <2 x double> %sub.i
|
114 | 111 | }
|
115 | 112 |
|
116 |
| -declare <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %a, <2 x double> %b, <2 x double> %c) |
117 |
| - |
118 | 113 | define <8 x float> @test7(float %a, <8 x float> %b, <8 x float> %c) {
|
119 | 114 | ; X32-LABEL: test7:
|
120 |
| -; X32: # %bb.0: # %entry |
| 115 | +; X32: # %bb.0: |
121 | 116 | ; X32-NEXT: vbroadcastss {{[0-9]+}}(%esp), %ymm2
|
122 | 117 | ; X32-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm2 * ymm0) + ymm1
|
123 | 118 | ; X32-NEXT: retl
|
124 | 119 | ;
|
125 | 120 | ; X64-LABEL: test7:
|
126 |
| -; X64: # %bb.0: # %entry |
| 121 | +; X64: # %bb.0: |
127 | 122 | ; X64-NEXT: vbroadcastss %xmm0, %ymm0
|
128 | 123 | ; X64-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2
|
129 | 124 | ; X64-NEXT: retq
|
130 |
| -entry: |
131 |
| - %0 = insertelement <8 x float> undef, float %a, i32 0 |
132 |
| - %1 = fsub <8 x float> <float -0.000000e+00, float undef, float undef, float undef, float undef, float undef, float undef, float undef>, %0 |
133 |
| - %2 = shufflevector <8 x float> %1, <8 x float> undef, <8 x i32> zeroinitializer |
134 |
| - %3 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %2, <8 x float> %b, <8 x float> %c) |
135 |
| - ret <8 x float> %3 |
| 125 | + %t0 = insertelement <8 x float> undef, float %a, i32 0 |
| 126 | + %t1 = fsub <8 x float> <float -0.0, float undef, float undef, float undef, float undef, float undef, float undef, float undef>, %t0 |
| 127 | + %t2 = shufflevector <8 x float> %t1, <8 x float> undef, <8 x i32> zeroinitializer |
| 128 | + %t3 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %t2, <8 x float> %b, <8 x float> %c) |
| 129 | + ret <8 x float> %t3 |
136 | 130 |
|
137 | 131 | }
|
138 | 132 |
|
139 | 133 | define <8 x float> @test8(float %a, <8 x float> %b, <8 x float> %c) {
|
140 | 134 | ; X32-LABEL: test8:
|
141 |
| -; X32: # %bb.0: # %entry |
| 135 | +; X32: # %bb.0: |
142 | 136 | ; X32-NEXT: vbroadcastss {{[0-9]+}}(%esp), %ymm2
|
143 | 137 | ; X32-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm2 * ymm0) + ymm1
|
144 | 138 | ; X32-NEXT: retl
|
145 | 139 | ;
|
146 | 140 | ; X64-LABEL: test8:
|
147 |
| -; X64: # %bb.0: # %entry |
| 141 | +; X64: # %bb.0: |
148 | 142 | ; X64-NEXT: vbroadcastss %xmm0, %ymm0
|
149 | 143 | ; X64-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2
|
150 | 144 | ; X64-NEXT: retq
|
151 |
| -entry: |
152 |
| - %0 = fsub float -0.0, %a |
153 |
| - %1 = insertelement <8 x float> undef, float %0, i32 0 |
154 |
| - %2 = shufflevector <8 x float> %1, <8 x float> undef, <8 x i32> zeroinitializer |
155 |
| - %3 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %2, <8 x float> %b, <8 x float> %c) |
156 |
| - ret <8 x float> %3 |
| 145 | + %t0 = fsub float -0.0, %a |
| 146 | + %t1 = insertelement <8 x float> undef, float %t0, i32 0 |
| 147 | + %t2 = shufflevector <8 x float> %t1, <8 x float> undef, <8 x i32> zeroinitializer |
| 148 | + %t3 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %t2, <8 x float> %b, <8 x float> %c) |
| 149 | + ret <8 x float> %t3 |
157 | 150 | }
|
158 |
| - |
159 |
| -declare <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c) |
0 commit comments