@@ -271,10 +271,38 @@ LIBC_INLINE constexpr simd<T, N> select(simd<bool, N> m, simd<T, N> x,
271
271
}
272
272
273
273
namespace internal {
274
+ template <typename T, size_t N, size_t O, size_t ... I>
275
+ LIBC_INLINE constexpr static cpp::simd<T, sizeof ...(I)>
276
+ extend (cpp::simd<T, N> x, cpp::index_sequence<I...>) {
277
+ return __builtin_shufflevector (x, x, (I < O ? static_cast <int >(I) : -1 )...);
278
+ }
279
+ template <typename T, size_t N, size_t M, size_t O>
280
+ LIBC_INLINE constexpr static auto extend (cpp::simd<T, N> x) {
281
+ if constexpr (N == M)
282
+ return x;
283
+ else if constexpr (M <= 2 * N)
284
+ return extend<T, N, M>(x, cpp::make_index_sequence<M>{});
285
+ else
286
+ return extend<T, 2 * N, M, O>(
287
+ extend<T, N, 2 * N>(x, cpp::make_index_sequence<2 * N>{}));
288
+ }
274
289
template <typename T, size_t N, size_t M, size_t ... I>
275
290
LIBC_INLINE constexpr static cpp::simd<T, N + M>
276
- concat (cpp::simd<T, N> x, cpp::simd<T, M> y, index_sequence<I...>) {
277
- return __builtin_shufflevector (x, y, I...);
291
+ concat (cpp::simd<T, N> x, cpp::simd<T, M> y, cpp::index_sequence<I...>) {
292
+ constexpr size_t L = (N > M ? N : M);
293
+
294
+ auto x_ext = extend<T, N, L, N>(x);
295
+ auto y_ext = extend<T, M, L, M>(y);
296
+
297
+ auto remap = [](size_t idx) -> int {
298
+ if (idx < N)
299
+ return static_cast <int >(idx);
300
+ if (idx < N + M)
301
+ return static_cast <int >((idx - N) + L);
302
+ return -1 ;
303
+ };
304
+
305
+ return __builtin_shufflevector (x_ext, y_ext, remap (I)...);
278
306
}
279
307
280
308
template <typename T, size_t N, size_t Count, size_t Offset, size_t ... I>
@@ -295,10 +323,18 @@ LIBC_INLINE constexpr static auto split(cpp::simd<T, N> x) {
295
323
} // namespace internal
296
324
297
325
// Shuffling helpers.
298
- template <typename T, size_t N>
299
- LIBC_INLINE constexpr static cpp::simd<T, N + N> concat (cpp::simd<T, N> x,
300
- cpp::simd<T, N> y) {
301
- return internal::concat (x, y, make_index_sequence<N + N>{});
326
+ template <typename T, size_t N, size_t M>
327
+ LIBC_INLINE constexpr static auto concat (cpp::simd<T, N> x, cpp::simd<T, M> y) {
328
+ return internal::concat (x, y, make_index_sequence<N + M>{});
329
+ }
330
+ template <typename T, size_t N, size_t M, typename ... Rest>
331
+ LIBC_INLINE constexpr static auto concat (cpp::simd<T, N> x, cpp::simd<T, M> y,
332
+ Rest... rest) {
333
+ auto xy = concat (x, y);
334
+ if constexpr (sizeof ...(Rest))
335
+ return concat (xy, rest...);
336
+ else
337
+ return xy;
302
338
}
303
339
template <size_t ... Sizes, typename T, size_t N> auto split (cpp::simd<T, N> x) {
304
340
static_assert ((... + Sizes) == N, " split sizes must sum to vector size" );
0 commit comments