-
Notifications
You must be signed in to change notification settings - Fork 233
/
broadcast.c
3730 lines (3586 loc) · 290 KB
/
broadcast.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <evan@nemerson.com>
* 2020 Hidayat Khan <huk2209@gmail.com>
* 2020 Christopher Moore <moore@free.fr>
*/
#define SIMDE_TEST_X86_AVX512_INSN broadcast
#include <test/x86/avx512/test-avx512.h>
#include <simde/x86/avx512/broadcast.h>
static int
test_simde_mm256_broadcast_f32x2 (SIMDE_MUNIT_TEST_ARGS) {
static const struct {
const simde_float32 a[4];
const simde_float32 r[8];
} test_vec[] = {
{ { SIMDE_FLOAT32_C( -751.49), SIMDE_FLOAT32_C( -275.35), SIMDE_FLOAT32_C( 260.73), SIMDE_FLOAT32_C( 40.02) },
{ SIMDE_FLOAT32_C( -751.49), SIMDE_FLOAT32_C( -275.35), SIMDE_FLOAT32_C( -751.49), SIMDE_FLOAT32_C( -275.35),
SIMDE_FLOAT32_C( -751.49), SIMDE_FLOAT32_C( -275.35), SIMDE_FLOAT32_C( -751.49), SIMDE_FLOAT32_C( -275.35) } },
{ { SIMDE_FLOAT32_C( 629.63), SIMDE_FLOAT32_C( 163.39), SIMDE_FLOAT32_C( 167.23), SIMDE_FLOAT32_C( 652.38) },
{ SIMDE_FLOAT32_C( 629.63), SIMDE_FLOAT32_C( 163.39), SIMDE_FLOAT32_C( 629.63), SIMDE_FLOAT32_C( 163.39),
SIMDE_FLOAT32_C( 629.63), SIMDE_FLOAT32_C( 163.39), SIMDE_FLOAT32_C( 629.63), SIMDE_FLOAT32_C( 163.39) } },
{ { SIMDE_FLOAT32_C( 574.73), SIMDE_FLOAT32_C( -529.99), SIMDE_FLOAT32_C( 389.79), SIMDE_FLOAT32_C( -875.04) },
{ SIMDE_FLOAT32_C( 574.73), SIMDE_FLOAT32_C( -529.99), SIMDE_FLOAT32_C( 574.73), SIMDE_FLOAT32_C( -529.99),
SIMDE_FLOAT32_C( 574.73), SIMDE_FLOAT32_C( -529.99), SIMDE_FLOAT32_C( 574.73), SIMDE_FLOAT32_C( -529.99) } },
{ { SIMDE_FLOAT32_C( -790.15), SIMDE_FLOAT32_C( 7.90), SIMDE_FLOAT32_C( 834.33), SIMDE_FLOAT32_C( 549.92) },
{ SIMDE_FLOAT32_C( -790.15), SIMDE_FLOAT32_C( 7.90), SIMDE_FLOAT32_C( -790.15), SIMDE_FLOAT32_C( 7.90),
SIMDE_FLOAT32_C( -790.15), SIMDE_FLOAT32_C( 7.90), SIMDE_FLOAT32_C( -790.15), SIMDE_FLOAT32_C( 7.90) } },
{ { SIMDE_FLOAT32_C( 494.62), SIMDE_FLOAT32_C( -875.96), SIMDE_FLOAT32_C( -221.96), SIMDE_FLOAT32_C( -519.70) },
{ SIMDE_FLOAT32_C( 494.62), SIMDE_FLOAT32_C( -875.96), SIMDE_FLOAT32_C( 494.62), SIMDE_FLOAT32_C( -875.96),
SIMDE_FLOAT32_C( 494.62), SIMDE_FLOAT32_C( -875.96), SIMDE_FLOAT32_C( 494.62), SIMDE_FLOAT32_C( -875.96) } },
{ { SIMDE_FLOAT32_C( -583.03), SIMDE_FLOAT32_C( -938.00), SIMDE_FLOAT32_C( 973.38), SIMDE_FLOAT32_C( -468.70) },
{ SIMDE_FLOAT32_C( -583.03), SIMDE_FLOAT32_C( -938.00), SIMDE_FLOAT32_C( -583.03), SIMDE_FLOAT32_C( -938.00),
SIMDE_FLOAT32_C( -583.03), SIMDE_FLOAT32_C( -938.00), SIMDE_FLOAT32_C( -583.03), SIMDE_FLOAT32_C( -938.00) } },
{ { SIMDE_FLOAT32_C( 521.04), SIMDE_FLOAT32_C( -960.21), SIMDE_FLOAT32_C( -215.76), SIMDE_FLOAT32_C( -218.82) },
{ SIMDE_FLOAT32_C( 521.04), SIMDE_FLOAT32_C( -960.21), SIMDE_FLOAT32_C( 521.04), SIMDE_FLOAT32_C( -960.21),
SIMDE_FLOAT32_C( 521.04), SIMDE_FLOAT32_C( -960.21), SIMDE_FLOAT32_C( 521.04), SIMDE_FLOAT32_C( -960.21) } },
{ { SIMDE_FLOAT32_C( 315.04), SIMDE_FLOAT32_C( 872.51), SIMDE_FLOAT32_C( 318.60), SIMDE_FLOAT32_C( 720.27) },
{ SIMDE_FLOAT32_C( 315.04), SIMDE_FLOAT32_C( 872.51), SIMDE_FLOAT32_C( 315.04), SIMDE_FLOAT32_C( 872.51),
SIMDE_FLOAT32_C( 315.04), SIMDE_FLOAT32_C( 872.51), SIMDE_FLOAT32_C( 315.04), SIMDE_FLOAT32_C( 872.51) } }
};
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
simde__m128 a = simde_mm_loadu_ps(test_vec[i].a);
simde__m256 r = simde_mm256_broadcast_f32x2(a);
simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r), 1);
}
return 0;
}
static int
test_simde_mm256_mask_broadcast_f32x2 (SIMDE_MUNIT_TEST_ARGS) {
static const struct {
const simde_float32 src[8];
const simde__mmask8 k;
const simde_float32 a[4];
const simde_float32 r[8];
} test_vec[] = {
{ { SIMDE_FLOAT32_C( -155.94), SIMDE_FLOAT32_C( -965.17), SIMDE_FLOAT32_C( 378.08), SIMDE_FLOAT32_C( 365.29),
SIMDE_FLOAT32_C( -495.97), SIMDE_FLOAT32_C( 311.10), SIMDE_FLOAT32_C( 575.79), SIMDE_FLOAT32_C( -655.57) },
UINT8_C( 85),
{ SIMDE_FLOAT32_C( 963.37), SIMDE_FLOAT32_C( -596.05), SIMDE_FLOAT32_C( 183.95), SIMDE_FLOAT32_C( -410.87) },
{ SIMDE_FLOAT32_C( 963.37), SIMDE_FLOAT32_C( -965.17), SIMDE_FLOAT32_C( 963.37), SIMDE_FLOAT32_C( 365.29),
SIMDE_FLOAT32_C( 963.37), SIMDE_FLOAT32_C( 311.10), SIMDE_FLOAT32_C( 963.37), SIMDE_FLOAT32_C( -655.57) } },
{ { SIMDE_FLOAT32_C( 431.64), SIMDE_FLOAT32_C( 613.27), SIMDE_FLOAT32_C( -834.97), SIMDE_FLOAT32_C( 711.68),
SIMDE_FLOAT32_C( -862.98), SIMDE_FLOAT32_C( -74.52), SIMDE_FLOAT32_C( -451.05), SIMDE_FLOAT32_C( -751.41) },
UINT8_C(193),
{ SIMDE_FLOAT32_C( -39.01), SIMDE_FLOAT32_C( 325.90), SIMDE_FLOAT32_C( -543.82), SIMDE_FLOAT32_C( 50.30) },
{ SIMDE_FLOAT32_C( -39.01), SIMDE_FLOAT32_C( 613.27), SIMDE_FLOAT32_C( -834.97), SIMDE_FLOAT32_C( 711.68),
SIMDE_FLOAT32_C( -862.98), SIMDE_FLOAT32_C( -74.52), SIMDE_FLOAT32_C( -39.01), SIMDE_FLOAT32_C( 325.90) } },
{ { SIMDE_FLOAT32_C( -570.27), SIMDE_FLOAT32_C( -600.03), SIMDE_FLOAT32_C( -713.28), SIMDE_FLOAT32_C( -16.45),
SIMDE_FLOAT32_C( -512.72), SIMDE_FLOAT32_C( 640.13), SIMDE_FLOAT32_C( 632.82), SIMDE_FLOAT32_C( -156.53) },
UINT8_C(110),
{ SIMDE_FLOAT32_C( 351.05), SIMDE_FLOAT32_C( 39.68), SIMDE_FLOAT32_C( 822.74), SIMDE_FLOAT32_C( -140.05) },
{ SIMDE_FLOAT32_C( -570.27), SIMDE_FLOAT32_C( 39.68), SIMDE_FLOAT32_C( 351.05), SIMDE_FLOAT32_C( 39.68),
SIMDE_FLOAT32_C( -512.72), SIMDE_FLOAT32_C( 39.68), SIMDE_FLOAT32_C( 351.05), SIMDE_FLOAT32_C( -156.53) } },
{ { SIMDE_FLOAT32_C( 219.95), SIMDE_FLOAT32_C( 765.90), SIMDE_FLOAT32_C( 464.19), SIMDE_FLOAT32_C( -363.72),
SIMDE_FLOAT32_C( 978.16), SIMDE_FLOAT32_C( -55.83), SIMDE_FLOAT32_C( -268.61), SIMDE_FLOAT32_C( -471.94) },
UINT8_C(194),
{ SIMDE_FLOAT32_C( 300.83), SIMDE_FLOAT32_C( 122.56), SIMDE_FLOAT32_C( -137.37), SIMDE_FLOAT32_C( -830.55) },
{ SIMDE_FLOAT32_C( 219.95), SIMDE_FLOAT32_C( 122.56), SIMDE_FLOAT32_C( 464.19), SIMDE_FLOAT32_C( -363.72),
SIMDE_FLOAT32_C( 978.16), SIMDE_FLOAT32_C( -55.83), SIMDE_FLOAT32_C( 300.83), SIMDE_FLOAT32_C( 122.56) } },
{ { SIMDE_FLOAT32_C( -993.95), SIMDE_FLOAT32_C( 735.37), SIMDE_FLOAT32_C( -715.04), SIMDE_FLOAT32_C( 363.48),
SIMDE_FLOAT32_C( 997.38), SIMDE_FLOAT32_C( 957.48), SIMDE_FLOAT32_C( 411.04), SIMDE_FLOAT32_C( 318.40) },
UINT8_C( 0),
{ SIMDE_FLOAT32_C( 944.29), SIMDE_FLOAT32_C( 688.98), SIMDE_FLOAT32_C( -319.61), SIMDE_FLOAT32_C( 391.33) },
{ SIMDE_FLOAT32_C( -993.95), SIMDE_FLOAT32_C( 735.37), SIMDE_FLOAT32_C( -715.04), SIMDE_FLOAT32_C( 363.48),
SIMDE_FLOAT32_C( 997.38), SIMDE_FLOAT32_C( 957.48), SIMDE_FLOAT32_C( 411.04), SIMDE_FLOAT32_C( 318.40) } },
{ { SIMDE_FLOAT32_C( -917.62), SIMDE_FLOAT32_C( -406.65), SIMDE_FLOAT32_C( -532.97), SIMDE_FLOAT32_C( 298.17),
SIMDE_FLOAT32_C( -598.91), SIMDE_FLOAT32_C( 107.47), SIMDE_FLOAT32_C( 214.95), SIMDE_FLOAT32_C( 587.62) },
UINT8_C(159),
{ SIMDE_FLOAT32_C( -173.39), SIMDE_FLOAT32_C( -170.67), SIMDE_FLOAT32_C( -483.21), SIMDE_FLOAT32_C( 718.07) },
{ SIMDE_FLOAT32_C( -173.39), SIMDE_FLOAT32_C( -170.67), SIMDE_FLOAT32_C( -173.39), SIMDE_FLOAT32_C( -170.67),
SIMDE_FLOAT32_C( -173.39), SIMDE_FLOAT32_C( 107.47), SIMDE_FLOAT32_C( 214.95), SIMDE_FLOAT32_C( -170.67) } },
{ { SIMDE_FLOAT32_C( 526.28), SIMDE_FLOAT32_C( -786.80), SIMDE_FLOAT32_C( 286.87), SIMDE_FLOAT32_C( -560.33),
SIMDE_FLOAT32_C( 596.72), SIMDE_FLOAT32_C( 991.58), SIMDE_FLOAT32_C( -572.23), SIMDE_FLOAT32_C( 587.29) },
UINT8_C( 79),
{ SIMDE_FLOAT32_C( 221.82), SIMDE_FLOAT32_C( 117.18), SIMDE_FLOAT32_C( -624.10), SIMDE_FLOAT32_C( 727.41) },
{ SIMDE_FLOAT32_C( 221.82), SIMDE_FLOAT32_C( 117.18), SIMDE_FLOAT32_C( 221.82), SIMDE_FLOAT32_C( 117.18),
SIMDE_FLOAT32_C( 596.72), SIMDE_FLOAT32_C( 991.58), SIMDE_FLOAT32_C( 221.82), SIMDE_FLOAT32_C( 587.29) } },
{ { SIMDE_FLOAT32_C( -473.57), SIMDE_FLOAT32_C( 647.70), SIMDE_FLOAT32_C( -174.14), SIMDE_FLOAT32_C( -701.99),
SIMDE_FLOAT32_C( -317.30), SIMDE_FLOAT32_C( -833.25), SIMDE_FLOAT32_C( -470.85), SIMDE_FLOAT32_C( 426.74) },
UINT8_C(169),
{ SIMDE_FLOAT32_C( -800.29), SIMDE_FLOAT32_C( -506.53), SIMDE_FLOAT32_C( 682.63), SIMDE_FLOAT32_C( 942.35) },
{ SIMDE_FLOAT32_C( -800.29), SIMDE_FLOAT32_C( 647.70), SIMDE_FLOAT32_C( -174.14), SIMDE_FLOAT32_C( -506.53),
SIMDE_FLOAT32_C( -317.30), SIMDE_FLOAT32_C( -506.53), SIMDE_FLOAT32_C( -470.85), SIMDE_FLOAT32_C( -506.53) } }
};
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
simde__m256 src = simde_mm256_loadu_ps(test_vec[i].src);
simde__m128 a = simde_mm_loadu_ps(test_vec[i].a);
simde__m256 r = simde_mm256_mask_broadcast_f32x2(src, test_vec[i].k, a);
simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r), 1);
}
return 0;
}
static int
test_simde_mm256_maskz_broadcast_f32x2 (SIMDE_MUNIT_TEST_ARGS) {
static const struct {
const simde__mmask8 k;
const simde_float32 a[4];
const simde_float32 r[8];
} test_vec[] = {
{ UINT8_C(167),
{ SIMDE_FLOAT32_C( -73.48), SIMDE_FLOAT32_C( -950.66), SIMDE_FLOAT32_C( 265.90), SIMDE_FLOAT32_C( -988.50) },
{ SIMDE_FLOAT32_C( -73.48), SIMDE_FLOAT32_C( -950.66), SIMDE_FLOAT32_C( -73.48), SIMDE_FLOAT32_C( 0.00),
SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -950.66), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -950.66) } },
{ UINT8_C(122),
{ SIMDE_FLOAT32_C( 490.14), SIMDE_FLOAT32_C( -286.45), SIMDE_FLOAT32_C( -424.27), SIMDE_FLOAT32_C( -754.18) },
{ SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -286.45), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -286.45),
SIMDE_FLOAT32_C( 490.14), SIMDE_FLOAT32_C( -286.45), SIMDE_FLOAT32_C( 490.14), SIMDE_FLOAT32_C( 0.00) } },
{ UINT8_C( 66),
{ SIMDE_FLOAT32_C( -622.52), SIMDE_FLOAT32_C( -691.02), SIMDE_FLOAT32_C( 48.53), SIMDE_FLOAT32_C( -368.74) },
{ SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -691.02), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00),
SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -622.52), SIMDE_FLOAT32_C( 0.00) } },
{ UINT8_C(140),
{ SIMDE_FLOAT32_C( 336.37), SIMDE_FLOAT32_C( -709.34), SIMDE_FLOAT32_C( 65.79), SIMDE_FLOAT32_C( -200.10) },
{ SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 336.37), SIMDE_FLOAT32_C( -709.34),
SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -709.34) } },
{ UINT8_C(151),
{ SIMDE_FLOAT32_C( 450.42), SIMDE_FLOAT32_C( 257.72), SIMDE_FLOAT32_C( -507.45), SIMDE_FLOAT32_C( -644.25) },
{ SIMDE_FLOAT32_C( 450.42), SIMDE_FLOAT32_C( 257.72), SIMDE_FLOAT32_C( 450.42), SIMDE_FLOAT32_C( 0.00),
SIMDE_FLOAT32_C( 450.42), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 257.72) } },
{ UINT8_C( 11),
{ SIMDE_FLOAT32_C( -161.31), SIMDE_FLOAT32_C( 845.16), SIMDE_FLOAT32_C( 584.32), SIMDE_FLOAT32_C( 641.28) },
{ SIMDE_FLOAT32_C( -161.31), SIMDE_FLOAT32_C( 845.16), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 845.16),
SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) } },
{ UINT8_C( 82),
{ SIMDE_FLOAT32_C( 565.26), SIMDE_FLOAT32_C( 325.20), SIMDE_FLOAT32_C( -344.79), SIMDE_FLOAT32_C( -940.47) },
{ SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 325.20), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00),
SIMDE_FLOAT32_C( 565.26), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 565.26), SIMDE_FLOAT32_C( 0.00) } },
{ UINT8_C(152),
{ SIMDE_FLOAT32_C( 715.85), SIMDE_FLOAT32_C( -726.67), SIMDE_FLOAT32_C( 812.36), SIMDE_FLOAT32_C( -643.19) },
{ SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -726.67),
SIMDE_FLOAT32_C( 715.85), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -726.67) } }
};
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
simde__m128 a = simde_mm_loadu_ps(test_vec[i].a);
simde__m256 r = simde_mm256_maskz_broadcast_f32x2(test_vec[i].k, a);
simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r), 1);
}
return 0;
}
static int
test_simde_mm512_broadcast_f32x2 (SIMDE_MUNIT_TEST_ARGS) {
static const struct {
const simde_float32 a[4];
const simde_float32 r[16];
} test_vec[] = {
{ { SIMDE_FLOAT32_C( -125.63), SIMDE_FLOAT32_C( 601.06), SIMDE_FLOAT32_C( 20.21), SIMDE_FLOAT32_C( -317.28) },
{ SIMDE_FLOAT32_C( -125.63), SIMDE_FLOAT32_C( 601.06), SIMDE_FLOAT32_C( -125.63), SIMDE_FLOAT32_C( 601.06),
SIMDE_FLOAT32_C( -125.63), SIMDE_FLOAT32_C( 601.06), SIMDE_FLOAT32_C( -125.63), SIMDE_FLOAT32_C( 601.06),
SIMDE_FLOAT32_C( -125.63), SIMDE_FLOAT32_C( 601.06), SIMDE_FLOAT32_C( -125.63), SIMDE_FLOAT32_C( 601.06),
SIMDE_FLOAT32_C( -125.63), SIMDE_FLOAT32_C( 601.06), SIMDE_FLOAT32_C( -125.63), SIMDE_FLOAT32_C( 601.06) } },
{ { SIMDE_FLOAT32_C( -590.78), SIMDE_FLOAT32_C( 832.90), SIMDE_FLOAT32_C( 590.84), SIMDE_FLOAT32_C( 180.72) },
{ SIMDE_FLOAT32_C( -590.78), SIMDE_FLOAT32_C( 832.90), SIMDE_FLOAT32_C( -590.78), SIMDE_FLOAT32_C( 832.90),
SIMDE_FLOAT32_C( -590.78), SIMDE_FLOAT32_C( 832.90), SIMDE_FLOAT32_C( -590.78), SIMDE_FLOAT32_C( 832.90),
SIMDE_FLOAT32_C( -590.78), SIMDE_FLOAT32_C( 832.90), SIMDE_FLOAT32_C( -590.78), SIMDE_FLOAT32_C( 832.90),
SIMDE_FLOAT32_C( -590.78), SIMDE_FLOAT32_C( 832.90), SIMDE_FLOAT32_C( -590.78), SIMDE_FLOAT32_C( 832.90) } },
{ { SIMDE_FLOAT32_C( -605.74), SIMDE_FLOAT32_C( -713.02), SIMDE_FLOAT32_C( 218.93), SIMDE_FLOAT32_C( -470.99) },
{ SIMDE_FLOAT32_C( -605.74), SIMDE_FLOAT32_C( -713.02), SIMDE_FLOAT32_C( -605.74), SIMDE_FLOAT32_C( -713.02),
SIMDE_FLOAT32_C( -605.74), SIMDE_FLOAT32_C( -713.02), SIMDE_FLOAT32_C( -605.74), SIMDE_FLOAT32_C( -713.02),
SIMDE_FLOAT32_C( -605.74), SIMDE_FLOAT32_C( -713.02), SIMDE_FLOAT32_C( -605.74), SIMDE_FLOAT32_C( -713.02),
SIMDE_FLOAT32_C( -605.74), SIMDE_FLOAT32_C( -713.02), SIMDE_FLOAT32_C( -605.74), SIMDE_FLOAT32_C( -713.02) } },
{ { SIMDE_FLOAT32_C( 61.13), SIMDE_FLOAT32_C( -592.59), SIMDE_FLOAT32_C( 423.81), SIMDE_FLOAT32_C( 987.29) },
{ SIMDE_FLOAT32_C( 61.13), SIMDE_FLOAT32_C( -592.59), SIMDE_FLOAT32_C( 61.13), SIMDE_FLOAT32_C( -592.59),
SIMDE_FLOAT32_C( 61.13), SIMDE_FLOAT32_C( -592.59), SIMDE_FLOAT32_C( 61.13), SIMDE_FLOAT32_C( -592.59),
SIMDE_FLOAT32_C( 61.13), SIMDE_FLOAT32_C( -592.59), SIMDE_FLOAT32_C( 61.13), SIMDE_FLOAT32_C( -592.59),
SIMDE_FLOAT32_C( 61.13), SIMDE_FLOAT32_C( -592.59), SIMDE_FLOAT32_C( 61.13), SIMDE_FLOAT32_C( -592.59) } },
{ { SIMDE_FLOAT32_C( -608.09), SIMDE_FLOAT32_C( -99.23), SIMDE_FLOAT32_C( 300.10), SIMDE_FLOAT32_C( -254.94) },
{ SIMDE_FLOAT32_C( -608.09), SIMDE_FLOAT32_C( -99.23), SIMDE_FLOAT32_C( -608.09), SIMDE_FLOAT32_C( -99.23),
SIMDE_FLOAT32_C( -608.09), SIMDE_FLOAT32_C( -99.23), SIMDE_FLOAT32_C( -608.09), SIMDE_FLOAT32_C( -99.23),
SIMDE_FLOAT32_C( -608.09), SIMDE_FLOAT32_C( -99.23), SIMDE_FLOAT32_C( -608.09), SIMDE_FLOAT32_C( -99.23),
SIMDE_FLOAT32_C( -608.09), SIMDE_FLOAT32_C( -99.23), SIMDE_FLOAT32_C( -608.09), SIMDE_FLOAT32_C( -99.23) } },
{ { SIMDE_FLOAT32_C( -727.78), SIMDE_FLOAT32_C( 285.14), SIMDE_FLOAT32_C( 318.61), SIMDE_FLOAT32_C( 956.19) },
{ SIMDE_FLOAT32_C( -727.78), SIMDE_FLOAT32_C( 285.14), SIMDE_FLOAT32_C( -727.78), SIMDE_FLOAT32_C( 285.14),
SIMDE_FLOAT32_C( -727.78), SIMDE_FLOAT32_C( 285.14), SIMDE_FLOAT32_C( -727.78), SIMDE_FLOAT32_C( 285.14),
SIMDE_FLOAT32_C( -727.78), SIMDE_FLOAT32_C( 285.14), SIMDE_FLOAT32_C( -727.78), SIMDE_FLOAT32_C( 285.14),
SIMDE_FLOAT32_C( -727.78), SIMDE_FLOAT32_C( 285.14), SIMDE_FLOAT32_C( -727.78), SIMDE_FLOAT32_C( 285.14) } },
{ { SIMDE_FLOAT32_C( 704.27), SIMDE_FLOAT32_C( 738.40), SIMDE_FLOAT32_C( 301.28), SIMDE_FLOAT32_C( -459.90) },
{ SIMDE_FLOAT32_C( 704.27), SIMDE_FLOAT32_C( 738.40), SIMDE_FLOAT32_C( 704.27), SIMDE_FLOAT32_C( 738.40),
SIMDE_FLOAT32_C( 704.27), SIMDE_FLOAT32_C( 738.40), SIMDE_FLOAT32_C( 704.27), SIMDE_FLOAT32_C( 738.40),
SIMDE_FLOAT32_C( 704.27), SIMDE_FLOAT32_C( 738.40), SIMDE_FLOAT32_C( 704.27), SIMDE_FLOAT32_C( 738.40),
SIMDE_FLOAT32_C( 704.27), SIMDE_FLOAT32_C( 738.40), SIMDE_FLOAT32_C( 704.27), SIMDE_FLOAT32_C( 738.40) } },
{ { SIMDE_FLOAT32_C( 379.79), SIMDE_FLOAT32_C( -819.17), SIMDE_FLOAT32_C( 172.39), SIMDE_FLOAT32_C( -722.17) },
{ SIMDE_FLOAT32_C( 379.79), SIMDE_FLOAT32_C( -819.17), SIMDE_FLOAT32_C( 379.79), SIMDE_FLOAT32_C( -819.17),
SIMDE_FLOAT32_C( 379.79), SIMDE_FLOAT32_C( -819.17), SIMDE_FLOAT32_C( 379.79), SIMDE_FLOAT32_C( -819.17),
SIMDE_FLOAT32_C( 379.79), SIMDE_FLOAT32_C( -819.17), SIMDE_FLOAT32_C( 379.79), SIMDE_FLOAT32_C( -819.17),
SIMDE_FLOAT32_C( 379.79), SIMDE_FLOAT32_C( -819.17), SIMDE_FLOAT32_C( 379.79), SIMDE_FLOAT32_C( -819.17) } }
};
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
simde__m128 a = simde_mm_loadu_ps(test_vec[i].a);
simde__m512 r = simde_mm512_broadcast_f32x2(a);
simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1);
}
return 0;
}
static int
test_simde_mm512_mask_broadcast_f32x2 (SIMDE_MUNIT_TEST_ARGS) {
static const struct {
const simde_float32 src[16];
const simde__mmask16 k;
const simde_float32 a[4];
const simde_float32 r[16];
} test_vec[] = {
{ { SIMDE_FLOAT32_C( 16.97), SIMDE_FLOAT32_C( -724.36), SIMDE_FLOAT32_C( -251.03), SIMDE_FLOAT32_C( 955.86),
SIMDE_FLOAT32_C( -884.86), SIMDE_FLOAT32_C( 79.30), SIMDE_FLOAT32_C( 805.27), SIMDE_FLOAT32_C( 217.58),
SIMDE_FLOAT32_C( 919.33), SIMDE_FLOAT32_C( -770.42), SIMDE_FLOAT32_C( -363.93), SIMDE_FLOAT32_C( -528.80),
SIMDE_FLOAT32_C( 387.46), SIMDE_FLOAT32_C( 8.94), SIMDE_FLOAT32_C( 238.55), SIMDE_FLOAT32_C( -769.11) },
UINT16_C(26495),
{ SIMDE_FLOAT32_C( -701.54), SIMDE_FLOAT32_C( -832.82), SIMDE_FLOAT32_C( 858.15), SIMDE_FLOAT32_C( 988.45) },
{ SIMDE_FLOAT32_C( -701.54), SIMDE_FLOAT32_C( -832.82), SIMDE_FLOAT32_C( -701.54), SIMDE_FLOAT32_C( -832.82),
SIMDE_FLOAT32_C( -701.54), SIMDE_FLOAT32_C( -832.82), SIMDE_FLOAT32_C( -701.54), SIMDE_FLOAT32_C( 217.58),
SIMDE_FLOAT32_C( -701.54), SIMDE_FLOAT32_C( -832.82), SIMDE_FLOAT32_C( -701.54), SIMDE_FLOAT32_C( -528.80),
SIMDE_FLOAT32_C( 387.46), SIMDE_FLOAT32_C( -832.82), SIMDE_FLOAT32_C( -701.54), SIMDE_FLOAT32_C( -769.11) } },
{ { SIMDE_FLOAT32_C( 886.30), SIMDE_FLOAT32_C( 115.75), SIMDE_FLOAT32_C( -627.06), SIMDE_FLOAT32_C( -987.33),
SIMDE_FLOAT32_C( -126.79), SIMDE_FLOAT32_C( 964.00), SIMDE_FLOAT32_C( -128.64), SIMDE_FLOAT32_C( -75.15),
SIMDE_FLOAT32_C( 949.72), SIMDE_FLOAT32_C( -114.82), SIMDE_FLOAT32_C( 286.01), SIMDE_FLOAT32_C( -995.38),
SIMDE_FLOAT32_C( 721.81), SIMDE_FLOAT32_C( -531.94), SIMDE_FLOAT32_C( -379.35), SIMDE_FLOAT32_C( 301.40) },
UINT16_C(55066),
{ SIMDE_FLOAT32_C( -112.99), SIMDE_FLOAT32_C( 933.42), SIMDE_FLOAT32_C( -66.18), SIMDE_FLOAT32_C( -307.32) },
{ SIMDE_FLOAT32_C( 886.30), SIMDE_FLOAT32_C( 933.42), SIMDE_FLOAT32_C( -627.06), SIMDE_FLOAT32_C( 933.42),
SIMDE_FLOAT32_C( -112.99), SIMDE_FLOAT32_C( 964.00), SIMDE_FLOAT32_C( -128.64), SIMDE_FLOAT32_C( -75.15),
SIMDE_FLOAT32_C( -112.99), SIMDE_FLOAT32_C( 933.42), SIMDE_FLOAT32_C( -112.99), SIMDE_FLOAT32_C( -995.38),
SIMDE_FLOAT32_C( -112.99), SIMDE_FLOAT32_C( -531.94), SIMDE_FLOAT32_C( -112.99), SIMDE_FLOAT32_C( 933.42) } },
{ { SIMDE_FLOAT32_C( 858.06), SIMDE_FLOAT32_C( -630.09), SIMDE_FLOAT32_C( 82.49), SIMDE_FLOAT32_C( 401.49),
SIMDE_FLOAT32_C( -226.24), SIMDE_FLOAT32_C( -448.63), SIMDE_FLOAT32_C( -200.28), SIMDE_FLOAT32_C( -144.91),
SIMDE_FLOAT32_C( 574.72), SIMDE_FLOAT32_C( -647.66), SIMDE_FLOAT32_C( 850.68), SIMDE_FLOAT32_C( -645.45),
SIMDE_FLOAT32_C( -136.23), SIMDE_FLOAT32_C( 385.26), SIMDE_FLOAT32_C( -998.08), SIMDE_FLOAT32_C( -718.84) },
UINT16_C(39639),
{ SIMDE_FLOAT32_C( -394.96), SIMDE_FLOAT32_C( -89.93), SIMDE_FLOAT32_C( 511.24), SIMDE_FLOAT32_C( 328.98) },
{ SIMDE_FLOAT32_C( -394.96), SIMDE_FLOAT32_C( -89.93), SIMDE_FLOAT32_C( -394.96), SIMDE_FLOAT32_C( 401.49),
SIMDE_FLOAT32_C( -394.96), SIMDE_FLOAT32_C( -448.63), SIMDE_FLOAT32_C( -394.96), SIMDE_FLOAT32_C( -89.93),
SIMDE_FLOAT32_C( 574.72), SIMDE_FLOAT32_C( -89.93), SIMDE_FLOAT32_C( 850.68), SIMDE_FLOAT32_C( -89.93),
SIMDE_FLOAT32_C( -394.96), SIMDE_FLOAT32_C( 385.26), SIMDE_FLOAT32_C( -998.08), SIMDE_FLOAT32_C( -89.93) } },
{ { SIMDE_FLOAT32_C( -783.73), SIMDE_FLOAT32_C( -210.92), SIMDE_FLOAT32_C( -991.67), SIMDE_FLOAT32_C( 979.95),
SIMDE_FLOAT32_C( 49.71), SIMDE_FLOAT32_C( -489.71), SIMDE_FLOAT32_C( -591.16), SIMDE_FLOAT32_C( 388.37),
SIMDE_FLOAT32_C( -622.36), SIMDE_FLOAT32_C( 45.42), SIMDE_FLOAT32_C( -553.07), SIMDE_FLOAT32_C( 498.54),
SIMDE_FLOAT32_C( 904.46), SIMDE_FLOAT32_C( -795.68), SIMDE_FLOAT32_C( -943.60), SIMDE_FLOAT32_C( 933.59) },
UINT16_C(44422),
{ SIMDE_FLOAT32_C( 213.33), SIMDE_FLOAT32_C( -541.90), SIMDE_FLOAT32_C( 310.55), SIMDE_FLOAT32_C( -596.77) },
{ SIMDE_FLOAT32_C( -783.73), SIMDE_FLOAT32_C( -541.90), SIMDE_FLOAT32_C( 213.33), SIMDE_FLOAT32_C( 979.95),
SIMDE_FLOAT32_C( 49.71), SIMDE_FLOAT32_C( -489.71), SIMDE_FLOAT32_C( -591.16), SIMDE_FLOAT32_C( -541.90),
SIMDE_FLOAT32_C( 213.33), SIMDE_FLOAT32_C( 45.42), SIMDE_FLOAT32_C( 213.33), SIMDE_FLOAT32_C( -541.90),
SIMDE_FLOAT32_C( 904.46), SIMDE_FLOAT32_C( -541.90), SIMDE_FLOAT32_C( -943.60), SIMDE_FLOAT32_C( -541.90) } },
{ { SIMDE_FLOAT32_C( 4.43), SIMDE_FLOAT32_C( 378.61), SIMDE_FLOAT32_C( -660.44), SIMDE_FLOAT32_C( -60.44),
SIMDE_FLOAT32_C( 265.90), SIMDE_FLOAT32_C( 922.57), SIMDE_FLOAT32_C( -447.45), SIMDE_FLOAT32_C( -208.75),
SIMDE_FLOAT32_C( -386.55), SIMDE_FLOAT32_C( -791.16), SIMDE_FLOAT32_C( 993.63), SIMDE_FLOAT32_C( -107.89),
SIMDE_FLOAT32_C( 758.84), SIMDE_FLOAT32_C( -215.37), SIMDE_FLOAT32_C( 198.46), SIMDE_FLOAT32_C( -486.35) },
UINT16_C(19819),
{ SIMDE_FLOAT32_C( 413.19), SIMDE_FLOAT32_C( 527.77), SIMDE_FLOAT32_C( 286.90), SIMDE_FLOAT32_C( -50.52) },
{ SIMDE_FLOAT32_C( 413.19), SIMDE_FLOAT32_C( 527.77), SIMDE_FLOAT32_C( -660.44), SIMDE_FLOAT32_C( 527.77),
SIMDE_FLOAT32_C( 265.90), SIMDE_FLOAT32_C( 527.77), SIMDE_FLOAT32_C( 413.19), SIMDE_FLOAT32_C( -208.75),
SIMDE_FLOAT32_C( 413.19), SIMDE_FLOAT32_C( -791.16), SIMDE_FLOAT32_C( 413.19), SIMDE_FLOAT32_C( 527.77),
SIMDE_FLOAT32_C( 758.84), SIMDE_FLOAT32_C( -215.37), SIMDE_FLOAT32_C( 413.19), SIMDE_FLOAT32_C( -486.35) } },
{ { SIMDE_FLOAT32_C( 968.23), SIMDE_FLOAT32_C( -877.74), SIMDE_FLOAT32_C( -102.63), SIMDE_FLOAT32_C( -954.86),
SIMDE_FLOAT32_C( -411.69), SIMDE_FLOAT32_C( 708.12), SIMDE_FLOAT32_C( -635.17), SIMDE_FLOAT32_C( 743.77),
SIMDE_FLOAT32_C( 622.65), SIMDE_FLOAT32_C( 851.75), SIMDE_FLOAT32_C( -569.83), SIMDE_FLOAT32_C( 908.51),
SIMDE_FLOAT32_C( -674.71), SIMDE_FLOAT32_C( 173.61), SIMDE_FLOAT32_C( -162.66), SIMDE_FLOAT32_C( 200.03) },
UINT16_C(57825),
{ SIMDE_FLOAT32_C( -696.94), SIMDE_FLOAT32_C( 529.84), SIMDE_FLOAT32_C( -942.89), SIMDE_FLOAT32_C( 880.87) },
{ SIMDE_FLOAT32_C( -696.94), SIMDE_FLOAT32_C( -877.74), SIMDE_FLOAT32_C( -102.63), SIMDE_FLOAT32_C( -954.86),
SIMDE_FLOAT32_C( -411.69), SIMDE_FLOAT32_C( 529.84), SIMDE_FLOAT32_C( -696.94), SIMDE_FLOAT32_C( 529.84),
SIMDE_FLOAT32_C( -696.94), SIMDE_FLOAT32_C( 851.75), SIMDE_FLOAT32_C( -569.83), SIMDE_FLOAT32_C( 908.51),
SIMDE_FLOAT32_C( -674.71), SIMDE_FLOAT32_C( 529.84), SIMDE_FLOAT32_C( -696.94), SIMDE_FLOAT32_C( 529.84) } },
{ { SIMDE_FLOAT32_C( 733.15), SIMDE_FLOAT32_C( 63.36), SIMDE_FLOAT32_C( 903.02), SIMDE_FLOAT32_C( -977.76),
SIMDE_FLOAT32_C( 704.77), SIMDE_FLOAT32_C( 985.75), SIMDE_FLOAT32_C( -492.96), SIMDE_FLOAT32_C( 872.57),
SIMDE_FLOAT32_C( -697.69), SIMDE_FLOAT32_C( -32.06), SIMDE_FLOAT32_C( -826.65), SIMDE_FLOAT32_C( 423.95),
SIMDE_FLOAT32_C( -668.70), SIMDE_FLOAT32_C( -777.46), SIMDE_FLOAT32_C( -794.02), SIMDE_FLOAT32_C( 931.91) },
UINT16_C(22885),
{ SIMDE_FLOAT32_C( 241.78), SIMDE_FLOAT32_C( -340.95), SIMDE_FLOAT32_C( -411.67), SIMDE_FLOAT32_C( -904.01) },
{ SIMDE_FLOAT32_C( 241.78), SIMDE_FLOAT32_C( 63.36), SIMDE_FLOAT32_C( 241.78), SIMDE_FLOAT32_C( -977.76),
SIMDE_FLOAT32_C( 704.77), SIMDE_FLOAT32_C( -340.95), SIMDE_FLOAT32_C( 241.78), SIMDE_FLOAT32_C( 872.57),
SIMDE_FLOAT32_C( 241.78), SIMDE_FLOAT32_C( -32.06), SIMDE_FLOAT32_C( -826.65), SIMDE_FLOAT32_C( -340.95),
SIMDE_FLOAT32_C( 241.78), SIMDE_FLOAT32_C( -777.46), SIMDE_FLOAT32_C( 241.78), SIMDE_FLOAT32_C( 931.91) } },
{ { SIMDE_FLOAT32_C( 377.61), SIMDE_FLOAT32_C( 543.54), SIMDE_FLOAT32_C( -676.81), SIMDE_FLOAT32_C( 796.04),
SIMDE_FLOAT32_C( -952.55), SIMDE_FLOAT32_C( 439.69), SIMDE_FLOAT32_C( -139.34), SIMDE_FLOAT32_C( 103.48),
SIMDE_FLOAT32_C( -782.74), SIMDE_FLOAT32_C( 562.99), SIMDE_FLOAT32_C( 161.99), SIMDE_FLOAT32_C( 620.38),
SIMDE_FLOAT32_C( 696.86), SIMDE_FLOAT32_C( 88.47), SIMDE_FLOAT32_C( 998.69), SIMDE_FLOAT32_C( -955.66) },
UINT16_C(13591),
{ SIMDE_FLOAT32_C( -395.69), SIMDE_FLOAT32_C( -372.87), SIMDE_FLOAT32_C( -839.61), SIMDE_FLOAT32_C( 668.17) },
{ SIMDE_FLOAT32_C( -395.69), SIMDE_FLOAT32_C( -372.87), SIMDE_FLOAT32_C( -395.69), SIMDE_FLOAT32_C( 796.04),
SIMDE_FLOAT32_C( -395.69), SIMDE_FLOAT32_C( 439.69), SIMDE_FLOAT32_C( -139.34), SIMDE_FLOAT32_C( 103.48),
SIMDE_FLOAT32_C( -395.69), SIMDE_FLOAT32_C( 562.99), SIMDE_FLOAT32_C( -395.69), SIMDE_FLOAT32_C( 620.38),
SIMDE_FLOAT32_C( -395.69), SIMDE_FLOAT32_C( -372.87), SIMDE_FLOAT32_C( 998.69), SIMDE_FLOAT32_C( -955.66) } }
};
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
simde__m512 src = simde_mm512_loadu_ps(test_vec[i].src);
simde__m128 a = simde_mm_loadu_ps(test_vec[i].a);
simde__m512 r = simde_mm512_mask_broadcast_f32x2(src, test_vec[i].k, a);
simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1);
}
return 0;
}
static int
test_simde_mm512_maskz_broadcast_f32x2 (SIMDE_MUNIT_TEST_ARGS) {
static const struct {
const simde__mmask16 k;
const simde_float32 a[4];
const simde_float32 r[16];
} test_vec[] = {
{ UINT16_C(18884),
{ SIMDE_FLOAT32_C( 545.10), SIMDE_FLOAT32_C( -550.17), SIMDE_FLOAT32_C( -710.41), SIMDE_FLOAT32_C( 204.85) },
{ SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 545.10), SIMDE_FLOAT32_C( 0.00),
SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 545.10), SIMDE_FLOAT32_C( -550.17),
SIMDE_FLOAT32_C( 545.10), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -550.17),
SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 545.10), SIMDE_FLOAT32_C( 0.00) } },
{ UINT16_C(16968),
{ SIMDE_FLOAT32_C( 51.85), SIMDE_FLOAT32_C( -493.14), SIMDE_FLOAT32_C( -214.52), SIMDE_FLOAT32_C( 484.86) },
{ SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -493.14),
SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 51.85), SIMDE_FLOAT32_C( 0.00),
SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -493.14), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00),
SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 51.85), SIMDE_FLOAT32_C( 0.00) } },
{ UINT16_C(55493),
{ SIMDE_FLOAT32_C( -547.31), SIMDE_FLOAT32_C( -681.83), SIMDE_FLOAT32_C( 567.76), SIMDE_FLOAT32_C( 376.14) },
{ SIMDE_FLOAT32_C( -547.31), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -547.31), SIMDE_FLOAT32_C( 0.00),
SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -547.31), SIMDE_FLOAT32_C( -681.83),
SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -681.83),
SIMDE_FLOAT32_C( -547.31), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -547.31), SIMDE_FLOAT32_C( -681.83) } },
{ UINT16_C( 1280),
{ SIMDE_FLOAT32_C( 358.99), SIMDE_FLOAT32_C( -507.35), SIMDE_FLOAT32_C( -959.80), SIMDE_FLOAT32_C( 688.48) },
{ SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00),
SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00),
SIMDE_FLOAT32_C( 358.99), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 358.99), SIMDE_FLOAT32_C( 0.00),
SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) } },
{ UINT16_C(16569),
{ SIMDE_FLOAT32_C( -988.71), SIMDE_FLOAT32_C( 789.03), SIMDE_FLOAT32_C( -740.57), SIMDE_FLOAT32_C( -739.46) },
{ SIMDE_FLOAT32_C( -988.71), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 789.03),
SIMDE_FLOAT32_C( -988.71), SIMDE_FLOAT32_C( 789.03), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 789.03),
SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00),
SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -988.71), SIMDE_FLOAT32_C( 0.00) } },
{ UINT16_C(26242),
{ SIMDE_FLOAT32_C( -555.34), SIMDE_FLOAT32_C( 402.79), SIMDE_FLOAT32_C( -274.64), SIMDE_FLOAT32_C( 159.53) },
{ SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 402.79), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00),
SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 402.79),
SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 402.79), SIMDE_FLOAT32_C( -555.34), SIMDE_FLOAT32_C( 0.00),
SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 402.79), SIMDE_FLOAT32_C( -555.34), SIMDE_FLOAT32_C( 0.00) } },
{ UINT16_C(39055),
{ SIMDE_FLOAT32_C( -25.84), SIMDE_FLOAT32_C( -228.90), SIMDE_FLOAT32_C( 813.40), SIMDE_FLOAT32_C( 762.90) },
{ SIMDE_FLOAT32_C( -25.84), SIMDE_FLOAT32_C( -228.90), SIMDE_FLOAT32_C( -25.84), SIMDE_FLOAT32_C( -228.90),
SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -228.90),
SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -228.90),
SIMDE_FLOAT32_C( -25.84), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -228.90) } },
{ UINT16_C(53187),
{ SIMDE_FLOAT32_C( -400.08), SIMDE_FLOAT32_C( -173.64), SIMDE_FLOAT32_C( -349.66), SIMDE_FLOAT32_C( -663.64) },
{ SIMDE_FLOAT32_C( -400.08), SIMDE_FLOAT32_C( -173.64), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00),
SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -400.08), SIMDE_FLOAT32_C( -173.64),
SIMDE_FLOAT32_C( -400.08), SIMDE_FLOAT32_C( -173.64), SIMDE_FLOAT32_C( -400.08), SIMDE_FLOAT32_C( -173.64),
SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -400.08), SIMDE_FLOAT32_C( -173.64) } }
};
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
simde__m128 a = simde_mm_loadu_ps(test_vec[i].a);
simde__m512 r = simde_mm512_maskz_broadcast_f32x2(test_vec[i].k, a);
simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1);
}
return 0;
}
static int
test_simde_mm512_broadcast_f32x8 (SIMDE_MUNIT_TEST_ARGS) {
static const struct {
const simde_float32 a[8];
const simde_float32 r[16];
} test_vec[] = {
{ { SIMDE_FLOAT32_C( -416.45), SIMDE_FLOAT32_C( 711.04), SIMDE_FLOAT32_C( 494.08), SIMDE_FLOAT32_C( 55.06),
SIMDE_FLOAT32_C( -527.80), SIMDE_FLOAT32_C( -810.11), SIMDE_FLOAT32_C( 486.30), SIMDE_FLOAT32_C( -695.23) },
{ SIMDE_FLOAT32_C( -416.45), SIMDE_FLOAT32_C( 711.04), SIMDE_FLOAT32_C( 494.08), SIMDE_FLOAT32_C( 55.06),
SIMDE_FLOAT32_C( -527.80), SIMDE_FLOAT32_C( -810.11), SIMDE_FLOAT32_C( 486.30), SIMDE_FLOAT32_C( -695.23),
SIMDE_FLOAT32_C( -416.45), SIMDE_FLOAT32_C( 711.04), SIMDE_FLOAT32_C( 494.08), SIMDE_FLOAT32_C( 55.06),
SIMDE_FLOAT32_C( -527.80), SIMDE_FLOAT32_C( -810.11), SIMDE_FLOAT32_C( 486.30), SIMDE_FLOAT32_C( -695.23) } },
{ { SIMDE_FLOAT32_C( -800.88), SIMDE_FLOAT32_C( -452.72), SIMDE_FLOAT32_C( -904.66), SIMDE_FLOAT32_C( -614.99),
SIMDE_FLOAT32_C( -172.17), SIMDE_FLOAT32_C( 311.84), SIMDE_FLOAT32_C( -833.25), SIMDE_FLOAT32_C( -503.53) },
{ SIMDE_FLOAT32_C( -800.88), SIMDE_FLOAT32_C( -452.72), SIMDE_FLOAT32_C( -904.66), SIMDE_FLOAT32_C( -614.99),
SIMDE_FLOAT32_C( -172.17), SIMDE_FLOAT32_C( 311.84), SIMDE_FLOAT32_C( -833.25), SIMDE_FLOAT32_C( -503.53),
SIMDE_FLOAT32_C( -800.88), SIMDE_FLOAT32_C( -452.72), SIMDE_FLOAT32_C( -904.66), SIMDE_FLOAT32_C( -614.99),
SIMDE_FLOAT32_C( -172.17), SIMDE_FLOAT32_C( 311.84), SIMDE_FLOAT32_C( -833.25), SIMDE_FLOAT32_C( -503.53) } },
{ { SIMDE_FLOAT32_C( -875.06), SIMDE_FLOAT32_C( 874.51), SIMDE_FLOAT32_C( -123.24), SIMDE_FLOAT32_C( 657.48),
SIMDE_FLOAT32_C( 309.07), SIMDE_FLOAT32_C( 484.03), SIMDE_FLOAT32_C( -839.17), SIMDE_FLOAT32_C( 10.32) },
{ SIMDE_FLOAT32_C( -875.06), SIMDE_FLOAT32_C( 874.51), SIMDE_FLOAT32_C( -123.24), SIMDE_FLOAT32_C( 657.48),
SIMDE_FLOAT32_C( 309.07), SIMDE_FLOAT32_C( 484.03), SIMDE_FLOAT32_C( -839.17), SIMDE_FLOAT32_C( 10.32),
SIMDE_FLOAT32_C( -875.06), SIMDE_FLOAT32_C( 874.51), SIMDE_FLOAT32_C( -123.24), SIMDE_FLOAT32_C( 657.48),
SIMDE_FLOAT32_C( 309.07), SIMDE_FLOAT32_C( 484.03), SIMDE_FLOAT32_C( -839.17), SIMDE_FLOAT32_C( 10.32) } },
{ { SIMDE_FLOAT32_C( -515.09), SIMDE_FLOAT32_C( 924.58), SIMDE_FLOAT32_C( -659.21), SIMDE_FLOAT32_C( 676.36),
SIMDE_FLOAT32_C( -421.41), SIMDE_FLOAT32_C( -682.12), SIMDE_FLOAT32_C( -306.00), SIMDE_FLOAT32_C( -939.89) },
{ SIMDE_FLOAT32_C( -515.09), SIMDE_FLOAT32_C( 924.58), SIMDE_FLOAT32_C( -659.21), SIMDE_FLOAT32_C( 676.36),
SIMDE_FLOAT32_C( -421.41), SIMDE_FLOAT32_C( -682.12), SIMDE_FLOAT32_C( -306.00), SIMDE_FLOAT32_C( -939.89),
SIMDE_FLOAT32_C( -515.09), SIMDE_FLOAT32_C( 924.58), SIMDE_FLOAT32_C( -659.21), SIMDE_FLOAT32_C( 676.36),
SIMDE_FLOAT32_C( -421.41), SIMDE_FLOAT32_C( -682.12), SIMDE_FLOAT32_C( -306.00), SIMDE_FLOAT32_C( -939.89) } },
{ { SIMDE_FLOAT32_C( -812.70), SIMDE_FLOAT32_C( 906.23), SIMDE_FLOAT32_C( -979.37), SIMDE_FLOAT32_C( -275.20),
SIMDE_FLOAT32_C( 664.08), SIMDE_FLOAT32_C( -809.85), SIMDE_FLOAT32_C( 934.39), SIMDE_FLOAT32_C( 280.51) },
{ SIMDE_FLOAT32_C( -812.70), SIMDE_FLOAT32_C( 906.23), SIMDE_FLOAT32_C( -979.37), SIMDE_FLOAT32_C( -275.20),
SIMDE_FLOAT32_C( 664.08), SIMDE_FLOAT32_C( -809.85), SIMDE_FLOAT32_C( 934.39), SIMDE_FLOAT32_C( 280.51),
SIMDE_FLOAT32_C( -812.70), SIMDE_FLOAT32_C( 906.23), SIMDE_FLOAT32_C( -979.37), SIMDE_FLOAT32_C( -275.20),
SIMDE_FLOAT32_C( 664.08), SIMDE_FLOAT32_C( -809.85), SIMDE_FLOAT32_C( 934.39), SIMDE_FLOAT32_C( 280.51) } },
{ { SIMDE_FLOAT32_C( 461.56), SIMDE_FLOAT32_C( -484.84), SIMDE_FLOAT32_C( -776.35), SIMDE_FLOAT32_C( -37.28),
SIMDE_FLOAT32_C( -552.72), SIMDE_FLOAT32_C( 358.22), SIMDE_FLOAT32_C( 561.82), SIMDE_FLOAT32_C( 465.10) },
{ SIMDE_FLOAT32_C( 461.56), SIMDE_FLOAT32_C( -484.84), SIMDE_FLOAT32_C( -776.35), SIMDE_FLOAT32_C( -37.28),
SIMDE_FLOAT32_C( -552.72), SIMDE_FLOAT32_C( 358.22), SIMDE_FLOAT32_C( 561.82), SIMDE_FLOAT32_C( 465.10),
SIMDE_FLOAT32_C( 461.56), SIMDE_FLOAT32_C( -484.84), SIMDE_FLOAT32_C( -776.35), SIMDE_FLOAT32_C( -37.28),
SIMDE_FLOAT32_C( -552.72), SIMDE_FLOAT32_C( 358.22), SIMDE_FLOAT32_C( 561.82), SIMDE_FLOAT32_C( 465.10) } },
{ { SIMDE_FLOAT32_C( 996.67), SIMDE_FLOAT32_C( -908.09), SIMDE_FLOAT32_C( -292.64), SIMDE_FLOAT32_C( -421.79),
SIMDE_FLOAT32_C( -984.50), SIMDE_FLOAT32_C( -529.88), SIMDE_FLOAT32_C( 228.67), SIMDE_FLOAT32_C( -756.34) },
{ SIMDE_FLOAT32_C( 996.67), SIMDE_FLOAT32_C( -908.09), SIMDE_FLOAT32_C( -292.64), SIMDE_FLOAT32_C( -421.79),
SIMDE_FLOAT32_C( -984.50), SIMDE_FLOAT32_C( -529.88), SIMDE_FLOAT32_C( 228.67), SIMDE_FLOAT32_C( -756.34),
SIMDE_FLOAT32_C( 996.67), SIMDE_FLOAT32_C( -908.09), SIMDE_FLOAT32_C( -292.64), SIMDE_FLOAT32_C( -421.79),
SIMDE_FLOAT32_C( -984.50), SIMDE_FLOAT32_C( -529.88), SIMDE_FLOAT32_C( 228.67), SIMDE_FLOAT32_C( -756.34) } },
{ { SIMDE_FLOAT32_C( 236.36), SIMDE_FLOAT32_C( 442.90), SIMDE_FLOAT32_C( -175.57), SIMDE_FLOAT32_C( -799.66),
SIMDE_FLOAT32_C( 97.65), SIMDE_FLOAT32_C( -822.08), SIMDE_FLOAT32_C( -738.45), SIMDE_FLOAT32_C( 923.13) },
{ SIMDE_FLOAT32_C( 236.36), SIMDE_FLOAT32_C( 442.90), SIMDE_FLOAT32_C( -175.57), SIMDE_FLOAT32_C( -799.66),
SIMDE_FLOAT32_C( 97.65), SIMDE_FLOAT32_C( -822.08), SIMDE_FLOAT32_C( -738.45), SIMDE_FLOAT32_C( 923.13),
SIMDE_FLOAT32_C( 236.36), SIMDE_FLOAT32_C( 442.90), SIMDE_FLOAT32_C( -175.57), SIMDE_FLOAT32_C( -799.66),
SIMDE_FLOAT32_C( 97.65), SIMDE_FLOAT32_C( -822.08), SIMDE_FLOAT32_C( -738.45), SIMDE_FLOAT32_C( 923.13) } }
};
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
simde__m256 a = simde_mm256_loadu_ps(test_vec[i].a);
simde__m512 r = simde_mm512_broadcast_f32x8(a);
simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1);
}
return 0;
}
static int
test_simde_mm512_mask_broadcast_f32x8 (SIMDE_MUNIT_TEST_ARGS) {
static const struct {
const simde_float32 src[16];
const simde__mmask16 k;
const simde_float32 a[8];
const simde_float32 r[16];
} test_vec[] = {
{ { SIMDE_FLOAT32_C( 280.29), SIMDE_FLOAT32_C( 838.38), SIMDE_FLOAT32_C( 622.29), SIMDE_FLOAT32_C( 762.17),
SIMDE_FLOAT32_C( -281.25), SIMDE_FLOAT32_C( 985.78), SIMDE_FLOAT32_C( 78.74), SIMDE_FLOAT32_C( -555.08),
SIMDE_FLOAT32_C( 759.89), SIMDE_FLOAT32_C( -557.22), SIMDE_FLOAT32_C( 754.50), SIMDE_FLOAT32_C( 954.59),
SIMDE_FLOAT32_C( -153.57), SIMDE_FLOAT32_C( 932.38), SIMDE_FLOAT32_C( 449.83), SIMDE_FLOAT32_C( 378.57) },
UINT16_C(36924),
{ SIMDE_FLOAT32_C( 598.39), SIMDE_FLOAT32_C( -917.42), SIMDE_FLOAT32_C( 853.85), SIMDE_FLOAT32_C( 635.72),
SIMDE_FLOAT32_C( 497.82), SIMDE_FLOAT32_C( 880.65), SIMDE_FLOAT32_C( -930.36), SIMDE_FLOAT32_C( -512.19) },
{ SIMDE_FLOAT32_C( 280.29), SIMDE_FLOAT32_C( 838.38), SIMDE_FLOAT32_C( 853.85), SIMDE_FLOAT32_C( 635.72),
SIMDE_FLOAT32_C( 497.82), SIMDE_FLOAT32_C( 880.65), SIMDE_FLOAT32_C( 78.74), SIMDE_FLOAT32_C( -555.08),
SIMDE_FLOAT32_C( 759.89), SIMDE_FLOAT32_C( -557.22), SIMDE_FLOAT32_C( 754.50), SIMDE_FLOAT32_C( 954.59),
SIMDE_FLOAT32_C( 497.82), SIMDE_FLOAT32_C( 932.38), SIMDE_FLOAT32_C( 449.83), SIMDE_FLOAT32_C( -512.19) } },
{ { SIMDE_FLOAT32_C( -437.09), SIMDE_FLOAT32_C( -187.22), SIMDE_FLOAT32_C( -573.53), SIMDE_FLOAT32_C( 628.55),
SIMDE_FLOAT32_C( 16.28), SIMDE_FLOAT32_C( -343.67), SIMDE_FLOAT32_C( 13.33), SIMDE_FLOAT32_C( 92.74),
SIMDE_FLOAT32_C( 617.88), SIMDE_FLOAT32_C( 659.02), SIMDE_FLOAT32_C( 114.72), SIMDE_FLOAT32_C( 86.74),
SIMDE_FLOAT32_C( -78.46), SIMDE_FLOAT32_C( -669.19), SIMDE_FLOAT32_C( 913.81), SIMDE_FLOAT32_C( 480.88) },
UINT16_C(25166),
{ SIMDE_FLOAT32_C( -761.34), SIMDE_FLOAT32_C( 162.88), SIMDE_FLOAT32_C( -410.95), SIMDE_FLOAT32_C( -918.77),
SIMDE_FLOAT32_C( 294.07), SIMDE_FLOAT32_C( 489.11), SIMDE_FLOAT32_C( 466.01), SIMDE_FLOAT32_C( 281.28) },
{ SIMDE_FLOAT32_C( -437.09), SIMDE_FLOAT32_C( 162.88), SIMDE_FLOAT32_C( -410.95), SIMDE_FLOAT32_C( -918.77),
SIMDE_FLOAT32_C( 16.28), SIMDE_FLOAT32_C( -343.67), SIMDE_FLOAT32_C( 466.01), SIMDE_FLOAT32_C( 92.74),
SIMDE_FLOAT32_C( 617.88), SIMDE_FLOAT32_C( 162.88), SIMDE_FLOAT32_C( 114.72), SIMDE_FLOAT32_C( 86.74),
SIMDE_FLOAT32_C( -78.46), SIMDE_FLOAT32_C( 489.11), SIMDE_FLOAT32_C( 466.01), SIMDE_FLOAT32_C( 480.88) } },
{ { SIMDE_FLOAT32_C( -606.28), SIMDE_FLOAT32_C( 188.60), SIMDE_FLOAT32_C( -142.85), SIMDE_FLOAT32_C( -814.99),
SIMDE_FLOAT32_C( 440.56), SIMDE_FLOAT32_C( 576.44), SIMDE_FLOAT32_C( 238.85), SIMDE_FLOAT32_C( 303.69),
SIMDE_FLOAT32_C( 150.34), SIMDE_FLOAT32_C( 808.69), SIMDE_FLOAT32_C( -362.83), SIMDE_FLOAT32_C( -158.08),
SIMDE_FLOAT32_C( -803.96), SIMDE_FLOAT32_C( -196.75), SIMDE_FLOAT32_C( -727.89), SIMDE_FLOAT32_C( 308.53) },
UINT16_C(23787),
{ SIMDE_FLOAT32_C( 944.26), SIMDE_FLOAT32_C( 110.45), SIMDE_FLOAT32_C( 407.09), SIMDE_FLOAT32_C( 45.91),
SIMDE_FLOAT32_C( -335.37), SIMDE_FLOAT32_C( -560.84), SIMDE_FLOAT32_C( 3.97), SIMDE_FLOAT32_C( 760.14) },
{ SIMDE_FLOAT32_C( 944.26), SIMDE_FLOAT32_C( 110.45), SIMDE_FLOAT32_C( -142.85), SIMDE_FLOAT32_C( 45.91),
SIMDE_FLOAT32_C( 440.56), SIMDE_FLOAT32_C( -560.84), SIMDE_FLOAT32_C( 3.97), SIMDE_FLOAT32_C( 760.14),
SIMDE_FLOAT32_C( 150.34), SIMDE_FLOAT32_C( 808.69), SIMDE_FLOAT32_C( 407.09), SIMDE_FLOAT32_C( 45.91),
SIMDE_FLOAT32_C( -335.37), SIMDE_FLOAT32_C( -196.75), SIMDE_FLOAT32_C( 3.97), SIMDE_FLOAT32_C( 308.53) } },
{ { SIMDE_FLOAT32_C( -278.78), SIMDE_FLOAT32_C( 517.15), SIMDE_FLOAT32_C( -283.92), SIMDE_FLOAT32_C( 114.05),
SIMDE_FLOAT32_C( 798.05), SIMDE_FLOAT32_C( 868.23), SIMDE_FLOAT32_C( 258.92), SIMDE_FLOAT32_C( -367.27),
SIMDE_FLOAT32_C( -720.23), SIMDE_FLOAT32_C( -836.19), SIMDE_FLOAT32_C( 163.28), SIMDE_FLOAT32_C( 201.97),
SIMDE_FLOAT32_C( 461.48), SIMDE_FLOAT32_C( 33.48), SIMDE_FLOAT32_C( 752.68), SIMDE_FLOAT32_C( 274.33) },
UINT16_C( 9614),
{ SIMDE_FLOAT32_C( -353.42), SIMDE_FLOAT32_C( 72.45), SIMDE_FLOAT32_C( -313.79), SIMDE_FLOAT32_C( 54.95),
SIMDE_FLOAT32_C( -482.32), SIMDE_FLOAT32_C( -268.09), SIMDE_FLOAT32_C( 146.77), SIMDE_FLOAT32_C( 772.72) },
{ SIMDE_FLOAT32_C( -278.78), SIMDE_FLOAT32_C( 72.45), SIMDE_FLOAT32_C( -313.79), SIMDE_FLOAT32_C( 54.95),
SIMDE_FLOAT32_C( 798.05), SIMDE_FLOAT32_C( 868.23), SIMDE_FLOAT32_C( 258.92), SIMDE_FLOAT32_C( 772.72),
SIMDE_FLOAT32_C( -353.42), SIMDE_FLOAT32_C( -836.19), SIMDE_FLOAT32_C( -313.79), SIMDE_FLOAT32_C( 201.97),
SIMDE_FLOAT32_C( 461.48), SIMDE_FLOAT32_C( -268.09), SIMDE_FLOAT32_C( 752.68), SIMDE_FLOAT32_C( 274.33) } },
{ { SIMDE_FLOAT32_C( -894.15), SIMDE_FLOAT32_C( -6.16), SIMDE_FLOAT32_C( 455.15), SIMDE_FLOAT32_C( -216.19),
SIMDE_FLOAT32_C( 419.21), SIMDE_FLOAT32_C( -283.83), SIMDE_FLOAT32_C( -341.07), SIMDE_FLOAT32_C( -431.79),
SIMDE_FLOAT32_C( 825.19), SIMDE_FLOAT32_C( -956.94), SIMDE_FLOAT32_C( 688.79), SIMDE_FLOAT32_C( 509.40),
SIMDE_FLOAT32_C( -511.22), SIMDE_FLOAT32_C( -14.80), SIMDE_FLOAT32_C( -763.30), SIMDE_FLOAT32_C( -769.02) },
UINT16_C(57357),
{ SIMDE_FLOAT32_C( -152.14), SIMDE_FLOAT32_C( -951.21), SIMDE_FLOAT32_C( 936.35), SIMDE_FLOAT32_C( -713.46),
SIMDE_FLOAT32_C( 933.97), SIMDE_FLOAT32_C( -738.03), SIMDE_FLOAT32_C( 3.91), SIMDE_FLOAT32_C( -225.68) },
{ SIMDE_FLOAT32_C( -152.14), SIMDE_FLOAT32_C( -6.16), SIMDE_FLOAT32_C( 936.35), SIMDE_FLOAT32_C( -713.46),
SIMDE_FLOAT32_C( 419.21), SIMDE_FLOAT32_C( -283.83), SIMDE_FLOAT32_C( -341.07), SIMDE_FLOAT32_C( -431.79),
SIMDE_FLOAT32_C( 825.19), SIMDE_FLOAT32_C( -956.94), SIMDE_FLOAT32_C( 688.79), SIMDE_FLOAT32_C( 509.40),
SIMDE_FLOAT32_C( -511.22), SIMDE_FLOAT32_C( -738.03), SIMDE_FLOAT32_C( 3.91), SIMDE_FLOAT32_C( -225.68) } },
{ { SIMDE_FLOAT32_C( 958.35), SIMDE_FLOAT32_C( 959.55), SIMDE_FLOAT32_C( -771.84), SIMDE_FLOAT32_C( -312.71),
SIMDE_FLOAT32_C( 261.02), SIMDE_FLOAT32_C( -965.72), SIMDE_FLOAT32_C( -898.55), SIMDE_FLOAT32_C( 98.86),
SIMDE_FLOAT32_C( -506.78), SIMDE_FLOAT32_C( 475.13), SIMDE_FLOAT32_C( -561.78), SIMDE_FLOAT32_C( 145.04),
SIMDE_FLOAT32_C( -310.71), SIMDE_FLOAT32_C( -100.99), SIMDE_FLOAT32_C( 656.93), SIMDE_FLOAT32_C( 955.62) },
UINT16_C(55637),
{ SIMDE_FLOAT32_C( 64.66), SIMDE_FLOAT32_C( 704.14), SIMDE_FLOAT32_C( 421.81), SIMDE_FLOAT32_C( -620.94),
SIMDE_FLOAT32_C( -124.06), SIMDE_FLOAT32_C( 858.04), SIMDE_FLOAT32_C( -855.91), SIMDE_FLOAT32_C( 691.15) },
{ SIMDE_FLOAT32_C( 64.66), SIMDE_FLOAT32_C( 959.55), SIMDE_FLOAT32_C( 421.81), SIMDE_FLOAT32_C( -312.71),
SIMDE_FLOAT32_C( -124.06), SIMDE_FLOAT32_C( -965.72), SIMDE_FLOAT32_C( -855.91), SIMDE_FLOAT32_C( 98.86),
SIMDE_FLOAT32_C( 64.66), SIMDE_FLOAT32_C( 475.13), SIMDE_FLOAT32_C( -561.78), SIMDE_FLOAT32_C( -620.94),
SIMDE_FLOAT32_C( -124.06), SIMDE_FLOAT32_C( -100.99), SIMDE_FLOAT32_C( -855.91), SIMDE_FLOAT32_C( 691.15) } },
{ { SIMDE_FLOAT32_C( 165.52), SIMDE_FLOAT32_C( -117.15), SIMDE_FLOAT32_C( -914.50), SIMDE_FLOAT32_C( -48.64),
SIMDE_FLOAT32_C( 429.74), SIMDE_FLOAT32_C( 612.18), SIMDE_FLOAT32_C( 933.85), SIMDE_FLOAT32_C( -778.14),
SIMDE_FLOAT32_C( -214.40), SIMDE_FLOAT32_C( 623.77), SIMDE_FLOAT32_C( -288.84), SIMDE_FLOAT32_C( -541.76),
SIMDE_FLOAT32_C( 699.14), SIMDE_FLOAT32_C( 473.09), SIMDE_FLOAT32_C( -762.45), SIMDE_FLOAT32_C( -518.42) },
UINT16_C(63181),
{ SIMDE_FLOAT32_C( 188.68), SIMDE_FLOAT32_C( -923.58), SIMDE_FLOAT32_C( -542.98), SIMDE_FLOAT32_C( 193.71),
SIMDE_FLOAT32_C( -319.51), SIMDE_FLOAT32_C( 46.76), SIMDE_FLOAT32_C( -44.67), SIMDE_FLOAT32_C( -768.90) },
{ SIMDE_FLOAT32_C( 188.68), SIMDE_FLOAT32_C( -117.15), SIMDE_FLOAT32_C( -542.98), SIMDE_FLOAT32_C( 193.71),
SIMDE_FLOAT32_C( 429.74), SIMDE_FLOAT32_C( 612.18), SIMDE_FLOAT32_C( -44.67), SIMDE_FLOAT32_C( -768.90),
SIMDE_FLOAT32_C( -214.40), SIMDE_FLOAT32_C( -923.58), SIMDE_FLOAT32_C( -542.98), SIMDE_FLOAT32_C( -541.76),
SIMDE_FLOAT32_C( -319.51), SIMDE_FLOAT32_C( 46.76), SIMDE_FLOAT32_C( -44.67), SIMDE_FLOAT32_C( -768.90) } },
{ { SIMDE_FLOAT32_C( -857.07), SIMDE_FLOAT32_C( -775.77), SIMDE_FLOAT32_C( -351.82), SIMDE_FLOAT32_C( 984.69),
SIMDE_FLOAT32_C( -320.14), SIMDE_FLOAT32_C( -636.62), SIMDE_FLOAT32_C( 297.63), SIMDE_FLOAT32_C( 186.04),
SIMDE_FLOAT32_C( 780.35), SIMDE_FLOAT32_C( -693.20), SIMDE_FLOAT32_C( -589.12), SIMDE_FLOAT32_C( 731.33),
SIMDE_FLOAT32_C( -601.90), SIMDE_FLOAT32_C( -195.41), SIMDE_FLOAT32_C( -239.98), SIMDE_FLOAT32_C( 675.16) },
UINT16_C(63687),
{ SIMDE_FLOAT32_C( 751.41), SIMDE_FLOAT32_C( 926.41), SIMDE_FLOAT32_C( 149.18), SIMDE_FLOAT32_C( -662.14),
SIMDE_FLOAT32_C( -649.07), SIMDE_FLOAT32_C( -858.90), SIMDE_FLOAT32_C( 465.33), SIMDE_FLOAT32_C( 831.66) },
{ SIMDE_FLOAT32_C( 751.41), SIMDE_FLOAT32_C( 926.41), SIMDE_FLOAT32_C( 149.18), SIMDE_FLOAT32_C( 984.69),
SIMDE_FLOAT32_C( -320.14), SIMDE_FLOAT32_C( -636.62), SIMDE_FLOAT32_C( 465.33), SIMDE_FLOAT32_C( 831.66),
SIMDE_FLOAT32_C( 780.35), SIMDE_FLOAT32_C( -693.20), SIMDE_FLOAT32_C( -589.12), SIMDE_FLOAT32_C( -662.14),
SIMDE_FLOAT32_C( -649.07), SIMDE_FLOAT32_C( -858.90), SIMDE_FLOAT32_C( 465.33), SIMDE_FLOAT32_C( 831.66) } }
};
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
simde__m512 src = simde_mm512_loadu_ps(test_vec[i].src);
simde__m256 a = simde_mm256_loadu_ps(test_vec[i].a);
simde__m512 r = simde_mm512_mask_broadcast_f32x8(src, test_vec[i].k, a);
simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1);
}
return 0;
}
static int
test_simde_mm512_maskz_broadcast_f32x8 (SIMDE_MUNIT_TEST_ARGS) {
static const struct {
const simde__mmask16 k;
const simde_float32 a[8];
const simde_float32 r[16];
} test_vec[] = {
{ UINT16_C(49062),
{ SIMDE_FLOAT32_C( -67.12), SIMDE_FLOAT32_C( -144.98), SIMDE_FLOAT32_C( -693.09), SIMDE_FLOAT32_C( -717.03),
SIMDE_FLOAT32_C( 833.33), SIMDE_FLOAT32_C( -297.62), SIMDE_FLOAT32_C( -166.55), SIMDE_FLOAT32_C( 748.74) },
{ SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -144.98), SIMDE_FLOAT32_C( -693.09), SIMDE_FLOAT32_C( 0.00),
SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -297.62), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 748.74),
SIMDE_FLOAT32_C( -67.12), SIMDE_FLOAT32_C( -144.98), SIMDE_FLOAT32_C( -693.09), SIMDE_FLOAT32_C( -717.03),
SIMDE_FLOAT32_C( 833.33), SIMDE_FLOAT32_C( -297.62), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 748.74) } },
{ UINT16_C( 6586),
{ SIMDE_FLOAT32_C( -140.91), SIMDE_FLOAT32_C( -189.72), SIMDE_FLOAT32_C( -663.50), SIMDE_FLOAT32_C( 613.12),
SIMDE_FLOAT32_C( 772.89), SIMDE_FLOAT32_C( -76.35), SIMDE_FLOAT32_C( 859.08), SIMDE_FLOAT32_C( 595.36) },
{ SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -189.72), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 613.12),
SIMDE_FLOAT32_C( 772.89), SIMDE_FLOAT32_C( -76.35), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 595.36),
SIMDE_FLOAT32_C( -140.91), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 613.12),
SIMDE_FLOAT32_C( 772.89), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) } },
{ UINT16_C(41380),
{ SIMDE_FLOAT32_C( -218.39), SIMDE_FLOAT32_C( -397.45), SIMDE_FLOAT32_C( 20.87), SIMDE_FLOAT32_C( 703.15),
SIMDE_FLOAT32_C( -126.69), SIMDE_FLOAT32_C( 776.77), SIMDE_FLOAT32_C( -820.00), SIMDE_FLOAT32_C( 252.00) },
{ SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 20.87), SIMDE_FLOAT32_C( 0.00),
SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 776.77), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 252.00),
SIMDE_FLOAT32_C( -218.39), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00),
SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 776.77), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 252.00) } },
{ UINT16_C(14746),
{ SIMDE_FLOAT32_C( 488.59), SIMDE_FLOAT32_C( -333.19), SIMDE_FLOAT32_C( 82.99), SIMDE_FLOAT32_C( 818.76),
SIMDE_FLOAT32_C( 927.98), SIMDE_FLOAT32_C( 586.60), SIMDE_FLOAT32_C( 933.90), SIMDE_FLOAT32_C( 84.47) },
{ SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -333.19), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 818.76),
SIMDE_FLOAT32_C( 927.98), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 84.47),
SIMDE_FLOAT32_C( 488.59), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 818.76),
SIMDE_FLOAT32_C( 927.98), SIMDE_FLOAT32_C( 586.60), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00) } },
{ UINT16_C(22430),
{ SIMDE_FLOAT32_C( -788.60), SIMDE_FLOAT32_C( -2.38), SIMDE_FLOAT32_C( -57.26), SIMDE_FLOAT32_C( -363.40),
SIMDE_FLOAT32_C( 348.91), SIMDE_FLOAT32_C( 172.83), SIMDE_FLOAT32_C( 816.49), SIMDE_FLOAT32_C( 677.29) },
{ SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -2.38), SIMDE_FLOAT32_C( -57.26), SIMDE_FLOAT32_C( -363.40),
SIMDE_FLOAT32_C( 348.91), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 677.29),
SIMDE_FLOAT32_C( -788.60), SIMDE_FLOAT32_C( -2.38), SIMDE_FLOAT32_C( -57.26), SIMDE_FLOAT32_C( 0.00),
SIMDE_FLOAT32_C( 348.91), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 816.49), SIMDE_FLOAT32_C( 0.00) } },
{ UINT16_C(53747),
{ SIMDE_FLOAT32_C( -74.10), SIMDE_FLOAT32_C( 628.20), SIMDE_FLOAT32_C( 176.40), SIMDE_FLOAT32_C( 789.58),
SIMDE_FLOAT32_C( 434.02), SIMDE_FLOAT32_C( 537.30), SIMDE_FLOAT32_C( 360.66), SIMDE_FLOAT32_C( -306.64) },
{ SIMDE_FLOAT32_C( -74.10), SIMDE_FLOAT32_C( 628.20), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00),
SIMDE_FLOAT32_C( 434.02), SIMDE_FLOAT32_C( 537.30), SIMDE_FLOAT32_C( 360.66), SIMDE_FLOAT32_C( -306.64),
SIMDE_FLOAT32_C( -74.10), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00),
SIMDE_FLOAT32_C( 434.02), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 360.66), SIMDE_FLOAT32_C( -306.64) } },
{ UINT16_C(57660),
{ SIMDE_FLOAT32_C( 529.43), SIMDE_FLOAT32_C( 185.72), SIMDE_FLOAT32_C( -666.37), SIMDE_FLOAT32_C( 372.37),
SIMDE_FLOAT32_C( 420.53), SIMDE_FLOAT32_C( -76.09), SIMDE_FLOAT32_C( -764.18), SIMDE_FLOAT32_C( 472.62) },
{ SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -666.37), SIMDE_FLOAT32_C( 372.37),
SIMDE_FLOAT32_C( 420.53), SIMDE_FLOAT32_C( -76.09), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00),
SIMDE_FLOAT32_C( 529.43), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00),
SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -76.09), SIMDE_FLOAT32_C( -764.18), SIMDE_FLOAT32_C( 472.62) } },
{ UINT16_C(60506),
{ SIMDE_FLOAT32_C( -796.21), SIMDE_FLOAT32_C( 148.32), SIMDE_FLOAT32_C( 781.59), SIMDE_FLOAT32_C( 218.77),
SIMDE_FLOAT32_C( 802.35), SIMDE_FLOAT32_C( -915.03), SIMDE_FLOAT32_C( -953.21), SIMDE_FLOAT32_C( -530.25) },
{ SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 148.32), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 218.77),
SIMDE_FLOAT32_C( 802.35), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -953.21), SIMDE_FLOAT32_C( 0.00),
SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 781.59), SIMDE_FLOAT32_C( 218.77),
SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -915.03), SIMDE_FLOAT32_C( -953.21), SIMDE_FLOAT32_C( -530.25) } }
};
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
simde__m256 a = simde_mm256_loadu_ps(test_vec[i].a);
simde__m512 r = simde_mm512_maskz_broadcast_f32x8(test_vec[i].k, a);
simde_test_x86_assert_equal_f32x16(r, simde_mm512_loadu_ps(test_vec[i].r), 1);
}
return 0;
}
static int
test_simde_mm512_broadcast_f64x2 (SIMDE_MUNIT_TEST_ARGS) {
static const struct {
const simde_float64 a[2];
const simde_float64 r[8];
} test_vec[] = {
{ { SIMDE_FLOAT64_C( 715.32), SIMDE_FLOAT64_C( 317.58) },
{ SIMDE_FLOAT64_C( 715.32), SIMDE_FLOAT64_C( 317.58), SIMDE_FLOAT64_C( 715.32), SIMDE_FLOAT64_C( 317.58),
SIMDE_FLOAT64_C( 715.32), SIMDE_FLOAT64_C( 317.58), SIMDE_FLOAT64_C( 715.32), SIMDE_FLOAT64_C( 317.58) } },
{ { SIMDE_FLOAT64_C( -404.76), SIMDE_FLOAT64_C( -835.80) },
{ SIMDE_FLOAT64_C( -404.76), SIMDE_FLOAT64_C( -835.80), SIMDE_FLOAT64_C( -404.76), SIMDE_FLOAT64_C( -835.80),
SIMDE_FLOAT64_C( -404.76), SIMDE_FLOAT64_C( -835.80), SIMDE_FLOAT64_C( -404.76), SIMDE_FLOAT64_C( -835.80) } },
{ { SIMDE_FLOAT64_C( 653.97), SIMDE_FLOAT64_C( -774.97) },
{ SIMDE_FLOAT64_C( 653.97), SIMDE_FLOAT64_C( -774.97), SIMDE_FLOAT64_C( 653.97), SIMDE_FLOAT64_C( -774.97),
SIMDE_FLOAT64_C( 653.97), SIMDE_FLOAT64_C( -774.97), SIMDE_FLOAT64_C( 653.97), SIMDE_FLOAT64_C( -774.97) } },
{ { SIMDE_FLOAT64_C( -843.04), SIMDE_FLOAT64_C( -900.71) },
{ SIMDE_FLOAT64_C( -843.04), SIMDE_FLOAT64_C( -900.71), SIMDE_FLOAT64_C( -843.04), SIMDE_FLOAT64_C( -900.71),
SIMDE_FLOAT64_C( -843.04), SIMDE_FLOAT64_C( -900.71), SIMDE_FLOAT64_C( -843.04), SIMDE_FLOAT64_C( -900.71) } },
{ { SIMDE_FLOAT64_C( -197.71), SIMDE_FLOAT64_C( -989.91) },
{ SIMDE_FLOAT64_C( -197.71), SIMDE_FLOAT64_C( -989.91), SIMDE_FLOAT64_C( -197.71), SIMDE_FLOAT64_C( -989.91),
SIMDE_FLOAT64_C( -197.71), SIMDE_FLOAT64_C( -989.91), SIMDE_FLOAT64_C( -197.71), SIMDE_FLOAT64_C( -989.91) } },
{ { SIMDE_FLOAT64_C( 515.43), SIMDE_FLOAT64_C( 879.19) },
{ SIMDE_FLOAT64_C( 515.43), SIMDE_FLOAT64_C( 879.19), SIMDE_FLOAT64_C( 515.43), SIMDE_FLOAT64_C( 879.19),
SIMDE_FLOAT64_C( 515.43), SIMDE_FLOAT64_C( 879.19), SIMDE_FLOAT64_C( 515.43), SIMDE_FLOAT64_C( 879.19) } },
{ { SIMDE_FLOAT64_C( 610.61), SIMDE_FLOAT64_C( 540.00) },
{ SIMDE_FLOAT64_C( 610.61), SIMDE_FLOAT64_C( 540.00), SIMDE_FLOAT64_C( 610.61), SIMDE_FLOAT64_C( 540.00),
SIMDE_FLOAT64_C( 610.61), SIMDE_FLOAT64_C( 540.00), SIMDE_FLOAT64_C( 610.61), SIMDE_FLOAT64_C( 540.00) } },
{ { SIMDE_FLOAT64_C( -234.86), SIMDE_FLOAT64_C( 751.29) },
{ SIMDE_FLOAT64_C( -234.86), SIMDE_FLOAT64_C( 751.29), SIMDE_FLOAT64_C( -234.86), SIMDE_FLOAT64_C( 751.29),
SIMDE_FLOAT64_C( -234.86), SIMDE_FLOAT64_C( 751.29), SIMDE_FLOAT64_C( -234.86), SIMDE_FLOAT64_C( 751.29) } }
};
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
simde__m128d a = simde_mm_loadu_pd(test_vec[i].a);
simde__m512d r = simde_mm512_broadcast_f64x2(a);
simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1);
}
return 0;
}
static int
test_simde_mm512_mask_broadcast_f64x2 (SIMDE_MUNIT_TEST_ARGS) {
static const struct {
const simde_float64 src[8];
const simde__mmask8 k;
const simde_float64 a[2];
const simde_float64 r[8];
} test_vec[] = {
{ { SIMDE_FLOAT64_C( 749.13), SIMDE_FLOAT64_C( 675.39), SIMDE_FLOAT64_C( -739.63), SIMDE_FLOAT64_C( -916.53),
SIMDE_FLOAT64_C( -70.94), SIMDE_FLOAT64_C( -224.46), SIMDE_FLOAT64_C( -485.72), SIMDE_FLOAT64_C( 433.96) },
UINT8_C(250),
{ SIMDE_FLOAT64_C( 424.19), SIMDE_FLOAT64_C( -720.98) },
{ SIMDE_FLOAT64_C( 749.13), SIMDE_FLOAT64_C( -720.98), SIMDE_FLOAT64_C( -739.63), SIMDE_FLOAT64_C( -720.98),
SIMDE_FLOAT64_C( 424.19), SIMDE_FLOAT64_C( -720.98), SIMDE_FLOAT64_C( 424.19), SIMDE_FLOAT64_C( -720.98) } },
{ { SIMDE_FLOAT64_C( 461.33), SIMDE_FLOAT64_C( -402.24), SIMDE_FLOAT64_C( -437.75), SIMDE_FLOAT64_C( 785.96),
SIMDE_FLOAT64_C( -372.46), SIMDE_FLOAT64_C( 110.74), SIMDE_FLOAT64_C( -831.39), SIMDE_FLOAT64_C( 846.99) },
UINT8_C( 78),
{ SIMDE_FLOAT64_C( -572.48), SIMDE_FLOAT64_C( 394.61) },
{ SIMDE_FLOAT64_C( 461.33), SIMDE_FLOAT64_C( 394.61), SIMDE_FLOAT64_C( -572.48), SIMDE_FLOAT64_C( 394.61),
SIMDE_FLOAT64_C( -372.46), SIMDE_FLOAT64_C( 110.74), SIMDE_FLOAT64_C( -572.48), SIMDE_FLOAT64_C( 846.99) } },
{ { SIMDE_FLOAT64_C( 215.35), SIMDE_FLOAT64_C( -616.54), SIMDE_FLOAT64_C( -262.30), SIMDE_FLOAT64_C( -426.39),
SIMDE_FLOAT64_C( -336.22), SIMDE_FLOAT64_C( -839.02), SIMDE_FLOAT64_C( 672.49), SIMDE_FLOAT64_C( 589.70) },
UINT8_C(163),
{ SIMDE_FLOAT64_C( -982.23), SIMDE_FLOAT64_C( -416.77) },
{ SIMDE_FLOAT64_C( -982.23), SIMDE_FLOAT64_C( -416.77), SIMDE_FLOAT64_C( -262.30), SIMDE_FLOAT64_C( -426.39),
SIMDE_FLOAT64_C( -336.22), SIMDE_FLOAT64_C( -416.77), SIMDE_FLOAT64_C( 672.49), SIMDE_FLOAT64_C( -416.77) } },
{ { SIMDE_FLOAT64_C( -578.35), SIMDE_FLOAT64_C( -267.73), SIMDE_FLOAT64_C( 242.90), SIMDE_FLOAT64_C( 449.74),
SIMDE_FLOAT64_C( 714.62), SIMDE_FLOAT64_C( 671.90), SIMDE_FLOAT64_C( 577.25), SIMDE_FLOAT64_C( -88.86) },
UINT8_C(222),
{ SIMDE_FLOAT64_C( 379.16), SIMDE_FLOAT64_C( 573.95) },
{ SIMDE_FLOAT64_C( -578.35), SIMDE_FLOAT64_C( 573.95), SIMDE_FLOAT64_C( 379.16), SIMDE_FLOAT64_C( 573.95),
SIMDE_FLOAT64_C( 379.16), SIMDE_FLOAT64_C( 671.90), SIMDE_FLOAT64_C( 379.16), SIMDE_FLOAT64_C( 573.95) } },
{ { SIMDE_FLOAT64_C( 428.10), SIMDE_FLOAT64_C( -969.60), SIMDE_FLOAT64_C( -117.58), SIMDE_FLOAT64_C( -121.88),
SIMDE_FLOAT64_C( -513.12), SIMDE_FLOAT64_C( -67.52), SIMDE_FLOAT64_C( -880.81), SIMDE_FLOAT64_C( 257.25) },
UINT8_C( 35),
{ SIMDE_FLOAT64_C( -71.92), SIMDE_FLOAT64_C( -682.64) },
{ SIMDE_FLOAT64_C( -71.92), SIMDE_FLOAT64_C( -682.64), SIMDE_FLOAT64_C( -117.58), SIMDE_FLOAT64_C( -121.88),
SIMDE_FLOAT64_C( -513.12), SIMDE_FLOAT64_C( -682.64), SIMDE_FLOAT64_C( -880.81), SIMDE_FLOAT64_C( 257.25) } },
{ { SIMDE_FLOAT64_C( 858.06), SIMDE_FLOAT64_C( -576.56), SIMDE_FLOAT64_C( -199.04), SIMDE_FLOAT64_C( 741.89),
SIMDE_FLOAT64_C( 940.66), SIMDE_FLOAT64_C( -320.73), SIMDE_FLOAT64_C( -519.45), SIMDE_FLOAT64_C( -359.73) },
UINT8_C( 14),
{ SIMDE_FLOAT64_C( -260.24), SIMDE_FLOAT64_C( 150.09) },
{ SIMDE_FLOAT64_C( 858.06), SIMDE_FLOAT64_C( 150.09), SIMDE_FLOAT64_C( -260.24), SIMDE_FLOAT64_C( 150.09),
SIMDE_FLOAT64_C( 940.66), SIMDE_FLOAT64_C( -320.73), SIMDE_FLOAT64_C( -519.45), SIMDE_FLOAT64_C( -359.73) } },
{ { SIMDE_FLOAT64_C( 508.76), SIMDE_FLOAT64_C( 671.76), SIMDE_FLOAT64_C( 188.22), SIMDE_FLOAT64_C( -524.84),
SIMDE_FLOAT64_C( 958.74), SIMDE_FLOAT64_C( -408.21), SIMDE_FLOAT64_C( -756.34), SIMDE_FLOAT64_C( 260.63) },
UINT8_C( 48),
{ SIMDE_FLOAT64_C( -287.86), SIMDE_FLOAT64_C( -66.95) },
{ SIMDE_FLOAT64_C( 508.76), SIMDE_FLOAT64_C( 671.76), SIMDE_FLOAT64_C( 188.22), SIMDE_FLOAT64_C( -524.84),
SIMDE_FLOAT64_C( -287.86), SIMDE_FLOAT64_C( -66.95), SIMDE_FLOAT64_C( -756.34), SIMDE_FLOAT64_C( 260.63) } },
{ { SIMDE_FLOAT64_C( 741.62), SIMDE_FLOAT64_C( 389.31), SIMDE_FLOAT64_C( -806.05), SIMDE_FLOAT64_C( 761.48),
SIMDE_FLOAT64_C( 242.55), SIMDE_FLOAT64_C( 550.14), SIMDE_FLOAT64_C( 214.54), SIMDE_FLOAT64_C( -176.03) },
UINT8_C( 79),
{ SIMDE_FLOAT64_C( 639.90), SIMDE_FLOAT64_C( 881.52) },
{ SIMDE_FLOAT64_C( 639.90), SIMDE_FLOAT64_C( 881.52), SIMDE_FLOAT64_C( 639.90), SIMDE_FLOAT64_C( 881.52),
SIMDE_FLOAT64_C( 242.55), SIMDE_FLOAT64_C( 550.14), SIMDE_FLOAT64_C( 639.90), SIMDE_FLOAT64_C( -176.03) } }
};
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
simde__m512d src = simde_mm512_loadu_pd(test_vec[i].src);
simde__m128d a = simde_mm_loadu_pd(test_vec[i].a);
simde__m512d r = simde_mm512_mask_broadcast_f64x2(src, test_vec[i].k, a);
simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1);
}
return 0;
}
static int
test_simde_mm512_maskz_broadcast_f64x2 (SIMDE_MUNIT_TEST_ARGS) {
static const struct {
const simde__mmask8 k;
const simde_float64 a[2];
const simde_float64 r[8];
} test_vec[] = {
{ UINT8_C( 32),
{ SIMDE_FLOAT64_C( 95.43), SIMDE_FLOAT64_C( -111.80) },
{ SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00),
SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -111.80), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00) } },
{ UINT8_C(212),
{ SIMDE_FLOAT64_C( 159.26), SIMDE_FLOAT64_C( 721.63) },
{ SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 159.26), SIMDE_FLOAT64_C( 0.00),
SIMDE_FLOAT64_C( 159.26), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 159.26), SIMDE_FLOAT64_C( 721.63) } },
{ UINT8_C(232),
{ SIMDE_FLOAT64_C( -41.02), SIMDE_FLOAT64_C( 592.81) },
{ SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 592.81),
SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 592.81), SIMDE_FLOAT64_C( -41.02), SIMDE_FLOAT64_C( 592.81) } },
{ UINT8_C(112),
{ SIMDE_FLOAT64_C( 80.26), SIMDE_FLOAT64_C( 969.51) },
{ SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00),
SIMDE_FLOAT64_C( 80.26), SIMDE_FLOAT64_C( 969.51), SIMDE_FLOAT64_C( 80.26), SIMDE_FLOAT64_C( 0.00) } },
{ UINT8_C(215),
{ SIMDE_FLOAT64_C( 905.16), SIMDE_FLOAT64_C( -968.55) },
{ SIMDE_FLOAT64_C( 905.16), SIMDE_FLOAT64_C( -968.55), SIMDE_FLOAT64_C( 905.16), SIMDE_FLOAT64_C( 0.00),
SIMDE_FLOAT64_C( 905.16), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 905.16), SIMDE_FLOAT64_C( -968.55) } },
{ UINT8_C(135),
{ SIMDE_FLOAT64_C( 140.43), SIMDE_FLOAT64_C( 267.82) },
{ SIMDE_FLOAT64_C( 140.43), SIMDE_FLOAT64_C( 267.82), SIMDE_FLOAT64_C( 140.43), SIMDE_FLOAT64_C( 0.00),
SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 267.82) } },
{ UINT8_C(192),
{ SIMDE_FLOAT64_C( -853.88), SIMDE_FLOAT64_C( 811.68) },
{ SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00),
SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( -853.88), SIMDE_FLOAT64_C( 811.68) } },
{ UINT8_C( 17),
{ SIMDE_FLOAT64_C( -661.24), SIMDE_FLOAT64_C( 561.84) },
{ SIMDE_FLOAT64_C( -661.24), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00),
SIMDE_FLOAT64_C( -661.24), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00), SIMDE_FLOAT64_C( 0.00) } }
};
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
simde__m128d a = simde_mm_loadu_pd(test_vec[i].a);
simde__m512d r = simde_mm512_maskz_broadcast_f64x2(test_vec[i].k, a);
simde_test_x86_assert_equal_f64x8(r, simde_mm512_loadu_pd(test_vec[i].r), 1);
}
return 0;
}
static int
test_simde_mm256_broadcast_f32x4 (SIMDE_MUNIT_TEST_ARGS) {
static const struct {
const simde_float32 a[4];
const simde_float32 r[8];
} test_vec[] = {
{ { SIMDE_FLOAT32_C( -558.39), SIMDE_FLOAT32_C( -943.50), SIMDE_FLOAT32_C( 652.52), SIMDE_FLOAT32_C( 945.52) },
{ SIMDE_FLOAT32_C( -558.39), SIMDE_FLOAT32_C( -943.50), SIMDE_FLOAT32_C( 652.52), SIMDE_FLOAT32_C( 945.52),
SIMDE_FLOAT32_C( -558.39), SIMDE_FLOAT32_C( -943.50), SIMDE_FLOAT32_C( 652.52), SIMDE_FLOAT32_C( 945.52) } },
{ { SIMDE_FLOAT32_C( -577.06), SIMDE_FLOAT32_C( -623.59), SIMDE_FLOAT32_C( -742.48), SIMDE_FLOAT32_C( -807.52) },
{ SIMDE_FLOAT32_C( -577.06), SIMDE_FLOAT32_C( -623.59), SIMDE_FLOAT32_C( -742.48), SIMDE_FLOAT32_C( -807.52),
SIMDE_FLOAT32_C( -577.06), SIMDE_FLOAT32_C( -623.59), SIMDE_FLOAT32_C( -742.48), SIMDE_FLOAT32_C( -807.52) } },
{ { SIMDE_FLOAT32_C( 0.46), SIMDE_FLOAT32_C( -222.35), SIMDE_FLOAT32_C( 965.41), SIMDE_FLOAT32_C( -320.94) },
{ SIMDE_FLOAT32_C( 0.46), SIMDE_FLOAT32_C( -222.35), SIMDE_FLOAT32_C( 965.41), SIMDE_FLOAT32_C( -320.94),
SIMDE_FLOAT32_C( 0.46), SIMDE_FLOAT32_C( -222.35), SIMDE_FLOAT32_C( 965.41), SIMDE_FLOAT32_C( -320.94) } },
{ { SIMDE_FLOAT32_C( 34.85), SIMDE_FLOAT32_C( -238.64), SIMDE_FLOAT32_C( -834.61), SIMDE_FLOAT32_C( 763.48) },
{ SIMDE_FLOAT32_C( 34.85), SIMDE_FLOAT32_C( -238.64), SIMDE_FLOAT32_C( -834.61), SIMDE_FLOAT32_C( 763.48),
SIMDE_FLOAT32_C( 34.85), SIMDE_FLOAT32_C( -238.64), SIMDE_FLOAT32_C( -834.61), SIMDE_FLOAT32_C( 763.48) } },
{ { SIMDE_FLOAT32_C( -215.99), SIMDE_FLOAT32_C( -214.29), SIMDE_FLOAT32_C( 432.66), SIMDE_FLOAT32_C( -222.94) },
{ SIMDE_FLOAT32_C( -215.99), SIMDE_FLOAT32_C( -214.29), SIMDE_FLOAT32_C( 432.66), SIMDE_FLOAT32_C( -222.94),
SIMDE_FLOAT32_C( -215.99), SIMDE_FLOAT32_C( -214.29), SIMDE_FLOAT32_C( 432.66), SIMDE_FLOAT32_C( -222.94) } },
{ { SIMDE_FLOAT32_C( -994.85), SIMDE_FLOAT32_C( -413.17), SIMDE_FLOAT32_C( -100.86), SIMDE_FLOAT32_C( 836.37) },
{ SIMDE_FLOAT32_C( -994.85), SIMDE_FLOAT32_C( -413.17), SIMDE_FLOAT32_C( -100.86), SIMDE_FLOAT32_C( 836.37),
SIMDE_FLOAT32_C( -994.85), SIMDE_FLOAT32_C( -413.17), SIMDE_FLOAT32_C( -100.86), SIMDE_FLOAT32_C( 836.37) } },
{ { SIMDE_FLOAT32_C( 809.63), SIMDE_FLOAT32_C( -520.84), SIMDE_FLOAT32_C( 265.00), SIMDE_FLOAT32_C( -111.67) },
{ SIMDE_FLOAT32_C( 809.63), SIMDE_FLOAT32_C( -520.84), SIMDE_FLOAT32_C( 265.00), SIMDE_FLOAT32_C( -111.67),
SIMDE_FLOAT32_C( 809.63), SIMDE_FLOAT32_C( -520.84), SIMDE_FLOAT32_C( 265.00), SIMDE_FLOAT32_C( -111.67) } },
{ { SIMDE_FLOAT32_C( -855.41), SIMDE_FLOAT32_C( -875.73), SIMDE_FLOAT32_C( -447.77), SIMDE_FLOAT32_C( 263.25) },
{ SIMDE_FLOAT32_C( -855.41), SIMDE_FLOAT32_C( -875.73), SIMDE_FLOAT32_C( -447.77), SIMDE_FLOAT32_C( 263.25),
SIMDE_FLOAT32_C( -855.41), SIMDE_FLOAT32_C( -875.73), SIMDE_FLOAT32_C( -447.77), SIMDE_FLOAT32_C( 263.25) } }
};
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
simde__m128 a = simde_mm_loadu_ps(test_vec[i].a);
simde__m256 r = simde_mm256_broadcast_f32x4(a);
simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r), 1);
}
return 0;
}
static int
test_simde_mm256_mask_broadcast_f32x4 (SIMDE_MUNIT_TEST_ARGS) {
static const struct {
const simde_float32 src[8];
const simde__mmask8 k;
const simde_float32 a[4];
const simde_float32 r[8];
} test_vec[] = {
{ { SIMDE_FLOAT32_C( 113.43), SIMDE_FLOAT32_C( 410.75), SIMDE_FLOAT32_C( -451.88), SIMDE_FLOAT32_C( -725.60),
SIMDE_FLOAT32_C( 921.94), SIMDE_FLOAT32_C( -987.53), SIMDE_FLOAT32_C( 590.45), SIMDE_FLOAT32_C( -298.17) },
UINT8_C( 50),
{ SIMDE_FLOAT32_C( 44.31), SIMDE_FLOAT32_C( 797.52), SIMDE_FLOAT32_C( -107.60), SIMDE_FLOAT32_C( -484.17) },
{ SIMDE_FLOAT32_C( 113.43), SIMDE_FLOAT32_C( 797.52), SIMDE_FLOAT32_C( -451.88), SIMDE_FLOAT32_C( -725.60),
SIMDE_FLOAT32_C( 44.31), SIMDE_FLOAT32_C( 797.52), SIMDE_FLOAT32_C( 590.45), SIMDE_FLOAT32_C( -298.17) } },
{ { SIMDE_FLOAT32_C( 556.86), SIMDE_FLOAT32_C( -797.02), SIMDE_FLOAT32_C( 402.24), SIMDE_FLOAT32_C( 441.25),
SIMDE_FLOAT32_C( 142.97), SIMDE_FLOAT32_C( 883.64), SIMDE_FLOAT32_C( -635.48), SIMDE_FLOAT32_C( -488.89) },
UINT8_C(165),
{ SIMDE_FLOAT32_C( -333.03), SIMDE_FLOAT32_C( 703.87), SIMDE_FLOAT32_C( -69.82), SIMDE_FLOAT32_C( 527.07) },
{ SIMDE_FLOAT32_C( -333.03), SIMDE_FLOAT32_C( -797.02), SIMDE_FLOAT32_C( -69.82), SIMDE_FLOAT32_C( 441.25),
SIMDE_FLOAT32_C( 142.97), SIMDE_FLOAT32_C( 703.87), SIMDE_FLOAT32_C( -635.48), SIMDE_FLOAT32_C( 527.07) } },
{ { SIMDE_FLOAT32_C( 425.48), SIMDE_FLOAT32_C( 960.83), SIMDE_FLOAT32_C( 698.87), SIMDE_FLOAT32_C( -175.48),
SIMDE_FLOAT32_C( 789.83), SIMDE_FLOAT32_C( 633.19), SIMDE_FLOAT32_C( 85.22), SIMDE_FLOAT32_C( 351.45) },
UINT8_C(206),
{ SIMDE_FLOAT32_C( 362.09), SIMDE_FLOAT32_C( -387.94), SIMDE_FLOAT32_C( -58.09), SIMDE_FLOAT32_C( -381.37) },
{ SIMDE_FLOAT32_C( 425.48), SIMDE_FLOAT32_C( -387.94), SIMDE_FLOAT32_C( -58.09), SIMDE_FLOAT32_C( -381.37),
SIMDE_FLOAT32_C( 789.83), SIMDE_FLOAT32_C( 633.19), SIMDE_FLOAT32_C( -58.09), SIMDE_FLOAT32_C( -381.37) } },
{ { SIMDE_FLOAT32_C( 385.81), SIMDE_FLOAT32_C( 368.14), SIMDE_FLOAT32_C( -607.80), SIMDE_FLOAT32_C( 623.02),
SIMDE_FLOAT32_C( -955.44), SIMDE_FLOAT32_C( -138.05), SIMDE_FLOAT32_C( -245.78), SIMDE_FLOAT32_C( -750.22) },
UINT8_C(110),
{ SIMDE_FLOAT32_C( 548.54), SIMDE_FLOAT32_C( -618.32), SIMDE_FLOAT32_C( -113.43), SIMDE_FLOAT32_C( -437.94) },
{ SIMDE_FLOAT32_C( 385.81), SIMDE_FLOAT32_C( -618.32), SIMDE_FLOAT32_C( -113.43), SIMDE_FLOAT32_C( -437.94),
SIMDE_FLOAT32_C( -955.44), SIMDE_FLOAT32_C( -618.32), SIMDE_FLOAT32_C( -113.43), SIMDE_FLOAT32_C( -750.22) } },
{ { SIMDE_FLOAT32_C( -510.40), SIMDE_FLOAT32_C( -247.29), SIMDE_FLOAT32_C( -272.50), SIMDE_FLOAT32_C( 154.15),
SIMDE_FLOAT32_C( 745.34), SIMDE_FLOAT32_C( 865.17), SIMDE_FLOAT32_C( 893.80), SIMDE_FLOAT32_C( 79.97) },
UINT8_C(108),
{ SIMDE_FLOAT32_C( -178.61), SIMDE_FLOAT32_C( 31.69), SIMDE_FLOAT32_C( 669.52), SIMDE_FLOAT32_C( 693.51) },
{ SIMDE_FLOAT32_C( -510.40), SIMDE_FLOAT32_C( -247.29), SIMDE_FLOAT32_C( 669.52), SIMDE_FLOAT32_C( 693.51),
SIMDE_FLOAT32_C( 745.34), SIMDE_FLOAT32_C( 31.69), SIMDE_FLOAT32_C( 669.52), SIMDE_FLOAT32_C( 79.97) } },
{ { SIMDE_FLOAT32_C( -127.96), SIMDE_FLOAT32_C( -619.72), SIMDE_FLOAT32_C( 284.07), SIMDE_FLOAT32_C( 372.86),
SIMDE_FLOAT32_C( 649.51), SIMDE_FLOAT32_C( 278.96), SIMDE_FLOAT32_C( 407.00), SIMDE_FLOAT32_C( 484.63) },
UINT8_C( 35),
{ SIMDE_FLOAT32_C( -266.56), SIMDE_FLOAT32_C( -110.85), SIMDE_FLOAT32_C( -976.05), SIMDE_FLOAT32_C( -446.86) },
{ SIMDE_FLOAT32_C( -266.56), SIMDE_FLOAT32_C( -110.85), SIMDE_FLOAT32_C( 284.07), SIMDE_FLOAT32_C( 372.86),
SIMDE_FLOAT32_C( 649.51), SIMDE_FLOAT32_C( -110.85), SIMDE_FLOAT32_C( 407.00), SIMDE_FLOAT32_C( 484.63) } },
{ { SIMDE_FLOAT32_C( -413.34), SIMDE_FLOAT32_C( 993.71), SIMDE_FLOAT32_C( -725.95), SIMDE_FLOAT32_C( 912.24),
SIMDE_FLOAT32_C( 38.79), SIMDE_FLOAT32_C( -113.15), SIMDE_FLOAT32_C( 355.83), SIMDE_FLOAT32_C( 489.44) },
UINT8_C(174),
{ SIMDE_FLOAT32_C( 271.71), SIMDE_FLOAT32_C( 611.34), SIMDE_FLOAT32_C( 750.31), SIMDE_FLOAT32_C( 445.31) },
{ SIMDE_FLOAT32_C( -413.34), SIMDE_FLOAT32_C( 611.34), SIMDE_FLOAT32_C( 750.31), SIMDE_FLOAT32_C( 445.31),
SIMDE_FLOAT32_C( 38.79), SIMDE_FLOAT32_C( 611.34), SIMDE_FLOAT32_C( 355.83), SIMDE_FLOAT32_C( 445.31) } },
{ { SIMDE_FLOAT32_C( 394.72), SIMDE_FLOAT32_C( -2.71), SIMDE_FLOAT32_C( 433.21), SIMDE_FLOAT32_C( 979.88),
SIMDE_FLOAT32_C( 870.25), SIMDE_FLOAT32_C( 239.46), SIMDE_FLOAT32_C( 664.36), SIMDE_FLOAT32_C( -21.11) },
UINT8_C(236),
{ SIMDE_FLOAT32_C( 20.21), SIMDE_FLOAT32_C( -364.92), SIMDE_FLOAT32_C( 870.25), SIMDE_FLOAT32_C( 218.91) },
{ SIMDE_FLOAT32_C( 394.72), SIMDE_FLOAT32_C( -2.71), SIMDE_FLOAT32_C( 870.25), SIMDE_FLOAT32_C( 218.91),
SIMDE_FLOAT32_C( 870.25), SIMDE_FLOAT32_C( -364.92), SIMDE_FLOAT32_C( 870.25), SIMDE_FLOAT32_C( 218.91) } }
};
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
simde__m256 src = simde_mm256_loadu_ps(test_vec[i].src);
simde__m128 a = simde_mm_loadu_ps(test_vec[i].a);
simde__m256 r = simde_mm256_mask_broadcast_f32x4(src, test_vec[i].k, a);
simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r), 1);
}
return 0;
}
static int
test_simde_mm256_maskz_broadcast_f32x4 (SIMDE_MUNIT_TEST_ARGS) {
static const struct {
const simde__mmask8 k;
const simde_float32 a[4];
const simde_float32 r[8];
} test_vec[] = {
{ UINT8_C(233),
{ SIMDE_FLOAT32_C( 749.31), SIMDE_FLOAT32_C( -425.85), SIMDE_FLOAT32_C( 752.50), SIMDE_FLOAT32_C( -794.87) },
{ SIMDE_FLOAT32_C( 749.31), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -794.87),
SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -425.85), SIMDE_FLOAT32_C( 752.50), SIMDE_FLOAT32_C( -794.87) } },
{ UINT8_C(237),
{ SIMDE_FLOAT32_C( 236.00), SIMDE_FLOAT32_C( 493.54), SIMDE_FLOAT32_C( -992.91), SIMDE_FLOAT32_C( 213.78) },
{ SIMDE_FLOAT32_C( 236.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -992.91), SIMDE_FLOAT32_C( 213.78),
SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 493.54), SIMDE_FLOAT32_C( -992.91), SIMDE_FLOAT32_C( 213.78) } },
{ UINT8_C(229),
{ SIMDE_FLOAT32_C( 572.59), SIMDE_FLOAT32_C( -505.20), SIMDE_FLOAT32_C( -888.69), SIMDE_FLOAT32_C( -168.99) },
{ SIMDE_FLOAT32_C( 572.59), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -888.69), SIMDE_FLOAT32_C( 0.00),
SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -505.20), SIMDE_FLOAT32_C( -888.69), SIMDE_FLOAT32_C( -168.99) } },
{ UINT8_C(115),
{ SIMDE_FLOAT32_C( 961.78), SIMDE_FLOAT32_C( 587.15), SIMDE_FLOAT32_C( 162.08), SIMDE_FLOAT32_C( 131.99) },
{ SIMDE_FLOAT32_C( 961.78), SIMDE_FLOAT32_C( 587.15), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00),
SIMDE_FLOAT32_C( 961.78), SIMDE_FLOAT32_C( 587.15), SIMDE_FLOAT32_C( 162.08), SIMDE_FLOAT32_C( 0.00) } },
{ UINT8_C(107),
{ SIMDE_FLOAT32_C( 722.82), SIMDE_FLOAT32_C( 519.77), SIMDE_FLOAT32_C( -160.36), SIMDE_FLOAT32_C( 908.34) },
{ SIMDE_FLOAT32_C( 722.82), SIMDE_FLOAT32_C( 519.77), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 908.34),
SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 519.77), SIMDE_FLOAT32_C( -160.36), SIMDE_FLOAT32_C( 0.00) } },
{ UINT8_C(148),
{ SIMDE_FLOAT32_C( 251.18), SIMDE_FLOAT32_C( -347.86), SIMDE_FLOAT32_C( -514.92), SIMDE_FLOAT32_C( -206.57) },
{ SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -514.92), SIMDE_FLOAT32_C( 0.00),
SIMDE_FLOAT32_C( 251.18), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -206.57) } },
{ UINT8_C(156),
{ SIMDE_FLOAT32_C( 874.47), SIMDE_FLOAT32_C( -711.75), SIMDE_FLOAT32_C( -458.03), SIMDE_FLOAT32_C( -188.74) },
{ SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -458.03), SIMDE_FLOAT32_C( -188.74),
SIMDE_FLOAT32_C( 874.47), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -188.74) } },
{ UINT8_C( 78),
{ SIMDE_FLOAT32_C( -804.36), SIMDE_FLOAT32_C( -844.65), SIMDE_FLOAT32_C( -82.05), SIMDE_FLOAT32_C( -986.67) },
{ SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -844.65), SIMDE_FLOAT32_C( -82.05), SIMDE_FLOAT32_C( -986.67),
SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( 0.00), SIMDE_FLOAT32_C( -82.05), SIMDE_FLOAT32_C( 0.00) } }
};
for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
simde__m128 a = simde_mm_loadu_ps(test_vec[i].a);
simde__m256 r = simde_mm256_maskz_broadcast_f32x4(test_vec[i].k, a);
simde_test_x86_assert_equal_f32x8(r, simde_mm256_loadu_ps(test_vec[i].r), 1);
}
return 0;
}
static int
test_simde_mm256_broadcast_f64x2 (SIMDE_MUNIT_TEST_ARGS) {
static const struct {
const simde_float64 a[2];
const simde_float64 r[4];
} test_vec[] = {
{ { SIMDE_FLOAT64_C( -818.06), SIMDE_FLOAT64_C( 862.04) },
{ SIMDE_FLOAT64_C( -818.06), SIMDE_FLOAT64_C( 862.04), SIMDE_FLOAT64_C( -818.06), SIMDE_FLOAT64_C( 862.04) } },
{ { SIMDE_FLOAT64_C( 251.28), SIMDE_FLOAT64_C( -807.49) },
{ SIMDE_FLOAT64_C( 251.28), SIMDE_FLOAT64_C( -807.49), SIMDE_FLOAT64_C( 251.28), SIMDE_FLOAT64_C( -807.49) } },
{ { SIMDE_FLOAT64_C( 489.47), SIMDE_FLOAT64_C( 521.73) },
{ SIMDE_FLOAT64_C( 489.47), SIMDE_FLOAT64_C( 521.73), SIMDE_FLOAT64_C( 489.47), SIMDE_FLOAT64_C( 521.73) } },
{ { SIMDE_FLOAT64_C( 697.15), SIMDE_FLOAT64_C( -943.39) },
{ SIMDE_FLOAT64_C( 697.15), SIMDE_FLOAT64_C( -943.39), SIMDE_FLOAT64_C( 697.15), SIMDE_FLOAT64_C( -943.39) } },
{ { SIMDE_FLOAT64_C( 397.38), SIMDE_FLOAT64_C( 769.24) },
{ SIMDE_FLOAT64_C( 397.38), SIMDE_FLOAT64_C( 769.24), SIMDE_FLOAT64_C( 397.38), SIMDE_FLOAT64_C( 769.24) } },
{ { SIMDE_FLOAT64_C( 607.10), SIMDE_FLOAT64_C( -411.28) },
{ SIMDE_FLOAT64_C( 607.10), SIMDE_FLOAT64_C( -411.28), SIMDE_FLOAT64_C( 607.10), SIMDE_FLOAT64_C( -411.28) } },
{ { SIMDE_FLOAT64_C( -417.96), SIMDE_FLOAT64_C( -732.77) },
{ SIMDE_FLOAT64_C( -417.96), SIMDE_FLOAT64_C( -732.77), SIMDE_FLOAT64_C( -417.96), SIMDE_FLOAT64_C( -732.77) } },