/
rbp2-001-add-raspberrypi5-support.patch
12318 lines (11547 loc) · 523 KB
/
rbp2-001-add-raspberrypi5-support.patch
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
diff --git a/include/drm-uapi/v3d_drm.h b/include/drm-uapi/v3d_drm.h
index 3dfc0af8756..1a7d7a689de 100644
--- a/include/drm-uapi/v3d_drm.h
+++ b/include/drm-uapi/v3d_drm.h
@@ -319,6 +319,11 @@ struct drm_v3d_submit_tfu {
/* Pointer to an array of ioctl extensions*/
__u64 extensions;
+
+ struct {
+ __u32 ioc;
+ __u32 pad;
+ } v71;
};
/* Submits a compute shader for dispatch. This job will block on any
diff --git a/src/broadcom/cle/meson.build b/src/broadcom/cle/meson.build
index 31a0d5bfa94..8ac32b313e4 100644
--- a/src/broadcom/cle/meson.build
+++ b/src/broadcom/cle/meson.build
@@ -23,7 +23,8 @@ v3d_versions = [
[21, 21],
[33, 33],
[41, 33],
- [42, 33]
+ [42, 33],
+ [71, 33]
]
v3d_xml_files = []
diff --git a/src/broadcom/cle/v3d_packet_v33.xml b/src/broadcom/cle/v3d_packet_v33.xml
index a0242b5f1c2..624353ca2bf 100644
--- a/src/broadcom/cle/v3d_packet_v33.xml
+++ b/src/broadcom/cle/v3d_packet_v33.xml
@@ -1,4 +1,4 @@
-<vcxml gen="3.3" min_ver="33" max_ver="42">
+<vcxml gen="3.3" min_ver="33" max_ver="71">
<enum name="Compare Function" prefix="V3D_COMPARE_FUNC">
<value name="NEVER" value="0"/>
@@ -167,13 +167,36 @@
<value name="depth_16" value="2"/>
</enum>
- <enum name="Render Target Clamp" prefix="V3D_RENDER_TARGET_CLAMP" min_ver="41">
+ <enum name="Render Target Clamp" prefix="V3D_RENDER_TARGET_CLAMP" min_ver="41" max_ver="42">
<value name="none" value="0"/> <!-- no clamping -->
<value name="norm" value="1"/> <!-- [0,1] for f16 -->
<value name="pos" value="2"/> <!-- [0, for f16 -->
<value name="int" value="3" min_ver="42"/> <!-- clamp to integer RT's range -->
</enum>
+ <enum name="Render Target Type Clamp" prefix="V3D_RENDER_TARGET_TYPE_CLAMP" min_ver="71">
+ <value name="8i" value="0"/> <!-- no clamping -->
+ <value name="16i" value="1"/> <!-- no clamping -->
+ <value name="32i" value="2"/> <!-- no clamping -->
+ <value name="8ui" value="4"/> <!-- no clamping -->
+ <value name="16ui" value="5"/> <!-- no clamping -->
+ <value name="32ui" value="6"/> <!-- no clamping -->
+ <value name="8" value="8"/> <!-- no clamping -->
+ <value name="16f" value="9"/> <!-- no clamping -->
+ <value name="32f" value="10"/> <!-- no clamping -->
+ <value name="8i_clamped" value="16"/> <!-- clamp to integer RT's range -->
+ <value name="16i_clamped" value="17"/> <!-- clamp to integer RT's range -->
+ <value name="32i_clamped" value="18"/> <!-- clamp to integer RT's range -->
+ <value name="8ui_clamped" value="20"/> <!-- clamp to integer RT's range -->
+ <value name="16ui_clamped" value="21"/> <!-- clamp to integer RT's range -->
+ <value name="32ui_clamped" value="22"/> <!-- clamp to integer RT's range -->
+ <value name="16f_clamp_norm" value="24"/> <!-- [0,1] for f16 -->
+ <value name="16f_clamp_pos" value="25"/> <!-- [0, for f16 -->
+ <value name="16f_clamp_pq" value="26"/> <!-- PQ lin range, colour to [0, 125], alpha to [0, 1] for f16 -->
+ <value name="16f_clamp_hlg" value="27"/> <!-- HLG lin range, colour to [0, 12], alpha to [0, 1] for f16 -->
+ <value name="invalid" value="32"/>
+ </enum>
+
<!---
CL cache flush commands are not fully documented and subject to a
number of hardware issues that make them unreliable. Specifically:
@@ -263,13 +286,27 @@
<value name="r8ui" value="36"/>
<value name="srgbx8" value="37" max_ver="33"/>
<value name="rgbx8" value="38" max_ver="33"/>
- <value name="bstc" value="39" min_ver="41"/>
+ <value name="bstc8" value="39" min_ver="41"/>
<value name="d32f" value="40" min_ver="41"/>
<value name="d24" value="41" min_ver="41"/>
<value name="d16" value="42" min_ver="41"/>
<value name="d24s8" value="43" min_ver="41"/>
<value name="s8" value="44" min_ver="41"/>
<value name="rgba5551" value="45" min_ver="41"/>
+ <value name="bstc8_srgb" value="46" min_ver="71"/>
+ <value name="bstc10" value="47" min_ver="71"/>
+ <value name="bstc10_srgb" value="48" min_ver="71"/>
+ <value name="bstc10_pq" value="49" min_ver="71"/>
+ <value name="rgba10x6" value="50" min_ver="71"/>
+ <value name="bstc10_hlg" value="55" min_ver="71"/>
+ <value name="rgba10x6_hlg" value="56" min_ver="71"/>
+ <value name="rgb10_a2_hlg" value="57" min_ver="71"/>
+ <value name="bstc10_pq_bt1886" value="58" min_ver="71"/>
+ <value name="rgba10x6_pq_bt1886" value="59" min_ver="71"/>
+ <value name="rgb10_a2_pq_bt1886" value="60" min_ver="71"/>
+ <value name="bstc10_hlg_bt1886" value="61" min_ver="71"/>
+ <value name="rgba10x6_hlg_bt1886" value="62" min_ver="71"/>
+ <value name="rgb10_a2_hlg_bt1886" value="63" min_ver="71"/>
</enum>
<enum name="Z/S Output Image Format" prefix="V3D_OUTPUT_IMAGE_FORMAT_ZS" max_ver="33">
@@ -314,6 +351,12 @@
<value name="perp end caps" value="1"/>
</enum>
+ <enum name="Z Clip Mode" prefix="V3D_Z_CLIP_MODE">
+ <value name="NONE" value="0"/>
+ <value name="MIN_ONE_TO_ONE" value="1"/>
+ <value name="ZERO_TO_ONE" value="2"/>
+ </enum>
+
<packet code="0" name="Halt"/>
<packet code="1" name="NOP"/>
<packet code="4" name="Flush"/>
@@ -381,11 +424,13 @@
<field name="Last Tile of Frame" size="1" start="0" type="bool"/>
</packet>
- <packet code="25" shortname="clear" name="Clear Tile Buffers" cl="R" min_ver="41">
+ <packet code="25" shortname="clear" name="Clear Tile Buffers" cl="R" min_ver="41" max_ver="42">
<field name="Clear Z/Stencil Buffer" size="1" start="1" type="bool"/>
<field name="Clear all Render Targets" size="1" start="0" type="bool"/>
</packet>
+ <packet code="25" shortname="clear_rt" name="Clear Render Targets" cl="R" min_ver="71"/>
+
<packet code="26" shortname="load" name="Reload Tile Color Buffer" cl="R" max_ver="33">
<field name="Disable Color Buffer load" size="8" start="8" type="uint"/>
<field name="Enable Z load" size="1" start="7" type="bool"/>
@@ -443,6 +488,10 @@
<value name="Render target 1" value="1"/>
<value name="Render target 2" value="2"/>
<value name="Render target 3" value="3"/>
+ <value name="Render target 4" value="4" min_ver="71"/>
+ <value name="Render target 5" value="5" min_ver="71"/>
+ <value name="Render target 6" value="6" min_ver="71"/>
+ <value name="Render target 7" value="7" min_ver="71"/>
<value name="None" value="8"/>
<value name="Z" value="9"/>
<value name="Stencil" value="10"/>
@@ -789,7 +838,7 @@
<field name="Alpha blend mode" size="4" start="0" type="Blend Mode"/>
</packet>
- <packet code="84" name="Blend Cfg" min_ver="41">
+ <packet code="84" name="Blend Cfg" min_ver="41" max_ver="42">
<field name="Render Target Mask" size="4" start="24" type="uint"/>
<field name="Color blend dst factor" size="4" start="20" type="Blend Factor"/>
<field name="Color blend src factor" size="4" start="16" type="Blend Factor"/>
@@ -799,6 +848,16 @@
<field name="Alpha blend mode" size="4" start="0" type="Blend Mode"/>
</packet>
+ <packet code="84" name="Blend Cfg" min_ver="71">
+ <field name="Render Target Mask" size="8" start="24" type="uint"/>
+ <field name="Color blend dst factor" size="4" start="20" type="Blend Factor"/>
+ <field name="Color blend src factor" size="4" start="16" type="Blend Factor"/>
+ <field name="Color blend mode" size="4" start="12" type="Blend Mode"/>
+ <field name="Alpha blend dst factor" size="4" start="8" type="Blend Factor"/>
+ <field name="Alpha blend src factor" size="4" start="4" type="Blend Factor"/>
+ <field name="Alpha blend mode" size="4" start="0" type="Blend Mode"/>
+ </packet>
+
<packet code="86" shortname="blend_ccolor" name="Blend Constant Color">
<field name="Alpha (F16)" size="16" start="48" type="uint"/>
<field name="Blue (F16)" size="16" start="32" type="uint"/>
@@ -828,7 +887,12 @@
<field name="address" size="32" start="0" type="address"/>
</packet>
- <packet code="96" name="Cfg Bits">
+ <packet code="93" name="Depth Bounds Test Limits" min_ver="71">
+ <field name="Lower Test Limit" size="32" start="0" type="float"/>
+ <field name="Upper Test Limit" size="32" start="32" type="float"/>
+ </packet>
+
+ <packet code="96" name="Cfg Bits" max_ver="42">
<field name="Direct3D Provoking Vertex" size="1" start="21" type="bool"/>
<field name="Direct3D 'Point-fill' mode" size="1" start="20" type="bool"/>
<field name="Blend enable" size="1" start="19" type="bool"/>
@@ -846,6 +910,25 @@
<field name="Enable Forward Facing Primitive" size="1" start="0" type="bool"/>
</packet>
+ <packet code="96" name="Cfg Bits" min_ver="71">
+ <field name="Z Clipping mode" size="2" start="22" type="Z Clip Mode"/>
+ <field name="Direct3D Provoking Vertex" size="1" start="21" type="bool"/>
+ <field name="Direct3D 'Point-fill' mode" size="1" start="20" type="bool"/>
+ <field name="Blend enable" size="1" start="19" type="bool"/>
+ <field name="Stencil enable" size="1" start="18" type="bool"/>
+ <field name="Z updates enable" size="1" start="15" type="bool"/>
+ <field name="Depth-Test Function" size="3" start="12" type="Compare Function"/>
+ <field name="Direct3D Wireframe triangles mode" size="1" start="11" type="bool"/>
+ <field name="Z Clamp Mode" size="1" start="10" type="bool"/>
+ <field name="Rasterizer Oversample Mode" size="2" start="6" type="uint"/>
+ <field name="Depth Bounds Test Enable" size="1" start="5" type="bool"/>
+ <field name="Line Rasterization" size="1" start="4" type="uint"/>
+ <field name="Enable Depth Offset" size="1" start="3" type="bool"/>
+ <field name="Clockwise Primitives" size="1" start="2" type="bool"/>
+ <field name="Enable Reverse Facing Primitive" size="1" start="1" type="bool"/>
+ <field name="Enable Forward Facing Primitive" size="1" start="0" type="bool"/>
+ </packet>
+
<packet code="97" shortname="zero_all_flatshade_flags" name="Zero All Flat Shade Flags"/>
<packet code="98" shortname="flatshade_flags" name="Flat Shade Flags">
@@ -907,16 +990,26 @@
<field name="Minimum Zw" size="32" start="0" type="float"/>
</packet>
- <packet shortname="clipper_xy" name="Clipper XY Scaling" code="110" cl="B">
+ <packet shortname="clipper_xy" name="Clipper XY Scaling" code="110" cl="B" max_ver="42">
<field name="Viewport Half-Height in 1/256th of pixel" size="32" start="32" type="float"/>
<field name="Viewport Half-Width in 1/256th of pixel" size="32" start="0" type="float"/>
</packet>
+ <packet shortname="clipper_xy" name="Clipper XY Scaling" code="110" cl="B" min_ver="71">
+ <field name="Viewport Half-Height in 1/64th of pixel" size="32" start="32" type="float"/>
+ <field name="Viewport Half-Width in 1/64th of pixel" size="32" start="0" type="float"/>
+ </packet>
+
<packet shortname="clipper_z" name="Clipper Z Scale and Offset" code="111" cl="B">
<field name="Viewport Z Offset (Zc to Zs)" size="32" start="32" type="float"/>
<field name="Viewport Z Scale (Zc to Zs)" size="32" start="0" type="float"/>
</packet>
+ <packet shortname="clipper_z_no_guardband" name="Clipper Z Scale and Offset no guardband" code="112" cl="B" min_ver="71">
+ <field name="Viewport Z Offset (Zc to Zs)" size="32" start="32" type="float"/>
+ <field name="Viewport Z Scale (Zc to Zs)" size="32" start="0" type="float"/>
+ </packet>
+
<packet name="Number of Layers" code="119" min_ver="41">
<field name="Number of Layers" size="8" start="0" type="uint" minus_one="true"/>
</packet>
@@ -947,7 +1040,7 @@
<field name="sub-id" size="1" start="0" type="uint" default="0"/>
</packet>
- <packet code="120" name="Tile Binning Mode Cfg" min_ver="41">
+ <packet code="120" name="Tile Binning Mode Cfg" min_ver="41" max_ver="42">
<field name="Height (in pixels)" size="16" start="48" type="uint" minus_one="true"/>
<field name="Width (in pixels)" size="16" start="32" type="uint" minus_one="true"/>
@@ -971,6 +1064,35 @@
</field>
</packet>
+ <packet code="120" name="Tile Binning Mode Cfg" min_ver="71">
+ <field name="Height (in pixels)" size="16" start="48" type="uint" minus_one="true"/>
+ <field name="Width (in pixels)" size="16" start="32" type="uint" minus_one="true"/>
+
+ <field name="Log2 Tile Height" size="3" start="11" type="uint">
+ <value name="tile height 8 pixels" value="0"/>
+ <value name="tile height 16 pixels" value="1"/>
+ <value name="tile height 32 pixels" value="2"/>
+ <value name="tile height 64 pixels" value="3"/>
+ </field>
+ <field name="Log2 Tile Width" size="3" start="8" type="uint">
+ <value name="tile width 8 pixels" value="0"/>
+ <value name="tile width 16 pixels" value="1"/>
+ <value name="tile width 32 pixels" value="2"/>
+ <value name="tile width 64 pixels" value="3"/>
+ </field>
+
+ <field name="tile allocation block size" size="2" start="4" type="uint">
+ <value name="tile allocation block size 64b" value="0"/>
+ <value name="tile allocation block size 128b" value="1"/>
+ <value name="tile allocation block size 256b" value="2"/>
+ </field>
+ <field name="tile allocation initial block size" size="2" start="2" type="uint">
+ <value name="tile allocation initial block size 64b" value="0"/>
+ <value name="tile allocation initial block size 128b" value="1"/>
+ <value name="tile allocation initial block size 256b" value="2"/>
+ </field>
+ </packet>
+
<packet code="120" name="Tile Binning Mode Cfg (Part2)" cl="B" max_ver="33">
<field name="Tile Allocation Memory Address" size="32" start="32" type="address"/>
<field name="Tile Allocation Memory Size" size="32" start="0" type="uint"/>
@@ -1002,7 +1124,7 @@
<field name="sub-id" size="4" start="0" type="uint" default="0"/>
</packet>
- <packet code="121" name="Tile Rendering Mode Cfg (Common)" cl="R" min_ver="41">
+ <packet code="121" name="Tile Rendering Mode Cfg (Common)" cl="R" min_ver="41" max_ver="42">
<field name="Pad" size="12" start="52" type="uint"/>
<field name="Early Depth/Stencil Clear" size="1" start="51" type="bool"/>
@@ -1018,7 +1140,11 @@
<field name="Double-buffer in non-ms mode" size="1" start="43" type="bool"/>
<field name="Multisample Mode (4x)" size="1" start="42" type="bool"/>
- <field name="Maximum BPP of all render targets" size="2" start="40" type="Internal BPP"/>
+ <field name="Maximum BPP of all render targets" size="2" start="40" type="Internal BPP">
+ <value name="Render target maximum 32bpp" value="0"/>
+ <value name="Render target maximum 64bpp" value="1"/>
+ <value name="Render target maximum 128bpp" value="2"/>
+ </field>
<field name="Image Height (pixels)" size="16" start="24" type="uint"/>
<field name="Image Width (pixels)" size="16" start="8" type="uint"/>
@@ -1027,6 +1153,43 @@
<field name="sub-id" size="4" start="0" type="uint" default="0"/>
</packet>
+ <packet code="121" name="Tile Rendering Mode Cfg (Common)" cl="R" min_ver="71">
+ <field name="Pad" size="6" start="58" type="uint"/>
+
+ <field name="Log2 Tile Height" size="3" start="55" type="uint">
+ <value name="tile height 8 pixels" value="0"/>
+ <value name="tile height 16 pixels" value="1"/>
+ <value name="tile height 32 pixels" value="2"/>
+ <value name="tile height 64 pixels" value="3"/>
+ </field>
+ <field name="Log2 Tile Width" size="3" start="52" type="uint">
+ <value name="tile width 8 pixels" value="0"/>
+ <value name="tile width 16 pixels" value="1"/>
+ <value name="tile width 32 pixels" value="2"/>
+ <value name="tile width 64 pixels" value="3"/>
+ </field>
+
+ <field name="Early Depth/Stencil Clear" size="1" start="51" type="bool"/>
+ <field name="Internal Depth Type" size="4" start="47" type="Internal Depth Type"/>
+
+ <field name="Early-Z disable" size="1" start="46" type="bool"/>
+
+ <field name="Early-Z Test and Update Direction" size="1" start="45" type="uint">
+ <value name="Early-Z direction LT/LE" value="0"/>
+ <value name="Early-Z direction GT/GE" value="1"/>
+ </field>
+
+ <field name="Depth-buffer disable" size="1" start="44" type="bool"/>
+ <field name="Double-buffer in non-ms mode" size="1" start="43" type="bool"/>
+ <field name="Multisample Mode (4x)" size="1" start="42" type="bool"/>
+
+ <field name="Image Height (pixels)" size="16" start="24" type="uint"/>
+ <field name="Image Width (pixels)" size="16" start="8" type="uint"/>
+ <field name="Number of Render Targets" size="4" start="4" type="uint" minus_one="true"/>
+
+ <field name="sub-id" size="3" start="0" type="uint" default="0"/>
+ </packet>
+
<packet code="121" name="Tile Rendering Mode Cfg (Color)" cl="R" max_ver="33">
<field name="Address" size="32" start="32" type="address"/>
@@ -1048,7 +1211,8 @@
<field name="sub-id" size="4" start="0" type="uint" default="2"/>
</packet>
- <packet code="121" name="Tile Rendering Mode Cfg (Color)" cl="R" min_ver="41">
+ <!-- On 4.1 the real name would be "Tile Rendering Mode Cfg (Render Target Configs) -->
+ <packet code="121" name="Tile Rendering Mode Cfg (Color)" cl="R" min_ver="41" max_ver="42">
<field name="Pad" size="28" start="36" type="uint"/>
@@ -1099,7 +1263,7 @@
<field name="sub-id" size="4" start="0" type="uint" default="3"/>
</packet>
- <packet code="121" name="Tile Rendering Mode Cfg (ZS Clear Values)" cl="R" min_ver="41">
+ <packet code="121" name="Tile Rendering Mode Cfg (ZS Clear Values)" cl="R" min_ver="41" max_ver="42">
<field name="unused" size="16" start="48" type="uint"/>
<field name="Z Clear Value" size="32" start="16" type="float"/>
@@ -1108,6 +1272,15 @@
<field name="sub-id" size="4" start="0" type="uint" default="2"/>
</packet>
+ <packet code="121" name="Tile Rendering Mode Cfg (ZS Clear Values)" cl="R" min_ver="71">
+ <field name="unused" size="16" start="48" type="uint"/>
+
+ <field name="Z Clear Value" size="32" start="16" type="float"/>
+
+ <field name="Stencil Clear Value" size="8" start="8" type="uint"/>
+ <field name="sub-id" size="4" start="0" type="uint" default="1"/>
+ </packet>
+
<packet code="121" name="Tile Rendering Mode Cfg (Clear Colors Part1)" cl="R" max_ver="33">
<!-- Express this as a 56-bit field? -->
<field name="Clear Color next 24 bits" size="24" start="40" type="uint"/>
@@ -1117,7 +1290,7 @@
<field name="sub-id" size="4" start="0" type="uint" default="4"/>
</packet>
- <packet code="121" name="Tile Rendering Mode Cfg (Clear Colors Part1)" cl="R" min_ver="41">
+ <packet code="121" name="Tile Rendering Mode Cfg (Clear Colors Part1)" cl="R" min_ver="41" max_ver="42">
<!-- Express this as a 56-bit field? -->
<field name="Clear Color next 24 bits" size="24" start="40" type="uint"/>
<field name="Clear Color low 32 bits" size="32" start="8" type="uint"/>
@@ -1126,6 +1299,19 @@
<field name="sub-id" size="4" start="0" type="uint" default="3"/>
</packet>
+ <packet code="121" name="Tile Rendering Mode Cfg (Render Target Part1)" cl="R" min_ver="71">
+
+ <field name="Clear Color low bits" size="32" start="32" type="uint"/>
+ <field name="Internal Type and Clamping" size="5" start="27" type="Render Target Type Clamp"/>
+ <field name="Internal BPP" size="2" start="25" type="Internal BPP"/>
+
+ <field name="Stride" size="7" start="18" type="uint" minus_one="true"/>
+ <!-- In multiples of 512 bits -->
+ <field name="Base Address" size="11" start="7" type="uint"/>
+ <field name="Render Target number" size="3" start="3" type="uint"/>
+ <field name="sub-id" size="3" start="0" type="uint" default="2"/>
+ </packet>
+
<packet code="121" name="Tile Rendering Mode Cfg (Clear Colors Part2)" cl="R" max_ver="33">
<!-- Express this as a 56-bit field? -->
<field name="Clear Color mid-high 24 bits" size="24" start="40" type="uint"/>
@@ -1135,7 +1321,7 @@
<field name="sub-id" size="4" start="0" type="uint" default="5"/>
</packet>
- <packet code="121" name="Tile Rendering Mode Cfg (Clear Colors Part2)" cl="R" min_ver="41">
+ <packet code="121" name="Tile Rendering Mode Cfg (Clear Colors Part2)" cl="R" min_ver="41" max_ver="42">
<!-- Express this as a 56-bit field? -->
<field name="Clear Color mid-high 24 bits" size="24" start="40" type="uint"/>
<field name="Clear Color mid-low 32 bits" size="32" start="8" type="uint"/>
@@ -1144,6 +1330,13 @@
<field name="sub-id" size="4" start="0" type="uint" default="4"/>
</packet>
+ <packet code="121" name="Tile Rendering Mode Cfg (Render Target Part2)" cl="R" min_ver="71">
+ <field name="Clear Color mid bits" size="40" start="24" type="uint"/>
+
+ <field name="Render Target number" size="3" start="3" type="uint"/>
+ <field name="sub-id" size="3" start="0" type="uint" default="3"/>
+ </packet>
+
<packet code="121" name="Tile Rendering Mode Cfg (Clear Colors Part3)" cl="R" max_ver="33">
<field name="pad" size="11" start="53" type="uint"/>
<field name="UIF padded height in UIF blocks" size="13" start="40" type="uint"/>
@@ -1155,7 +1348,7 @@
<field name="sub-id" size="4" start="0" type="uint" default="6"/>
</packet>
- <packet code="121" name="Tile Rendering Mode Cfg (Clear Colors Part3)" cl="R" min_ver="41">
+ <packet code="121" name="Tile Rendering Mode Cfg (Clear Colors Part3)" cl="R" min_ver="41" max_ver="42">
<field name="pad" size="11" start="53" type="uint"/>
<field name="UIF padded height in UIF blocks" size="13" start="40" type="uint"/>
<!-- image height is for Y flipping -->
@@ -1166,6 +1359,13 @@
<field name="sub-id" size="4" start="0" type="uint" default="5"/>
</packet>
+ <packet code="121" name="Tile Rendering Mode Cfg (Render Target Part3)" cl="R" min_ver="71">
+ <field name="Clear Color top bits" size="56" start="8" type="uint"/>
+
+ <field name="Render Target number" size="3" start="3" type="uint"/>
+ <field name="sub-id" size="3" start="0" type="uint" default="4"/>
+ </packet>
+
<packet code="124" shortname="tile_coords" name="Tile Coordinates">
<field name="tile row number" size="12" start="12" type="uint"/>
<field name="tile column number" size="12" start="0" type="uint"/>
@@ -1240,7 +1440,7 @@
<field name="Coordinate Shader Uniforms Address" size="32" start="32b" type="address"/>
</struct>
- <struct name="GL Shader State Record" min_ver="41">
+ <struct name="GL Shader State Record" min_ver="41" max_ver="42">
<field name="Point size in shaded vertex data" size="1" start="0" type="bool"/>
<field name="Enable clipping" size="1" start="1" type="bool"/>
@@ -1299,6 +1499,63 @@
<field name="Coordinate Shader Uniforms Address" size="32" start="32b" type="address"/>
</struct>
+ <struct name="GL Shader State Record" min_ver="71">
+ <field name="Point size in shaded vertex data" size="1" start="0" type="bool"/>
+ <field name="Enable clipping" size="1" start="1" type="bool"/>
+
+ <field name="Vertex ID read by coordinate shader" size="1" start="2" type="bool"/>
+ <field name="Instance ID read by coordinate shader" size="1" start="3" type="bool"/>
+ <field name="Base Instance ID read by coordinate shader" size="1" start="4" type="bool"/>
+ <field name="Vertex ID read by vertex shader" size="1" start="5" type="bool"/>
+ <field name="Instance ID read by vertex shader" size="1" start="6" type="bool"/>
+ <field name="Base Instance ID read by vertex shader" size="1" start="7" type="bool"/>
+
+ <field name="Fragment shader does Z writes" size="1" start="8" type="bool"/>
+ <field name="Turn off early-z test" size="1" start="9" type="bool"/>
+
+ <field name="Fragment shader uses real pixel centre W in addition to centroid W2" size="1" start="12" type="bool"/>
+ <field name="Enable Sample Rate Shading" size="1" start="13" type="bool"/>
+ <field name="Any shader reads hardware-written Primitive ID" size="1" start="14" type="bool"/>
+ <field name="Insert Primitive ID as first varying to fragment shader" size="1" start="15" type="bool"/>
+ <field name="Turn off scoreboard" size="1" start="16" type="bool"/>
+ <field name="Do scoreboard wait on first thread switch" size="1" start="17" type="bool"/>
+ <field name="Disable implicit point/line varyings" size="1" start="18" type="bool"/>
+ <field name="No prim pack" size="1" start="19" type="bool"/>
+ <field name="Never defer FEP depth writes" size="1" start="20" type="bool"/>
+
+ <field name="Number of varyings in Fragment Shader" size="8" start="3b" type="uint"/>
+
+ <field name="Coordinate Shader output VPM segment size" size="4" start="4b" type="uint"/>
+ <field name="Min Coord Shader output segments required in play in addition to VCM cache size" size="4" start="36" type="uint"/>
+
+ <field name="Coordinate Shader input VPM segment size" size="4" start="5b" type="uint"/>
+ <field name="Min Coord Shader input segments required in play" size="4" start="44" type="uint" minus_one="true"/>
+
+ <field name="Vertex Shader output VPM segment size" size="4" start="6b" type="uint"/>
+ <field name="Min Vertex Shader output segments required in play in addition to VCM cache size" size="4" start="52" type="uint"/>
+
+ <field name="Vertex Shader input VPM segment size" size="4" start="7b" type="uint"/>
+ <field name="Min Vertex Shader input segments required in play" size="4" start="60" type="uint" minus_one="true"/>
+
+ <field name="Fragment Shader Code Address" size="29" start="67" type="address"/>
+ <field name="Fragment Shader 4-way threadable" size="1" start="64" type="bool"/>
+ <field name="Fragment Shader start in final thread section" size="1" start="65" type="bool"/>
+ <field name="Fragment Shader Propagate NaNs" size="1" start="66" type="bool"/>
+ <field name="Fragment Shader Uniforms Address" size="32" start="12b" type="address"/>
+
+ <field name="Vertex Shader Code Address" size="29" start="131" type="address"/>
+ <field name="Vertex Shader 4-way threadable" size="1" start="128" type="bool"/>
+ <field name="Vertex Shader start in final thread section" size="1" start="129" type="bool"/>
+ <field name="Vertex Shader Propagate NaNs" size="1" start="130" type="bool"/>
+ <field name="Vertex Shader Uniforms Address" size="32" start="20b" type="address"/>
+
+ <field name="Coordinate Shader Code Address" size="29" start="195" type="address"/>
+ <field name="Coordinate Shader 4-way threadable" size="1" start="192" type="bool"/>
+ <field name="Coordinate Shader start in final thread section" size="1" start="193" type="bool"/>
+ <field name="Coordinate Shader Propagate NaNs" size="1" start="194" type="bool"/>
+ <field name="Coordinate Shader Uniforms Address" size="32" start="28b" type="address"/>
+ </struct>
+
<struct name="Geometry Shader State Record" min_ver="41">
<field name="Geometry Bin Mode Shader Code Address" size="29" start="3" type="address"/>
<field name="Geometry Bin Mode Shader 4-way threadable" size="1" start="0" type="bool"/>
@@ -1543,7 +1800,7 @@
<field name="Offset Format 8" size="1" start="0" type="bool"/>
</struct>
- <struct name="TMU Config Parameter 2" min_ver="42">
+ <struct name="TMU Config Parameter 2" min_ver="42" max_ver="42">
<field name="Pad" size="7" start="25" type="uint"/>
<field name="LOD Query" size="1" start="24" type="bool"/>
<field name="Op" size="4" start="20" type="TMU Op"/>
@@ -1558,6 +1815,23 @@
<field name="Offset Format 8" size="1" start="0" type="bool"/>
</struct>
+ <struct name="TMU Config Parameter 2" min_ver="71">
+ <field name="Pad" size="5" start="27" type="uint"/>
+ <field name="Write conversion" size="1" start="26" type="bool"/>
+ <field name="DIM query" size="1" start="25" type="bool"/>
+ <field name="LOD Query" size="1" start="24" type="bool"/>
+ <field name="Op" size="4" start="20" type="TMU Op"/>
+ <field name="Offset R" size="4" start="16" type="int"/>
+ <field name="Offset T" size="4" start="12" type="int"/>
+ <field name="Offset S" size="4" start="8" type="int"/>
+ <field name="Gather Mode" size="1" start="7" type="bool"/>
+ <field name="Gather Component" size="2" start="5" type="uint"/>
+ <field name="Coefficient Mode" size="1" start="4" type="bool"/>
+ <field name="Sample Number" size="2" start="2" type="uint"/>
+ <field name="Disable AutoLOD" size="1" start="1" type="bool"/>
+ <field name="Offset Format 8" size="1" start="0" type="bool"/>
+ </struct>
+
<struct name="Texture Shader State" max_ver="33">
<field name="UIF XOR disable" size="1" start="255" type="bool"/>
<field name="Level 0 is strictly UIF" size="1" start="254" type="bool"/>
@@ -1611,7 +1885,7 @@
<field name="Filter" size="4" start="0" type="TMU Filter"/>
</struct>
- <struct name="Texture Shader State" min_ver="41">
+ <struct name="Texture Shader State" min_ver="41" max_ver="42">
<field name="Pad" size="56" start="136" type="uint"/>
<field name="UIF XOR disable" size="1" start="135" type="bool"/>
<field name="Level 0 is strictly UIF" size="1" start="134" type="bool"/>
@@ -1652,6 +1926,82 @@
<field name="Flip texture X Axis" size="1" start="0" type="bool"/>
</struct>
+ <struct name="Texture Shader State" min_ver="71">
+ <field name="Pad" size="2" start="190" type="uint"/>
+ <!-- When we use an address type, there is an implicit requirement
+ that the address is a 32-bit that is encoded starting at a 32-bit
+ aligned bit offset into the packet. If the address field has less than
+ 32 bits, it is assumed that the address is aligned. For example, a
+ 26-bit address field is expected to be 64-byte aligned (6 lsb bits
+ are 0) and that this will be encoded into a packet starting at bit
+ offset 6 into a 32-bit dword (since bits 0..5 of the address are
+ implicitly 0 and don't need to be explicitly encoded).
+
+ Unfortunately, the CB address below doesn't match this requirement:
+ it starts at bit 138, which is 10 bits into a 32-bit dword, but it
+ represents a 64-bit aligned address (6 lsb bits are 0), so we cannot
+ encode it as an address type. To fix this we encode these addresses
+ as uint types which has two implications:
+ 1. the driver is responsible for manually addinng the buffer objects
+ for these addresses to the job BO list.
+ 2. the driver needs to pass an actual 26-bit address value by manually
+ shifting the 6 lsb bits (that are implicitly 0).
+ -->
+ <field name="texture_base pointer_Cr" size="26" start="164" type="uint"/>
+ <field name="texture base pointer Cb" size="26" start="138" type="uint"/>
+ <field name="Chroma offset y" size="1" start="137" type="uint"/>
+ <field name="Chroma offset x" size="1" start="136" type="uint"/>
+
+ <field name="UIF XOR disable" size="1" start="135" type="bool"/>
+ <field name="Level 0 is strictly UIF" size="1" start="134" type="bool"/>
+ <field name="Level 0 XOR enable" size="1" start="132" type="bool"/>
+ <field name="Level 0 UB_PAD" size="4" start="128" type="uint"/>
+
+ <field name="Base Level" size="4" start="124" type="uint"/>
+ <field name="Max Level" size="4" start="120" type="uint"/>
+
+ <field name="Swizzle A" size="3" start="117" type="uint">
+ <value name="Swizzle Zero" value="0"/>
+ <value name="Swizzle One" value="1"/>
+ <value name="Swizzle Red" value="2"/>
+ <value name="Swizzle Green" value="3"/>
+ <value name="Swizzle Blue" value="4"/>
+ <value name="Swizzle Alpha" value="5"/>
+ </field>
+
+ <field name="Swizzle B" size="3" start="114" type="uint"/>
+ <field name="Swizzle G" size="3" start="111" type="uint"/>
+ <field name="Swizzle R" size="3" start="108" type="uint"/>
+ <field name="Extended" size="1" start="107" type="bool"/>
+
+ <field name="Texture type" size="7" start="100" type="uint"/>
+ <field name="Image Depth" size="14" start="86" type="uint"/>
+ <field name="Image Height" size="14" start="72" type="uint"/>
+ <field name="Image Width" size="14" start="58" type="uint"/>
+
+ <!-- V3D 7.1.2 doesn't have the RB swap bit and has Array Stride starting
+ at bit 32. However, 7.1.5 included the RB swap bit at bit 32 and has
+ Array Stride starting at 33, which is backwards incompatible,
+ We use the definition from 7.1.5.
+ -->
+ <field name="Array Stride (64-byte aligned)" size="24" start="33" type="uint"/>
+ <field name="R/B swap" size="1" start="32" type="bool"/>
+
+ <field name="Texture base pointer" size="32" start="0" type="address"/>
+
+ <field name="Reverse" size="1" start="5" type="bool"/>
+ <field name="Transfer func" size="3" start="2" type="uint">
+ <value name="Transfer Func None" value="0"/>
+ <value name="Transfer Func sRGB" value="1"/>
+ <value name="Transfer Func PQ" value="2"/>
+ <value name="Transfer Func HLG" value="3"/>
+ <value name="Transfer Func PQ BT1886" value="4"/>
+ <value name="Transfer Func HLG BT1886" value="5"/>
+ </field>
+ <field name="Flip texture Y Axis" size="1" start="1" type="bool"/>
+ <field name="Flip texture X Axis" size="1" start="0" type="bool"/>
+ </struct>
+
<struct name="Sampler State" min_ver="41">
<field name="Border color word 3" size="32" start="160" type="uint"/>
<field name="Border color word 2" size="32" start="128" type="uint"/>
diff --git a/src/broadcom/cle/v3dx_pack.h b/src/broadcom/cle/v3dx_pack.h
index 5762e5aaa70..e5a1eb26698 100644
--- a/src/broadcom/cle/v3dx_pack.h
+++ b/src/broadcom/cle/v3dx_pack.h
@@ -37,6 +37,8 @@
# include "cle/v3d_packet_v41_pack.h"
#elif (V3D_VERSION == 42)
# include "cle/v3d_packet_v42_pack.h"
+#elif (V3D_VERSION == 71)
+# include "cle/v3d_packet_v71_pack.h"
#else
# error "Need to add a pack header include for this v3d version"
#endif
diff --git a/src/broadcom/clif/clif_private.h b/src/broadcom/clif/clif_private.h
index 6ace62b0310..cda407a00bf 100644
--- a/src/broadcom/clif/clif_private.h
+++ b/src/broadcom/clif/clif_private.h
@@ -101,6 +101,8 @@ bool v3d41_clif_dump_packet(struct clif_dump *clif, uint32_t offset,
const uint8_t *cl, uint32_t *size, bool reloc_mode);
bool v3d42_clif_dump_packet(struct clif_dump *clif, uint32_t offset,
const uint8_t *cl, uint32_t *size, bool reloc_mode);
+bool v3d71_clif_dump_packet(struct clif_dump *clif, uint32_t offset,
+ const uint8_t *cl, uint32_t *size, bool reloc_mode);
static inline void
out(struct clif_dump *clif, const char *fmt, ...)
diff --git a/src/broadcom/common/v3d_device_info.c b/src/broadcom/common/v3d_device_info.c
index 272190eb2e5..7bc2b662cfc 100644
--- a/src/broadcom/common/v3d_device_info.c
+++ b/src/broadcom/common/v3d_device_info.c
@@ -36,6 +36,9 @@ v3d_get_device_info(int fd, struct v3d_device_info* devinfo, v3d_ioctl_fun drm_i
struct drm_v3d_get_param ident1 = {
.param = DRM_V3D_PARAM_V3D_CORE0_IDENT1,
};
+ struct drm_v3d_get_param hub_ident3 = {
+ .param = DRM_V3D_PARAM_V3D_HUB_IDENT3,
+ };
int ret;
ret = drm_ioctl(fd, DRM_IOCTL_V3D_GET_PARAM, &ident0);
@@ -62,10 +65,13 @@ v3d_get_device_info(int fd, struct v3d_device_info* devinfo, v3d_ioctl_fun drm_i
int qups = (ident1.value >> 8) & 0xf;
devinfo->qpu_count = nslc * qups;
+ devinfo->has_accumulators = devinfo->ver < 71;
+
switch (devinfo->ver) {
case 33:
case 41:
case 42:
+ case 71:
break;
default:
fprintf(stderr,
@@ -75,5 +81,14 @@ v3d_get_device_info(int fd, struct v3d_device_info* devinfo, v3d_ioctl_fun drm_i
return false;
}
- return true;
+ ret = drm_ioctl(fd, DRM_IOCTL_V3D_GET_PARAM, &hub_ident3);
+ if (ret != 0) {
+ fprintf(stderr, "Couldn't get V3D core HUB IDENT3: %s\n",
+ strerror(errno));
+ return false;
+ }
+
+ devinfo->rev = (hub_ident3.value >> 8) & 0xff;
+
+ return true;
}
diff --git a/src/broadcom/common/v3d_device_info.h b/src/broadcom/common/v3d_device_info.h
index 97abd9b8d9f..8dfc7858727 100644
--- a/src/broadcom/common/v3d_device_info.h
+++ b/src/broadcom/common/v3d_device_info.h
@@ -34,11 +34,17 @@ struct v3d_device_info {
/** Simple V3D version: major * 10 + minor */
uint8_t ver;
+ /** V3D revision number */
+ uint8_t rev;
+
/** Size of the VPM, in bytes. */
int vpm_size;
/* NSLC * QUPS from the core's IDENT registers. */
int qpu_count;
+
+ /* If the hw has accumulator registers */
+ bool has_accumulators;
};
typedef int (*v3d_ioctl_fun)(int fd, unsigned long request, void *arg);
diff --git a/src/broadcom/common/v3d_limits.h b/src/broadcom/common/v3d_limits.h
index 46f38bd7484..354c8784914 100644
--- a/src/broadcom/common/v3d_limits.h
+++ b/src/broadcom/common/v3d_limits.h
@@ -42,7 +42,8 @@
#define V3D_MAX_SAMPLES 4
-#define V3D_MAX_DRAW_BUFFERS 4
+#define V3D_MAX_DRAW_BUFFERS 8
+#define V3D_MAX_RENDER_TARGETS(ver) (ver < 71 ? 4 : 8)
#define V3D_MAX_POINT_SIZE 512.0f
#define V3D_MAX_LINE_WIDTH 32
diff --git a/src/broadcom/common/v3d_macros.h b/src/broadcom/common/v3d_macros.h
index fe89398208a..b4291fb5350 100644
--- a/src/broadcom/common/v3d_macros.h
+++ b/src/broadcom/common/v3d_macros.h
@@ -41,6 +41,9 @@
#elif (V3D_VERSION == 42)
# define V3DX(x) V3D42_##x
# define v3dX(x) v3d42_##x
+#elif (V3D_VERSION == 71)
+# define V3DX(x) V3D71_##x
+# define v3dX(x) v3d71_##x
#else
# error "Need to add prefixing macros for this v3d version"
#endif
diff --git a/src/broadcom/common/v3d_performance_counters.h b/src/broadcom/common/v3d_performance_counters.h
index 08d750c2cbe..a8f0cff8784 100644
--- a/src/broadcom/common/v3d_performance_counters.h
+++ b/src/broadcom/common/v3d_performance_counters.h
@@ -28,6 +28,110 @@
#define V3D_PERFCNT_NAME 1
#define V3D_PERFCNT_DESCRIPTION 2
+#ifndef V3D_VERSION
+# error "The V3D_VERSION macro must be defined"
+#endif
+
+#if (V3D_VERSION >= 71)
+
+static const char *v3d_performance_counters[][3] = {
+ {"CORE", "cycle-count", "[CORE] Cycle counter"},
+ {"CORE", "core-active", "[CORE] Bin/Render/Compute active cycles"},
+ {"CLE", "CLE-bin-thread-active-cycles", "[CLE] Bin thread active cycles"},
+ {"CLE", "CLE-render-thread-active-cycles", "[CLE] Render thread active cycles"},
+ {"CORE", "compute-active-cycles", "[CORE] Compute active cycles"},
+ {"FEP", "FEP-valid-primitives-no-rendered-pixels", "[FEP] Valid primitives that result in no rendered pixels, for all rendered tiles"},
+ {"FEP", "FEP-valid-primitives-rendered-pixels", "[FEP] Valid primitives for all rendered tiles (primitives may be counted in more than one tile)"},
+ {"FEP", "FEP-clipped-quads", "[FEP] Early-Z/Near/Far clipped quads"},
+ {"FEP", "FEP-valid-quads", "[FEP] Valid quads"},
+ {"TLB", "TLB-quads-not-passing-stencil-test", "[TLB] Quads with no pixels passing the stencil test"},
+ {"TLB", "TLB-quads-not-passing-z-and-stencil-test", "[TLB] Quads with no pixels passing the Z and stencil tests"},
+ {"TLB", "TLB-quads-passing-z-and-stencil-test", "[TLB] Quads with any pixels passing the Z and stencil tests"},
+ {"TLB", "TLB-quads-written-to-color-buffer", "[TLB] Quads with valid pixels written to colour buffer"},
+ {"TLB", "TLB-partial-quads-written-to-color-buffer", "[TLB] Partial quads written to the colour buffer"},
+ {"PTB", "PTB-primitives-need-clipping", "[PTB] Primitives that need clipping"},
+ {"PTB", "PTB-primitives-discarded-outside-viewport", "[PTB] Primitives discarded by being outside the viewport"},
+ {"PTB", "PTB-primitives-binned", "[PTB] Total primitives binned"},
+ {"PTB", "PTB-primitives-discarded-reversed", "[PTB] Primitives that are discarded because they are reversed"},
+ {"QPU", "QPU-total-instr-cache-hit", "[QPU] Total instruction cache hits for all slices"},
+ {"QPU", "QPU-total-instr-cache-miss", "[QPU] Total instruction cache misses for all slices"},
+ {"QPU", "QPU-total-uniform-cache-hit", "[QPU] Total uniforms cache hits for all slices"},
+ {"QPU", "QPU-total-uniform-cache-miss", "[QPU] Total uniforms cache misses for all slices"},
+ {"TMU", "TMU-active-cycles", "[TMU] Active cycles"},
+ {"TMU", "TMU-stalled-cycles", "[TMU] Stalled cycles"},
+ {"TMU", "TMU-total-text-quads-access", "[TMU] Total texture cache accesses"},
+ {"TMU", "TMU-cache-x4-active-cycles", "[TMU] Cache active cycles for x4 access"},
+ {"TMU", "TMU-cache-x4-stalled-cycles", "[TMU] Cache stalled cycles for x4 access"},
+ {"TMU", "TMU-total-text-quads-x4-access", "[TMU] Total texture cache x4 access"},
+ {"L2T", "L2T-total-cache-hit", "[L2T] Total Level 2 cache hits"},
+ {"L2T", "L2T-total-cache-miss", "[L2T] Total Level 2 cache misses"},
+ {"L2T", "L2T-local", "[L2T] Local mode access"},
+ {"L2T", "L2T-writeback", "[L2T] Writeback"},
+ {"L2T", "L2T-zero", "[L2T] Zero"},
+ {"L2T", "L2T-merge", "[L2T] Merge"},
+ {"L2T", "L2T-fill", "[L2T] Fill"},
+ {"L2T", "L2T-stalls-no-wid", "[L2T] Stalls because no WID available"},
+ {"L2T", "L2T-stalls-no-rid", "[L2T] Stalls because no RID available"},
+ {"L2T", "L2T-stalls-queue-full", "[L2T] Stalls because internal queue full"},
+ {"L2T", "L2T-stalls-wrightback", "[L2T] Stalls because writeback in flight"},
+ {"L2T", "L2T-stalls-mem", "[L2T] Stalls because AXI blocks read"},
+ {"L2T", "L2T-stalls-fill", "[L2T] Stalls because fill pending for victim cache-line"},
+ {"L2T", "L2T-hitq", "[L2T] Sent request via hit queue"},
+ {"L2T", "L2T-hitq-full", "[L2T] Sent request via main queue because hit queue is full"},
+ {"L2T", "L2T-stalls-read-data", "[L2T] Stalls because waiting for data from SDRAM"},
+ {"L2T", "L2T-TMU-read-hits", "[L2T] TMU read hits"},
+ {"L2T", "L2T-TMU-read-miss", "[L2T] TMU read misses"},
+ {"L2T", "L2T-VCD-read-hits", "[L2T] VCD read hits"},
+ {"L2T", "L2T-VCD-read-miss", "[L2T] VCD read misses"},
+ {"L2T", "L2T-SLC-read-hits", "[L2T] SLC read hits (all slices)"},
+ {"L2T", "L2T-SLC-read-miss", "[L2T] SLC read misses (all slices)"},
+ {"AXI", "AXI-writes-seen-watch-0", "[AXI] Writes seen by watch 0"},
+ {"AXI", "AXI-reads-seen-watch-0", "[AXI] Reads seen by watch 0"},
+ {"AXI", "AXI-writes-stalled-seen-watch-0", "[AXI] Write stalls seen by watch 0"},
+ {"AXI", "AXI-reads-stalled-seen-watch-0", "[AXI] Read stalls seen by watch 0"},
+ {"AXI", "AXI-write-bytes-seen-watch-0", "[AXI] Total bytes written seen by watch 0"},
+ {"AXI", "AXI-read-bytes-seen-watch-0", "[AXI] Total bytes read seen by watch 0"},
+ {"AXI", "AXI-writes-seen-watch-1", "[AXI] Writes seen by watch 1"},
+ {"AXI", "AXI-reads-seen-watch-1", "[AXI] Reads seen by watch 1"},
+ {"AXI", "AXI-writes-stalled-seen-watch-1", "[AXI] Write stalls seen by watch 1"},
+ {"AXI", "AXI-reads-stalled-seen-watch-1", "[AXI] Read stalls seen by watch 1"},
+ {"AXI", "AXI-write-bytes-seen-watch-1", "[AXI] Total bytes written seen by watch 1"},
+ {"AXI", "AXI-read-bytes-seen-watch-1", "[AXI] Total bytes read seen by watch 1"},
+ {"CORE", "core-memory-writes", "[CORE] Total memory writes"},
+ {"L2T", "L2T-memory-writes", "[L2T] Total memory writes"},
+ {"PTB", "PTB-memory-writes", "[PTB] Total memory writes"},
+ {"TLB", "TLB-memory-writes", "[TLB] Total memory writes"},
+ {"CORE", "core-memory-reads", "[CORE] Total memory reads"},
+ {"L2T", "L2T-memory-reads", "[L2T] Total memory reads"},
+ {"PTB", "PTB-memory-reads", "[PTB] Total memory reads"},
+ {"PSE", "PSE-memory-reads", "[PSE] Total memory reads"},
+ {"TLB", "TLB-memory-reads", "[TLB] Total memory reads"},
+ {"PTB", "PTB-memory-words-writes", "[PTB] Total memory words written"},
+ {"TLB", "TLB-memory-words-writes", "[TLB] Total memory words written"},
+ {"PSE", "PSE-memory-words-reads", "[PSE] Total memory words read"},
+ {"TLB", "TLB-memory-words-reads", "[TLB] Total memory words read"},
+ {"AXI", "AXI-read-trans", "[AXI] Read transaction count"},
+ {"AXI", "AXI-write-trans", "[AXI] Write transaction count"},
+ {"AXI", "AXI-read-wait-cycles", "[AXI] Read total wait cycles"},
+ {"AXI", "AXI-write-wait-cycles", "[AXI] Write total wait cycles"},
+ {"AXI", "AXI-max-outstanding-reads", "[AXI] Maximium outstanding read transactions"},
+ {"AXI", "AXI-max-outstanding-writes", "[AXI] Maximum outstanding write transactions"},
+ {"QPU", "QPU-wait-bubble", "[QPU] Pipeline bubble in qcycles due all threads waiting"},
+ {"QPU", "QPU-ic-miss-bubble", "[QPU] Pipeline bubble in qcycles due instruction-cache miss"},
+ {"QPU", "QPU-active", "[QPU] Executed shader instruction"},
+ {"QPU", "QPU-total-active-clk-cycles-fragment-shading", "[QPU] Total active clock cycles for all QPUs doing fragment shading (counts only when QPU is not stalled)"},
+ {"QPU", "QPU-stalls", "[QPU] Stalled qcycles executing shader instruction"},
+ {"QPU", "QPU-total-clk-cycles-waiting-fragment-shading", "[QPU] Total stalled clock cycles for all QPUs doing fragment shading"},
+ {"QPU", "QPU-stalls-TMU", "[QPU] Stalled qcycles waiting for TMU"},
+ {"QPU", "QPU-stalls-TLB", "[QPU] Stalled qcycles waiting for TLB"},
+ {"QPU", "QPU-stalls-VPM", "[QPU] Stalled qcycles waiting for VPM"},
+ {"QPU", "QPU-stalls-uniforms", "[QPU] Stalled qcycles waiting for uniforms"},
+ {"QPU", "QPU-stalls-SFU", "[QPU] Stalled qcycles waiting for SFU"},
+ {"QPU", "QPU-stalls-other", "[QPU] Stalled qcycles waiting for any other reason (vary/W/Z)"},
+};
+
+#elif (V3D_VERSION >= 41)
+
static const char *v3d_performance_counters[][3] = {
{"FEP", "FEP-valid-primitives-no-rendered-pixels", "[FEP] Valid primitives that result in no rendered pixels, for all rendered tiles"},
{"FEP", "FEP-valid-primitives-rendered-pixels", "[FEP] Valid primitives for all rendered tiles (primitives may be counted in more than one tile)"},
@@ -118,4 +222,8 @@ static const char *v3d_performance_counters[][3] = {
{"CORE", "compute-active-cycles", "[CORE] Compute active cycles"},
};
+#else
+static const char *v3d_performance_counters[][3] = { };
+#endif
+
#endif
diff --git a/src/broadcom/common/v3d_tfu.h b/src/broadcom/common/v3d_tfu.h
index 80da224ca2d..572d0074794 100644
--- a/src/broadcom/common/v3d_tfu.h
+++ b/src/broadcom/common/v3d_tfu.h
@@ -48,4 +48,27 @@
#define V3D33_TFU_ICFG_FORMAT_UIF_NO_XOR 14
#define V3D33_TFU_ICFG_FORMAT_UIF_XOR 15
+/* Disable level 0 write, just write following mipmaps */
+#define V3D71_TFU_IOC_DIMTW (1 << 0)
+#define V3D71_TFU_IOC_FORMAT_SHIFT 12
+#define V3D71_TFU_IOC_FORMAT_LINEARTILE 3
+#define V3D71_TFU_IOA_FORMAT_UBLINEAR_1_COLUMN 4
+#define V3D71_TFU_IOA_FORMAT_UBLINEAR_2_COLUMN 5
+#define V3D71_TFU_IOA_FORMAT_UIF_NO_XOR 6
+#define V3D71_TFU_IOA_FORMAT_UIF_XOR 7
+
+#define V3D71_TFU_IOC_STRIDE_SHIFT 16
+#define V3D71_TFU_IOC_NUMMM_SHIFT 4
+
+#define V3D71_TFU_ICFG_OTYPE_SHIFT 16
+#define V3D71_TFU_ICFG_IFORMAT_SHIFT 23
+#define V3D71_TFU_ICFG_FORMAT_RASTER 0
+#define V3D71_TFU_ICFG_FORMAT_SAND_128 1
+#define V3D71_TFU_ICFG_FORMAT_SAND_256 2
+#define V3D71_TFU_ICFG_FORMAT_LINEARTILE 11
+#define V3D71_TFU_ICFG_FORMAT_UBLINEAR_1_COLUMN 12
+#define V3D71_TFU_ICFG_FORMAT_UBLINEAR_2_COLUMN 13
+#define V3D71_TFU_ICFG_FORMAT_UIF_NO_XOR 14
+#define V3D71_TFU_ICFG_FORMAT_UIF_XOR 15
+
#endif
diff --git a/src/broadcom/common/v3d_util.c b/src/broadcom/common/v3d_util.c
index 57872a923d3..8a50d279985 100644
--- a/src/broadcom/common/v3d_util.c
+++ b/src/broadcom/common/v3d_util.c
@@ -87,10 +87,37 @@ v3d_csd_choose_workgroups_per_supergroup(struct v3d_device_info *devinfo,
return best_wgs_per_sg;
}
+#define V3D71_TLB_COLOR_SIZE (16 * 1024)
+#define V3D71_TLB_DETPH_SIZE (16 * 1024)
+#define V3D71_TLB_AUX_DETPH_SIZE (8 * 1024)
+
+static bool
+tile_size_valid(uint32_t pixel_count, uint32_t color_bpp, uint32_t depth_bpp)
+{
+ /* First, we check if we can fit this tile size allocating the depth
+ * TLB memory to color.
+ */
+ if (pixel_count * depth_bpp <= V3D71_TLB_AUX_DETPH_SIZE &&
+ pixel_count * color_bpp <= V3D71_TLB_COLOR_SIZE + V3D71_TLB_DETPH_SIZE) {
+ return true;
+ }
+
+ /* Otherwise the tile must fit in the main TLB buffers */
+ return pixel_count * depth_bpp <= V3D71_TLB_DETPH_SIZE &&
+ pixel_count * color_bpp <= V3D71_TLB_COLOR_SIZE;
+}
+
void
-v3d_choose_tile_size(uint32_t color_attachment_count, uint32_t max_color_bpp,
- bool msaa, bool double_buffer,
- uint32_t *width, uint32_t *height)
+v3d_choose_tile_size(const struct v3d_device_info *devinfo,
+ uint32_t color_attachment_count,
+ /* V3D 4.x max internal bpp of all RTs */
+ uint32_t max_internal_bpp,
+ /* V3D 7.x accumulated bpp for all RTs (in bytes) */
+ uint32_t total_color_bpp,
+ bool msaa,
+ bool double_buffer,
+ uint32_t *width,
+ uint32_t *height)
{
static const uint8_t tile_sizes[] = {
64, 64,
@@ -103,19 +130,65 @@ v3d_choose_tile_size(uint32_t color_attachment_count, uint32_t max_color_bpp,
};
uint32_t idx = 0;
- if (color_attachment_count > 2)
- idx += 2;
- else if (color_attachment_count > 1)
- idx += 1;
+ if (devinfo->ver >= 71) {
+ /* In V3D 7.x, we use the actual bpp used by color attachments to compute
+ * the tile size instead of the maximum bpp. This may allow us to choose a
+ * larger tile size than we would in 4.x in scenarios with multiple RTs
+ * with different bpps.
+ *
+ * Also, the TLB has an auxiliary buffer of 8KB that will be automatically
+ * used for depth instead of the main 16KB depth TLB buffer when the depth
+ * tile fits in the auxiliary buffer, allowing the hardware to allocate
+ * the 16KB from the main depth TLB to the color TLB. If we can do that,
+ * then we are effectively doubling the memory we have for color and we
+ * can also select a larger tile size. This is necessary to support