-
Notifications
You must be signed in to change notification settings - Fork 168
/
helper.h
1323 lines (1215 loc) · 46 KB
/
helper.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
* Copyright © 2009 CNRS
* Copyright © 2009-2023 Inria. All rights reserved.
* Copyright © 2009-2012 Université Bordeaux
* Copyright © 2009-2010 Cisco Systems, Inc. All rights reserved.
* See COPYING in top-level directory.
*/
/** \file
* \brief High-level hwloc traversal helpers.
*/
#ifndef HWLOC_HELPER_H
#define HWLOC_HELPER_H
#ifndef HWLOC_H
#error Please include the main hwloc.h instead
#endif
#include <stdlib.h>
#include <errno.h>
#ifdef __cplusplus
extern "C" {
#endif
/** \defgroup hwlocality_helper_types Kinds of object Type
* @{
*
* Each object type is
* either Normal (i.e. hwloc_obj_type_is_normal() returns 1),
* or Memory (i.e. hwloc_obj_type_is_memory() returns 1)
* or I/O (i.e. hwloc_obj_type_is_io() returns 1)
* or Misc (i.e. equal to ::HWLOC_OBJ_MISC).
* It cannot be of more than one of these kinds.
*
* See also Object Kind in \ref termsanddefs.
*/
/** \brief Check whether an object type is Normal.
*
* Normal objects are objects of the main CPU hierarchy
* (Machine, Package, Core, PU, CPU caches, etc.),
* but they are not NUMA nodes, I/O devices or Misc objects.
*
* They are attached to parent as Normal children,
* not as Memory, I/O or Misc children.
*
* \return 1 if an object of type \p type is a Normal object, 0 otherwise.
*/
HWLOC_DECLSPEC int
hwloc_obj_type_is_normal(hwloc_obj_type_t type);
/** \brief Check whether an object type is I/O.
*
* I/O objects are objects attached to their parents
* in the I/O children list.
* This current includes Bridges, PCI and OS devices.
*
* \return 1 if an object of type \p type is a I/O object, 0 otherwise.
*/
HWLOC_DECLSPEC int
hwloc_obj_type_is_io(hwloc_obj_type_t type);
/** \brief Check whether an object type is Memory.
*
* Memory objects are objects attached to their parents
* in the Memory children list.
* This current includes NUMA nodes and Memory-side caches.
*
* \return 1 if an object of type \p type is a Memory object, 0 otherwise.
*/
HWLOC_DECLSPEC int
hwloc_obj_type_is_memory(hwloc_obj_type_t type);
/** \brief Check whether an object type is a CPU Cache (Data, Unified or Instruction).
*
* Memory-side caches are not CPU caches.
*
* \return 1 if an object of type \p type is a Cache, 0 otherwise.
*/
HWLOC_DECLSPEC int
hwloc_obj_type_is_cache(hwloc_obj_type_t type);
/** \brief Check whether an object type is a CPU Data or Unified Cache.
*
* Memory-side caches are not CPU caches.
*
* \return 1 if an object of type \p type is a CPU Data or Unified Cache, 0 otherwise.
*/
HWLOC_DECLSPEC int
hwloc_obj_type_is_dcache(hwloc_obj_type_t type);
/** \brief Check whether an object type is a CPU Instruction Cache,
*
* Memory-side caches are not CPU caches.
*
* \return 1 if an object of type \p type is a CPU Instruction Cache, 0 otherwise.
*/
HWLOC_DECLSPEC int
hwloc_obj_type_is_icache(hwloc_obj_type_t type);
/** @} */
/** \defgroup hwlocality_helper_find_inside Finding Objects inside a CPU set
* @{
*/
/** \brief Get the first largest object included in the given cpuset \p set.
*
* \return the first object that is included in \p set and whose parent is not.
* \return \c NULL if no such object exists.
*
* This is convenient for iterating over all largest objects within a CPU set
* by doing a loop getting the first largest object and clearing its CPU set
* from the remaining CPU set.
*/
static __hwloc_inline hwloc_obj_t
hwloc_get_first_largest_obj_inside_cpuset(hwloc_topology_t topology, hwloc_const_cpuset_t set)
{
hwloc_obj_t obj = hwloc_get_root_obj(topology);
if (!hwloc_bitmap_intersects(obj->cpuset, set))
return NULL;
while (!hwloc_bitmap_isincluded(obj->cpuset, set)) {
/* while the object intersects without being included, look at its children */
hwloc_obj_t child = obj->first_child;
while (child) {
if (hwloc_bitmap_intersects(child->cpuset, set))
break;
child = child->next_sibling;
}
if (!child)
/* no child intersects, return their father */
return obj;
/* found one intersecting child, look at its children */
obj = child;
}
/* obj is included, return it */
return obj;
}
/** \brief Get the set of largest objects covering exactly a given cpuset \p set
*
* \return the number of objects returned in \p objs.
* \return -1 if no set of objects may cover that cpuset.
*/
HWLOC_DECLSPEC int hwloc_get_largest_objs_inside_cpuset (hwloc_topology_t topology, hwloc_const_cpuset_t set,
hwloc_obj_t * __hwloc_restrict objs, int max);
/** \brief Return the next object at depth \p depth included in CPU set \p set.
*
* The next invokation should pass the previous return value in \p prev
* so as to obtain the next object in \p set.
*
* \return the first object at depth \p depth included in \p set if \p prev is \c NULL.
* \return the next object at depth \p depth included in \p set if \p prev is not \c NULL.
* \return \c NULL if there is no next object.
*
* \note Objects with empty CPU sets are ignored
* (otherwise they would be considered included in any given set).
*
* \note This function cannot work if objects at the given depth do
* not have CPU sets (I/O or Misc objects).
*/
static __hwloc_inline hwloc_obj_t
hwloc_get_next_obj_inside_cpuset_by_depth (hwloc_topology_t topology, hwloc_const_cpuset_t set,
int depth, hwloc_obj_t prev)
{
hwloc_obj_t next = hwloc_get_next_obj_by_depth(topology, depth, prev);
if (!next)
return NULL;
while (next && (hwloc_bitmap_iszero(next->cpuset) || !hwloc_bitmap_isincluded(next->cpuset, set)))
next = next->next_cousin;
return next;
}
/** \brief Return the next object of type \p type included in CPU set \p set.
*
* The next invokation should pass the previous return value in \p prev
* so as to obtain the next object in \p set.
*
* \return the first object of type \p type included in \p set if \p prev is \c NULL.
* \return the next object of type \p type included in \p set if \p prev is not \c NULL.
* \return \c NULL if there is no next object.
* \return \c NULL if there is no depth for the given type.
* \return \c NULL if there are multiple depths for the given type,
* the caller should fallback to hwloc_get_next_obj_inside_cpuset_by_depth().
*
* \note Objects with empty CPU sets are ignored
* (otherwise they would be considered included in any given set).
*
* \note This function cannot work if objects of the given type do
* not have CPU sets (I/O or Misc objects).
*/
static __hwloc_inline hwloc_obj_t
hwloc_get_next_obj_inside_cpuset_by_type (hwloc_topology_t topology, hwloc_const_cpuset_t set,
hwloc_obj_type_t type, hwloc_obj_t prev)
{
int depth = hwloc_get_type_depth(topology, type);
if (depth == HWLOC_TYPE_DEPTH_UNKNOWN || depth == HWLOC_TYPE_DEPTH_MULTIPLE)
return NULL;
return hwloc_get_next_obj_inside_cpuset_by_depth(topology, set, depth, prev);
}
/** \brief Return the (logically) \p idx -th object at depth \p depth included in CPU set \p set.
*
* \return the object if any, \c NULL otherwise.
*
* \note Objects with empty CPU sets are ignored
* (otherwise they would be considered included in any given set).
*
* \note This function cannot work if objects at the given depth do
* not have CPU sets (I/O or Misc objects).
*/
static __hwloc_inline hwloc_obj_t
hwloc_get_obj_inside_cpuset_by_depth (hwloc_topology_t topology, hwloc_const_cpuset_t set,
int depth, unsigned idx) __hwloc_attribute_pure;
static __hwloc_inline hwloc_obj_t
hwloc_get_obj_inside_cpuset_by_depth (hwloc_topology_t topology, hwloc_const_cpuset_t set,
int depth, unsigned idx)
{
hwloc_obj_t obj = hwloc_get_obj_by_depth (topology, depth, 0);
unsigned count = 0;
if (!obj)
return NULL;
while (obj) {
if (!hwloc_bitmap_iszero(obj->cpuset) && hwloc_bitmap_isincluded(obj->cpuset, set)) {
if (count == idx)
return obj;
count++;
}
obj = obj->next_cousin;
}
return NULL;
}
/** \brief Return the \p idx -th object of type \p type included in CPU set \p set.
*
* \return the object if any.
* \return \c NULL if there is no such object.
* \return \c NULL if there is no depth for given type.
* \return \c NULL if there are multiple depths for given type,
* the caller should fallback to hwloc_get_obj_inside_cpuset_by_depth().
*
* \note Objects with empty CPU sets are ignored
* (otherwise they would be considered included in any given set).
*
* \note This function cannot work if objects of the given type do
* not have CPU sets (I/O or Misc objects).
*/
static __hwloc_inline hwloc_obj_t
hwloc_get_obj_inside_cpuset_by_type (hwloc_topology_t topology, hwloc_const_cpuset_t set,
hwloc_obj_type_t type, unsigned idx) __hwloc_attribute_pure;
static __hwloc_inline hwloc_obj_t
hwloc_get_obj_inside_cpuset_by_type (hwloc_topology_t topology, hwloc_const_cpuset_t set,
hwloc_obj_type_t type, unsigned idx)
{
int depth = hwloc_get_type_depth(topology, type);
if (depth == HWLOC_TYPE_DEPTH_UNKNOWN || depth == HWLOC_TYPE_DEPTH_MULTIPLE)
return NULL;
return hwloc_get_obj_inside_cpuset_by_depth(topology, set, depth, idx);
}
/** \brief Return the number of objects at depth \p depth included in CPU set \p set.
*
* \return the number of objects.
* \return 0 if the depth is invalid.
*
* \note Objects with empty CPU sets are ignored
* (otherwise they would be considered included in any given set).
*
* \note This function cannot work if objects at the given depth do
* not have CPU sets (I/O or Misc objects).
*/
static __hwloc_inline unsigned
hwloc_get_nbobjs_inside_cpuset_by_depth (hwloc_topology_t topology, hwloc_const_cpuset_t set,
int depth) __hwloc_attribute_pure;
static __hwloc_inline unsigned
hwloc_get_nbobjs_inside_cpuset_by_depth (hwloc_topology_t topology, hwloc_const_cpuset_t set,
int depth)
{
hwloc_obj_t obj = hwloc_get_obj_by_depth (topology, depth, 0);
unsigned count = 0;
if (!obj)
return 0;
while (obj) {
if (!hwloc_bitmap_iszero(obj->cpuset) && hwloc_bitmap_isincluded(obj->cpuset, set))
count++;
obj = obj->next_cousin;
}
return count;
}
/** \brief Return the number of objects of type \p type included in CPU set \p set.
*
* \return the number of objects.
* \return 0 if there are no objects of that type in the topology.
* \return -1 if there are multiple levels of objects of that type,
* the caller should fallback to hwloc_get_nbobjs_inside_cpuset_by_depth().
*
* \note Objects with empty CPU sets are ignored
* (otherwise they would be considered included in any given set).
*
* \note This function cannot work if objects of the given type do
* not have CPU sets (I/O objects).
*/
static __hwloc_inline int
hwloc_get_nbobjs_inside_cpuset_by_type (hwloc_topology_t topology, hwloc_const_cpuset_t set,
hwloc_obj_type_t type) __hwloc_attribute_pure;
static __hwloc_inline int
hwloc_get_nbobjs_inside_cpuset_by_type (hwloc_topology_t topology, hwloc_const_cpuset_t set,
hwloc_obj_type_t type)
{
int depth = hwloc_get_type_depth(topology, type);
if (depth == HWLOC_TYPE_DEPTH_UNKNOWN)
return 0;
if (depth == HWLOC_TYPE_DEPTH_MULTIPLE)
return -1; /* FIXME: agregate nbobjs from different levels? */
return (int) hwloc_get_nbobjs_inside_cpuset_by_depth(topology, set, depth);
}
/** \brief Return the logical index among the objects included in CPU set \p set.
*
* Consult all objects in the same level as \p obj and inside CPU set \p set
* in the logical order, and return the index of \p obj within them.
* If \p set covers the entire topology, this is the logical index of \p obj.
* Otherwise, this is similar to a logical index within the part of the topology
* defined by CPU set \p set.
*
* \return the logical index among the objects included in the set if any.
* \return -1 if the object is not included in the set.
*
* \note Objects with empty CPU sets are ignored
* (otherwise they would be considered included in any given set).
*
* \note This function cannot work if obj does not have CPU sets (I/O objects).
*/
static __hwloc_inline int
hwloc_get_obj_index_inside_cpuset (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_const_cpuset_t set,
hwloc_obj_t obj) __hwloc_attribute_pure;
static __hwloc_inline int
hwloc_get_obj_index_inside_cpuset (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_const_cpuset_t set,
hwloc_obj_t obj)
{
int idx = 0;
if (!hwloc_bitmap_isincluded(obj->cpuset, set))
return -1;
/* count how many objects are inside the cpuset on the way from us to the beginning of the level */
while ((obj = obj->prev_cousin) != NULL)
if (!hwloc_bitmap_iszero(obj->cpuset) && hwloc_bitmap_isincluded(obj->cpuset, set))
idx++;
return idx;
}
/** @} */
/** \defgroup hwlocality_helper_find_covering Finding Objects covering at least CPU set
* @{
*/
/** \brief Get the child covering at least CPU set \p set.
*
* \return the child that covers the set entirely.
* \return \c NULL if no child matches or if \p set is empty.
*
* \note This function cannot work if parent does not have a CPU set (I/O or Misc objects).
*/
static __hwloc_inline hwloc_obj_t
hwloc_get_child_covering_cpuset (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_const_cpuset_t set,
hwloc_obj_t parent) __hwloc_attribute_pure;
static __hwloc_inline hwloc_obj_t
hwloc_get_child_covering_cpuset (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_const_cpuset_t set,
hwloc_obj_t parent)
{
hwloc_obj_t child;
if (hwloc_bitmap_iszero(set))
return NULL;
child = parent->first_child;
while (child) {
if (child->cpuset && hwloc_bitmap_isincluded(set, child->cpuset))
return child;
child = child->next_sibling;
}
return NULL;
}
/** \brief Get the lowest object covering at least CPU set \p set
*
* \return the lowest object covering the set entirely.
* \return \c NULL if no object matches or if \p set is empty.
*/
static __hwloc_inline hwloc_obj_t
hwloc_get_obj_covering_cpuset (hwloc_topology_t topology, hwloc_const_cpuset_t set) __hwloc_attribute_pure;
static __hwloc_inline hwloc_obj_t
hwloc_get_obj_covering_cpuset (hwloc_topology_t topology, hwloc_const_cpuset_t set)
{
struct hwloc_obj *current = hwloc_get_root_obj(topology);
if (hwloc_bitmap_iszero(set) || !hwloc_bitmap_isincluded(set, current->cpuset))
return NULL;
while (1) {
hwloc_obj_t child = hwloc_get_child_covering_cpuset(topology, set, current);
if (!child)
return current;
current = child;
}
}
/** \brief Iterate through same-depth objects covering at least CPU set \p set
*
* The next invokation should pass the previous return value in \p prev so as
* to obtain the next object covering at least another part of \p set.
*
* \return the first object at depth \p depth covering at least part of CPU set \p set
* if object \p prev is \c NULL.
* \return the next one if \p prev is not \c NULL.
* \return \c NULL if there is no next object.
*
* \note This function cannot work if objects at the given depth do
* not have CPU sets (I/O or Misc objects).
*/
static __hwloc_inline hwloc_obj_t
hwloc_get_next_obj_covering_cpuset_by_depth(hwloc_topology_t topology, hwloc_const_cpuset_t set,
int depth, hwloc_obj_t prev)
{
hwloc_obj_t next = hwloc_get_next_obj_by_depth(topology, depth, prev);
if (!next)
return NULL;
while (next && !hwloc_bitmap_intersects(set, next->cpuset))
next = next->next_cousin;
return next;
}
/** \brief Iterate through same-type objects covering at least CPU set \p set
*
* The next invokation should pass the previous return value in \p prev so as to obtain
* the next object of type \p type covering at least another part of \p set.
*
* \return the first object of type \p type covering at least part of CPU set \p set
* if object \p prev is \c NULL.
* \return the next one if \p prev is not \c NULL.
* \return \c NULL if there is no next object.
* \return \c NULL if there is no depth for the given type.
* \return \c NULL if there are multiple depths for the given type,
* the caller should fallback to hwloc_get_next_obj_covering_cpuset_by_depth().
*
* \note This function cannot work if objects of the given type do
* not have CPU sets (I/O or Misc objects).
*/
static __hwloc_inline hwloc_obj_t
hwloc_get_next_obj_covering_cpuset_by_type(hwloc_topology_t topology, hwloc_const_cpuset_t set,
hwloc_obj_type_t type, hwloc_obj_t prev)
{
int depth = hwloc_get_type_depth(topology, type);
if (depth == HWLOC_TYPE_DEPTH_UNKNOWN || depth == HWLOC_TYPE_DEPTH_MULTIPLE)
return NULL;
return hwloc_get_next_obj_covering_cpuset_by_depth(topology, set, depth, prev);
}
/** @} */
/** \defgroup hwlocality_helper_ancestors Looking at Ancestor and Child Objects
* @{
*
* Be sure to see the figure in \ref termsanddefs that shows a
* complete topology tree, including depths, child/sibling/cousin
* relationships, and an example of an asymmetric topology where one
* package has fewer caches than its peers.
*/
/** \brief Returns the ancestor object of \p obj at depth \p depth.
*
* \return the ancestor if any.
* \return \c NULL if no such ancestor exists.
*
* \note \p depth should not be the depth of PU or NUMA objects
* since they are ancestors of no objects (except Misc or I/O).
* This function rather expects an intermediate level depth,
* such as the depth of Packages, Cores, or Caches.
*/
static __hwloc_inline hwloc_obj_t
hwloc_get_ancestor_obj_by_depth (hwloc_topology_t topology __hwloc_attribute_unused, int depth, hwloc_obj_t obj) __hwloc_attribute_pure;
static __hwloc_inline hwloc_obj_t
hwloc_get_ancestor_obj_by_depth (hwloc_topology_t topology __hwloc_attribute_unused, int depth, hwloc_obj_t obj)
{
hwloc_obj_t ancestor = obj;
if (obj->depth < depth)
return NULL;
while (ancestor && ancestor->depth > depth)
ancestor = ancestor->parent;
return ancestor;
}
/** \brief Returns the ancestor object of \p obj with type \p type.
*
* \return the ancestor if any.
* \return \c NULL if no such ancestor exists.
*
* \note if multiple matching ancestors exist (e.g. multiple levels of ::HWLOC_OBJ_GROUP)
* the lowest one is returned.
*
* \note \p type should not be ::HWLOC_OBJ_PU or ::HWLOC_OBJ_NUMANODE
* since these objects are ancestors of no objects (except Misc or I/O).
* This function rather expects an intermediate object type,
* such as ::HWLOC_OBJ_PACKAGE, ::HWLOC_OBJ_CORE, etc.
*/
static __hwloc_inline hwloc_obj_t
hwloc_get_ancestor_obj_by_type (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_type_t type, hwloc_obj_t obj) __hwloc_attribute_pure;
static __hwloc_inline hwloc_obj_t
hwloc_get_ancestor_obj_by_type (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_type_t type, hwloc_obj_t obj)
{
hwloc_obj_t ancestor = obj->parent;
while (ancestor && ancestor->type != type)
ancestor = ancestor->parent;
return ancestor;
}
/** \brief Returns the common parent object to objects \p obj1 and \p obj2.
*
* \return the common ancestor.
*
* \note This function cannot return \c NULL.
*/
static __hwloc_inline hwloc_obj_t
hwloc_get_common_ancestor_obj (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_t obj1, hwloc_obj_t obj2) __hwloc_attribute_pure;
static __hwloc_inline hwloc_obj_t
hwloc_get_common_ancestor_obj (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_t obj1, hwloc_obj_t obj2)
{
/* the loop isn't so easy since intermediate ancestors may have
* different depth, causing us to alternate between using obj1->parent
* and obj2->parent. Also, even if at some point we find ancestors of
* of the same depth, their ancestors may have different depth again.
*/
while (obj1 != obj2) {
while (obj1->depth > obj2->depth)
obj1 = obj1->parent;
while (obj2->depth > obj1->depth)
obj2 = obj2->parent;
if (obj1 != obj2 && obj1->depth == obj2->depth) {
obj1 = obj1->parent;
obj2 = obj2->parent;
}
}
return obj1;
}
/** \brief Returns true if \p obj is inside the subtree beginning with ancestor object \p subtree_root.
*
* \return 1 is the object is in the subtree, 0 otherwise.
*
* \note This function cannot work if \p obj and \p subtree_root objects do
* not have CPU sets (I/O or Misc objects).
*/
static __hwloc_inline int
hwloc_obj_is_in_subtree (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_t obj, hwloc_obj_t subtree_root) __hwloc_attribute_pure;
static __hwloc_inline int
hwloc_obj_is_in_subtree (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_t obj, hwloc_obj_t subtree_root)
{
return obj->cpuset && subtree_root->cpuset && hwloc_bitmap_isincluded(obj->cpuset, subtree_root->cpuset);
}
/** \brief Return the next child.
*
* Return the next child among the normal children list,
* then among the memory children list, then among the I/O
* children list, then among the Misc children list.
*
* \return the first child if \p prev is \c NULL.
* \return the next child if \p prev is not \c NULL.
* \return \c NULL when there is no next child.
*/
static __hwloc_inline hwloc_obj_t
hwloc_get_next_child (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_t parent, hwloc_obj_t prev)
{
hwloc_obj_t obj;
int state = 0;
if (prev) {
if (prev->type == HWLOC_OBJ_MISC)
state = 3;
else if (hwloc_obj_type_is_io(prev->type))
state = 2;
else if (hwloc_obj_type_is_memory(prev->type))
state = 1;
obj = prev->next_sibling;
} else {
obj = parent->first_child;
}
if (!obj && state == 0) {
obj = parent->memory_first_child;
state = 1;
}
if (!obj && state == 1) {
obj = parent->io_first_child;
state = 2;
}
if (!obj && state == 2) {
obj = parent->misc_first_child;
state = 3;
}
return obj;
}
/** @} */
/** \defgroup hwlocality_helper_find_cache Looking at Cache Objects
* @{
*/
/** \brief Find the depth of cache objects matching cache level and type.
*
* Return the depth of the topology level that contains cache objects
* whose attributes match \p cachelevel and \p cachetype.
* This function is identical to calling hwloc_get_type_depth() with the
* corresponding type such as ::HWLOC_OBJ_L1ICACHE, except that it may
* also return a Unified cache when looking for an instruction cache.
*
* \return the depth of the unique matching unified cache level is returned
* if \p cachetype is ::HWLOC_OBJ_CACHE_UNIFIED.
*
* \return the depth of either a matching cache level or a unified cache level
* if \p cachetype is ::HWLOC_OBJ_CACHE_DATA or ::HWLOC_OBJ_CACHE_INSTRUCTION.
*
* \return the depth of the matching level
* if \p cachetype is \c -1 but only one level matches.
*
* \return ::HWLOC_TYPE_DEPTH_MULTIPLE
* if \p cachetype is \c -1 but multiple levels match.
*
* \return ::HWLOC_TYPE_DEPTH_UNKNOWN if no cache level matches.
*/
static __hwloc_inline int
hwloc_get_cache_type_depth (hwloc_topology_t topology,
unsigned cachelevel, hwloc_obj_cache_type_t cachetype)
{
int depth;
int found = HWLOC_TYPE_DEPTH_UNKNOWN;
for (depth=0; ; depth++) {
hwloc_obj_t obj = hwloc_get_obj_by_depth(topology, depth, 0);
if (!obj)
break;
if (!hwloc_obj_type_is_dcache(obj->type) || obj->attr->cache.depth != cachelevel)
/* doesn't match, try next depth */
continue;
if (cachetype == (hwloc_obj_cache_type_t) -1) {
if (found != HWLOC_TYPE_DEPTH_UNKNOWN) {
/* second match, return MULTIPLE */
return HWLOC_TYPE_DEPTH_MULTIPLE;
}
/* first match, mark it as found */
found = depth;
continue;
}
if (obj->attr->cache.type == cachetype || obj->attr->cache.type == HWLOC_OBJ_CACHE_UNIFIED)
/* exact match (either unified is alone, or we match instruction or data), return immediately */
return depth;
}
/* went to the bottom, return what we found */
return found;
}
/** \brief Get the first data (or unified) cache covering a cpuset \p set
*
* \return a covering cache, or \c NULL if no cache matches.
*/
static __hwloc_inline hwloc_obj_t
hwloc_get_cache_covering_cpuset (hwloc_topology_t topology, hwloc_const_cpuset_t set) __hwloc_attribute_pure;
static __hwloc_inline hwloc_obj_t
hwloc_get_cache_covering_cpuset (hwloc_topology_t topology, hwloc_const_cpuset_t set)
{
hwloc_obj_t current = hwloc_get_obj_covering_cpuset(topology, set);
while (current) {
if (hwloc_obj_type_is_dcache(current->type))
return current;
current = current->parent;
}
return NULL;
}
/** \brief Get the first data (or unified) cache shared between an object and somebody else.
*
* \return a shared cache.
* \return \c NULL if no cache matches or if an invalid object is given (e.g. I/O object).
*/
static __hwloc_inline hwloc_obj_t
hwloc_get_shared_cache_covering_obj (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_t obj) __hwloc_attribute_pure;
static __hwloc_inline hwloc_obj_t
hwloc_get_shared_cache_covering_obj (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_t obj)
{
hwloc_obj_t current = obj->parent;
if (!obj->cpuset)
return NULL;
while (current) {
if (!hwloc_bitmap_isequal(current->cpuset, obj->cpuset)
&& hwloc_obj_type_is_dcache(current->type))
return current;
current = current->parent;
}
return NULL;
}
/** @} */
/** \defgroup hwlocality_helper_find_misc Finding objects, miscellaneous helpers
* @{
*
* Be sure to see the figure in \ref termsanddefs that shows a
* complete topology tree, including depths, child/sibling/cousin
* relationships, and an example of an asymmetric topology where one
* package has fewer caches than its peers.
*/
/** \brief Remove simultaneous multithreading PUs from a CPU set.
*
* For each core in \p topology, if \p cpuset contains some PUs of that core,
* modify \p cpuset to only keep a single PU for that core.
*
* \p which specifies which PU will be kept.
* PU are considered in physical index order.
* If 0, for each core, the function keeps the first PU that was originally set in \p cpuset.
*
* If \p which is larger than the number of PUs in a core there were originally set in \p cpuset,
* no PU is kept for that core.
*
* \return 0.
*
* \note PUs that are not below a Core object are ignored
* (for instance if the topology does not contain any Core object).
* None of them is removed from \p cpuset.
*/
HWLOC_DECLSPEC int hwloc_bitmap_singlify_per_core(hwloc_topology_t topology, hwloc_bitmap_t cpuset, unsigned which);
/** \brief Returns the object of type ::HWLOC_OBJ_PU with \p os_index.
*
* This function is useful for converting a CPU set into the PU
* objects it contains.
* When retrieving the current binding (e.g. with hwloc_get_cpubind()),
* one may iterate over the bits of the resulting CPU set with
* hwloc_bitmap_foreach_begin(), and find the corresponding PUs
* with this function.
*
* \return the PU object, or \c NULL if none matches.
*/
static __hwloc_inline hwloc_obj_t
hwloc_get_pu_obj_by_os_index(hwloc_topology_t topology, unsigned os_index) __hwloc_attribute_pure;
static __hwloc_inline hwloc_obj_t
hwloc_get_pu_obj_by_os_index(hwloc_topology_t topology, unsigned os_index)
{
hwloc_obj_t obj = NULL;
while ((obj = hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_PU, obj)) != NULL)
if (obj->os_index == os_index)
return obj;
return NULL;
}
/** \brief Returns the object of type ::HWLOC_OBJ_NUMANODE with \p os_index.
*
* This function is useful for converting a nodeset into the NUMA node
* objects it contains.
* When retrieving the current binding (e.g. with hwloc_get_membind() with HWLOC_MEMBIND_BYNODESET),
* one may iterate over the bits of the resulting nodeset with
* hwloc_bitmap_foreach_begin(), and find the corresponding NUMA nodes
* with this function.
*
* \return the NUMA node object, or \c NULL if none matches.
*/
static __hwloc_inline hwloc_obj_t
hwloc_get_numanode_obj_by_os_index(hwloc_topology_t topology, unsigned os_index) __hwloc_attribute_pure;
static __hwloc_inline hwloc_obj_t
hwloc_get_numanode_obj_by_os_index(hwloc_topology_t topology, unsigned os_index)
{
hwloc_obj_t obj = NULL;
while ((obj = hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_NUMANODE, obj)) != NULL)
if (obj->os_index == os_index)
return obj;
return NULL;
}
/** \brief Do a depth-first traversal of the topology to find and sort
*
* all objects that are at the same depth than \p src.
* Report in \p objs up to \p max physically closest ones to \p src.
*
* \return the number of objects returned in \p objs.
*
* \return 0 if \p src is an I/O object.
*
* \note This function requires the \p src object to have a CPU set.
*/
/* TODO: rather provide an iterator? Provide a way to know how much should be allocated? By returning the total number of objects instead? */
HWLOC_DECLSPEC unsigned hwloc_get_closest_objs (hwloc_topology_t topology, hwloc_obj_t src, hwloc_obj_t * __hwloc_restrict objs, unsigned max);
/** \brief Find an object below another object, both specified by types and indexes.
*
* Start from the top system object and find object of type \p type1
* and logical index \p idx1. Then look below this object and find another
* object of type \p type2 and logical index \p idx2. Indexes are specified
* within the parent, not withing the entire system.
*
* For instance, if type1 is PACKAGE, idx1 is 2, type2 is CORE and idx2
* is 3, return the fourth core object below the third package.
*
* \return a matching object if any, \c NULL otherwise.
*
* \note This function requires these objects to have a CPU set.
*/
static __hwloc_inline hwloc_obj_t
hwloc_get_obj_below_by_type (hwloc_topology_t topology,
hwloc_obj_type_t type1, unsigned idx1,
hwloc_obj_type_t type2, unsigned idx2) __hwloc_attribute_pure;
static __hwloc_inline hwloc_obj_t
hwloc_get_obj_below_by_type (hwloc_topology_t topology,
hwloc_obj_type_t type1, unsigned idx1,
hwloc_obj_type_t type2, unsigned idx2)
{
hwloc_obj_t obj;
obj = hwloc_get_obj_by_type (topology, type1, idx1);
if (!obj)
return NULL;
return hwloc_get_obj_inside_cpuset_by_type(topology, obj->cpuset, type2, idx2);
}
/** \brief Find an object below a chain of objects specified by types and indexes.
*
* This is a generalized version of hwloc_get_obj_below_by_type().
*
* Arrays \p typev and \p idxv must contain \p nr types and indexes.
*
* Start from the top system object and walk the arrays \p typev and \p idxv.
* For each type and logical index couple in the arrays, look under the previously found
* object to find the index-th object of the given type.
* Indexes are specified within the parent, not withing the entire system.
*
* For instance, if nr is 3, typev contains NODE, PACKAGE and CORE,
* and idxv contains 0, 1 and 2, return the third core object below
* the second package below the first NUMA node.
*
* \return a matching object if any, \c NULL otherwise.
*
* \note This function requires all these objects and the root object
* to have a CPU set.
*/
static __hwloc_inline hwloc_obj_t
hwloc_get_obj_below_array_by_type (hwloc_topology_t topology, int nr, hwloc_obj_type_t *typev, unsigned *idxv) __hwloc_attribute_pure;
static __hwloc_inline hwloc_obj_t
hwloc_get_obj_below_array_by_type (hwloc_topology_t topology, int nr, hwloc_obj_type_t *typev, unsigned *idxv)
{
hwloc_obj_t obj = hwloc_get_root_obj(topology);
int i;
for(i=0; i<nr; i++) {
if (!obj)
return NULL;
obj = hwloc_get_obj_inside_cpuset_by_type(topology, obj->cpuset, typev[i], idxv[i]);
}
return obj;
}
/** \brief Return an object of a different type with same locality.
*
* If the source object \p src is a normal or memory type,
* this function returns an object of type \p type with same
* CPU and node sets, either below or above in the hierarchy.
*
* If the source object \p src is a PCI or an OS device within a PCI
* device, the function may either return that PCI device, or another
* OS device in the same PCI parent.
* This may for instance be useful for converting between OS devices
* such as "nvml0" or "rsmi1" used in distance structures into the
* the PCI device, or the CUDA or OpenCL OS device that correspond
* to the same physical card.
*
* If not \c NULL, parameter \p subtype only select objects whose
* subtype attribute exists and is \p subtype (case-insensitively),
* for instance "OpenCL" or "CUDA".
*
* If not \c NULL, parameter \p nameprefix only selects objects whose
* name attribute exists and starts with \p nameprefix (case-insensitively),
* for instance "rsmi" for matching "rsmi0".
*
* If multiple objects match, the first one is returned.
*
* This function will not walk the hierarchy across bridges since
* the PCI locality may become different.
* This function cannot also convert between normal/memory objects
* and I/O or Misc objects.
*
* \p flags must be \c 0 for now.
*
* \return An object with identical locality,
* matching \p subtype and \p nameprefix if any.
*
* \return \c NULL if no matching object could be found,
* or if the source object and target type are incompatible,
* for instance if converting between CPU and I/O objects.
*/
HWLOC_DECLSPEC hwloc_obj_t
hwloc_get_obj_with_same_locality(hwloc_topology_t topology, hwloc_obj_t src,
hwloc_obj_type_t type, const char *subtype, const char *nameprefix,
unsigned long flags);
/** @} */
/** \defgroup hwlocality_helper_distribute Distributing items over a topology
* @{
*/
/** \brief Flags to be given to hwloc_distrib().
*/
enum hwloc_distrib_flags_e {
/** \brief Distrib in reverse order, starting from the last objects.
* \hideinitializer
*/
HWLOC_DISTRIB_FLAG_REVERSE = (1UL<<0)
};
/** \brief Distribute \p n items over the topology under \p roots
*
* Array \p set will be filled with \p n cpusets recursively distributed
* linearly over the topology under objects \p roots, down to depth \p until
* (which can be INT_MAX to distribute down to the finest level).
*
* \p n_roots is usually 1 and \p roots only contains the topology root object
* so as to distribute over the entire topology.
*
* This is typically useful when an application wants to distribute \p n
* threads over a machine, giving each of them as much private cache as
* possible and keeping them locally in number order.
*
* The caller may typically want to also call hwloc_bitmap_singlify()
* before binding a thread so that it does not move at all.
*
* \p flags should be 0 or a OR'ed set of ::hwloc_distrib_flags_e.
*
* \return 0 on success, -1 on error.
*
* \note This function requires the \p roots objects to have a CPU set.
*/
static __hwloc_inline int
hwloc_distrib(hwloc_topology_t topology,
hwloc_obj_t *roots, unsigned n_roots,
hwloc_cpuset_t *set,
unsigned n,
int until, unsigned long flags)
{
unsigned i;
unsigned tot_weight;
unsigned given, givenweight;
hwloc_cpuset_t *cpusetp = set;
if (flags & ~HWLOC_DISTRIB_FLAG_REVERSE) {
errno = EINVAL;
return -1;
}
tot_weight = 0;
for (i = 0; i < n_roots; i++)
tot_weight += (unsigned) hwloc_bitmap_weight(roots[i]->cpuset);
for (i = 0, given = 0, givenweight = 0; i < n_roots; i++) {
unsigned chunk, weight;
hwloc_obj_t root = roots[flags & HWLOC_DISTRIB_FLAG_REVERSE ? n_roots-1-i : i];
hwloc_cpuset_t cpuset = root->cpuset;
while (!hwloc_obj_type_is_normal(root->type))
/* If memory/io/misc, walk up to normal parent */
root = root->parent;
weight = (unsigned) hwloc_bitmap_weight(cpuset);
if (!weight)
continue;
/* Give to root a chunk proportional to its weight.
* If previous chunks got rounded-up, we may get a bit less. */
chunk = (( (givenweight+weight) * n + tot_weight-1) / tot_weight)
- (( givenweight * n + tot_weight-1) / tot_weight);
if (!root->arity || chunk <= 1 || root->depth >= until) {
/* We can't split any more, put everything there. */
if (chunk) {
/* Fill cpusets with ours */
unsigned j;
for (j=0; j < chunk; j++)
cpusetp[j] = hwloc_bitmap_dup(cpuset);
} else {
/* We got no chunk, just merge our cpuset to a previous one
* (the first chunk cannot be empty)
* so that this root doesn't get ignored.
*/
assert(given);
hwloc_bitmap_or(cpusetp[-1], cpusetp[-1], cpuset);
}