@@ -102,6 +102,12 @@ typedef struct ucs_topo_global_ctx {
102102} ucs_topo_global_ctx_t ;
103103
104104
105+ struct ucs_global_state {
106+ unsigned num_devices ;
107+ ucs_topo_sys_device_info_t devices [];
108+ };
109+
110+
105111const ucs_sys_dev_distance_t ucs_topo_default_distance = {
106112 .latency = 0 ,
107113 .bandwidth = INFINITY
@@ -1000,6 +1006,75 @@ void ucs_topo_print_info(FILE *stream)
10001006 }
10011007}
10021008
1009+ static void ucs_topo_release_devices ()
1010+ {
1011+ ucs_topo_sys_device_info_t * device ;
1012+
1013+ while (ucs_topo_global_ctx .num_devices -- > 0 ) {
1014+ device = & ucs_topo_global_ctx .devices [ucs_topo_global_ctx .num_devices ];
1015+ ucs_free (device -> name );
1016+ }
1017+ }
1018+
1019+ ucs_global_state_t * ucs_topo_extract_state (void )
1020+ {
1021+ ucs_global_state_t * state ;
1022+ size_t devices_size ;
1023+
1024+ devices_size = sizeof (ucs_topo_sys_device_info_t ) *
1025+ ucs_topo_global_ctx .num_devices ;
1026+
1027+ state = ucs_malloc (sizeof (* state ) + devices_size , "ucs_global_state_t" );
1028+ if (state == NULL ) {
1029+ return NULL ;
1030+ }
1031+
1032+ ucs_spin_lock (& ucs_topo_global_ctx .lock );
1033+
1034+ memcpy (state -> devices , ucs_topo_global_ctx .devices , devices_size );
1035+ state -> num_devices = ucs_topo_global_ctx .num_devices ;
1036+
1037+ ucs_topo_global_ctx .num_devices = 0 ;
1038+ kh_clear (bus_to_sys_dev , & ucs_topo_global_ctx .bus_to_sys_dev_hash );
1039+
1040+ ucs_spin_unlock (& ucs_topo_global_ctx .lock );
1041+
1042+ return state ;
1043+ }
1044+
1045+ void ucs_topo_restore_state (ucs_global_state_t * state )
1046+ {
1047+ const ucs_topo_sys_device_info_t * device ;
1048+ ucs_sys_device_t sys_dev ;
1049+ int kh_put_status ;
1050+ khiter_t hash_it ;
1051+
1052+ ucs_spin_lock (& ucs_topo_global_ctx .lock );
1053+
1054+ ucs_topo_release_devices ();
1055+
1056+ memcpy (ucs_topo_global_ctx .devices , state -> devices ,
1057+ sizeof (ucs_topo_sys_device_info_t ) * state -> num_devices );
1058+ ucs_topo_global_ctx .num_devices = state -> num_devices ;
1059+
1060+ /* Create the hash table */
1061+ kh_clear (bus_to_sys_dev , & ucs_topo_global_ctx .bus_to_sys_dev_hash );
1062+ for (sys_dev = 0 ; sys_dev < ucs_topo_global_ctx .num_devices ; ++ sys_dev ) {
1063+ device = & ucs_topo_global_ctx .devices [sys_dev ];
1064+ hash_it = kh_put (bus_to_sys_dev ,
1065+ & ucs_topo_global_ctx .bus_to_sys_dev_hash ,
1066+ ucs_topo_get_bus_id_bit_repr (& device -> bus_id ),
1067+ & kh_put_status );
1068+ ucs_assert ((kh_put_status == UCS_KH_PUT_BUCKET_EMPTY ) ||
1069+ (kh_put_status == UCS_KH_PUT_BUCKET_CLEAR ));
1070+ kh_val (& ucs_topo_global_ctx .bus_to_sys_dev_hash , hash_it ) = sys_dev ;
1071+ }
1072+
1073+ ucs_spin_unlock (& ucs_topo_global_ctx .lock );
1074+
1075+ ucs_free (state );
1076+ }
1077+
10031078static ucs_sys_topo_provider_t ucs_sys_topo_provider_sysfs = {
10041079 .name = "sysfs" ,
10051080 .ops = {
@@ -1021,16 +1096,10 @@ void ucs_topo_init()
10211096
10221097void ucs_topo_cleanup ()
10231098{
1024- ucs_topo_sys_device_info_t * device ;
1025-
10261099 ucs_list_del (& ucs_sys_topo_provider_sysfs .list );
10271100 ucs_list_del (& ucs_sys_topo_provider_default .list );
10281101
1029- while (ucs_topo_global_ctx .num_devices -- > 0 ) {
1030- device = & ucs_topo_global_ctx .devices [ucs_topo_global_ctx .num_devices ];
1031- ucs_free (device -> name );
1032- }
1033-
1102+ ucs_topo_release_devices ();
10341103 kh_destroy_inplace (bus_to_sys_dev ,
10351104 & ucs_topo_global_ctx .bus_to_sys_dev_hash );
10361105 ucs_spinlock_destroy (& ucs_topo_global_ctx .lock );
0 commit comments