@@ -602,9 +602,10 @@ uct_gdaki_query_tl_devices(uct_md_h tl_md,
602602 uct_tl_device_resource_t * * tl_devices_p ,
603603 unsigned * num_tl_devices_p )
604604{
605- static int uar_supported = -1 ;
606- uct_ib_mlx5_md_t * md = ucs_derived_of (tl_md , uct_ib_mlx5_md_t );
607- unsigned num_tl_devices = 0 ;
605+ static int uar_supported = -1 ;
606+ static int peermem_loaded = -1 ;
607+ uct_ib_mlx5_md_t * md = ucs_derived_of (tl_md , uct_ib_mlx5_md_t );
608+ unsigned num_tl_devices = 0 ;
608609 uct_tl_device_resource_t * tl_devices ;
609610 ucs_status_t status ;
610611 CUdevice device ;
@@ -649,6 +650,25 @@ uct_gdaki_query_tl_devices(uct_md_h tl_md,
649650 goto err ;
650651 }
651652
653+ /*
654+ * Save the result of peermem driver check in a global flag to avoid
655+ * printing diag message for each GPU and MD.
656+ */
657+ if (peermem_loaded == -1 ) {
658+ peermem_loaded = !!(md -> super .reg_mem_types &
659+ UCS_BIT (UCS_MEMORY_TYPE_CUDA ));
660+ if (peermem_loaded == 0 ) {
661+ ucs_diag ("GDAKI not supported, please load "
662+ "Nvidia peermem driver by running "
663+ "\"modprobe nvidia_peermem\"" );
664+ }
665+ }
666+
667+ if (peermem_loaded == 0 ) {
668+ status = UCS_ERR_NO_DEVICE ;
669+ goto err ;
670+ }
671+
652672 uct_cuda_base_get_sys_dev (device , & dev );
653673 status = ucs_topo_get_distance (dev , md -> super .dev .sys_dev , & dist );
654674 if (status != UCS_OK ) {
0 commit comments