Skip to content

Commit

Permalink
coll/HAN: Add support for XHC on the intra-comm
Browse files Browse the repository at this point in the history
Signed-off-by: George Katevenis <gkatev@ics.forth.gr>
  • Loading branch information
gkatev committed May 1, 2024
1 parent 7b59f8e commit 7b9e74c
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 9 deletions.
6 changes: 4 additions & 2 deletions ompi/mca/coll/han/coll_han.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
* Copyright (c) 2020-2022 Bull S.A.S. All rights reserved.
* Copyright (c) Amazon.com, Inc. or its affiliates.
* All rights reserved.
* Copyright (c) 2023 Computer Architecture and VLSI Systems (CARV)
* Laboratory, ICS Forth. All rights reserved.
* Copyright (c) 2024 NVIDIA Corporation. All rights reserved.
* $COPYRIGHT$
*
Expand Down Expand Up @@ -47,11 +49,11 @@

/*
* Today;
* . only 2 modules available for intranode (low) level
* . 3 modules available for intranode (low) level
* . only 2 modules available for internode (up) level
*/

#define COLL_HAN_LOW_MODULES 2
#define COLL_HAN_LOW_MODULES 3
#define COLL_HAN_UP_MODULES 2

struct mca_coll_han_bcast_args_s {
Expand Down
17 changes: 10 additions & 7 deletions ompi/mca/coll/han/coll_han_component.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
* reserved.
* Copyright (c) 2022 IBM Corporation. All rights reserved
* Copyright (c) 2020-2022 Bull S.A.S. All rights reserved.
* Copyright (c) 2023 Computer Architecture and VLSI Systems (CARV)
* Laboratory, ICS Forth. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -43,7 +45,8 @@ ompi_coll_han_components ompi_coll_han_available_components[COMPONENTS_COUNT] =
{ TUNED, "tuned" },
{ SM, "sm" }, /* this should not be used, the collective component is gone */
{ ADAPT, "adapt" },
{ HAN, "han" }
{ HAN, "han" },
{ XHC, "xhc" }
};

/*
Expand Down Expand Up @@ -287,7 +290,7 @@ static int han_register(void)

cs->han_bcast_low_module = 0;
(void) mca_coll_han_query_module_from_mca(c, "bcast_low_module",
"low level module for bcast, currently only 0 for tuned",
"low level module for bcast, 0 tuned, 2 xhc",
OPAL_INFO_LVL_9,
&cs->han_bcast_low_module,
&cs->han_op_module_name.bcast.han_op_low_module_name);
Expand All @@ -307,7 +310,7 @@ static int han_register(void)

cs->han_reduce_low_module = 0;
(void) mca_coll_han_query_module_from_mca(c, "reduce_low_module",
"low level module for allreduce, currently only 0 tuned",
"low level module for allreduce, 0 tuned, 2 xhc",
OPAL_INFO_LVL_9, &cs->han_reduce_low_module,
&cs->han_op_module_name.reduce.han_op_low_module_name);

Expand All @@ -326,7 +329,7 @@ static int han_register(void)

cs->han_allreduce_low_module = 0;
(void) mca_coll_han_query_module_from_mca(c, "allreduce_low_module",
"low level module for allreduce, currently only 0 tuned",
"low level module for allreduce, 0 tuned, 2 xhc",
OPAL_INFO_LVL_9, &cs->han_allreduce_low_module,
&cs->han_op_module_name.allreduce.han_op_low_module_name);

Expand All @@ -338,7 +341,7 @@ static int han_register(void)

cs->han_allgather_low_module = 0;
(void) mca_coll_han_query_module_from_mca(c, "allgather_low_module",
"low level module for allgather, currently only 0 tuned",
"low level module for allgather, 0 tuned, 2 xhc",
OPAL_INFO_LVL_9, &cs->han_allgather_low_module,
&cs->han_op_module_name.allgather.han_op_low_module_name);

Expand All @@ -350,7 +353,7 @@ static int han_register(void)

cs->han_gather_low_module = 0;
(void) mca_coll_han_query_module_from_mca(c, "gather_low_module",
"low level module for gather, currently only 0 tuned",
"low level module for gather, 0 tuned, 2 xhc",
OPAL_INFO_LVL_9, &cs->han_gather_low_module,
&cs->han_op_module_name.gather.han_op_low_module_name);

Expand All @@ -374,7 +377,7 @@ static int han_register(void)

cs->han_scatter_low_module = 0;
(void) mca_coll_han_query_module_from_mca(c, "scatter_low_module",
"low level module for scatter, currently only 0 tuned",
"low level module for scatter, 0 tuned, 2 xhc",
OPAL_INFO_LVL_9, &cs->han_scatter_low_module,
&cs->han_op_module_name.scatter.han_op_low_module_name);

Expand Down
3 changes: 3 additions & 0 deletions ompi/mca/coll/han/coll_han_dynamic.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
* reserved.
* Copyright (c) 2020 Bull S.A.S. All rights reserved.
* Copyright (c) 2022 IBM Corporation. All rights reserved
* Copyright (c) 2023 Computer Architecture and VLSI Systems (CARV)
* Laboratory, ICS Forth. All rights reserved.
*
* $COPYRIGHT$
*
Expand Down Expand Up @@ -105,6 +107,7 @@ typedef enum COMPONENTS {
SM,
ADAPT,
HAN,
XHC,
COMPONENTS_COUNT
} COMPONENT_T;

Expand Down
6 changes: 6 additions & 0 deletions ompi/mca/coll/han/coll_han_subcomms.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2020 Bull S.A.S. All rights reserved.
* Copyright (c) 2023 Computer Architecture and VLSI Systems (CARV)
* Laboratory, ICS Forth. All rights reserved.
*
* Copyright (c) 2024 NVIDIA Corporation. All rights reserved.
* $COPYRIGHT$
Expand Down Expand Up @@ -314,6 +316,10 @@ int mca_coll_han_comm_create(struct ompi_communicator_t *comm,
&comm_info, &(low_comms[1]));
assert(OMPI_COMM_IS_DISJOINT_SET(low_comms[1]) && !OMPI_COMM_IS_DISJOINT(low_comms[1]));

opal_info_set(&comm_info, "ompi_comm_coll_preference", "xhc,^han");
ompi_comm_split_type(comm, MPI_COMM_TYPE_SHARED, 0,
&comm_info, &(low_comms[2]));

/*
* Upgrade libnbc module priority to set up up_comms[0] with libnbc module
* This sub-communicator contains one process per node: processes with the
Expand Down

0 comments on commit 7b9e74c

Please sign in to comment.