From b466e301114c6ba5d290b9477e54ee62b2b1136a Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Fri, 10 Oct 2014 11:29:06 +0900 Subject: [PATCH 01/14] Revert "coll/basic: fix segmentation fault in neighborhood collectives if the degree" This reverts commit 9c788ff9400960823637dc0eebb3a8640414fa05. --- ompi/mca/coll/basic/coll_basic.h | 21 +------------------ ompi/mca/coll/basic/coll_basic_module.c | 13 +++++------- .../basic/coll_basic_neighbor_allgather.c | 20 +----------------- .../basic/coll_basic_neighbor_allgatherv.c | 20 +----------------- .../coll/basic/coll_basic_neighbor_alltoall.c | 20 +----------------- .../basic/coll_basic_neighbor_alltoallv.c | 20 +----------------- .../basic/coll_basic_neighbor_alltoallw.c | 19 +---------------- 7 files changed, 11 insertions(+), 122 deletions(-) diff --git a/ompi/mca/coll/basic/coll_basic.h b/ompi/mca/coll/basic/coll_basic.h index 425dd3066d2..ca0b6d558f6 100644 --- a/ompi/mca/coll/basic/coll_basic.h +++ b/ompi/mca/coll/basic/coll_basic.h @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Sandia National Laboratories. All rights reserved. - * Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights + * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ * @@ -354,25 +354,6 @@ struct mca_coll_basic_module_t { int mccb_num_reqs; }; typedef struct mca_coll_basic_module_t mca_coll_basic_module_t; - -static inline int mca_coll_basic_check_for_requests (mca_coll_basic_module_t *basic_module, int max_reqs) -{ - if (basic_module->mccb_num_reqs < max_reqs) { - void *tmp; - - basic_module->mccb_num_reqs = max_reqs; - - tmp = realloc (basic_module->mccb_reqs, sizeof(ompi_request_t *) * basic_module->mccb_num_reqs); - if (NULL == tmp) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - - basic_module->mccb_reqs = tmp; - } - - return OMPI_SUCCESS; -} - OBJ_CLASS_DECLARATION(mca_coll_basic_module_t); END_C_DECLS diff --git a/ompi/mca/coll/basic/coll_basic_module.c b/ompi/mca/coll/basic/coll_basic_module.c index cb9b0d769bf..8bb929a1f1a 100644 --- a/ompi/mca/coll/basic/coll_basic_module.c +++ b/ompi/mca/coll/basic/coll_basic_module.c @@ -11,7 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2012 Sandia National Laboratories. All rights reserved. - * Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights + * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ * @@ -55,7 +55,7 @@ mca_coll_base_module_t * mca_coll_basic_comm_query(struct ompi_communicator_t *comm, int *priority) { - int size, ret; + int size; mca_coll_basic_module_t *basic_module; basic_module = OBJ_NEW(mca_coll_basic_module_t); @@ -70,12 +70,9 @@ mca_coll_basic_comm_query(struct ompi_communicator_t *comm, } else { size = ompi_comm_size(comm); } - - ret = mca_coll_basic_check_for_requests (basic_module, size * 2); - if (OMPI_SUCCESS != ret) { - OBJ_RELEASE(basic_module); - return NULL; - } + basic_module->mccb_num_reqs = size * 2; + basic_module->mccb_reqs = (ompi_request_t**) + malloc(sizeof(ompi_request_t *) * basic_module->mccb_num_reqs); /* Choose whether to use [intra|inter], and [linear|log]-based * algorithms. */ diff --git a/ompi/mca/coll/basic/coll_basic_neighbor_allgather.c b/ompi/mca/coll/basic/coll_basic_neighbor_allgather.c index 31aba0d4bfd..8d8242dc9b0 100644 --- a/ompi/mca/coll/basic/coll_basic_neighbor_allgather.c +++ b/ompi/mca/coll/basic/coll_basic_neighbor_allgather.c @@ -10,7 +10,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights + * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -49,12 +49,6 @@ mca_coll_basic_neighbor_allgather_cart(const void *sbuf, int scount, ptrdiff_t lb, extent; int rc = MPI_SUCCESS, dim, nreqs; - /* ensure we have enough storage for requests */ - rc = mca_coll_basic_check_for_requests (basic_module, cart->ndims * 4); - if (OMPI_SUCCESS != rc) { - return rc; - } - ompi_datatype_get_extent(rdtype, &lb, &extent); /* The ordering is defined as -1 then +1 in each dimension in @@ -132,12 +126,6 @@ mca_coll_basic_neighbor_allgather_graph(const void *sbuf, int scount, mca_topo_base_graph_neighbors_count (comm, rank, °ree); - /* ensure we have enough storage for requests */ - rc = mca_coll_basic_check_for_requests (basic_module, degree * 2); - if (OMPI_SUCCESS != rc) { - return rc; - } - edges = graph->edges; if (rank > 0) { edges += graph->index[rank - 1]; @@ -185,12 +173,6 @@ mca_coll_basic_neighbor_allgather_dist_graph(const void *sbuf, int scount, indegree = dist_graph->indegree; outdegree = dist_graph->outdegree; - /* ensure we have enough storage for requests */ - rc = mca_coll_basic_check_for_requests (basic_module, indegree + outdegree); - if (OMPI_SUCCESS != rc) { - return rc; - } - inedges = dist_graph->in; outedges = dist_graph->out; diff --git a/ompi/mca/coll/basic/coll_basic_neighbor_allgatherv.c b/ompi/mca/coll/basic/coll_basic_neighbor_allgatherv.c index 89e4d584afc..cdcf91de95b 100644 --- a/ompi/mca/coll/basic/coll_basic_neighbor_allgatherv.c +++ b/ompi/mca/coll/basic/coll_basic_neighbor_allgatherv.c @@ -10,7 +10,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights + * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -48,12 +48,6 @@ mca_coll_basic_neighbor_allgatherv_cart(const void *sbuf, int scount, struct omp ptrdiff_t lb, extent; int rc = MPI_SUCCESS, dim, i, nreqs; - /* ensure we have enough storage for requests */ - rc = mca_coll_basic_check_for_requests (basic_module, cart->ndims * 4); - if (OMPI_SUCCESS != rc) { - return rc; - } - ompi_datatype_get_extent(rdtype, &lb, &extent); reqs = basic_module->mccb_reqs; @@ -119,12 +113,6 @@ mca_coll_basic_neighbor_allgatherv_graph(const void *sbuf, int scount, struct om mca_topo_base_graph_neighbors_count (comm, rank, °ree); - /* ensure we have enough storage for requests */ - rc = mca_coll_basic_check_for_requests (basic_module, degree * 2); - if (OMPI_SUCCESS != rc) { - return rc; - } - edges = graph->edges; if (rank > 0) { edges += graph->index[rank - 1]; @@ -170,12 +158,6 @@ mca_coll_basic_neighbor_allgatherv_dist_graph(const void *sbuf, int scount, stru indegree = dist_graph->indegree; outdegree = dist_graph->outdegree; - /* ensure we have enough storage for requests */ - rc = mca_coll_basic_check_for_requests (basic_module, indegree + outdegree); - if (OMPI_SUCCESS != rc) { - return rc; - } - inedges = dist_graph->in; outedges = dist_graph->out; diff --git a/ompi/mca/coll/basic/coll_basic_neighbor_alltoall.c b/ompi/mca/coll/basic/coll_basic_neighbor_alltoall.c index 4517fd8f4e7..289f60acbc8 100644 --- a/ompi/mca/coll/basic/coll_basic_neighbor_alltoall.c +++ b/ompi/mca/coll/basic/coll_basic_neighbor_alltoall.c @@ -10,7 +10,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights + * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -47,12 +47,6 @@ mca_coll_basic_neighbor_alltoall_cart(const void *sbuf, int scount, struct ompi_ ptrdiff_t lb, rdextent, sdextent; int rc = MPI_SUCCESS, dim, nreqs; - /* ensure we have enough storage for requests */ - rc = mca_coll_basic_check_for_requests (basic_module, cart->ndims * 4); - if (OMPI_SUCCESS != rc) { - return rc; - } - ompi_datatype_get_extent(rdtype, &lb, &rdextent); ompi_datatype_get_extent(sdtype, &lb, &sdextent); @@ -149,12 +143,6 @@ mca_coll_basic_neighbor_alltoall_graph(const void *sbuf, int scount, struct ompi mca_topo_base_graph_neighbors_count (comm, rank, °ree); - /* ensure we have enough storage for requests */ - rc = mca_coll_basic_check_for_requests (basic_module, degree * 2); - if (OMPI_SUCCESS != rc) { - return rc; - } - edges = graph->edges; if (rank > 0) { edges += graph->index[rank - 1]; @@ -205,12 +193,6 @@ mca_coll_basic_neighbor_alltoall_dist_graph(const void *sbuf, int scount,struct indegree = dist_graph->indegree; outdegree = dist_graph->outdegree; - /* ensure we have enough storage for requests */ - rc = mca_coll_basic_check_for_requests (basic_module, indegree + outdegree); - if (OMPI_SUCCESS != rc) { - return rc; - } - inedges = dist_graph->in; outedges = dist_graph->out; diff --git a/ompi/mca/coll/basic/coll_basic_neighbor_alltoallv.c b/ompi/mca/coll/basic/coll_basic_neighbor_alltoallv.c index 3c909968cce..9ace9006624 100644 --- a/ompi/mca/coll/basic/coll_basic_neighbor_alltoallv.c +++ b/ompi/mca/coll/basic/coll_basic_neighbor_alltoallv.c @@ -10,7 +10,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights + * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -48,12 +48,6 @@ mca_coll_basic_neighbor_alltoallv_cart(const void *sbuf, const int scounts[], co ptrdiff_t lb, rdextent, sdextent; ompi_request_t **reqs; - /* ensure we have enough storage for requests */ - rc = mca_coll_basic_check_for_requests (basic_module, cart->ndims * 4); - if (OMPI_SUCCESS != rc) { - return rc; - } - ompi_datatype_get_extent(rdtype, &lb, &rdextent); ompi_datatype_get_extent(sdtype, &lb, &sdextent); @@ -136,12 +130,6 @@ mca_coll_basic_neighbor_alltoallv_graph(const void *sbuf, const int scounts[], c mca_topo_base_graph_neighbors_count (comm, rank, °ree); - /* ensure we have enough storage for requests */ - rc = mca_coll_basic_check_for_requests (basic_module, 2 * degree); - if (OMPI_SUCCESS != rc) { - return rc; - } - edges = graph->edges; if (rank > 0) { edges += graph->index[rank - 1]; @@ -195,12 +183,6 @@ mca_coll_basic_neighbor_alltoallv_dist_graph(const void *sbuf, const int scounts indegree = dist_graph->indegree; outdegree = dist_graph->outdegree; - /* ensure we have enough storage for requests */ - rc = mca_coll_basic_check_for_requests (basic_module, indegree + outdegree); - if (OMPI_SUCCESS != rc) { - return rc; - } - inedges = dist_graph->in; outedges = dist_graph->out; diff --git a/ompi/mca/coll/basic/coll_basic_neighbor_alltoallw.c b/ompi/mca/coll/basic/coll_basic_neighbor_alltoallw.c index dcf35922503..28ecf04cbbb 100644 --- a/ompi/mca/coll/basic/coll_basic_neighbor_alltoallw.c +++ b/ompi/mca/coll/basic/coll_basic_neighbor_alltoallw.c @@ -10,7 +10,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights + * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -47,11 +47,6 @@ mca_coll_basic_neighbor_alltoallw_cart(const void *sbuf, const int scounts[], co int rc = MPI_SUCCESS, dim, i, nreqs; ompi_request_t **reqs; - /* ensure we have enough storage for requests */ - rc = mca_coll_basic_check_for_requests (basic_module, cart->ndims * 4); - if (OMPI_SUCCESS != rc) { - return rc; - } /* post receives first */ for (dim = 0, i = 0, nreqs = 0, reqs = basic_module->mccb_reqs ; dim < cart->ndims ; ++dim, i += 2) { @@ -131,12 +126,6 @@ mca_coll_basic_neighbor_alltoallw_graph(const void *sbuf, const int scounts[], c mca_topo_base_graph_neighbors_count (comm, rank, °ree); - /* ensure we have enough storage for requests */ - rc = mca_coll_basic_check_for_requests (basic_module, 2 * degree); - if (OMPI_SUCCESS != rc) { - return rc; - } - edges = graph->edges; if (rank > 0) { edges += graph->index[rank - 1]; @@ -186,12 +175,6 @@ mca_coll_basic_neighbor_alltoallw_dist_graph(const void *sbuf, const int scounts indegree = dist_graph->indegree; outdegree = dist_graph->outdegree; - /* ensure we have enough storage for requests */ - rc = mca_coll_basic_check_for_requests (basic_module, indegree + outdegree); - if (OMPI_SUCCESS != rc) { - return rc; - } - inedges = dist_graph->in; outedges = dist_graph->out; From 7c2aefe6c9b7aa7636fa4833b4a25354cd72d66f Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Fri, 10 Oct 2014 11:56:04 +0900 Subject: [PATCH 02/14] coll/basic: fix segmentation fault in neighborhood collectives if the degree of the topology is higher than the communicator size It is possible to have a topology degree higher than the size of the communicator. For example, a periodic cartesian communicator on MPI_COMM_SELF. This will leave the neighborhood collectives with a request buffer that is too small. This commits introduces a semantic change : from now, c_topo must be set before invoking coll_select --- ompi/mca/coll/basic/coll_basic_module.c | 35 ++++++++- ompi/mca/topo/base/topo_base_cart_create.c | 12 ++-- .../topo/base/topo_base_dist_graph_create.c | 71 +++++++++++++++---- ompi/mca/topo/base/topo_base_graph_create.c | 13 ++-- 4 files changed, 107 insertions(+), 24 deletions(-) diff --git a/ompi/mca/coll/basic/coll_basic_module.c b/ompi/mca/coll/basic/coll_basic_module.c index 8bb929a1f1a..574a0e4f985 100644 --- a/ompi/mca/coll/basic/coll_basic_module.c +++ b/ompi/mca/coll/basic/coll_basic_module.c @@ -13,6 +13,8 @@ * Copyright (c) 2012 Sandia National Laboratories. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2014 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -28,6 +30,8 @@ #include "mpi.h" #include "ompi/mca/coll/coll.h" #include "ompi/mca/coll/base/base.h" +#include "ompi/mca/topo/topo.h" +#include "ompi/mca/topo/base/base.h" #include "coll_basic.h" @@ -70,7 +74,36 @@ mca_coll_basic_comm_query(struct ompi_communicator_t *comm, } else { size = ompi_comm_size(comm); } - basic_module->mccb_num_reqs = size * 2; + size *= 2; + if (OMPI_COMM_IS_CART(comm)) { + int cart_size; + mca_topo_base_comm_cart_2_2_0_t *cart; + assert (NULL != comm->c_topo); + cart = comm->c_topo->mtc.cart; + cart_size = cart->ndims * 4; + if (cart_size > size) { + size = cart_size; + } + } else if (OMPI_COMM_IS_GRAPH(comm)) { + int rank, degree; + assert (NULL != comm->c_topo); + rank = ompi_comm_rank (comm); + mca_topo_base_graph_neighbors_count (comm, rank, °ree); + degree *= 2; + if (degree > size) { + size = degree; + } + } else if (OMPI_COMM_IS_DIST_GRAPH(comm)) { + int dist_graph_size; + mca_topo_base_comm_dist_graph_2_2_0_t *dist_graph; + assert (NULL != comm->c_topo); + dist_graph = comm->c_topo->mtc.dist_graph; + dist_graph_size = dist_graph->indegree + dist_graph->outdegree; + if (dist_graph_size > size) { + size = dist_graph_size; + } + } + basic_module->mccb_num_reqs = size; basic_module->mccb_reqs = (ompi_request_t**) malloc(sizeof(ompi_request_t *) * basic_module->mccb_num_reqs); diff --git a/ompi/mca/topo/base/topo_base_cart_create.c b/ompi/mca/topo/base/topo_base_cart_create.c index f0d8d927938..6a678da7cac 100644 --- a/ompi/mca/topo/base/topo_base_cart_create.c +++ b/ompi/mca/topo/base/topo_base_cart_create.c @@ -162,20 +162,24 @@ int mca_topo_base_cart_create(mca_topo_base_module_t *topo, return MPI_ERR_INTERN; } + assert(NULL == new_comm->c_topo); + assert(!(new_comm->c_flags & OMPI_COMM_CART)); + new_comm->c_topo = topo; + new_comm->c_topo->mtc.cart = cart; + new_comm->c_topo->reorder = reorder; + new_comm->c_flags |= OMPI_COMM_CART; ret = ompi_comm_enable(old_comm, new_comm, new_rank, num_procs, topo_procs); if (OMPI_SUCCESS != ret) { /* something wrong happened during setting the communicator */ + new_comm->c_topo = NULL; + new_comm->c_flags &= ~OMPI_COMM_CART; free(topo_procs); OBJ_RELEASE(cart); ompi_comm_free (&new_comm); return ret; } - new_comm->c_topo = topo; - new_comm->c_topo->mtc.cart = cart; - new_comm->c_topo->reorder = reorder; - new_comm->c_flags |= OMPI_COMM_CART; *comm_topo = new_comm; if( MPI_UNDEFINED == new_rank ) { diff --git a/ompi/mca/topo/base/topo_base_dist_graph_create.c b/ompi/mca/topo/base/topo_base_dist_graph_create.c index f8e6dab9327..641b62ab830 100644 --- a/ompi/mca/topo/base/topo_base_dist_graph_create.c +++ b/ompi/mca/topo/base/topo_base_dist_graph_create.c @@ -288,28 +288,71 @@ int mca_topo_base_dist_graph_create(mca_topo_base_module_t* module, { int err; - if( OMPI_SUCCESS != (err = ompi_comm_create(comm_old, - comm_old->c_local_group, - newcomm)) ) { - OBJ_RELEASE(module); - return err; + ompi_proc_t **topo_procs = NULL; + int num_procs, ret, rank, i; + ompi_communicator_t *new_comm; + mca_topo_base_comm_dist_graph_2_2_0_t* topo; + num_procs = ompi_comm_size(comm_old); + rank = ompi_comm_rank(comm_old); + topo_procs = (ompi_proc_t**)malloc(num_procs * sizeof(ompi_proc_t *)); + if(OMPI_GROUP_IS_DENSE(comm_old->c_local_group)) { + memcpy(topo_procs, + comm_old->c_local_group->grp_proc_pointers, + num_procs * sizeof(ompi_proc_t *)); + } else { + for(i = 0 ; i < num_procs; i++) { + topo_procs[i] = ompi_group_peer_lookup(comm_old->c_local_group,i); + } + } + new_comm = ompi_comm_allocate(num_procs, 0); + if (NULL == new_comm) { + free(topo_procs); + return OMPI_ERR_OUT_OF_RESOURCE; } - - assert(NULL == (*newcomm)->c_topo); - (*newcomm)->c_topo = module; - (*newcomm)->c_topo->reorder = reorder; - (*newcomm)->c_flags |= OMPI_COMM_DIST_GRAPH; - err = mca_topo_base_dist_graph_distribute(module, - *newcomm, + comm_old, n, nodes, degrees, targets, weights, - &((*newcomm)->c_topo->mtc.dist_graph)); + &topo); if( OMPI_SUCCESS != err ) { + free(topo_procs); ompi_comm_free(newcomm); + return err; } - return err; + + assert(NULL == new_comm->c_topo); + new_comm->c_topo = module; + new_comm->c_topo->reorder = reorder; + new_comm->c_flags |= OMPI_COMM_DIST_GRAPH; + new_comm->c_topo->mtc.dist_graph = topo; + + ret = ompi_comm_enable(comm_old, new_comm, + rank, num_procs, topo_procs); + if (OMPI_SUCCESS != ret) { + if ( NULL != topo->in ) { + free(topo->in); + } + if ( NULL != topo->out ) { + free(topo->out); + } + if ( NULL != topo->inw ) { + free(topo->inw); + } + if ( NULL != topo->outw ) { + free(topo->outw); + } + free(topo); + free(topo_procs); + new_comm->c_topo = NULL; + new_comm->c_flags &= ~OMPI_COMM_DIST_GRAPH; + new_comm->c_topo->mtc.dist_graph = NULL; + ompi_comm_free (&new_comm); + return ret; + } + *newcomm = new_comm; + + return OMPI_SUCCESS; } static void mca_topo_base_comm_dist_graph_2_2_0_construct(mca_topo_base_comm_dist_graph_2_2_0_t * dist_graph) { diff --git a/ompi/mca/topo/base/topo_base_graph_create.c b/ompi/mca/topo/base/topo_base_graph_create.c index d83a9f4738d..990ac7228fb 100644 --- a/ompi/mca/topo/base/topo_base_graph_create.c +++ b/ompi/mca/topo/base/topo_base_graph_create.c @@ -10,7 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2012-2013 Inria. All rights reserved. - * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -123,19 +123,22 @@ int mca_topo_base_graph_create(mca_topo_base_module_t *topo, return OMPI_ERR_OUT_OF_RESOURCE; } + new_comm->c_topo = topo; + new_comm->c_topo->mtc.graph = graph; + new_comm->c_flags |= OMPI_COMM_GRAPH; + new_comm->c_topo->reorder = reorder; + ret = ompi_comm_enable(old_comm, new_comm, new_rank, num_procs, topo_procs); if (OMPI_SUCCESS != ret) { + new_comm->c_topo = NULL; + new_comm->c_flags &= ~OMPI_COMM_GRAPH; free(topo_procs); OBJ_RELEASE(graph); ompi_comm_free (&new_comm); return ret; } - new_comm->c_topo = topo; - new_comm->c_topo->mtc.graph = graph; - new_comm->c_flags |= OMPI_COMM_GRAPH; - new_comm->c_topo->reorder = reorder; *comm_topo = new_comm; if( MPI_UNDEFINED == new_rank ) { From 4925ddf8e3b57e2bef8e3186ec51e475c5f8e244 Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Fri, 10 Oct 2014 16:07:20 +0900 Subject: [PATCH 03/14] * comment on communicator creation in mca_topo_base_dist_graph_create(...) * use accesors to retrieve topo info --- ompi/mca/coll/basic/coll_basic_module.c | 16 +++++----- .../topo/base/topo_base_dist_graph_create.c | 29 +++++++++++-------- 2 files changed, 24 insertions(+), 21 deletions(-) diff --git a/ompi/mca/coll/basic/coll_basic_module.c b/ompi/mca/coll/basic/coll_basic_module.c index 574a0e4f985..80fb7b584e9 100644 --- a/ompi/mca/coll/basic/coll_basic_module.c +++ b/ompi/mca/coll/basic/coll_basic_module.c @@ -76,11 +76,10 @@ mca_coll_basic_comm_query(struct ompi_communicator_t *comm, } size *= 2; if (OMPI_COMM_IS_CART(comm)) { - int cart_size; - mca_topo_base_comm_cart_2_2_0_t *cart; + int cart_size, ndims; assert (NULL != comm->c_topo); - cart = comm->c_topo->mtc.cart; - cart_size = cart->ndims * 4; + comm->c_topo->topo.cart.cartdim_get(comm, &ndims); + cart_size = ndims * 4; if (cart_size > size) { size = cart_size; } @@ -88,17 +87,16 @@ mca_coll_basic_comm_query(struct ompi_communicator_t *comm, int rank, degree; assert (NULL != comm->c_topo); rank = ompi_comm_rank (comm); - mca_topo_base_graph_neighbors_count (comm, rank, °ree); + comm->c_topo->topo.graph.graph_neighbors_count (comm, rank, °ree); degree *= 2; if (degree > size) { size = degree; } } else if (OMPI_COMM_IS_DIST_GRAPH(comm)) { - int dist_graph_size; - mca_topo_base_comm_dist_graph_2_2_0_t *dist_graph; + int dist_graph_size, inneighbors, outneighbors, weighted; assert (NULL != comm->c_topo); - dist_graph = comm->c_topo->mtc.dist_graph; - dist_graph_size = dist_graph->indegree + dist_graph->outdegree; + comm->c_topo->topo.dist_graph.dist_graph_neighbors_count(comm, &inneighbors, &outneighbors, &weighted); + dist_graph_size = inneighbors + outneighbors; if (dist_graph_size > size) { size = dist_graph_size; } diff --git a/ompi/mca/topo/base/topo_base_dist_graph_create.c b/ompi/mca/topo/base/topo_base_dist_graph_create.c index 641b62ab830..37e2b4adf8d 100644 --- a/ompi/mca/topo/base/topo_base_dist_graph_create.c +++ b/ompi/mca/topo/base/topo_base_dist_graph_create.c @@ -287,23 +287,16 @@ int mca_topo_base_dist_graph_create(mca_topo_base_module_t* module, ompi_communicator_t **newcomm) { int err; - ompi_proc_t **topo_procs = NULL; int num_procs, ret, rank, i; ompi_communicator_t *new_comm; mca_topo_base_comm_dist_graph_2_2_0_t* topo; - num_procs = ompi_comm_size(comm_old); - rank = ompi_comm_rank(comm_old); + topo_procs = (ompi_proc_t**)malloc(num_procs * sizeof(ompi_proc_t *)); - if(OMPI_GROUP_IS_DENSE(comm_old->c_local_group)) { - memcpy(topo_procs, - comm_old->c_local_group->grp_proc_pointers, - num_procs * sizeof(ompi_proc_t *)); - } else { - for(i = 0 ; i < num_procs; i++) { - topo_procs[i] = ompi_group_peer_lookup(comm_old->c_local_group,i); - } + if (NULL == topo_procs) { + return OMPI_ERR_OUT_OF_RESOURCE; } + num_procs = ompi_comm_size(comm_old); new_comm = ompi_comm_allocate(num_procs, 0); if (NULL == new_comm) { free(topo_procs); @@ -317,10 +310,22 @@ int mca_topo_base_dist_graph_create(mca_topo_base_module_t* module, &topo); if( OMPI_SUCCESS != err ) { free(topo_procs); - ompi_comm_free(newcomm); + ompi_comm_free(&new_comm); return err; } + /* we cannot simply call ompi_comm_create because c_topo + must be set before invoking ompi_comm_enable */ + rank = ompi_comm_rank(comm_old); + if(OMPI_GROUP_IS_DENSE(comm_old->c_local_group)) { + memcpy(topo_procs, + comm_old->c_local_group->grp_proc_pointers, + num_procs * sizeof(ompi_proc_t *)); + } else { + for(i = 0 ; i < num_procs; i++) { + topo_procs[i] = ompi_group_peer_lookup(comm_old->c_local_group,i); + } + } assert(NULL == new_comm->c_topo); new_comm->c_topo = module; new_comm->c_topo->reorder = reorder; From 95d1ff78d5ef2514fb04dc39d6fb1e025d95f311 Mon Sep 17 00:00:00 2001 From: rolfv Date: Fri, 7 Nov 2014 11:00:45 -0800 Subject: [PATCH 04/14] Make sure initialization happens --- opal/mca/btl/sm/btl_sm_component.c | 8 ++++---- opal/mca/btl/smcuda/btl_smcuda_component.c | 4 ++++ 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/opal/mca/btl/sm/btl_sm_component.c b/opal/mca/btl/sm/btl_sm_component.c index 4417d44f650..8dc8f2a28b5 100644 --- a/opal/mca/btl/sm/btl_sm_component.c +++ b/opal/mca/btl/sm/btl_sm_component.c @@ -734,6 +734,10 @@ mca_btl_sm_component_init(int *num_btls, mca_btl_sm_component.sm_mpool = NULL; mca_btl_sm_component.sm_mpool_base = NULL; +#if OPAL_CUDA_SUPPORT + mca_common_cuda_stage_one_init(); +#endif /* OPAL_CUDA_SUPPORT */ + /* if no session directory was created, then we cannot be used */ if (NULL == opal_process_info.job_session_dir) { /* SKG - this isn't true anymore. Some backing facilities don't require a @@ -927,10 +931,6 @@ mca_btl_sm_component_init(int *num_btls, } #endif /* OPAL_BTL_SM_HAVE_CMA */ -#if OPAL_CUDA_SUPPORT - mca_common_cuda_stage_one_init(); -#endif /* OPAL_CUDA_SUPPORT */ - return btls; no_knem: diff --git a/opal/mca/btl/smcuda/btl_smcuda_component.c b/opal/mca/btl/smcuda/btl_smcuda_component.c index 3a6281a6748..32914e96ea8 100644 --- a/opal/mca/btl/smcuda/btl_smcuda_component.c +++ b/opal/mca/btl/smcuda/btl_smcuda_component.c @@ -845,6 +845,10 @@ mca_btl_smcuda_component_init(int *num_btls, mca_btl_smcuda_component.sm_mpool = NULL; mca_btl_smcuda_component.sm_mpool_base = NULL; +#if OPAL_CUDA_SUPPORT + mca_common_cuda_stage_one_init(); +#endif /* OPAL_CUDA_SUPPORT */ + /* if no session directory was created, then we cannot be used */ if (NULL == opal_process_info.job_session_dir) { /* SKG - this isn't true anymore. Some backing facilities don't require a From 0c630b981fca649a63e4fde8dc7615b0e9516d0e Mon Sep 17 00:00:00 2001 From: rolfv Date: Fri, 7 Nov 2014 11:08:41 -0800 Subject: [PATCH 05/14] Missed a removal from previous commit --- opal/mca/btl/smcuda/btl_smcuda_component.c | 1 - 1 file changed, 1 deletion(-) diff --git a/opal/mca/btl/smcuda/btl_smcuda_component.c b/opal/mca/btl/smcuda/btl_smcuda_component.c index 32914e96ea8..6290128b065 100644 --- a/opal/mca/btl/smcuda/btl_smcuda_component.c +++ b/opal/mca/btl/smcuda/btl_smcuda_component.c @@ -940,7 +940,6 @@ mca_btl_smcuda_component_init(int *num_btls, /* Register a smcuda control function to help setup IPC support */ mca_btl_base_active_message_trigger[MCA_BTL_TAG_SMCUDA].cbfunc = btl_smcuda_control; mca_btl_base_active_message_trigger[MCA_BTL_TAG_SMCUDA].cbdata = NULL; - mca_common_cuda_stage_one_init(); #endif /* OPAL_CUDA_SUPPORT */ return btls; From ef82741af7dab080d37aa6414f5cf90db62fdab8 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Fri, 7 Nov 2014 13:12:19 -0800 Subject: [PATCH 06/14] Doh - if we can't output an entire block, then we need to adjust the number of bytes remaining to be output or else we will output duplicate bytes when next we are able to write. --- orte/mca/iof/base/iof_base_output.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/orte/mca/iof/base/iof_base_output.c b/orte/mca/iof/base/iof_base_output.c index 60ab0c04561..8d1b9eddcaf 100644 --- a/orte/mca/iof/base/iof_base_output.c +++ b/orte/mca/iof/base/iof_base_output.c @@ -316,6 +316,8 @@ void orte_iof_base_write_handler(int fd, short event, void *cbdata) } else if (num_written < output->numbytes) { /* incomplete write - adjust data to avoid duplicate output */ memmove(output->data, &output->data[num_written], output->numbytes - num_written); + /* adjust the number of bytes remaining to be written */ + output->numbytes -= num_written; /* push this item back on the front of the list */ opal_list_prepend(&wev->outputs, item); /* if the list is getting too large, abort */ From 67b195793911f5a67ca6285f5d895ad23dd98b98 Mon Sep 17 00:00:00 2001 From: Annapurna Dasari Date: Sun, 15 Mar 2015 14:30:23 -0700 Subject: [PATCH 07/14] Add rml_base_channel_handlers.c --- orte/mca/rml/base/Makefile.am | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/orte/mca/rml/base/Makefile.am b/orte/mca/rml/base/Makefile.am index dfd8df0a504..be8a16b0afe 100644 --- a/orte/mca/rml/base/Makefile.am +++ b/orte/mca/rml/base/Makefile.am @@ -5,16 +5,17 @@ # Copyright (c) 2004-2005 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights -# reserved. +# reserved. +# Copyright (c) 2015 Intel Corporation. All rights reserved. # $COPYRIGHT$ -# +# # Additional copyrights may follow -# +# # $HEADER$ # @@ -26,4 +27,5 @@ libmca_rml_la_SOURCES += \ base/rml_base_frame.c \ base/rml_base_receive.c \ base/rml_base_contact.c \ - base/rml_base_msg_handlers.c + base/rml_base_msg_handlers.c \ + base/rml_base_channel_handlers.c From d575d74694dcc3e26f392e9ee0e320e78ca2834d Mon Sep 17 00:00:00 2001 From: Annapurna Dasari Date: Sun, 15 Mar 2015 14:35:24 -0700 Subject: [PATCH 08/14] porting changes from QoS branch --- orte/mca/rml/base/base.h | 143 ++++++++++++++++++++++++++++++++++----- 1 file changed, 127 insertions(+), 16 deletions(-) diff --git a/orte/mca/rml/base/base.h b/orte/mca/rml/base/base.h index 4f98f1c59fb..d434d01521b 100644 --- a/orte/mca/rml/base/base.h +++ b/orte/mca/rml/base/base.h @@ -6,7 +6,7 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -14,9 +14,9 @@ * reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -26,7 +26,7 @@ * RML Framework maintenence interface * * Interface for starting / stopping / controlling the RML framework, - * as well as support for modifying RML datatypes. + * as well as support for modifying RML datatypes. * * @note The only RML datatype exposed to the user is the RML tag. * This will always be an integral value, so the only datatype support @@ -43,11 +43,13 @@ #include "opal/dss/dss_types.h" #include "opal/mca/mca.h" #include "opal/util/timings.h" +#include "opal/class/opal_pointer_array.h" #include "orte/runtime/orte_globals.h" #include "orte/mca/rml/rml.h" + BEGIN_C_DECLS OPAL_TIMING_DECLARE_EXT(ORTE_DECLSPEC, tm_rml) @@ -84,6 +86,7 @@ ORTE_DECLSPEC void orte_rml_base_comm_stop(void); typedef struct { opal_list_t posted_recvs; opal_list_t unmatched_msgs; + opal_pointer_array_t open_channels; #if OPAL_ENABLE_TIMING bool timing; #endif @@ -109,12 +112,37 @@ ORTE_DECLSPEC extern opal_list_t orte_rml_base_components; * Component structure pointer for the currently selected RML * component. Useable between calls to orte_rml_base_select() and * orte_rml_base_close(). - * + * * @note This pointer should not be used outside the RML base. It is * available outside the RML base only for the F/T component. */ ORTE_DECLSPEC extern orte_rml_component_t *orte_rml_component; +typedef enum { + orte_rml_channel_opening = 0, + orte_rml_channel_open = 1, + orte_rml_channel_closing = 2, + orte_rml_channel_closed = 3, +}orte_rml_channel_state_t; + +/** + * RML channel structure. + * The RML only needs basic channel information as the rest of the book keeping information + * is stored in the QoS module specific channel object. + * It contains a pointer to the QoS module that handles requests on the channel. + * It contains a pointer to a struct that contains the QoS specific channel data. + */ +typedef struct { + orte_rml_channel_num_t channel_num; // the channel number reference (exposed to the user). + orte_process_name_t peer; // the other end point (peer) of the channel + orte_rml_channel_num_t peer_channel; // peer channel number + void * qos; // pointer to QoS component specific module + void * qos_channel_ptr; // pointer to QoS component specific channel struct + orte_rml_channel_state_t state; // channel state + bool receive; // set to true if this is a receive (peer opened) channel. (Default is send channel) +} orte_rml_channel_t; +OBJ_CLASS_DECLARATION(orte_rml_channel_t); + /* structure to send RML messages - used internally */ typedef struct { @@ -128,6 +156,8 @@ typedef struct { union { orte_rml_callback_fn_t iov; orte_rml_buffer_callback_fn_t buffer; + orte_rml_send_channel_callback_fn_t iov_chan; + orte_rml_send_buffer_channel_callback_fn_t buf_chan; } cbfunc; void *cbdata; @@ -136,6 +166,13 @@ typedef struct { int count; /* pointer to the user's buffer */ opal_buffer_t *buffer; + /*** TODO : need to move channel specific data to a channel struct */ + /* pointer to the channel object */ + orte_rml_channel_t *channel; + /* destination channel number */ + orte_rml_channel_num_t dst_channel; + /* msg seq number */ + uint32_t seq_num; /* pointer to raw data for cross-transport * transfers */ @@ -143,11 +180,32 @@ typedef struct { } orte_rml_send_t; OBJ_CLASS_DECLARATION(orte_rml_send_t); +/* structure to send RML channel open messages - used internally */ +typedef struct { + opal_list_item_t super; + /* peer process */ + orte_process_name_t dst; + /* msg send status */ + int status; + /* channel object */ + orte_rml_channel_t *channel; + /* attributes of the channel */ + opal_list_t *qos_attributes; + /* user's callback function */ + orte_rml_channel_callback_fn_t cbfunc; + /* user's cbdata */ + void *cbdata; +} orte_rml_open_channel_t; +OBJ_CLASS_DECLARATION(orte_rml_open_channel_t); + /* define an object for transferring send requests to the event lib */ typedef struct { opal_object_t super; opal_event_t ev; - orte_rml_send_t post; + union { + orte_rml_send_t send; + orte_rml_open_channel_t channel; + }post; } orte_rml_send_request_t; OBJ_CLASS_DECLARATION(orte_rml_send_request_t); @@ -157,6 +215,8 @@ typedef struct { opal_event_t ev; orte_process_name_t sender; // sender orte_rml_tag_t tag; // targeted tag + orte_rml_channel_num_t channel_num; // channel number + uint32_t seq_num; //sequence number struct iovec iov; // the recvd data } orte_rml_recv_t; OBJ_CLASS_DECLARATION(orte_rml_recv_t); @@ -184,7 +244,7 @@ typedef struct { } orte_rml_recv_request_t; OBJ_CLASS_DECLARATION(orte_rml_recv_request_t); -#define ORTE_RML_POST_MESSAGE(p, t, b, l) \ +#define ORTE_RML_POST_MESSAGE(p, t, c, s, b, l) \ do { \ orte_rml_recv_t *msg; \ opal_output_verbose(5, orte_rml_base_framework.framework_output, \ @@ -195,6 +255,8 @@ OBJ_CLASS_DECLARATION(orte_rml_recv_request_t); msg->sender.jobid = (p)->jobid; \ msg->sender.vpid = (p)->vpid; \ msg->tag = (t); \ + msg->channel_num = (c); \ + msg->seq_num = (s); \ msg->iov.iov_base = (IOVBASE_TYPE*)(b); \ msg->iov.iov_len = (l); \ /* setup the event */ \ @@ -222,34 +284,83 @@ OBJ_CLASS_DECLARATION(orte_rml_recv_request_t); ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \ ORTE_NAME_PRINT(&((m)->dst)), \ __FILE__, __LINE__); \ - if (NULL != (m)->iov) { \ - if (NULL != (m)->cbfunc.iov) { \ - (m)->cbfunc.iov((m)->status, \ + if( NULL == (m)->channel) { \ + if (NULL != (m)->iov) { \ + if (NULL != (m)->cbfunc.iov) { \ + (m)->cbfunc.iov((m)->status, \ &((m)->dst), \ (m)->iov, (m)->count, \ (m)->tag, (m)->cbdata); \ - } \ - } else { \ - /* non-blocking buffer send */ \ - (m)->cbfunc.buffer((m)->status, &((m)->origin), \ + } \ + } else { \ + /* non-blocking buffer send */ \ + (m)->cbfunc.buffer((m)->status, &((m)->origin), \ (m)->buffer, \ (m)->tag, (m)->cbdata); \ + } \ + } else { \ + if (NULL != (m)->iov) { \ + if (NULL != (m)->cbfunc.iov_chan) { \ + (m)->cbfunc.iov_chan((m)->status, \ + (m)->channel->channel_num, \ + (m)->iov, (m)->count, \ + (m)->tag, (m)->cbdata); \ + } \ + } else { \ + /* non-blocking buffer send */ \ + (m)->cbfunc.buf_chan((m)->status, \ + (m)->channel->channel_num, \ + (m)->buffer, \ + (m)->tag, (m)->cbdata); \ + } \ } \ OBJ_RELEASE(m); \ }while(0); + +#define ORTE_RML_OPEN_CHANNEL_COMPLETE(m) \ + do { \ + opal_output_verbose(5, orte_rml_base_framework.framework_output, \ + "%s-%s open channel message complete at %s:%d", \ + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \ + ORTE_NAME_PRINT(&((m)->dst)), \ + __FILE__, __LINE__); \ + /* call the callback function */ \ + (m)->cbfunc((m)->status, (m)->channel->channel_num, \ + &((m)->dst), \ + NULL, (m)->cbdata) ; \ + }while(0); + + /* * This is the base priority for a RML wrapper component - * If there exists more than one wrapper, then the one with + * If there exists more than one wrapper, then the one with * the lowest priority wins. */ #define RML_SELECT_WRAPPER_PRIORITY -128 +#define ORTE_RML_INVALID_CHANNEL_NUM 1599 +ORTE_DECLSPEC orte_rml_channel_t * orte_rml_base_get_channel (orte_rml_channel_num_t chan_num); + + /* common implementations */ ORTE_DECLSPEC void orte_rml_base_post_recv(int sd, short args, void *cbdata); ORTE_DECLSPEC void orte_rml_base_process_msg(int fd, short flags, void *cbdata); ORTE_DECLSPEC void orte_rml_base_process_error(int fd, short flags, void *cbdata); - +ORTE_DECLSPEC void orte_rml_base_open_channel(int fd, short flags, void *cbdata); +ORTE_DECLSPEC void orte_rml_open_channel_send_callback ( int status, orte_process_name_t* sender, + opal_buffer_t* buffer, orte_rml_tag_t tag, + void* cbdata); +ORTE_DECLSPEC void orte_rml_open_channel_resp_callback (int status, orte_process_name_t* peer, + struct opal_buffer_t* buffer, orte_rml_tag_t tag, + void* cbdata); +ORTE_DECLSPEC void orte_rml_open_channel_reply_send_callback ( int status, orte_process_name_t* sender, + opal_buffer_t* buffer, orte_rml_tag_t tag, + void* cbdata); +ORTE_DECLSPEC void orte_rml_base_prep_send_channel (orte_rml_channel_t *channel, + orte_rml_send_t *send); +ORTE_DECLSPEC void orte_rml_base_process_recv_channel (orte_rml_channel_t *channel, + orte_rml_recv_t *recv); END_C_DECLS #endif /* MCA_RML_BASE_H */ From 7eb3f359bd87592848d50f63801662d0beb2c295 Mon Sep 17 00:00:00 2001 From: Annapurna Dasari Date: Sun, 15 Mar 2015 14:37:29 -0700 Subject: [PATCH 09/14] porting changes from QoS branch --- orte/mca/rml/base/rml_base_frame.c | 60 +++++- orte/mca/rml/base/rml_base_msg_handlers.c | 44 +++- orte/mca/rml/oob/rml_oob.h | 33 ++- orte/mca/rml/oob/rml_oob_send.c | 205 +++++++++++++------ orte/mca/rml/rml.h | 232 +++++++++++++++++++++- orte/mca/rml/rml_types.h | 27 ++- 6 files changed, 507 insertions(+), 94 deletions(-) diff --git a/orte/mca/rml/base/rml_base_frame.c b/orte/mca/rml/base/rml_base_frame.c index c973af81983..1eef10f55b1 100644 --- a/orte/mca/rml/base/rml_base_frame.c +++ b/orte/mca/rml/base/rml_base_frame.c @@ -3,13 +3,13 @@ * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2014 Intel Corporation. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -48,7 +48,7 @@ static int orte_rml_base_register(mca_base_register_flag_t flags) { int var_id; - /* + /* * Which RML Wrapper component to use, if any * - NULL or "" = No wrapper * - ow. select that specific wrapper component @@ -80,10 +80,12 @@ static int orte_rml_base_close(void) while (NULL != (item = opal_list_remove_first(&orte_rml_base.posted_recvs))) { OBJ_RELEASE(item); + } OBJ_DESTRUCT(&orte_rml_base.posted_recvs); OPAL_TIMING_REPORT(orte_rml_base.timing, &tm_rml); + OBJ_DESTRUCT(&orte_rml_base.open_channels); return mca_base_framework_components_close(&orte_rml_base_framework, NULL); } @@ -93,6 +95,11 @@ static int orte_rml_base_open(mca_base_open_flag_t flags) /* Initialize globals */ OBJ_CONSTRUCT(&orte_rml_base.posted_recvs, opal_list_t); OBJ_CONSTRUCT(&orte_rml_base.unmatched_msgs, opal_list_t); + OBJ_CONSTRUCT(&orte_rml_base.open_channels, opal_pointer_array_t); + if (OPAL_SUCCESS != opal_pointer_array_init(&orte_rml_base.open_channels, 0, + INT_MAX, 1)) { + return ORTE_ERR_OUT_OF_RESOURCE; + } OPAL_TIMING_INIT(&tm_rml); /* Open up all available components */ return mca_base_framework_components_open(&orte_rml_base_framework, flags); @@ -124,13 +131,13 @@ int orte_rml_base_select(void) orte_rml_component_t* component; component = (orte_rml_component_t *) cli->cli_component; - opal_output_verbose(10, orte_rml_base_framework.framework_output, + opal_output_verbose(10, orte_rml_base_framework.framework_output, "orte_rml_base_select: initializing %s component %s", component->rml_version.mca_type_name, component->rml_version.mca_component_name); if (NULL == component->rml_init) { - opal_output_verbose(10, orte_rml_base_framework.framework_output, + opal_output_verbose(10, orte_rml_base_framework.framework_output, "orte_rml_base_select: no init function; ignoring component"); } else { int priority = 0; @@ -148,7 +155,7 @@ int orte_rml_base_select(void) if(NULL != orte_rml_base_wrapper && /* If this is a wrapper component then save it for later */ RML_SELECT_WRAPPER_PRIORITY >= priority) { - if( 0 == strncmp(component->rml_version.mca_component_name, + if( 0 == strncmp(component->rml_version.mca_component_name, orte_rml_base_wrapper, strlen(orte_rml_base_wrapper) ) ) { wrapper_component = component; @@ -166,7 +173,7 @@ int orte_rml_base_select(void) } } - /* + /* * Unload all components that were not selected */ OPAL_LIST_FOREACH_SAFE(item, next, &orte_rml_base_framework.framework_components, opal_list_item_t) { @@ -192,7 +199,7 @@ int orte_rml_base_select(void) orte_rml_component = selected_component; } - /* If a wrapper component was requested then + /* If a wrapper component was requested then * Make sure it can switch out the selected module */ if( NULL != wrapper_component) { @@ -205,7 +212,10 @@ int orte_rml_base_select(void) } return ORTE_ERROR; } - + /* Post a persistent recieve for open channel request */ + orte_rml.recv_buffer_nb (ORTE_NAME_WILDCARD, ORTE_RML_TAG_OPEN_CHANNEL_REQ, + ORTE_RML_PERSISTENT, orte_rml_open_channel_recv_callback, + NULL); return ORTE_SUCCESS; } @@ -235,6 +245,7 @@ void orte_rml_recv_callback(int status, orte_process_name_t* sender, blob->active = false; } + /*** RML CLASS INSTANCES ***/ static void send_cons(orte_rml_send_t *ptr) { @@ -242,14 +253,39 @@ static void send_cons(orte_rml_send_t *ptr) ptr->iov = NULL; ptr->buffer = NULL; ptr->data = NULL; + ptr->channel = NULL; + ptr->dst_channel = ORTE_RML_INVALID_CHANNEL_NUM; + ptr->seq_num = 0xFFFFFFFF; } OBJ_CLASS_INSTANCE(orte_rml_send_t, opal_list_item_t, send_cons, NULL); +static void channel_cons(orte_rml_channel_t *ptr) +{ + ptr->channel_num = ORTE_RML_INVALID_CHANNEL_NUM; + ptr->qos = NULL; + ptr->qos_channel_ptr = NULL; + ptr->receive = false; +} + +OBJ_CLASS_INSTANCE(orte_rml_channel_t, + opal_object_t, + channel_cons, NULL); + +static void open_channel_cons(orte_rml_open_channel_t *ptr) +{ + ptr->cbdata = NULL; + ptr->qos_attributes = NULL; +} +OBJ_CLASS_INSTANCE(orte_rml_open_channel_t, + opal_list_item_t, + open_channel_cons, NULL); + static void send_req_cons(orte_rml_send_request_t *ptr) { - OBJ_CONSTRUCT(&ptr->post, orte_rml_send_t); + OBJ_CONSTRUCT(&ptr->post.send, orte_rml_send_t); + OBJ_CONSTRUCT(&ptr->post.channel, orte_rml_open_channel_t); } OBJ_CLASS_INSTANCE(orte_rml_send_request_t, opal_object_t, @@ -259,6 +295,7 @@ static void recv_cons(orte_rml_recv_t *ptr) { ptr->iov.iov_base = NULL; ptr->iov.iov_len = 0; + ptr->channel_num = ORTE_RML_INVALID_CHANNEL_NUM; } static void recv_des(orte_rml_recv_t *ptr) { @@ -305,3 +342,4 @@ OBJ_CLASS_INSTANCE(orte_rml_recv_request_t, opal_object_t, prq_cons, prq_des); + diff --git a/orte/mca/rml/base/rml_base_msg_handlers.c b/orte/mca/rml/base/rml_base_msg_handlers.c index 9e01ae38fd0..6481f5ba909 100644 --- a/orte/mca/rml/base/rml_base_msg_handlers.c +++ b/orte/mca/rml/base/rml_base_msg_handlers.c @@ -11,7 +11,8 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2013 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. + * Copyright (c) 2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -47,6 +48,8 @@ #include "orte/mca/rml/rml.h" #include "orte/mca/rml/base/base.h" #include "orte/mca/rml/base/rml_contact.h" +#include "orte/mca/qos/base/base.h" + static void msg_match_recv(orte_rml_posted_recv_t *rcv, bool get_all); @@ -159,11 +162,11 @@ void orte_rml_base_process_msg(int fd, short flags, void *cbdata) opal_buffer_t buf; OPAL_OUTPUT_VERBOSE((5, orte_rml_base_framework.framework_output, - "%s message received %d bytes from %s for tag %d", + "%s message received from %s for tag %d on channel=%d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - (int)msg->iov.iov_len, ORTE_NAME_PRINT(&msg->sender), - msg->tag)); + msg->tag, + msg->channel_num)); OPAL_TIMING_EVENT((&tm_rml,"from %s %d bytes", ORTE_NAME_PRINT(&msg->sender), msg->iov.iov_len)); @@ -175,6 +178,15 @@ void orte_rml_base_process_msg(int fd, short flags, void *cbdata) */ if (OPAL_EQUAL == orte_util_compare_name_fields(mask, &msg->sender, &post->peer) && msg->tag == post->tag) { + if ((ORTE_RML_INVALID_CHANNEL_NUM != msg->channel_num) && + (NULL != orte_rml_base_get_channel(msg->channel_num) )) { + OPAL_OUTPUT_VERBOSE((5, orte_rml_base_framework.framework_output, + "%s calling recv msg on channel=%d", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + msg->channel_num)); + // call channel for recv post processing + orte_rml_base_process_recv_channel (orte_rml_base_get_channel(msg->channel_num), msg); + } /* deliver the data to this location */ if (post->buffer_data) { /* deliver it in a buffer */ @@ -186,7 +198,13 @@ void orte_rml_base_process_msg(int fd, short flags, void *cbdata) /* the user must have unloaded the buffer if they wanted * to retain ownership of it, so release whatever remains */ - OBJ_DESTRUCT(&buf); + /* OPAL_OUTPUT_VERBOSE((5, orte_rml_base_framework.framework_output, + "%s message received bytes from %s for tag %d on channel=%d called callback", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(&msg->sender), + msg->tag, + msg->channel_num));*/ + OBJ_DESTRUCT(&buf); } else { /* deliver as an iovec */ post->cbfunc.iov(ORTE_SUCCESS, &msg->sender, &msg->iov, 1, msg->tag, post->cbdata); @@ -197,10 +215,19 @@ void orte_rml_base_process_msg(int fd, short flags, void *cbdata) } /* release the message */ OBJ_RELEASE(msg); + OPAL_OUTPUT_VERBOSE((5, orte_rml_base_framework.framework_output, + "%s message tag %d on released", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + post->tag)); /* if the recv is non-persistent, remove it */ if (!post->persistent) { opal_list_remove_item(&orte_rml_base.posted_recvs, &post->super); + /*OPAL_OUTPUT_VERBOSE((5, orte_rml_base_framework.framework_output, + "%s non persistent recv %p remove success releasing now", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + post));*/ OBJ_RELEASE(post); + } return; } @@ -209,5 +236,12 @@ void orte_rml_base_process_msg(int fd, short flags, void *cbdata) /* we get here if no matching recv was found - we then hold * the message until such a recv is issued */ + OPAL_OUTPUT_VERBOSE((5, orte_rml_base_framework.framework_output, + "%s message received bytes from %s for tag %d on channel=%d Not Matched adding to unmatched msgs", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(&msg->sender), + msg->tag, + msg->channel_num)); opal_list_append(&orte_rml_base.unmatched_msgs, &msg->super); } + diff --git a/orte/mca/rml/oob/rml_oob.h b/orte/mca/rml/oob/rml_oob.h index 7a8fdf1f09b..2ec3b5b1861 100644 --- a/orte/mca/rml/oob/rml_oob.h +++ b/orte/mca/rml/oob/rml_oob.h @@ -5,7 +5,7 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -14,10 +14,11 @@ * All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2014 Intel, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -75,10 +76,32 @@ void orte_rml_oob_recv_buffer_nb(orte_process_name_t* peer, orte_rml_buffer_callback_fn_t cbfunc, void* cbdata); -void orte_rml_oob_recv_cancel(orte_process_name_t* peer, +void orte_rml_oob_recv_cancel(orte_process_name_t* peer, orte_rml_tag_t tag); -int orte_rml_oob_ping(const char* uri, +int orte_rml_oob_open_channel(orte_process_name_t * peer, + opal_list_t * qos_attributes, + orte_rml_channel_callback_fn_t cbfunc, + void *cbdata); + +int orte_rml_oob_send_channel_nb (orte_rml_channel_num_t channel, + struct iovec* msg, + int count, + orte_rml_tag_t tag, + orte_rml_send_channel_callback_fn_t cbfunc, + void* cbdata); + +int orte_rml_oob_send_buffer_channel_nb (orte_rml_channel_num_t channel, + opal_buffer_t *buffer, + orte_rml_tag_t tag, + orte_rml_send_buffer_channel_callback_fn_t cbfunc, + void* cbdata); + +int orte_rml_oob_close_channel (orte_rml_channel_num_t channel, + orte_rml_channel_callback_fn_t cbfunc, + void* cbdata); + +int orte_rml_oob_ping(const char* uri, const struct timeval* tv); char* orte_rml_oob_get_uri(void); diff --git a/orte/mca/rml/oob/rml_oob_send.c b/orte/mca/rml/oob/rml_oob_send.c index 3331856b708..48652d81ab7 100644 --- a/orte/mca/rml/oob/rml_oob_send.c +++ b/orte/mca/rml/oob/rml_oob_send.c @@ -6,17 +6,17 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2013-2014 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2014 Intel, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -34,7 +34,7 @@ #include "orte/mca/rml/base/base.h" #include "orte/mca/rml/rml_types.h" #include "rml_oob.h" - +#include "orte/mca/qos/base/base.h" typedef struct { opal_object_t object; opal_event_t ev; @@ -74,13 +74,13 @@ static void send_self_exe(int fd, short args, void* data) if (NULL != xfer->iov) { if (NULL != xfer->cbfunc.iov) { /* non-blocking iovec send */ - xfer->cbfunc.iov(ORTE_SUCCESS, ORTE_PROC_MY_NAME, xfer->iov, xfer->count, + xfer->cbfunc.iov(ORTE_SUCCESS, ORTE_PROC_MY_NAME, xfer->iov, xfer->count, xfer->tag, xfer->cbdata); } } else if (NULL != xfer->buffer) { if (NULL != xfer->cbfunc.buffer) { /* non-blocking buffer send */ - xfer->cbfunc.buffer(ORTE_SUCCESS, ORTE_PROC_MY_NAME, xfer->buffer, + xfer->cbfunc.buffer(ORTE_SUCCESS, ORTE_PROC_MY_NAME, xfer->buffer, xfer->tag, xfer->cbdata); } } else { @@ -95,8 +95,8 @@ static void send_self_exe(int fd, short args, void* data) static void send_msg(int fd, short args, void *cbdata) { orte_rml_send_request_t *req = (orte_rml_send_request_t*)cbdata; - orte_process_name_t *peer = &(req->post.dst); - orte_rml_tag_t tag = req->post.tag; + orte_process_name_t *peer = &(req->post.send.dst); + orte_rml_tag_t tag = req->post.send.tag; orte_rml_recv_t *rcv; orte_rml_send_t *snd; int bytes; @@ -135,16 +135,16 @@ static void send_msg(int fd, short args, void *cbdata) /* setup the send callback */ xfer = OBJ_NEW(orte_self_send_xfer_t); - if (NULL != req->post.iov) { - xfer->iov = req->post.iov; - xfer->count = req->post.count; - xfer->cbfunc.iov = req->post.cbfunc.iov; + if (NULL != req->post.send.iov) { + xfer->iov = req->post.send.iov; + xfer->count = req->post.send.count; + xfer->cbfunc.iov = req->post.send.cbfunc.iov; } else { - xfer->buffer = req->post.buffer; - xfer->cbfunc.buffer = req->post.cbfunc.buffer; + xfer->buffer = req->post.send.buffer; + xfer->cbfunc.buffer = req->post.send.cbfunc.buffer; } xfer->tag = tag; - xfer->cbdata = req->post.cbdata; + xfer->cbdata = req->post.send.cbdata; /* setup the event for the send callback */ opal_event_set(orte_event_base, &xfer->ev, -1, OPAL_EV_WRITE, send_self_exe, xfer); opal_event_set_priority(&xfer->ev, ORTE_MSG_PRI); @@ -154,11 +154,11 @@ static void send_msg(int fd, short args, void *cbdata) rcv = OBJ_NEW(orte_rml_recv_t); rcv->sender = *peer; rcv->tag = tag; - if (NULL != req->post.iov) { + if (NULL != req->post.send.iov) { /* get the total number of bytes in the iovec array */ bytes = 0; - for (i = 0 ; i < req->post.count ; ++i) { - bytes += req->post.iov[i].iov_len; + for (i = 0 ; i < req->post.send.count ; ++i) { + bytes += req->post.send.iov[i].iov_len; } /* get the required memory allocation */ if (0 < bytes) { @@ -166,15 +166,15 @@ static void send_msg(int fd, short args, void *cbdata) rcv->iov.iov_len = bytes; /* transfer the bytes */ ptr = (char*)rcv->iov.iov_base; - for (i = 0 ; i < req->post.count ; ++i) { - memcpy(ptr, req->post.iov[i].iov_base, req->post.iov[i].iov_len); - ptr += req->post.iov[i].iov_len; + for (i = 0 ; i < req->post.send.count ; ++i) { + memcpy(ptr, req->post.send.iov[i].iov_base, req->post.send.iov[i].iov_len); + ptr += req->post.send.iov[i].iov_len; } } - } else if (0 < req->post.buffer->bytes_used) { - rcv->iov.iov_base = (IOVBASE_TYPE*)malloc(req->post.buffer->bytes_used); - memcpy(rcv->iov.iov_base, req->post.buffer->base_ptr, req->post.buffer->bytes_used); - rcv->iov.iov_len = req->post.buffer->bytes_used; + } else if (0 < req->post.send.buffer->bytes_used) { + rcv->iov.iov_base = (IOVBASE_TYPE*)malloc(req->post.send.buffer->bytes_used); + memcpy(rcv->iov.iov_base, req->post.send.buffer->base_ptr, req->post.send.buffer->bytes_used); + rcv->iov.iov_len = req->post.send.buffer->bytes_used; } /* post the message for receipt - since the send callback was posted * first and has the same priority, it will execute first @@ -188,16 +188,24 @@ static void send_msg(int fd, short args, void *cbdata) snd->dst = *peer; snd->origin = *ORTE_PROC_MY_NAME; snd->tag = tag; - if (NULL != req->post.iov) { - snd->iov = req->post.iov; - snd->count = req->post.count; - snd->cbfunc.iov = req->post.cbfunc.iov; + if (NULL != req->post.send.iov) { + snd->iov = req->post.send.iov; + snd->count = req->post.send.count; + snd->cbfunc.iov = req->post.send.cbfunc.iov; } else { - snd->buffer = req->post.buffer; - snd->cbfunc.buffer = req->post.cbfunc.buffer; + snd->buffer = req->post.send.buffer; + snd->cbfunc.buffer = req->post.send.cbfunc.buffer; + } + snd->cbdata = req->post.send.cbdata; + snd->channel = req->post.send.channel; + /* call send prep to prep the Qos channel for send */ + if (NULL != snd->channel) + { + OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output, + "%s send_msg sending on channel %d", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), snd->channel->channel_num)); + orte_rml_base_prep_send_channel (snd->channel, snd); } - snd->cbdata = req->post.cbdata; - /* activate the OOB send state */ ORTE_OOB_SEND(snd); @@ -205,6 +213,7 @@ static void send_msg(int fd, short args, void *cbdata) } + int orte_rml_oob_send_nb(orte_process_name_t* peer, struct iovec* iov, int count, @@ -224,24 +233,16 @@ int orte_rml_oob_send_nb(orte_process_name_t* peer, ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); return ORTE_ERR_BAD_PARAM; } - - if( NULL == peer || - OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_NAME_INVALID, peer) ) { - /* cannot send to an invalid peer */ - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; - } - /* get ourselves into an event to protect against * race conditions and threads */ req = OBJ_NEW(orte_rml_send_request_t); - req->post.dst = *peer; - req->post.iov = iov; - req->post.count = count; - req->post.tag = tag; - req->post.cbfunc.iov = cbfunc; - req->post.cbdata = cbdata; + req->post.send.dst = *peer; + req->post.send.iov = iov; + req->post.send.count = count; + req->post.send.tag = tag; + req->post.send.cbfunc.iov = cbfunc; + req->post.send.cbdata = cbdata; /* setup the event for the send callback */ opal_event_set(orte_event_base, &req->ev, -1, OPAL_EV_WRITE, send_msg, req); opal_event_set_priority(&req->ev, ORTE_MSG_PRI); @@ -270,26 +271,112 @@ int orte_rml_oob_send_buffer_nb(orte_process_name_t* peer, return ORTE_ERR_BAD_PARAM; } - if (NULL == peer || - OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_NAME_INVALID, peer) ) { - /* cannot send to an invalid peer */ + /* get ourselves into an event to protect against + * race conditions and threads + */ + req = OBJ_NEW(orte_rml_send_request_t); + req->post.send.dst = *peer; + req->post.send.buffer = buffer; + req->post.send.tag = tag; + req->post.send.cbfunc.buffer = cbfunc; + req->post.send.cbdata = cbdata; + /* setup the event for the send callback */ + opal_event_set(orte_event_base, &req->ev, -1, OPAL_EV_WRITE, send_msg, req); + opal_event_set_priority(&req->ev, ORTE_MSG_PRI); + opal_event_active(&req->ev, OPAL_EV_WRITE, 1); + + return ORTE_SUCCESS; +} + +int orte_rml_oob_open_channel(orte_process_name_t * peer, + opal_list_t *qos_attributes, + orte_rml_channel_callback_fn_t cbfunc, + void *cbdata) +{ + orte_rml_send_request_t *req; + OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output, + "%s rml_open_channel to peer %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(peer))); + /*if (!(orte_qos_base_have_qos_component_for_channel(qos_attributes))) + return ORTE_ERROR_QOS_UNAVAILABLE;*/ + /* process the request in an event to be safe */ + req = OBJ_NEW(orte_rml_send_request_t); + req->post.channel.dst = *peer; + req->post.channel.qos_attributes = qos_attributes; + req->post.channel.cbfunc = cbfunc; + req->post.channel.cbdata = cbdata; + /* setup the event for the open callback */ + opal_event_set(orte_event_base, &req->ev, -1, OPAL_EV_WRITE, orte_rml_base_open_channel, req); + opal_event_set_priority(&req->ev, ORTE_MSG_PRI); + opal_event_active(&req->ev, OPAL_EV_WRITE, 1); + OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output, + "%s rml_open_channel to peer %s - set event done", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(peer))); + return ORTE_SUCCESS; +} + +int orte_rml_oob_send_channel_nb (orte_rml_channel_num_t channel, + struct iovec* msg, + int count, + orte_rml_tag_t tag, + orte_rml_send_channel_callback_fn_t cbfunc, + void* cbdata) +{ + // TO DO + return ORTE_SUCCESS; +} + +int orte_rml_oob_send_buffer_channel_nb (orte_rml_channel_num_t channel_num, + opal_buffer_t *buffer, + orte_rml_tag_t tag, + orte_rml_send_buffer_channel_callback_fn_t cbfunc, + void* cbdata) +{ + orte_rml_send_request_t *req; + orte_rml_channel_t *channel; + OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output, + "%s rml_send_buffer to channel %d at tag %d", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + channel_num, tag)); + + if (ORTE_RML_TAG_INVALID == tag) { + /* cannot send to an invalid tag */ + ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); + return ORTE_ERR_BAD_PARAM; + } + channel = (orte_rml_channel_t*) orte_rml_base_get_channel (channel_num); + if (NULL == channel) { + /* cannot send to a non existing or closed channel */ ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); return ORTE_ERR_BAD_PARAM; } - /* get ourselves into an event to protect against - * race conditions and threads - */ + * race conditions and threads + */ req = OBJ_NEW(orte_rml_send_request_t); - req->post.dst = *peer; - req->post.buffer = buffer; - req->post.tag = tag; - req->post.cbfunc.buffer = cbfunc; - req->post.cbdata = cbdata; + req->post.send.dst = channel->peer; + req->post.send.buffer = buffer; + req->post.send.tag = tag; + req->post.send.cbfunc.buf_chan = cbfunc; + req->post.send.cbdata = cbdata; + req->post.send.channel = channel; /* setup the event for the send callback */ opal_event_set(orte_event_base, &req->ev, -1, OPAL_EV_WRITE, send_msg, req); opal_event_set_priority(&req->ev, ORTE_MSG_PRI); opal_event_active(&req->ev, OPAL_EV_WRITE, 1); + return ORTE_SUCCESS; +} +int orte_rml_oob_close_channel (orte_rml_channel_num_t channel_num, + orte_rml_channel_callback_fn_t cbfunc, + void* cbdata) +{ + orte_rml_channel_t *channel; + channel = orte_rml_base_get_channel (channel_num); + if (NULL != channel) { + // TO DO + } return ORTE_SUCCESS; } diff --git a/orte/mca/rml/rml.h b/orte/mca/rml/rml.h index 7b5f4b0d23f..bec0886466d 100644 --- a/orte/mca/rml/rml.h +++ b/orte/mca/rml/rml.h @@ -10,7 +10,9 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. + * Copyright (c) 2014 Intel, Inc. All rights reserved. + * * $COPYRIGHT$ * * Additional copyrights may follow @@ -18,7 +20,7 @@ * $HEADER$ */ -/** +/** * @file * * Runtime Messaging Layer (RML) Communication Interface @@ -73,11 +75,16 @@ ORTE_DECLSPEC void orte_rml_recv_callback(int status, orte_process_name_t* sende opal_buffer_t *buffer, orte_rml_tag_t tag, void *cbdata); +ORTE_DECLSPEC void orte_rml_open_channel_recv_callback(int status, + orte_process_name_t* sender, + opal_buffer_t *buffer, + orte_rml_tag_t tag, void *cbdata); + /* ******************************************************************** */ /** - * RML component initialization + * RML component initialization * * Create an instance (module) of the given RML component. Upon * returning, the module data structure should be fully populated and @@ -175,7 +182,6 @@ typedef void (*orte_rml_buffer_callback_fn_t)(int status, orte_rml_tag_t tag, void* cbdata); - /** * Function prototype for exception callback * @@ -283,7 +289,7 @@ typedef void (*orte_rml_module_set_contact_info_fn_t)(const char *contact_info); * @param[in] contact_info The contact info string for the remote process * @param[in] tv Timeout after which the ping should be failed * - * @retval ORTE_SUCESS The process is available and will allow connections + * @retval ORTE_SUCESS The process is available and will allow connections * from the local process * @retval ORTE_ERROR An unspecified error occurred during the update */ @@ -387,7 +393,7 @@ typedef void (*orte_rml_module_recv_buffer_nb_fn_t)(orte_process_name_t* peer, * * Attempt to cancel a posted non-blocking receive. * - * @param[in] peer Peer process or ORTE_NAME_WILDCARD, exactly as passed + * @param[in] peer Peer process or ORTE_NAME_WILDCARD, exactly as passed * to the non-blocking receive call * @param[in] tag Posted receive tag */ @@ -428,6 +434,205 @@ typedef int (*orte_rml_module_ft_event_fn_t)(int state); */ typedef void (*orte_rml_module_purge_fn_t)(orte_process_name_t *peer); +/********* NEW RML QOS MESSAGING APIS *****************/ +/***** Questions *****/ +/* + 1: Should the send and recv fns take the peer param as well for validation? + 2: Should we provide a func for the user to get qos attributes of a channel? (do we allow for sets??) + 3: Should open channel - have a channel error callback function? +*/ +typedef void (*orte_rml_channel_callback_fn_t) (int status, + orte_rml_channel_num_t channel_num, + orte_process_name_t * peer, + opal_list_t *qos_attributes, + void * cbdata); +/** + * Funtion prototype for callback from non-blocking iovec send on a channel + * + * Funtion prototype for callback from non-blocking iovec send on a channel + * On send, the iovec pointer will be the same pointer passed to + * send_nb and count will equal the count given to send. + * + * + * @note The parameter in/out parameters are relative to the user's callback + * function. + * + * @param[in] status Completion status + * @param[in] channel Opaque channel number on which the msg was sent (input to rml_send_channel) + * @param[in] msg Pointer to the array of iovec that was sent + * or to a single iovec that has been recvd + * @param[in] count Number of iovecs in the array + * @param[in] tag User defined tag for matching send/recv + * @param[in] cbdata User data passed to send_nb() + */ +typedef void (*orte_rml_send_channel_callback_fn_t)(int status, + orte_rml_channel_num_t channel, + struct iovec* msg, + int count, + orte_rml_tag_t tag, + void* cbdata); +/** + * Funtion prototype for callback from non-blocking buffer send on a channel + * + * Function prototype for callback from non-blocking buffer send on a + * channel. On send, the buffer will be the same pointer passed to + * send_buffer_nb. + * + * @note The parameter in/out parameters are relative to the user's callback + * function. + * + * @param[in] status Completion status + * @param[in] channel channel number on which the msg was sent + * @param[in] buffer Message buffer + * @param[in] tag User defined tag for matching send + * @param[in] cbdata User data passed to send_buffer_nb() + */ +typedef void (*orte_rml_send_buffer_channel_callback_fn_t)(int status, + orte_rml_channel_num_t channel, + struct opal_buffer_t* buffer, + orte_rml_tag_t tag, + void* cbdata); + +/** + * * Open a messaging channel with specified QoS to a specific peer + * + * @param[in] peer End point Peer to which the channel needs to be opened + * @param[in] qos_attributes List of Quality of Service Attributes for the channel + * @param[in] cbfunc Callback function on channel create (open) comlpetion + * @param[in] cbdata User data to provide during completion callback + * + * @retval ORTE_SUCCESS - the channel was successfully created at the source and a request was sent to the dest. + * @retval ORTE_ERROR - unknown error + * @retval ORTE_ERROR_UNSUPPORTED_QOS - the requested QoS cannot be provided. + */ +typedef int (*orte_rml_module_open_channel_fn_t)(orte_process_name_t* peer, + opal_list_t *qos_attributes, + orte_rml_channel_callback_fn_t cbfunc, + void* cbdata); + +/** + * Send an iovec non-blocking message + * + * Send an array of iovecs to the specified peer. The call + * will return immediately, although the iovecs may not be modified + * until the completion callback is triggered. The iovecs *may* be + * passed to another call to send_nb before the completion callback is + * triggered. The callback being triggered does not give any + * indication of remote completion. + * + * @param[in] channel Channel number of the specific channel (given to user in the channel open completion callback fn.) + * @param[in] msg Pointer to an array of iovecs to be sent + * @param[in] count Number of iovecs in array + * @param[in] tag User defined tag for matching send/recv + * @param[in] cbfunc Callback function on message comlpetion + * @param[in] cbdata User data to provide during completion callback + * + * @retval ORTE_SUCCESS The message was successfully started + * @retval ORTE_ERR_BAD_PARAM One of the parameters was invalid + * @retval ORTE_ERR_CHANNEL_UNKNOWN Channel specified does not exist. + * @retval ORTE_ERROR An unspecified error occurred + */ +typedef int (*orte_rml_module_send_channel_nb_fn_t)(orte_rml_channel_num_t channel, + struct iovec* msg, + int count, + orte_rml_tag_t tag, + orte_rml_send_channel_callback_fn_t cbfunc, + void* cbdata); + + +/** + * Send a buffer non-blocking message + * + * Send a buffer on specific prestablished channel. The call + * will return immediately, although the buffer may not be modified + * until the completion callback is triggered. The buffer *may* be + * passed to another call to send_nb before the completion callback is + * triggered. The callback being triggered does not give any + * indication of remote completion. + * + * @param[in] channel Channel number of the specific channel (given to user in the channel open completion callback fn.) + * @param[in] buffer Pointer to buffer to be sent + * @param[in] tag User defined tag for matching send/recv + * @param[in] cbfunc Callback function on message comlpetion + * @param[in] cbdata User data to provide during completion callback + * + * @retval ORTE_SUCCESS The message was successfully started + * @retval ORTE_ERR_BAD_PARAM One of the parameters was invalid + * @retval ORTE_ERR_CHANNEL_UNKNOWN Channel specified does not exist. + * @retval ORTE_ERROR An unspecified error occurred + */ + +typedef int (*orte_rml_module_send_buffer_channel_nb_fn_t) (orte_rml_channel_num_t channel, + struct opal_buffer_t * buffer, + orte_rml_tag_t tag, + orte_rml_send_buffer_channel_callback_fn_t cbfunc, + void* cbdata); + +/** + * Receive an iovec non-blocking message + * + * @param[in] channel specific channel established with the peer of receiving msgs. + * @param[in] tag User defined tag for matching send/recv + * @param[in] persistent Boolean flag indicating whether or not this is a one-time recv + * @param[in] cbfunc Callback function on message comlpetion + * @param[in] cbdata User data to provide during completion callback + * + * @retval ORTE_SUCCESS - succesfully posted a recv request for the channel + * @retval ORTE_CHANNEL_UNAVAILABLE - the specific channel does not exist or is not available for receiving msgs. + */ +typedef int (*orte_rml_module_recv_channel_nb_fn_t)(orte_rml_channel_num_t channel, + orte_rml_tag_t tag, + bool persistent, + orte_rml_callback_fn_t cbfunc, + void* cbdata); + + +/** + * Receive a buffer non-blocking message + * + * @param[in] channel specific channel established with the peer of receiving msgs. + * @param[in] tag User defined tag for matching send/recv + * @param[in] persistent Boolean flag indicating whether or not this is a one-time recv + * @param[in] cbfunc Callback function on message comlpetion + * @param[in] cbdata User data to provide during completion callback + * + * @retval ORTE_SUCCESS - succesfully posted a recv request for the channel + * @retval ORTE_CHANNEL_UNAVAILABLE - the specific channel does not exist or is not available for receiving msgs. + */ +typedef int (*orte_rml_module_recv_buffer_channel_nb_fn_t)(orte_rml_channel_num_t channel, + orte_rml_tag_t tag, + bool persistent, + orte_rml_buffer_callback_fn_t cbfunc, + void* cbdata); + + +/** + * Cancel a posted non-blocking receive + * + * Attempt to cancel a posted non-blocking receive. + * + * @param[in] channel Specific channel or ORTE_ANY_CHANNEL for wild card receive + * @param[in] tag Posted receive tag + */ +typedef void (*orte_rml_module_recv_channel_cancel_fn_t)(orte_rml_channel_num_t channel, + orte_rml_tag_t tag); + +/** + * * close a messaging channel with specified QoS to a specific peer + * + * @param[in] peer End point Peer to which the channel needs to be opened + * @param[in] channel_num The channel number returned in the channel open completion callback function. + * @param[in] cbfunc Callback function on channel close comlpetion + * @param[in] cbdata User data to provide during completion callback + * + * @retval ORTE_SUCCESS - the channel was successfully closed at the source and a request was sent to the dest. + * @retval ORTE_ERROR - unknown error + * @retval ORTE_ERROR_UNKNOWN_CHANNEL - cannot find the specified QoS channel + */ +typedef int (*orte_rml_module_close_channel_fn_t)( orte_rml_channel_num_t channel_num, + orte_rml_channel_callback_fn_t cbfunc, + void* cbdata); + /* ******************************************************************** */ @@ -473,9 +678,22 @@ struct orte_rml_module_t { /** Fault tolerance handler */ orte_rml_module_ft_event_fn_t ft_event; - + /** Purge information */ orte_rml_module_purge_fn_t purge; + + /** Open a qos messaging channel to a peer*/ + orte_rml_module_open_channel_fn_t open_channel; + + /** send a non blocking iovec message over a channel */ + orte_rml_module_send_channel_nb_fn_t send_channel_nb; + + /** send a non blocking buffer message over a channel */ + orte_rml_module_send_buffer_channel_nb_fn_t send_buffer_channel_nb; + + /** close a qos messaging channel */ + orte_rml_module_close_channel_fn_t close_channel; + }; /** Convienence typedef */ typedef struct orte_rml_module_t orte_rml_module_t; diff --git a/orte/mca/rml/rml_types.h b/orte/mca/rml/rml_types.h index 8f084982e82..b434e6c0a1e 100644 --- a/orte/mca/rml/rml_types.h +++ b/orte/mca/rml/rml_types.h @@ -5,17 +5,18 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2012 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. * Copyright (c) 2009-2011 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2014 Intel, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ /** @file: @@ -150,17 +151,24 @@ BEGIN_C_DECLS /* notifier support */ #define ORTE_RML_TAG_NOTIFIER_HNP 52 - -/* confirm spawn by tool */ #define ORTE_RML_TAG_CONFIRM_SPAWN 53 +/*** QOS specific RML TAGS ***/ +#define ORTE_RML_TAG_OPEN_CHANNEL_REQ 54 +#define ORTE_RML_TAG_OPEN_CHANNEL_RESP 55 +#define ORTE_RML_TAG_MSG_ACK 56 +#define ORTE_RML_TAG_CLOSE_CHANNEL_REQ 57 +#define ORTE_RML_TAG_CLOSE_CHANNEL_ACCEPT 58 + + + #define ORTE_RML_TAG_MAX 100 #define ORTE_RML_TAG_NTOH(t) ntohl(t) #define ORTE_RML_TAG_HTON(t) htonl(t) -/** +/** * Message matching tag * * Message matching tag. Unlike MPI, there is no wildcard receive, @@ -170,6 +178,11 @@ BEGIN_C_DECLS */ typedef uint32_t orte_rml_tag_t; +/** + * Channel number + * Reference to a rml channel + */ +typedef uint32_t orte_rml_channel_num_t; /* ******************************************************************** */ From 4f93de3f83ab8c0ee5067540deb9f91d4a9822fb Mon Sep 17 00:00:00 2001 From: Annapurna Dasari Date: Sun, 15 Mar 2015 14:45:32 -0700 Subject: [PATCH 10/14] porting changes from QoS branch --- orte/mca/rml/base/Makefile.am | 1 - 1 file changed, 1 deletion(-) diff --git a/orte/mca/rml/base/Makefile.am b/orte/mca/rml/base/Makefile.am index be8a16b0afe..1461032070a 100644 --- a/orte/mca/rml/base/Makefile.am +++ b/orte/mca/rml/base/Makefile.am @@ -11,7 +11,6 @@ # All rights reserved. # Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights # reserved. -# Copyright (c) 2015 Intel Corporation. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow From 3ff2b23d0575d9d6e101d839bf3387ec478fe4c1 Mon Sep 17 00:00:00 2001 From: Annapurna Dasari Date: Sun, 15 Mar 2015 16:37:45 -0700 Subject: [PATCH 11/14] committing all QoS changes. Rebased all files except for oob_tcp_component.c --- opal/class/opal_hotel.h | 44 +- orte/include/orte/constants.h | 13 +- orte/mca/ess/base/ess_base_std_app.c | 65 +- orte/mca/ess/base/ess_base_std_orted.c | 117 ++-- orte/mca/ess/base/ess_base_std_tool.c | 44 +- orte/mca/ess/hnp/ess_hnp_module.c | 101 +-- orte/mca/oob/oob.h | 2 +- orte/mca/oob/tcp/oob_tcp.c | 16 +- orte/mca/oob/tcp/oob_tcp_component.c | 67 +- orte/mca/oob/tcp/oob_tcp_hdr.h | 14 +- orte/mca/oob/tcp/oob_tcp_sendrecv.c | 68 +- orte/mca/oob/tcp/oob_tcp_sendrecv.h | 21 +- orte/mca/oob/ud/oob_ud_recv.c | 4 +- orte/mca/oob/ud/oob_ud_req.c | 15 +- orte/mca/oob/ud/oob_ud_req.h | 3 + orte/mca/oob/ud/oob_ud_send.c | 8 +- orte/mca/oob/usock/oob_usock_component.c | 8 +- orte/mca/oob/usock/oob_usock_connection.c | 38 +- orte/mca/oob/usock/oob_usock_hdr.h | 12 +- orte/mca/oob/usock/oob_usock_sendrecv.c | 60 +- orte/mca/oob/usock/oob_usock_sendrecv.h | 14 +- orte/mca/qos/Makefile.am | 31 + orte/mca/qos/ack/Makefile.am | 34 + orte/mca/qos/ack/qos_ack.h | 88 +++ orte/mca/qos/ack/qos_ack_component.c | 585 ++++++++++++++++++ orte/mca/qos/base/Makefile.am | 18 + orte/mca/qos/base/base.h | 74 +++ orte/mca/qos/base/help-qos-base.txt | 12 + orte/mca/qos/base/oob_base_select.c | 137 ++++ orte/mca/qos/base/qos_base_channel_handlers.c | 163 +++++ orte/mca/qos/base/qos_base_frame.c | 118 ++++ orte/mca/qos/base/qos_base_select.c | 73 +++ orte/mca/qos/noop/Makefile.am | 34 + orte/mca/qos/noop/qos_noop.h | 35 ++ orte/mca/qos/noop/qos_noop_channel_handlers.c | 339 ++++++++++ orte/mca/qos/noop/qos_noop_component.c | 181 ++++++ orte/mca/qos/qos.h | 168 +++++ orte/mca/rml/base/rml_base_channel_handlers.c | 454 ++++++++++++++ orte/mca/rml/oob/rml_oob_component.c | 27 +- orte/test/system/oob_stress_channel.c | 221 +++++++ orte/util/attr.h | 16 +- 41 files changed, 3253 insertions(+), 289 deletions(-) create mode 100644 orte/mca/qos/Makefile.am create mode 100644 orte/mca/qos/ack/Makefile.am create mode 100644 orte/mca/qos/ack/qos_ack.h create mode 100644 orte/mca/qos/ack/qos_ack_component.c create mode 100644 orte/mca/qos/base/Makefile.am create mode 100644 orte/mca/qos/base/base.h create mode 100644 orte/mca/qos/base/help-qos-base.txt create mode 100644 orte/mca/qos/base/oob_base_select.c create mode 100644 orte/mca/qos/base/qos_base_channel_handlers.c create mode 100644 orte/mca/qos/base/qos_base_frame.c create mode 100644 orte/mca/qos/base/qos_base_select.c create mode 100644 orte/mca/qos/noop/Makefile.am create mode 100644 orte/mca/qos/noop/qos_noop.h create mode 100644 orte/mca/qos/noop/qos_noop_channel_handlers.c create mode 100644 orte/mca/qos/noop/qos_noop_component.c create mode 100644 orte/mca/qos/qos.h create mode 100644 orte/mca/rml/base/rml_base_channel_handlers.c create mode 100644 orte/test/system/oob_stress_channel.c diff --git a/opal/class/opal_hotel.h b/opal/class/opal_hotel.h index 8216d4cfd61..5d1f58ae8b0 100644 --- a/opal/class/opal_hotel.h +++ b/opal/class/opal_hotel.h @@ -1,10 +1,11 @@ /* * Copyright (c) 2012-2013 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Los Alamos National Security, LLC. All rights reserved + * Copyright (c) 2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -23,7 +24,7 @@ * * One use case for this class is for ACK-based network retransmission * schemes (NACK-based retransmission schemes probably can use - * opal_ring_buffer). + * opal_ring_buffer). * * For ACK-based retransmission schemes, a hotel might be used * something like this: @@ -61,7 +62,7 @@ BEGIN_C_DECLS struct opal_hotel_t; /* User-supplied function to be invoked when an occupant is evicted. */ -typedef void (*opal_hotel_eviction_callback_fn_t)(struct opal_hotel_t *hotel, +typedef void (*opal_hotel_eviction_callback_fn_t)(struct opal_hotel_t *hotel, int room_num, void *occupant); @@ -248,6 +249,41 @@ static inline void opal_hotel_checkout(opal_hotel_t *hotel, int room_num) assume the upper layer knows what it's doing. */ } +/** + * Check the specified occupant out of the hotel and return the occupant. + * + * @param hotel Pointer to hotel (IN) + * @param room Room number to checkout (IN) + * @param void * occupant (OUT) + * If there is an occupant in the room, their timer is canceled and + * they are checked out. + * + * Use this checkout and when caller needs the occupant + */ +static inline void opal_hotel_checkout_and_return_occupant(opal_hotel_t *hotel, int room_num, void **occupant) +{ + opal_hotel_room_t *room; + + /* Bozo check */ + assert(room_num < hotel->num_rooms); + + /* If there's an occupant in the room, check them out */ + room = &(hotel->rooms[room_num]); + if (OPAL_LIKELY(NULL != room->occupant)) { + opal_output (10, "checking out occupant %p from room num %d", room->occupant, room_num); + *occupant = room->occupant; + room->occupant = NULL; + opal_event_del(&(room->eviction_timer_event)); + hotel->last_unoccupied_room++; + assert(hotel->last_unoccupied_room < hotel->num_rooms); + hotel->unoccupied_rooms[hotel->last_unoccupied_room] = room_num; + } + else { + opal_output( 0, " OOPS there is no occupant in room_num %d", room_num); + } + +} + /** * Destroy a hotel. * diff --git a/orte/include/orte/constants.h b/orte/include/orte/constants.h index cc4bcdbcd31..0095ff9fcfd 100644 --- a/orte/include/orte/constants.h +++ b/orte/include/orte/constants.h @@ -62,7 +62,7 @@ enum { ORTE_ERR_UNPACK_INADEQUATE_SPACE = OPAL_ERR_UNPACK_INADEQUATE_SPACE, ORTE_ERR_UNPACK_READ_PAST_END_OF_BUFFER = OPAL_ERR_UNPACK_READ_PAST_END_OF_BUFFER, ORTE_ERR_TYPE_MISMATCH = OPAL_ERR_TYPE_MISMATCH, - ORTE_ERR_OPERATION_UNSUPPORTED = OPAL_ERR_OPERATION_UNSUPPORTED, + ORTE_ERR_OPERATION_UNSUPPORTED = OPAL_ERR_OPERATION_UNSUPPORTED, ORTE_ERR_UNKNOWN_DATA_TYPE = OPAL_ERR_UNKNOWN_DATA_TYPE, ORTE_ERR_BUFFER = OPAL_ERR_BUFFER, ORTE_ERR_DATA_TYPE_REDEF = OPAL_ERR_DATA_TYPE_REDEF, @@ -85,7 +85,7 @@ enum { ORTE_ERR_CONNECTION_FAILED = OPAL_ERR_CONNECTION_FAILED, ORTE_ERR_AUTHENTICATION_FAILED = OPAL_ERR_AUTHENTICATION_FAILED, ORTE_ERR_COMM_FAILURE = OPAL_ERR_COMM_FAILURE, - + /* error codes specific to ORTE - don't forget to update orte/util/error_strings.c when adding new error codes!! Otherwise, the error reporting system will potentially crash, @@ -133,7 +133,14 @@ enum { ORTE_ERR_SENSOR_LIMIT_EXCEEDED = (ORTE_ERR_BASE - 42), ORTE_ERR_ALLOCATION_PENDING = (ORTE_ERR_BASE - 43), ORTE_ERR_NO_PATH_TO_TARGET = (ORTE_ERR_BASE - 44), - ORTE_ERR_OP_IN_PROGRESS = (ORTE_ERR_BASE - 45) + ORTE_ERR_OP_IN_PROGRESS = (ORTE_ERR_BASE - 45), + ORTE_ERR_OPEN_CHANNEL_PEER_FAIL = (ORTE_ERR_BASE - 46), + ORTE_ERR_OPEN_CHANNEL_PEER_REJECT = (ORTE_ERR_BASE - 47), + ORTE_ERR_QOS_TYPE_UNSUPPORTED = (ORTE_ERR_BASE - 48), + ORTE_ERR_QOS_ACK_WINDOW_FULL = (ORTE_ERR_BASE - 49), + ORTE_ERR_ACK_TIMEOUT_SENDER = (ORTE_ERR_BASE - 50), + ORTE_ERR_ACK_TIMEOUT_RECEIVER = (ORTE_ERR_BASE - 51), + ORTE_ERR_LOST_MSG_IN_WINDOW = (ORTE_ERR_BASE - 52) }; #define ORTE_ERR_MAX (ORTE_ERR_BASE - 100) diff --git a/orte/mca/ess/base/ess_base_std_app.c b/orte/mca/ess/base/ess_base_std_app.c index 11c10a2db7a..4762c69f07b 100644 --- a/orte/mca/ess/base/ess_base_std_app.c +++ b/orte/mca/ess/base/ess_base_std_app.c @@ -5,20 +5,20 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2010-2012 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. * Copyright (c) 2013-2014 Intel, Inc. All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -54,6 +54,7 @@ #include "orte/mca/grpcomm/base/base.h" #include "orte/mca/oob/base/base.h" #include "orte/mca/rml/rml.h" +#include "orte/mca/qos/base/base.h" #include "orte/mca/odls/odls_types.h" #include "orte/mca/plm/plm.h" #include "orte/mca/filem/base/base.h" @@ -116,7 +117,7 @@ int orte_ess_base_app_setup(bool db_restrict_local) /* get a separate orte event base */ orte_event_base = opal_start_progress_thread("orte", true); progress_thread_running = true; - + /* open and setup the state machine */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_state_base_framework, 0))) { ORTE_ERROR_LOG(ret); @@ -143,7 +144,7 @@ int orte_ess_base_app_setup(bool db_restrict_local) ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (NULL == orte_process_info.tmpdir_base) ? "UNDEF" : orte_process_info.tmpdir_base, orte_process_info.nodename)); - + if (ORTE_SUCCESS != (ret = orte_session_dir(true, orte_process_info.tmpdir_base, orte_process_info.nodename, NULL, @@ -152,7 +153,7 @@ int orte_ess_base_app_setup(bool db_restrict_local) error = "orte_session_dir"; goto error; } - + /* Once the session directory location has been established, set the opal_output env file location to be in the proc-specific session directory. */ @@ -202,7 +203,7 @@ int orte_ess_base_app_setup(bool db_restrict_local) error = "orte_oob_base_select"; goto error; } - + /* Runtime Messaging Layer */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_rml_base_framework, 0))) { ORTE_ERROR_LOG(ret); @@ -214,7 +215,19 @@ int orte_ess_base_app_setup(bool db_restrict_local) error = "orte_rml_base_select"; goto error; } - + + /* Messaging QoS Layer */ + if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_qos_base_framework, 0))) { + ORTE_ERROR_LOG(ret); + error = "orte_qos_base_open"; + goto error; + } + if (ORTE_SUCCESS != (ret = orte_qos_base_select())) { + ORTE_ERROR_LOG(ret); + error = "orte_qos_base_select"; + goto error; + } + /* setup the errmgr */ if (ORTE_SUCCESS != (ret = orte_errmgr_base_select())) { ORTE_ERROR_LOG(ret); @@ -233,7 +246,7 @@ int orte_ess_base_app_setup(bool db_restrict_local) error = "orte_routed_base_select"; goto error; } - + /* * Group communications */ @@ -247,7 +260,7 @@ int orte_ess_base_app_setup(bool db_restrict_local) error = "orte_grpcomm_base_select"; goto error; } - + /* non-daemon/HNP apps can only have the default proxy PLM * module open - provide a chance for it to initialize */ @@ -256,22 +269,22 @@ int orte_ess_base_app_setup(bool db_restrict_local) error = "orte_plm_init"; goto error; } - + /* enable communication via the rml */ if (ORTE_SUCCESS != (ret = orte_rml.enable_comm())) { ORTE_ERROR_LOG(ret); error = "orte_rml.enable_comm"; goto error; } - + /* setup the routed info */ if (ORTE_SUCCESS != (ret = orte_routed.init_routes(ORTE_PROC_MY_NAME->jobid, NULL))) { ORTE_ERROR_LOG(ret); error = "orte_routed.init_routes"; goto error; } - - + + #if OPAL_ENABLE_FT_CR == 1 /* * Setup the SnapC @@ -302,7 +315,7 @@ int orte_ess_base_app_setup(bool db_restrict_local) #else opal_cr_set_enabled(false); #endif - + /* Initalize the CR setup * Note: Always do this, even in non-FT builds. * If we don't some user level tools may hang. @@ -326,7 +339,7 @@ int orte_ess_base_app_setup(bool db_restrict_local) } return ORTE_SUCCESS; - + error: if (!progress_thread_running) { /* can't send the help message, so ensure it @@ -337,7 +350,7 @@ int orte_ess_base_app_setup(bool db_restrict_local) orte_show_help("help-orte-runtime.txt", "orte_init:startup:internal-failure", true, error, ORTE_ERROR_NAME(ret), ret); - + return ret; } @@ -375,7 +388,7 @@ int orte_ess_base_app_finalize(void) /* free the event base to cleanup memory */ opal_stop_progress_thread("orte", true); - return ORTE_SUCCESS; + return ORTE_SUCCESS; } /* @@ -405,16 +418,16 @@ void orte_ess_base_app_abort(int status, bool report) /* Exit - do NOT do a normal finalize as this will very likely * hang the process. We are aborting due to an abnormal condition - * that precludes normal cleanup + * that precludes normal cleanup * - * We do need to do the following bits to make sure we leave a + * We do need to do the following bits to make sure we leave a * clean environment. Taken from orte_finalize(): * - Assume errmgr cleans up child processes before we exit. */ - + /* CRS cleanup since it may have a named pipe and thread active */ orte_cr_finalize(); - + /* If we were asked to report this termination, do so. * Since singletons don't start an HNP unless necessary, and * direct-launched procs don't have daemons at all, only send @@ -430,11 +443,11 @@ void orte_ess_base_app_abort(int status, bool report) * have a chance to be sent */ nanosleep(&tp, NULL); } - - /* - Clean out the global structures + + /* - Clean out the global structures * (not really necessary, but good practice) */ orte_proc_info_finalize(); - + /* Now Exit */ _exit(status); } diff --git a/orte/mca/ess/base/ess_base_std_orted.c b/orte/mca/ess/base/ess_base_std_orted.c index 9dcdfd04eb4..2af369486d3 100644 --- a/orte/mca/ess/base/ess_base_std_orted.c +++ b/orte/mca/ess/base/ess_base_std_orted.c @@ -5,7 +5,7 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -13,12 +13,12 @@ * et Automatique. All rights reserved. * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. * Copyright (c) 2013-2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -50,6 +50,7 @@ #include "orte/mca/routed/base/base.h" #include "orte/mca/routed/routed.h" #include "orte/mca/oob/base/base.h" +#include "orte/mca/qos/base/base.h" #include "orte/mca/dfs/base/base.h" #include "orte/mca/grpcomm/grpcomm.h" #include "orte/mca/grpcomm/base/base.h" @@ -127,22 +128,22 @@ int orte_ess_base_orted_setup(char **hosts) /* setup callback for SIGPIPE */ setup_sighandler(SIGPIPE, &epipe_handler, epipe_signal_callback); /* Set signal handlers to catch kill signals so we can properly clean up - * after ourselves. + * after ourselves. */ setup_sighandler(SIGTERM, &term_handler, shutdown_signal); setup_sighandler(SIGINT, &int_handler, shutdown_signal); - + /** setup callbacks for signals we should ignore */ setup_sighandler(SIGUSR1, &sigusr1_handler, signal_callback); setup_sighandler(SIGUSR2, &sigusr2_handler, signal_callback); - + signals_set = true; - + #if OPAL_HAVE_HWLOC { hwloc_obj_t obj; unsigned i, j; - + /* get the local topology */ if (NULL == opal_hwloc_topology) { if (OPAL_SUCCESS != opal_hwloc_base_get_topology()) { @@ -177,14 +178,14 @@ int orte_ess_base_orted_setup(char **hosts) break; } } - + if (15 < opal_output_get_verbosity(orte_ess_base_framework.framework_output)) { opal_output(0, "%s Topology Info:", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); opal_dss.dump(0, opal_hwloc_topology, OPAL_HWLOC_TOPO); } } #endif - + /* open and setup the opal_pstat framework so we can provide * process stats if requested */ @@ -198,7 +199,7 @@ int orte_ess_base_orted_setup(char **hosts) error = "opal_pstat_base_select"; goto error; } - + /* open and setup the state machine */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_state_base_framework, 0))) { ORTE_ERROR_LOG(ret); @@ -210,20 +211,20 @@ int orte_ess_base_orted_setup(char **hosts) error = "orte_state_base_select"; goto error; } - + /* open the errmgr */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_errmgr_base_framework, 0))) { ORTE_ERROR_LOG(ret); error = "orte_errmgr_base_open"; goto error; } - + /* some environments allow remote launches - e.g., ssh - so * open and select something -only- if we are given * a specific module to use */ (void) mca_base_var_env_name("plm", ¶m); - + plm_in_use = !!(getenv(param)); free (param); @@ -240,7 +241,7 @@ int orte_ess_base_orted_setup(char **hosts) goto error; } } - + /* setup my session directory here as the OOB may need it */ if (orte_create_session_dirs) { OPAL_OUTPUT_VERBOSE((2, orte_ess_base_framework.framework_output, @@ -248,7 +249,7 @@ int orte_ess_base_orted_setup(char **hosts) ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (NULL == orte_process_info.tmpdir_base) ? "UNDEF" : orte_process_info.tmpdir_base, orte_process_info.nodename)); - + /* take a pass thru the session directory code to fillin the * tmpdir names - don't create anything yet */ @@ -279,13 +280,13 @@ int orte_ess_base_orted_setup(char **hosts) * proc-specific session directory. */ opal_output_set_output_file_info(orte_process_info.proc_session_dir, "output-", NULL, NULL); - + /* setup stdout/stderr */ if (orte_debug_daemons_file_flag) { /* if we are debugging to a file, then send stdout/stderr to * the orted log file */ - + /* get my jobid */ if (ORTE_SUCCESS != (ret = orte_util_convert_jobid_to_string(&jobidstring, ORTE_PROC_MY_NAME->jobid))) { @@ -293,7 +294,7 @@ int orte_ess_base_orted_setup(char **hosts) error = "convert_jobid"; goto error; } - + /* define a log file name in the session directory */ snprintf(log_file, PATH_MAX, "output-orted-%s-%s.log", jobidstring, orte_process_info.nodename); @@ -302,7 +303,7 @@ int orte_ess_base_orted_setup(char **hosts) orte_process_info.top_session_dir, log_file, NULL); - + fd = open(log_path, O_RDWR|O_CREAT|O_TRUNC, 0640); if (fd < 0) { /* couldn't open the file for some reason, so @@ -340,14 +341,26 @@ int orte_ess_base_orted_setup(char **hosts) error = "orte_rml_base_select"; goto error; } - + + /* Messaging QoS Layer */ + if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_qos_base_framework, 0))) { + ORTE_ERROR_LOG(ret); + error = "orte_qos_base_open"; + goto error; + } + if (ORTE_SUCCESS != (ret = orte_qos_base_select())) { + ORTE_ERROR_LOG(ret); + error = "orte_qos_base_select"; + goto error; + } + /* select the errmgr */ if (ORTE_SUCCESS != (ret = orte_errmgr_base_select())) { ORTE_ERROR_LOG(ret); error = "orte_errmgr_base_select"; goto error; } - + /* Routed system */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_routed_base_framework, 0))) { ORTE_ERROR_LOG(ret); @@ -359,7 +372,7 @@ int orte_ess_base_orted_setup(char **hosts) error = "orte_routed_base_select"; goto error; } - + /* * Group communications */ @@ -373,7 +386,7 @@ int orte_ess_base_orted_setup(char **hosts) error = "orte_grpcomm_base_select"; goto error; } - + /* Open/select the odls */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_odls_base_framework, 0))) { ORTE_ERROR_LOG(ret); @@ -385,7 +398,7 @@ int orte_ess_base_orted_setup(char **hosts) error = "orte_odls_base_select"; goto error; } - + /* Open/select the rtc */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_rtc_base_framework, 0))) { ORTE_ERROR_LOG(ret); @@ -397,14 +410,14 @@ int orte_ess_base_orted_setup(char **hosts) error = "orte_rtc_base_select"; goto error; } - + /* enable communication with the rml */ if (ORTE_SUCCESS != (ret = orte_rml.enable_comm())) { ORTE_ERROR_LOG(ret); error = "orte_rml.enable_comm"; goto error; } - + #if ORTE_ENABLE_STATIC_PORTS /* if we are using static ports, then we need to setup * the daemon info so the RML can function properly @@ -433,7 +446,7 @@ int orte_ess_base_orted_setup(char **hosts) * need to do it anyway just to initialize things */ orte_routed.update_routing_plan(); - + /* Now provide a chance for the PLM * to perform any module-specific init functions. This * needs to occur AFTER the communications are setup @@ -448,7 +461,7 @@ int orte_ess_base_orted_setup(char **hosts) goto error; } } - + /* setup the global job and node arrays */ orte_job_data = OBJ_NEW(opal_pointer_array_t); if (ORTE_SUCCESS != (ret = opal_pointer_array_init(orte_job_data, @@ -459,7 +472,7 @@ int orte_ess_base_orted_setup(char **hosts) error = "setup job array"; goto error; } - + orte_node_pool = OBJ_NEW(opal_pointer_array_t); if (ORTE_SUCCESS != (ret = opal_pointer_array_init(orte_node_pool, ORTE_GLOBAL_ARRAY_BLOCK_SIZE, @@ -479,17 +492,17 @@ int orte_ess_base_orted_setup(char **hosts) goto error; } - /* Setup the job data object for the daemons */ + /* Setup the job data object for the daemons */ /* create and store the job data object */ jdata = OBJ_NEW(orte_job_t); jdata->jobid = ORTE_PROC_MY_NAME->jobid; opal_pointer_array_set_item(orte_job_data, 0, jdata); - + /* every job requires at least one app */ app = OBJ_NEW(orte_app_context_t); opal_pointer_array_set_item(jdata->apps, 0, app); jdata->num_apps++; - + /* create and store a node object where we are */ node = OBJ_NEW(orte_node_t); node->name = strdup(orte_process_info.nodename); @@ -503,13 +516,13 @@ int orte_ess_base_orted_setup(char **hosts) proc = OBJ_NEW(orte_proc_t); proc->name.jobid = ORTE_PROC_MY_NAME->jobid; proc->name.vpid = ORTE_PROC_MY_NAME->vpid; - + proc->pid = orte_process_info.pid; proc->rml_uri = orte_rml.get_contact_info(); proc->state = ORTE_PROC_STATE_RUNNING; opal_pointer_array_set_item(jdata->procs, proc->name.vpid, proc); - - /* record that the daemon (i.e., us) is on this node + + /* record that the daemon (i.e., us) is on this node * NOTE: we do not add the proc object to the node's * proc array because we are not an application proc. * Instead, we record it in the daemon field of the @@ -519,7 +532,7 @@ int orte_ess_base_orted_setup(char **hosts) node->daemon = proc; ORTE_FLAG_SET(node, ORTE_NODE_FLAG_DAEMON_LAUNCHED); node->state = ORTE_NODE_STATE_UP; - + /* now point our proc node field to the node */ OBJ_RETAIN(node); /* keep accounting straight */ proc->node = node; @@ -529,7 +542,7 @@ int orte_ess_base_orted_setup(char **hosts) jdata->state = ORTE_JOB_STATE_RUNNING; /* obviously, we have "reported" */ jdata->num_reported = 1; - + /* setup the PMIx server */ if (ORTE_SUCCESS != (ret = pmix_server_init())) { ORTE_ERROR_LOG(ret); @@ -538,14 +551,14 @@ int orte_ess_base_orted_setup(char **hosts) } /* setup the routed info - the selected routed component - * will know what to do. + * will know what to do. */ if (ORTE_SUCCESS != (ret = orte_routed.init_routes(ORTE_PROC_MY_NAME->jobid, NULL))) { ORTE_ERROR_LOG(ret); error = "orte_routed.init_routes"; goto error; } - + /* setup I/O forwarding system - must come after we init routes */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_iof_base_framework, 0))) { ORTE_ERROR_LOG(ret); @@ -557,7 +570,7 @@ int orte_ess_base_orted_setup(char **hosts) error = "orte_iof_base_select"; goto error; } - + /* setup the FileM */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_filem_base_framework, 0))) { ORTE_ERROR_LOG(ret); @@ -569,7 +582,7 @@ int orte_ess_base_orted_setup(char **hosts) error = "orte_filem_base_select"; goto error; } - + #if OPAL_ENABLE_FT_CR == 1 /* * Setup the SnapC @@ -594,13 +607,13 @@ int orte_ess_base_orted_setup(char **hosts) error = "orte_sstore_base_select"; goto error; } - + /* For daemons, ORTE doesn't need the OPAL CR stuff */ opal_cr_set_enabled(false); #else opal_cr_set_enabled(false); #endif - + /* * Initalize the CR setup * Note: Always do this, even in non-FT builds. @@ -611,7 +624,7 @@ int orte_ess_base_orted_setup(char **hosts) error = "orte_cr_init"; goto error; } - + /* setup the DFS framework */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_dfs_base_framework, 0))) { ORTE_ERROR_LOG(ret); @@ -637,12 +650,12 @@ int orte_ess_base_orted_setup(char **hosts) } return ORTE_SUCCESS; - + error: orte_show_help("help-orte-runtime.txt", "orte_init:startup:internal-failure", true, error, ORTE_ERROR_NAME(ret), ret); - + return ORTE_ERR_SILENT; } @@ -656,12 +669,12 @@ int orte_ess_base_orted_finalize(void) opal_event_signal_del(&sigusr1_handler); opal_event_signal_del(&sigusr2_handler); } - + /* cleanup */ if (NULL != log_path) { unlink(log_path); } - + /* shutdown the pmix server */ pmix_server_finalize(); @@ -689,8 +702,8 @@ int orte_ess_base_orted_finalize(void) /* cleanup any lingering session directories */ orte_session_dir_cleanup(ORTE_JOBID_WILDCARD); - - return ORTE_SUCCESS; + + return ORTE_SUCCESS; } static void shutdown_signal(int fd, short flags, void *arg) diff --git a/orte/mca/ess/base/ess_base_std_tool.c b/orte/mca/ess/base/ess_base_std_tool.c index 1368b2ae3d6..069848d8abd 100644 --- a/orte/mca/ess/base/ess_base_std_tool.c +++ b/orte/mca/ess/base/ess_base_std_tool.c @@ -5,7 +5,7 @@ * Copyright (c) 2004-2009 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. @@ -15,9 +15,9 @@ * Copyright (c) 2014 Hochschule Esslingen. All rights reserved. * * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -43,6 +43,7 @@ #include "orte/mca/oob/base/base.h" #include "orte/mca/plm/base/base.h" #include "orte/mca/rml/base/base.h" +#include "orte/mca/qos/base/base.h" #include "orte/mca/routed/base/base.h" #include "orte/mca/errmgr/base/base.h" #include "orte/mca/iof/base/base.h" @@ -88,7 +89,7 @@ int orte_ess_base_tool_setup(void) progress_thread_running = true; orte_event_base_active = true; } - + /* open and setup the state machine */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_state_base_framework, 0))) { ORTE_ERROR_LOG(ret); @@ -136,6 +137,19 @@ int orte_ess_base_tool_setup(void) error = "orte_rml_base_select"; goto error; } + + /* Messaging QoS Layer */ + if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_qos_base_framework, 0))) { + ORTE_ERROR_LOG(ret); + error = "orte_qos_base_open"; + goto error; + } + if (ORTE_SUCCESS != (ret = orte_qos_base_select())) { + ORTE_ERROR_LOG(ret); + error = "orte_qos_base_select"; + goto error; + } + /* Routed system */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_routed_base_framework, 0))) { ORTE_ERROR_LOG(ret); @@ -147,20 +161,20 @@ int orte_ess_base_tool_setup(void) error = "orte_routed_base_select"; goto error; } - + /* since I am a tool, then all I really want to do is communicate. * So setup communications and be done - finding the HNP * to which I want to communicate and setting up a route for * that link is my responsibility */ - + /* enable communication via the rml */ if (ORTE_SUCCESS != (ret = orte_rml.enable_comm())) { ORTE_ERROR_LOG(ret); error = "orte_rml.enable_comm"; goto error; } - + /* we -may- need to know the name of the head * of our session directory tree, particularly the * tmp base where any other session directories on @@ -174,16 +188,16 @@ int orte_ess_base_tool_setup(void) error = "define session dir names"; goto error; } - + /* setup the routed info - the selected routed component - * will know what to do. + * will know what to do. */ if (ORTE_SUCCESS != (ret = orte_routed.init_routes(ORTE_PROC_MY_NAME->jobid, NULL))) { ORTE_ERROR_LOG(ret); error = "orte_routed.init_routes"; goto error; } - + /* setup I/O forwarding system - must come after we init routes */ if (NULL != orte_process_info.my_hnp_uri) { /* only do this if we were given an HNP */ @@ -207,7 +221,7 @@ int orte_ess_base_tool_setup(void) /* we don't select the plm framework as we only want the * base proxy functions */ } - + #if OPAL_ENABLE_FT_CR == 1 /* * Setup the SnapC @@ -233,7 +247,7 @@ int orte_ess_base_tool_setup(void) error = "orte_sstore_base_select"; goto error; } - + /* Tools do not need all the OPAL CR stuff */ opal_cr_set_enabled(false); #endif @@ -251,12 +265,12 @@ int orte_ess_base_tool_setup(void) } return ORTE_SUCCESS; - + error: orte_show_help("help-orte-runtime.txt", "orte_init:startup:internal-failure", true, error, ORTE_ERROR_NAME(ret), ret); - + return ret; } @@ -286,5 +300,5 @@ int orte_ess_base_tool_finalize(void) opal_stop_progress_thread("orte", true); progress_thread_running = false; } - return ORTE_SUCCESS; + return ORTE_SUCCESS; } diff --git a/orte/mca/ess/hnp/ess_hnp_module.c b/orte/mca/ess/hnp/ess_hnp_module.c index b81e4bec4b4..a1bdef909a0 100644 --- a/orte/mca/ess/hnp/ess_hnp_module.c +++ b/orte/mca/ess/hnp/ess_hnp_module.c @@ -5,19 +5,19 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2010-2011 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2011-2014 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2013-2015 Intel, Inc. All rights reserved. + * reserved. + * Copyright (c) 2013-2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * */ @@ -55,6 +55,7 @@ #include "orte/mca/oob/base/base.h" #include "orte/mca/rml/base/base.h" +#include "orte/mca/qos/base/base.h" #include "orte/mca/rml/rml_types.h" #include "orte/mca/routed/base/base.h" #include "orte/mca/routed/routed.h" @@ -231,7 +232,7 @@ static int rte_init(void) error = "opal_pstat_base_select"; goto error; } - + /* open and setup the state machine */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_state_base_framework, 0))) { ORTE_ERROR_LOG(ret); @@ -261,7 +262,7 @@ static int rte_init(void) error = "orte_plm_base_open"; goto error; } - + if (ORTE_SUCCESS != (ret = orte_plm_base_select())) { ORTE_ERROR_LOG(ret); error = "orte_plm_base_select"; @@ -296,7 +297,7 @@ static int rte_init(void) ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (NULL == orte_process_info.tmpdir_base) ? "UNDEF" : orte_process_info.tmpdir_base, orte_process_info.nodename)); - + /* take a pass thru the session directory code to fillin the * tmpdir names - don't create anything yet */ @@ -325,7 +326,7 @@ static int rte_init(void) } /* Setup the communication infrastructure */ - + /* * OOB Layer */ @@ -354,12 +355,24 @@ static int rte_init(void) goto error; } + /* Messaging QoS Layer */ + if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_qos_base_framework, 0))) { + ORTE_ERROR_LOG(ret); + error = "orte_qos_base_open"; + goto error; + } + if (ORTE_SUCCESS != (ret = orte_qos_base_select())) { + ORTE_ERROR_LOG(ret); + error = "orte_qos_base_select"; + goto error; + } + if (ORTE_SUCCESS != (ret = orte_errmgr_base_select())) { ORTE_ERROR_LOG(ret); error = "orte_errmgr_base_select"; goto error; } - + /* setup the global job and node arrays */ orte_job_data = OBJ_NEW(opal_pointer_array_t); if (ORTE_SUCCESS != (ret = opal_pointer_array_init(orte_job_data, @@ -370,7 +383,7 @@ static int rte_init(void) error = "setup job array"; goto error; } - + orte_node_pool = OBJ_NEW(opal_pointer_array_t); if (ORTE_SUCCESS != (ret = opal_pointer_array_init(orte_node_pool, ORTE_GLOBAL_ARRAY_BLOCK_SIZE, @@ -390,7 +403,7 @@ static int rte_init(void) goto error; } - /* Setup the job data object for the daemons */ + /* Setup the job data object for the daemons */ /* create and store the job data object */ jdata = OBJ_NEW(orte_job_t); jdata->jobid = ORTE_PROC_MY_NAME->jobid; @@ -400,7 +413,7 @@ static int rte_init(void) * are running! */ jdata->state = ORTE_JOB_STATE_DAEMONS_REPORTED; - + /* every job requires at least one app */ app = OBJ_NEW(orte_app_context_t); opal_pointer_array_set_item(jdata->apps, 0, app); @@ -425,7 +438,7 @@ static int rte_init(void) proc = OBJ_NEW(orte_proc_t); proc->name.jobid = ORTE_PROC_MY_NAME->jobid; proc->name.vpid = ORTE_PROC_MY_NAME->vpid; - + proc->pid = orte_process_info.pid; proc->rml_uri = orte_rml.get_contact_info(); proc->state = ORTE_PROC_STATE_RUNNING; @@ -433,7 +446,7 @@ static int rte_init(void) proc->node = node; opal_pointer_array_set_item(jdata->procs, proc->name.vpid, proc); - /* record that the daemon (i.e., us) is on this node + /* record that the daemon (i.e., us) is on this node * NOTE: we do not add the proc object to the node's * proc array because we are not an application proc. * Instead, we record it in the daemon field of the @@ -443,7 +456,7 @@ static int rte_init(void) node->daemon = proc; ORTE_FLAG_SET(node, ORTE_NODE_FLAG_DAEMON_LAUNCHED); node->state = ORTE_NODE_STATE_UP; - + /* if we are to retain aliases, get ours */ if (orte_retain_aliases) { aliases = NULL; @@ -475,7 +488,7 @@ static int rte_init(void) error = "orte_routed_base_select"; goto error; } - + /* * Group communications @@ -512,18 +525,18 @@ static int rte_init(void) ORTE_ERROR_LOG(ret); error = "orte_ras_base_open"; goto error; - } + } if (ORTE_SUCCESS != (ret = orte_ras_base_select())) { ORTE_ERROR_LOG(ret); error = "orte_ras_base_find_available"; goto error; } - + if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_rmaps_base_framework, 0))) { ORTE_ERROR_LOG(ret); error = "orte_rmaps_base_open"; goto error; - } + } if (ORTE_SUCCESS != (ret = orte_rmaps_base_select())) { ORTE_ERROR_LOG(ret); error = "orte_rmaps_base_find_available"; @@ -584,7 +597,7 @@ static int rte_init(void) error = "orte_odls_base_select"; goto error; } - + /* Open/select the rtc */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_rtc_base_framework, 0))) { ORTE_ERROR_LOG(ret); @@ -596,7 +609,7 @@ static int rte_init(void) error = "orte_rtc_base_select"; goto error; } - + /* enable communication with the rml */ if (ORTE_SUCCESS != (ret = orte_rml.enable_comm())) { ORTE_ERROR_LOG(ret); @@ -610,7 +623,7 @@ static int rte_init(void) /* we are also officially a daemon, so better update that field too */ orte_process_info.my_daemon_uri = strdup(orte_process_info.my_hnp_uri); - + /* setup the orte_show_help system to recv remote output */ orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_SHOW_HELP, ORTE_RML_PERSISTENT, orte_show_help_recv, NULL); @@ -620,17 +633,17 @@ static int rte_init(void) * proc-specific session directory. */ opal_output_set_output_file_info(orte_process_info.proc_session_dir, "output-", NULL, NULL); - + /* save my contact info in a file for others to find */ jobfam_dir = opal_dirname(orte_process_info.job_session_dir); contact_path = opal_os_path(false, jobfam_dir, "contact.txt", NULL); free(jobfam_dir); - + OPAL_OUTPUT_VERBOSE((2, orte_debug_output, "%s writing contact file %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), contact_path)); - + if (ORTE_SUCCESS != (ret = orte_write_hnp_contact_file(contact_path))) { OPAL_OUTPUT_VERBOSE((2, orte_debug_output, "%s writing contact file failed with error %s", @@ -652,14 +665,14 @@ static int rte_init(void) } /* setup the routed info - the selected routed component - * will know what to do. + * will know what to do. */ if (ORTE_SUCCESS != (ret = orte_routed.init_routes(ORTE_PROC_MY_NAME->jobid, NULL))) { ORTE_ERROR_LOG(ret); error = "orte_routed.init_routes"; goto error; } - + /* setup I/O forwarding system - must come after we init routes */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_iof_base_framework, 0))) { ORTE_ERROR_LOG(ret); @@ -671,7 +684,7 @@ static int rte_init(void) error = "orte_iof_base_select"; goto error; } - + /* setup the FileM */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_filem_base_framework, 0))) { ORTE_ERROR_LOG(ret); @@ -725,7 +738,7 @@ static int rte_init(void) error = "orte_cr_init"; goto error; } - + /* setup the dfs framework */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_dfs_base_framework, 0))) { ORTE_ERROR_LOG(ret); @@ -766,7 +779,7 @@ static int rte_init(void) it should; but when it *is* doing something, we do not want it to be unnecessarily delayed because it voluntarily yielded the processor in the middle of its work. - + For example: when a message arrives at orterun, we want the OS to wake us up in a timely fashion (which most OS's seem good about doing) and then we want orterun to process @@ -788,7 +801,7 @@ static int rte_init(void) "orte_init:startup:internal-failure", true, error, ORTE_ERROR_NAME(ret), ret); } - + return ORTE_ERR_SILENT; } @@ -851,7 +864,7 @@ static int rte_finalize(void) /* ensure we scrub the session directory tree */ orte_session_dir_cleanup(ORTE_JOBID_WILDCARD); - + /* close the xml output file, if open */ if (orte_xml_output) { fprintf(orte_xml_fp, "\n"); @@ -868,24 +881,24 @@ static void rte_abort(int status, bool report) { /* do NOT do a normal finalize as this will very likely * hang the process. We are aborting due to an abnormal condition - * that precludes normal cleanup + * that precludes normal cleanup * - * We do need to do the following bits to make sure we leave a + * We do need to do the following bits to make sure we leave a * clean environment. Taken from orte_finalize(): * - Assume errmgr cleans up child processes before we exit. */ - + /* CRS cleanup since it may have a named pipe and thread active */ orte_cr_finalize(); - + /* ensure we scrub the session directory tree */ orte_session_dir_cleanup(ORTE_JOBID_WILDCARD); - - /* - Clean out the global structures + + /* - Clean out the global structures * (not really necessary, but good practice) */ orte_proc_info_finalize(); - + /* just exit */ exit(status); } @@ -899,13 +912,13 @@ static void clean_abort(int fd, short flags, void *arg) if (forcibly_die) { /* kill any local procs */ orte_odls.kill_local_procs(NULL); - + /* whack any lingering session directory files from our jobs */ orte_session_dir_cleanup(ORTE_JOBID_WILDCARD); - + /* cleanup our data server */ orte_data_server_finalize(); - + /* exit with a non-zero status */ exit(ORTE_ERROR_DEFAULT_EXIT_CODE); } @@ -925,7 +938,7 @@ static void clean_abort(int fd, short flags, void *arg) * so need to tell them that! */ orte_execute_quiet = true; - + if (!orte_never_launched) { /* cleanup our data server */ orte_data_server_finalize(); diff --git a/orte/mca/oob/oob.h b/orte/mca/oob/oob.h index 5c85dafe053..98a1a65b1cd 100644 --- a/orte/mca/oob/oob.h +++ b/orte/mca/oob/oob.h @@ -43,7 +43,7 @@ #include "opal/mca/mca.h" #include "orte/mca/rml/base/base.h" - +#include "orte/mca/qos/base/base.h" BEGIN_C_DECLS typedef bool (*mca_oob_base_component_avail_fn_t)(void); diff --git a/orte/mca/oob/tcp/oob_tcp.c b/orte/mca/oob/tcp/oob_tcp.c index b44f0aa7b7b..951f6f318a9 100644 --- a/orte/mca/oob/tcp/oob_tcp.c +++ b/orte/mca/oob/tcp/oob_tcp.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2006-2013 Los Alamos National Security, LLC. + * Copyright (c) 2006-2013 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. @@ -245,7 +245,7 @@ static int parse_uri(const uint16_t af_family, hints.ai_family = af_family; hints.ai_socktype = SOCK_STREAM; ret = getaddrinfo(host, NULL, &hints, &res); - + if (ret) { opal_output (0, "oob_tcp_parse_uri: Could not resolve %s. [Error: %s]\n", host, gai_strerror (ret)); @@ -259,7 +259,7 @@ static int parse_uri(const uint16_t af_family, else { return ORTE_ERR_NOT_SUPPORTED; } - + return ORTE_SUCCESS; } @@ -283,7 +283,7 @@ static void process_set_peer(int fd, short args, void *cbdata) if (AF_INET != pop->af_family) { opal_output_verbose(20, orte_oob_base_framework.framework_output, - "%s NOT AF_INET", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); + "%s NOT AF_INET", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); goto cleanup; } @@ -410,10 +410,10 @@ static void process_send(int fd, short args, void *cbdata) orte_process_name_t hop; opal_output_verbose(2, orte_oob_base_framework.framework_output, - "%s:[%s:%d] processing send to peer %s:%d", + "%s:[%s:%d] processing send to peer %s:%d to channel =%d seq_num = %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, - ORTE_NAME_PRINT(&op->msg->dst), op->msg->tag); + ORTE_NAME_PRINT(&op->msg->dst), op->msg->tag, op->msg->dst_channel, op->msg->seq_num); /* do we have a route to this peer (could be direct)? */ hop = orte_routed.get_route(&op->msg->dst); @@ -555,7 +555,7 @@ static void resend(struct mca_oob_tcp_msg_error_t *mp) * socket to recv. This is called for the listen sockets to accept an * incoming connection, on new sockets trying to complete the software * connection process, and for probes. Data on an established - * connection is handled elsewhere. + * connection is handled elsewhere. */ static void recv_handler(int sd, short flg, void *cbdata) { @@ -592,7 +592,7 @@ static void recv_handler(int sd, short flg, void *cbdata) ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), strerror(opal_socket_errno), opal_socket_errno); } } - + /* is the peer instance willing to accept this connection */ peer->sd = sd; if (mca_oob_tcp_peer_accept(peer) == false) { diff --git a/orte/mca/oob/tcp/oob_tcp_component.c b/orte/mca/oob/tcp/oob_tcp_component.c index 383a078e5d9..691b3be256d 100644 --- a/orte/mca/oob/tcp/oob_tcp_component.c +++ b/orte/mca/oob/tcp/oob_tcp_component.c @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2006-2014 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2009-2014 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2013-2014 Intel, Inc. All rights reserved. * Copyright (c) 2014 NVIDIA Corporation. All rights reserved. @@ -25,7 +25,7 @@ * In windows, many of the socket functions return an EWOULDBLOCK * instead of things like EAGAIN, EINPROGRESS, etc. It has been * verified that this will not conflict with other error codes that - * are returned by these functions under UNIX/Linux environments + * are returned by these functions under UNIX/Linux environments */ #include "orte_config.h" @@ -154,7 +154,7 @@ static int tcp_component_open(void) #endif /* if_include and if_exclude need to be mutually exclusive */ - if (OPAL_SUCCESS != + if (OPAL_SUCCESS != mca_base_var_check_exclusive("orte", mca_oob_tcp_component.super.oob_base.mca_type_name, mca_oob_tcp_component.super.oob_base.mca_component_name, @@ -166,7 +166,7 @@ static int tcp_component_open(void) "open" failing is not printed */ return ORTE_ERR_NOT_AVAILABLE; } - + return ORTE_SUCCESS; } @@ -279,7 +279,7 @@ static int tcp_component_register(void) #if ORTE_ENABLE_STATIC_PORTS static_port_string = NULL; - (void)mca_base_component_var_register(component, "static_ipv4_ports", + (void)mca_base_component_var_register(component, "static_ipv4_ports", "Static ports for daemons and procs (IPv4)", MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_9, @@ -299,7 +299,7 @@ static int tcp_component_register(void) #if OPAL_ENABLE_IPV6 static_port_string6 = NULL; - (void)mca_base_component_var_register(component, "static_ipv6_ports", + (void)mca_base_component_var_register(component, "static_ipv6_ports", "Static ports for daemons and procs (IPv6)", MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_9, @@ -324,7 +324,7 @@ static int tcp_component_register(void) } #endif #endif - + dyn_port_string = NULL; (void)mca_base_component_var_register(component, "dynamic_ipv4_ports", "Range of ports to be dynamically used by daemons and procs (IPv4)", @@ -445,7 +445,7 @@ static bool component_available(void) excluding = true; } - /* look at all available interfaces */ + /* look at all available interfaces */ for (i = opal_ifbegin(); i >= 0; i = opal_ifnext(i)) { if (OPAL_SUCCESS != opal_ifindextoaddr(i, (struct sockaddr*) &my_ss, sizeof (my_ss))) { @@ -453,7 +453,7 @@ static bool component_available(void) i, opal_ifindextokindex(i)); continue; } - + /* ignore non-ip4/6 interfaces */ if (AF_INET != my_ss.ss_family #if OPAL_ENABLE_IPV6 @@ -462,7 +462,7 @@ static bool component_available(void) ) { continue; } - + kindex = opal_ifindextokindex(i); if (kindex <= 0) { continue; @@ -520,7 +520,7 @@ static bool component_available(void) * IP interfaces that are "up" on the same subnet (because that's a Bad Idea). Note * that we should only check for this after applying the relevant include/exclude * list MCA params. If we detect redundant ports, we can also automatically ignore - * them so that applications won't hang. + * them so that applications won't hang. */ /* add this address to our connections */ @@ -596,7 +596,7 @@ static int component_startup(void) static void component_shutdown(void) { - int i=0; + int i; opal_list_item_t *item; opal_output_verbose(2, orte_oob_base_framework.framework_output, @@ -623,9 +623,9 @@ static void component_shutdown(void) static int component_send(orte_rml_send_t *msg) { opal_output_verbose(5, orte_oob_base_framework.framework_output, - "%s oob:tcp:send_nb to peer %s:%d", + "%s oob:tcp:send_nb to peer %s:%d to channel=%d seq = %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&msg->dst), msg->tag); + ORTE_NAME_PRINT(&msg->dst), msg->tag,msg->dst_channel, msg->seq_num ); /* the module is potentially running on its own event * base, so all it can do is push our send request @@ -703,26 +703,20 @@ static int component_set_addr(orte_process_name_t *peer, found = false; for (i=0; NULL != uris[i]; i++) { - tcpuri = strdup(uris[i]); - if (NULL == tcpuri) { - opal_output_verbose(2, orte_oob_base_framework.framework_output, - "%s oob:tcp: out of memory", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - continue; - } if (0 == strncmp(uris[i], "tcp:", 4)) { af_family = AF_INET; + tcpuri = strdup(uris[i]); host = tcpuri + strlen("tcp://"); } else if (0 == strncmp(uris[i], "tcp6:", 5)) { #if OPAL_ENABLE_IPV6 af_family = AF_INET6; + tcpuri = strdup(uris[i]); host = tcpuri + strlen("tcp6://"); #else /* we don't support this connection type */ opal_output_verbose(2, orte_oob_base_framework.framework_output, "%s oob:tcp: address %s not supported", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), uris[i]); - free(tcpuri); continue; #endif } else { @@ -730,7 +724,6 @@ static int component_set_addr(orte_process_name_t *peer, opal_output_verbose(2, orte_oob_base_framework.framework_output, "%s oob:tcp: ignoring address %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), uris[i]); - free(tcpuri); continue; } @@ -739,19 +732,26 @@ static int component_set_addr(orte_process_name_t *peer, "%s oob:tcp: working peer %s address %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(peer), uris[i]); - + /* separate the ports from the network addrs */ ports = strrchr(tcpuri, ':'); *ports = '\0'; ports++; /* split the addrs */ + if (NULL == host || 0 == strlen(host)) { + opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, + "FORMAT ERROR IN ADDR: %s", + (NULL == host) ? "NULL" : "ZERO LENGTH"); + free(tcpuri); + return ORTE_ERR_BAD_PARAM; + } + /* if this is a tcp6 connection, the first one will have a '[' * at the beginning of it, and the last will have a ']' at the * end - we need to remove those extra characters */ hptr = host; -#if OPAL_ENABLE_IPV6 if (AF_INET6 == af_family) { if ('[' == host[0]) { hptr = &host[1]; @@ -760,7 +760,6 @@ static int component_set_addr(orte_process_name_t *peer, host[strlen(host)-1] = '\0'; } } -#endif addrs = opal_argv_split(hptr, ','); @@ -788,7 +787,7 @@ static int component_set_addr(orte_process_name_t *peer, } else { host = addrs[j]; } - + /* pass this proc, and its ports, to the * module for handling - this module will be responsible * for communicating with the proc via this network. @@ -1018,6 +1017,8 @@ void mca_oob_tcp_component_hop_unknown(int fd, short args, void *cbdata) snd->dst = mop->snd->hdr.dst; snd->origin = mop->snd->hdr.origin; snd->tag = mop->snd->hdr.tag; + snd->dst_channel = mop->snd->hdr.channel; + snd->seq_num = mop->snd->hdr.seq_num; snd->data = mop->snd->data; snd->count = mop->snd->hdr.nbytes; snd->cbfunc.iov = NULL; @@ -1095,7 +1096,7 @@ static char **split_and_resolve(char **orig_str, char *name) str = strchr(argv[i], '/'); if (NULL == str) { orte_show_help("help-oob-tcp.txt", "invalid if_inexclude", - true, name, orte_process_info.nodename, + true, name, orte_process_info.nodename, tmp, "Invalid specification (missing \"/\")"); free(argv[i]); free(tmp); @@ -1106,7 +1107,7 @@ static char **split_and_resolve(char **orig_str, char *name) /* Now convert the IPv4 address */ ((struct sockaddr*) &argv_inaddr)->sa_family = AF_INET; - ret = inet_pton(AF_INET, argv[i], + ret = inet_pton(AF_INET, argv[i], &((struct sockaddr_in*) &argv_inaddr)->sin_addr); free(argv[i]); @@ -1123,11 +1124,11 @@ static char **split_and_resolve(char **orig_str, char *name) name, opal_net_get_hostname((struct sockaddr*) &argv_inaddr), argv_prefix); - + /* Go through all interfaces and see if we can find a match */ for (if_index = opal_ifbegin(); if_index >= 0; - if_index = opal_ifnext(if_index)) { - opal_ifindextoaddr(if_index, + if_index = opal_ifnext(if_index)) { + opal_ifindextoaddr(if_index, (struct sockaddr*) &if_inaddr, sizeof(if_inaddr)); if (opal_net_samenetwork((struct sockaddr*) &argv_inaddr, @@ -1136,7 +1137,7 @@ static char **split_and_resolve(char **orig_str, char *name) break; } } - + /* If we didn't find a match, keep trying */ if (if_index < 0) { orte_show_help("help-oob-tcp.txt", "invalid if_inexclude", diff --git a/orte/mca/oob/tcp/oob_tcp_hdr.h b/orte/mca/oob/tcp/oob_tcp_hdr.h index 1bd4ec66db0..057ec2cb686 100644 --- a/orte/mca/oob/tcp/oob_tcp_hdr.h +++ b/orte/mca/oob/tcp/oob_tcp_hdr.h @@ -5,17 +5,19 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2006-2013 Los Alamos National Security, LLC. + * Copyright (c) 2006-2013 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2014 -2015 Intel, Inc. All rights reserved. + * * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -54,6 +56,10 @@ typedef struct { mca_oob_tcp_msg_type_t type; /* the rml tag where this message is headed */ orte_rml_tag_t tag; + /* the rml channel where this message is headed */ + orte_rml_channel_num_t channel; + /* the seq number of this message */ + uint32_t seq_num; /* number of bytes in message */ uint32_t nbytes; } mca_oob_tcp_hdr_t; diff --git a/orte/mca/oob/tcp/oob_tcp_sendrecv.c b/orte/mca/oob/tcp/oob_tcp_sendrecv.c index 7f77287ad15..a5e6a7ac8ae 100644 --- a/orte/mca/oob/tcp/oob_tcp_sendrecv.c +++ b/orte/mca/oob/tcp/oob_tcp_sendrecv.c @@ -5,25 +5,25 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2006-2013 Los Alamos National Security, LLC. + * Copyright (c) 2006-2013 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2013-2014 Intel, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * * In windows, many of the socket functions return an EWOULDBLOCK * instead of \ things like EAGAIN, EINPROGRESS, etc. It has been * verified that this will \ not conflict with other error codes that - * are returned by these functions \ under UNIX/Linux environments + * are returned by these functions \ under UNIX/Linux environments */ #include "orte_config.h" @@ -102,9 +102,9 @@ static int send_bytes(mca_oob_tcp_peer_t* peer) return ORTE_ERR_WOULD_BLOCK; } /* we hit an error and cannot progress this message */ - opal_output(0, "%s->%s mca_oob_tcp_msg_send_bytes: write failed: %s (%d) [sd = %d]", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&(peer->name)), + opal_output(0, "%s->%s mca_oob_tcp_msg_send_bytes: write failed: %s (%d) [sd = %d]", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(&(peer->name)), strerror(opal_socket_errno), opal_socket_errno, peer->sd); @@ -196,7 +196,12 @@ void mca_oob_tcp_send_handler(int sd, short flags, void *cbdata) ORTE_NAME_PRINT(&(peer->name))); opal_event_del(&peer->send_event); msg->msg->status = rc; - ORTE_RML_SEND_COMPLETE(msg->msg); + if( NULL == msg->msg->channel) { + ORTE_RML_SEND_COMPLETE(msg->msg); + } + else { + ORTE_QOS_SEND_COMPLETE(msg->msg); + } OBJ_RELEASE(msg); peer->send_msg = NULL; goto next; @@ -223,7 +228,12 @@ void mca_oob_tcp_send_handler(int sd, short flags, void *cbdata) ORTE_NAME_PRINT(&(peer->name)), (int)ntohl(msg->hdr.nbytes), peer->sd); msg->msg->status = ORTE_SUCCESS; - ORTE_RML_SEND_COMPLETE(msg->msg); + if( NULL == msg->msg->channel) { + ORTE_RML_SEND_COMPLETE(msg->msg); + } + else { + ORTE_QOS_SEND_COMPLETE(msg->msg); + } OBJ_RELEASE(msg); peer->send_msg = NULL; } else if (NULL != msg->msg->data) { @@ -258,7 +268,12 @@ void mca_oob_tcp_send_handler(int sd, short flags, void *cbdata) ORTE_NAME_PRINT(&(peer->name)), (int)ntohl(msg->hdr.nbytes), peer->sd); msg->msg->status = ORTE_SUCCESS; - ORTE_RML_SEND_COMPLETE(msg->msg); + if( NULL == msg->msg->channel) { + ORTE_RML_SEND_COMPLETE(msg->msg); + } + else { + ORTE_QOS_SEND_COMPLETE(msg->msg); + } OBJ_RELEASE(msg); peer->send_msg = NULL; } @@ -275,7 +290,12 @@ void mca_oob_tcp_send_handler(int sd, short flags, void *cbdata) ORTE_NAME_PRINT(&(peer->name)), peer->sd); opal_event_del(&peer->send_event); msg->msg->status = rc; - ORTE_RML_SEND_COMPLETE(msg->msg); + if( NULL == msg->msg->channel) { + ORTE_RML_SEND_COMPLETE(msg->msg); + } + else { + ORTE_QOS_SEND_COMPLETE(msg->msg); + } OBJ_RELEASE(msg); peer->send_msg = NULL; ORTE_FORCED_TERMINATE(1); @@ -293,7 +313,7 @@ void mca_oob_tcp_send_handler(int sd, short flags, void *cbdata) peer->send_msg = (mca_oob_tcp_send_t*) opal_list_remove_first(&peer->send_queue); } - + /* if nothing else to do unregister for send event notifications */ if (NULL == peer->send_msg && peer->send_ev_active) { opal_event_del(&peer->send_event); @@ -344,7 +364,7 @@ static int read_bytes(mca_oob_tcp_peer_t* peer) * to abort this message */ opal_output_verbose(OOB_TCP_DEBUG_FAIL, orte_oob_base_framework.framework_output, - "%s-%s mca_oob_tcp_msg_recv: readv failed: %s (%d)", + "%s-%s mca_oob_tcp_msg_recv: readv failed: %s (%d)", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer->name)), strerror(opal_socket_errno), @@ -359,7 +379,7 @@ static int read_bytes(mca_oob_tcp_peer_t* peer) * and let the caller know */ opal_output_verbose(OOB_TCP_DEBUG_FAIL, orte_oob_base_framework.framework_output, - "%s-%s mca_oob_tcp_msg_recv: peer closed connection", + "%s-%s mca_oob_tcp_msg_recv: peer closed connection", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer->name))); /* stop all events */ @@ -554,11 +574,19 @@ void mca_oob_tcp_recv_handler(int sd, short flags, void *cbdata) peer->recv_msg->hdr.dst.vpid == ORTE_PROC_MY_NAME->vpid) { /* yes - post it to the RML for delivery */ opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, - "%s DELIVERING TO RML", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); + "%s DELIVERING TO RML tag = %d channel = %d seq_num = %d", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + peer->recv_msg->hdr.tag, peer->recv_msg->hdr.channel, + peer->recv_msg->hdr.seq_num); ORTE_RML_POST_MESSAGE(&peer->recv_msg->hdr.origin, peer->recv_msg->hdr.tag, + peer->recv_msg->hdr.channel, peer->recv_msg->hdr.seq_num, peer->recv_msg->data, peer->recv_msg->hdr.nbytes); + opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, + "%s DELIVERED TO RML tag = %d channel = %d seq_num = %d", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + peer->recv_msg->hdr.tag, peer->recv_msg->hdr.channel, + peer->recv_msg->hdr.seq_num); OBJ_RELEASE(peer->recv_msg); } else { /* promote this to the OOB as some other transport might @@ -572,6 +600,8 @@ void mca_oob_tcp_recv_handler(int sd, short flags, void *cbdata) snd->origin = peer->recv_msg->hdr.origin; snd->tag = peer->recv_msg->hdr.tag; snd->data = peer->recv_msg->data; + snd->dst_channel = peer->recv_msg->hdr.channel; + snd->seq_num = peer->recv_msg->hdr.seq_num; snd->count = peer->recv_msg->hdr.nbytes; snd->cbfunc.iov = NULL; snd->cbdata = NULL; @@ -600,8 +630,8 @@ void mca_oob_tcp_recv_handler(int sd, short flags, void *cbdata) } } break; - default: - opal_output(0, "%s-%s mca_oob_tcp_peer_recv_handler: invalid socket state(%d)", + default: + opal_output(0, "%s-%s mca_oob_tcp_peer_recv_handler: invalid socket state(%d)", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer->name)), peer->state); diff --git a/orte/mca/oob/tcp/oob_tcp_sendrecv.h b/orte/mca/oob/tcp/oob_tcp_sendrecv.h index e1d27e19031..658e4fd8f14 100644 --- a/orte/mca/oob/tcp/oob_tcp_sendrecv.h +++ b/orte/mca/oob/tcp/oob_tcp_sendrecv.h @@ -5,18 +5,18 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2006-2013 Los Alamos National Security, LLC. + * Copyright (c) 2006-2013 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2010-2013 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013-2014 Intel, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -107,16 +107,19 @@ OBJ_CLASS_DECLARATION(mca_oob_tcp_recv_t); mca_oob_tcp_send_t *msg; \ int i; \ opal_output_verbose(5, orte_oob_base_framework.framework_output, \ - "%s:[%s:%d] queue send to %s", \ - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \ - __FILE__, __LINE__, \ - ORTE_NAME_PRINT(&((m)->dst))); \ + "%s:[%s:%d] queue send to %s channel =%d", \ + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \ + __FILE__, __LINE__, \ + ORTE_NAME_PRINT(&((m)->dst)), \ + (m)->dst_channel); \ msg = OBJ_NEW(mca_oob_tcp_send_t); \ /* setup the header */ \ msg->hdr.origin = (m)->origin; \ msg->hdr.dst = (m)->dst; \ msg->hdr.type = MCA_OOB_TCP_USER; \ msg->hdr.tag = (m)->tag; \ + msg->hdr.channel = (m)->dst_channel; \ + msg->hdr.seq_num = (m)->seq_num; \ /* point to the actual message */ \ msg->msg = (m); \ /* set the total number of bytes to be sent */ \ @@ -160,6 +163,8 @@ OBJ_CLASS_DECLARATION(mca_oob_tcp_recv_t); msg->hdr.dst = (m)->dst; \ msg->hdr.type = MCA_OOB_TCP_USER; \ msg->hdr.tag = (m)->tag; \ + msg->hdr.channel = (m)->dst_channel; \ + msg->hdr.seq_num = (m)->seq_num; \ /* point to the actual message */ \ msg->msg = (m); \ /* set the total number of bytes to be sent */ \ diff --git a/orte/mca/oob/ud/oob_ud_recv.c b/orte/mca/oob/ud/oob_ud_recv.c index 76084883110..8dc7e4d5001 100644 --- a/orte/mca/oob/ud/oob_ud_recv.c +++ b/orte/mca/oob/ud/oob_ud_recv.c @@ -4,6 +4,7 @@ * reserved. * 2014 Mellanox Technologies, Inc. * All rights reserved. + * Copyright (c) 2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -80,7 +81,8 @@ int mca_oob_ud_get_recv_req (const orte_process_name_t name, const int tag, req->req_origin = name; req->req_tag = tag; - + req->req_channel = ORTE_RML_INVALID_CHANNEL_NUM; + req->req_seq_num = 0; /* this receive was not expected */ req->type = MCA_OOB_UD_REQ_RECV; diff --git a/orte/mca/oob/ud/oob_ud_req.c b/orte/mca/oob/ud/oob_ud_req.c index 9c510240735..b484ca6523e 100644 --- a/orte/mca/oob/ud/oob_ud_req.c +++ b/orte/mca/oob/ud/oob_ud_req.c @@ -4,6 +4,7 @@ * reserved. * 2014 Mellanox Technologies, Inc. * All rights reserved. + * Copyright (c) 2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -286,7 +287,10 @@ void mca_oob_ud_req_complete (mca_oob_ud_req_t *req, int rc) case MCA_OOB_UD_REQ_SEND: if (req->req_data_type != MCA_OOB_UD_REQ_TR) { req->rml_msg->status = rc; - ORTE_RML_SEND_COMPLETE(req->rml_msg); + if( NULL == req->rml_msg->channel) + ORTE_RML_SEND_COMPLETE(req->rml_msg); + else + ORTE_QOS_SEND_COMPLETE(req->rml_msg); } break; case MCA_OOB_UD_REQ_RECV: @@ -302,11 +306,11 @@ void mca_oob_ud_req_complete (mca_oob_ud_req_t *req, int rc) memcpy (&data[datalen], req->req_data.iov.uiov[i].iov_base, req->req_data.iov.uiov[i].iov_len); datalen += req->req_data.iov.uiov[i].iov_len; } - ORTE_RML_POST_MESSAGE(&req->req_origin, req->req_tag, data, datalen); + ORTE_RML_POST_MESSAGE(&req->req_origin, req->req_tag, req->req_channel, req->req_seq_num, data, datalen); free(data); } else { - ORTE_RML_POST_MESSAGE(&req->req_origin, req->req_tag, - req->req_data.buf.p, req->req_data.buf.size); + ORTE_RML_POST_MESSAGE(&req->req_origin, req->req_tag, req->req_channel, req->req_seq_num, + req->req_data.buf.p, req->req_data.buf.size); } } else { opal_output_verbose(1, orte_oob_base_framework.framework_output, @@ -318,7 +322,8 @@ void mca_oob_ud_req_complete (mca_oob_ud_req_t *req, int rc) snd->dst = req->req_target; snd->origin = req->req_origin; snd->tag = req->req_tag; - + snd->dst_channel = req->req_channel; + snd->seq_num = req->req_seqnum; if (MCA_OOB_UD_REQ_IOV == req->req_data_type) { char *data = (char *)calloc(req->req_data.iov.count, sizeof(struct iovec)); int datalen = 0; diff --git a/orte/mca/oob/ud/oob_ud_req.h b/orte/mca/oob/ud/oob_ud_req.h index b718ed758ee..8fb8bd26afb 100644 --- a/orte/mca/oob/ud/oob_ud_req.h +++ b/orte/mca/oob/ud/oob_ud_req.h @@ -4,6 +4,7 @@ * reserved. * 2014 Mellanox Technologies, Inc. * All rights reserved. + * Copyright (c) 2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -154,6 +155,8 @@ struct mca_oob_ud_req_t { }req_data; int req_tag; + int req_channel; + int req_seq_num; int req_rc; void *req_cbdata; diff --git a/orte/mca/oob/ud/oob_ud_send.c b/orte/mca/oob/ud/oob_ud_send.c index f99554f85d1..7238cf1d0e0 100644 --- a/orte/mca/oob/ud/oob_ud_send.c +++ b/orte/mca/oob/ud/oob_ud_send.c @@ -4,6 +4,7 @@ * reserved. * 2014 Mellanox Technologies, Inc. * All rights reserved. + * Copyright (c) 2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -107,7 +108,10 @@ static int mca_oob_ud_send_self (orte_rml_send_t *msg) req->rml_msg->status = ORTE_SUCCESS; - ORTE_RML_SEND_COMPLETE(req->rml_msg); + if( NULL == req->rml_msg->channel) + ORTE_RML_SEND_COMPLETE(req->rml_msg->msg); + else + ORTE_QOS_SEND_COMPLETE(req->rml_msg->msg); return size; } @@ -165,6 +169,8 @@ int mca_oob_ud_process_send_nb(int fd, short args, void *cbdata) send_req->req_target = op->msg->dst; send_req->req_origin = op->msg->origin; send_req->req_tag = op->msg->tag; + send_req->req_channel = op->msg->dst_channel; + send_req->req_seq_num = op->msg->seq_num; if (op->msg->data != NULL) { size = op->msg->count; diff --git a/orte/mca/oob/usock/oob_usock_component.c b/orte/mca/oob/usock/oob_usock_component.c index 9e7b9cb79bb..222e6eff7fc 100644 --- a/orte/mca/oob/usock/oob_usock_component.c +++ b/orte/mca/oob/usock/oob_usock_component.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2006-2013 Los Alamos National Security, LLC. + * Copyright (c) 2006-2013 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2009-2013 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. @@ -23,7 +23,7 @@ * In windows, many of the socket functions return an EWOULDBLOCK * instead of things like EAGAIN, EINPROGRESS, etc. It has been * verified that this will not conflict with other error codes that - * are returned by these functions under UNIX/Linux environments + * are returned by these functions under UNIX/Linux environments */ #include "orte_config.h" @@ -253,9 +253,9 @@ static int component_send(orte_rml_send_t *msg) orte_proc_t *proc; opal_output_verbose(5, orte_oob_base_framework.framework_output, - "%s oob:usock:send_nb to peer %s:%d", + "%s oob:usock:send_nb to peer %s:%d to channel=%d seq_num =%d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&msg->dst), msg->tag); + ORTE_NAME_PRINT(&msg->dst), msg->tag, msg->dst_channel, msg->seq_num); if (ORTE_PROC_IS_DAEMON || ORTE_PROC_IS_HNP) { /* daemons can only reach local procs */ diff --git a/orte/mca/oob/usock/oob_usock_connection.c b/orte/mca/oob/usock/oob_usock_connection.c index eeb5a2b472f..682ac968148 100644 --- a/orte/mca/oob/usock/oob_usock_connection.c +++ b/orte/mca/oob/usock/oob_usock_connection.c @@ -5,21 +5,21 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2006-2013 Los Alamos National Security, LLC. + * Copyright (c) 2006-2013 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2014 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2014 Intel, Inc. All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -94,7 +94,7 @@ static int usock_peer_create_socket(mca_oob_usock_peer_t* peer) "%s oob:usock:peer creating socket to %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer->name)))); - + peer->sd = socket(PF_UNIX, SOCK_STREAM, 0); if (peer->sd < 0) { @@ -120,7 +120,7 @@ static int usock_peer_create_socket(mca_oob_usock_peer_t* peer) /* setup the socket as non-blocking */ if (peer->sd >= 0) { if ((flags = fcntl(peer->sd, F_GETFL, 0)) < 0) { - opal_output(0, "%s-%s usock_peer_connect: fcntl(F_GETFL) failed: %s (%d)\n", + opal_output(0, "%s-%s usock_peer_connect: fcntl(F_GETFL) failed: %s (%d)\n", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer->name)), strerror(opal_socket_errno), @@ -128,7 +128,7 @@ static int usock_peer_create_socket(mca_oob_usock_peer_t* peer) } else { flags |= O_NONBLOCK; if(fcntl(peer->sd, F_SETFL, flags) < 0) - opal_output(0, "%s-%s usock_peer_connect: fcntl(F_SETFL) failed: %s (%d)\n", + opal_output(0, "%s-%s usock_peer_connect: fcntl(F_SETFL) failed: %s (%d)\n", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer->name)), strerror(opal_socket_errno), @@ -248,7 +248,7 @@ void mca_oob_usock_peer_try_connect(int fd, short args, void *cbdata) "Connection across to proc %s succeeded", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&peer->name)); - + /* setup our recv to catch the return ack call */ if (!peer->recv_ev_active) { opal_event_add(&peer->recv_event, 0); @@ -259,7 +259,7 @@ void mca_oob_usock_peer_try_connect(int fd, short args, void *cbdata) if (ORTE_SUCCESS == (rc = usock_peer_send_connect_ack(peer))) { peer->state = MCA_OOB_USOCK_CONNECT_ACK; } else { - opal_output(0, + opal_output(0, "%s orte_usock_peer_try_connect: " "usock_peer_send_connect_ack to proc %s failed: %s (%d)", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -289,6 +289,8 @@ static int usock_peer_send_connect_ack(mca_oob_usock_peer_t* peer) hdr.dst = peer->name; hdr.type = MCA_OOB_USOCK_IDENT; hdr.tag = 0; + hdr.channel = 0xffffffff; + hdr.seq_num = 0; /* get our security credential*/ if (OPAL_SUCCESS != (rc = opal_sec.get_my_credential(opal_dstore_internal, @@ -339,7 +341,7 @@ static void usock_peer_event_init(mca_oob_usock_peer_t* peer) opal_event_del(&peer->recv_event); peer->recv_ev_active = false; } - + opal_event_set(mca_oob_usock_module.ev_base, &peer->send_event, peer->sd, @@ -371,7 +373,7 @@ void mca_oob_usock_peer_complete_connect(mca_oob_usock_peer_t *peer) /* check connect completion status */ if (getsockopt(peer->sd, SOL_SOCKET, SO_ERROR, (char *)&so_error, &so_length) < 0) { - opal_output(0, "%s usock_peer_complete_connect: getsockopt() to %s failed: %s (%d)\n", + opal_output(0, "%s usock_peer_complete_connect: getsockopt() to %s failed: %s (%d)\n", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer->name)), strerror(opal_socket_errno), @@ -421,7 +423,7 @@ void mca_oob_usock_peer_complete_connect(mca_oob_usock_peer_t *peer) "setting read event on connection to %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer->name))); - + if (!peer->recv_ev_active) { opal_event_add(&peer->recv_event, 0); peer->recv_ev_active = true; @@ -800,8 +802,8 @@ static bool usock_peer_recv_blocking(mca_oob_usock_peer_t* peer, /* socket is non-blocking so handle errors */ if (retval < 0) { - if (opal_socket_errno != EINTR && - opal_socket_errno != EAGAIN && + if (opal_socket_errno != EINTR && + opal_socket_errno != EAGAIN && opal_socket_errno != EWOULDBLOCK) { if (peer->state == MCA_OOB_USOCK_CONNECT_ACK) { /* If we overflow the listen backlog, it's @@ -825,7 +827,7 @@ static bool usock_peer_recv_blocking(mca_oob_usock_peer_t* peer, (NULL == peer) ? "UNKNOWN" : ORTE_NAME_PRINT(&(peer->name))); return false; } else { - opal_output(0, + opal_output(0, "%s usock_peer_recv_blocking: " "recv() failed for %s: %s (%d)\n", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -866,11 +868,11 @@ void mca_oob_usock_peer_dump(mca_oob_usock_peer_t* peer, const char* msg) strerror(opal_socket_errno), opal_socket_errno); } - + #if defined(USOCK_NODELAY) optlen = sizeof(nodelay); if (getsockopt(peer->sd, IPPROTO_USOCK, USOCK_NODELAY, (char *)&nodelay, &optlen) < 0) { - opal_output(0, "usock_peer_dump: USOCK_NODELAY option: %s (%d)\n", + opal_output(0, "usock_peer_dump: USOCK_NODELAY option: %s (%d)\n", strerror(opal_socket_errno), opal_socket_errno); } diff --git a/orte/mca/oob/usock/oob_usock_hdr.h b/orte/mca/oob/usock/oob_usock_hdr.h index 3ee83967733..010d69289ef 100644 --- a/orte/mca/oob/usock/oob_usock_hdr.h +++ b/orte/mca/oob/usock/oob_usock_hdr.h @@ -5,18 +5,18 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2006-2013 Los Alamos National Security, LLC. + * Copyright (c) 2006-2013 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013-2014 Intel, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -48,6 +48,10 @@ typedef struct { mca_oob_usock_msg_type_t type; /* the rml tag where this message is headed */ orte_rml_tag_t tag; + /* the rml channel to which this message is headed */ + orte_rml_channel_num_t channel; + /* msg seq number on the src channel */ + uint32_t seq_num; /* number of bytes in message */ uint32_t nbytes; } mca_oob_usock_hdr_t; diff --git a/orte/mca/oob/usock/oob_usock_sendrecv.c b/orte/mca/oob/usock/oob_usock_sendrecv.c index 11817eeeff7..2ae8a561af8 100644 --- a/orte/mca/oob/usock/oob_usock_sendrecv.c +++ b/orte/mca/oob/usock/oob_usock_sendrecv.c @@ -5,25 +5,25 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2006-2013 Los Alamos National Security, LLC. + * Copyright (c) 2006-2013 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2013-2014 Intel, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ * * In windows, many of the socket functions return an EWOULDBLOCK * instead of \ things like EAGAIN, EINPROGRESS, etc. It has been * verified that this will \ not conflict with other error codes that - * are returned by these functions \ under UNIX/Linux environments + * are returned by these functions \ under UNIX/Linux environments */ #include "orte_config.h" @@ -97,9 +97,9 @@ static int send_bytes(mca_oob_usock_peer_t* peer) return ORTE_ERR_WOULD_BLOCK; } /* we hit an error and cannot progress this message */ - opal_output(0, "%s->%s mca_oob_usock_msg_send_bytes: write failed: %s (%d) [sd = %d]", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&(peer->name)), + opal_output(0, "%s->%s mca_oob_usock_msg_send_bytes: write failed: %s (%d) [sd = %d]", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(&(peer->name)), strerror(opal_socket_errno), opal_socket_errno, peer->sd); @@ -187,7 +187,12 @@ void mca_oob_usock_send_handler(int sd, short flags, void *cbdata) opal_event_del(&peer->send_event); peer->send_ev_active = false; msg->msg->status = rc; - ORTE_RML_SEND_COMPLETE(msg->msg); + if( NULL == msg->msg->channel) { + ORTE_RML_SEND_COMPLETE(msg->msg); + } + else { + ORTE_QOS_SEND_COMPLETE(msg->msg); + } OBJ_RELEASE(msg); peer->send_msg = NULL; goto next; @@ -205,7 +210,12 @@ void mca_oob_usock_send_handler(int sd, short flags, void *cbdata) ORTE_NAME_PRINT(&(peer->name)), msg->hdr.nbytes, peer->sd); msg->msg->status = ORTE_SUCCESS; - ORTE_RML_SEND_COMPLETE(msg->msg); + if( NULL == msg->msg->channel) { + ORTE_RML_SEND_COMPLETE(msg->msg); + } + else { + ORTE_QOS_SEND_COMPLETE(msg->msg); + } OBJ_RELEASE(msg); peer->send_msg = NULL; } else if (NULL != msg->msg->data) { @@ -236,7 +246,12 @@ void mca_oob_usock_send_handler(int sd, short flags, void *cbdata) ORTE_NAME_PRINT(&(peer->name)), msg->hdr.nbytes, peer->sd); msg->msg->status = ORTE_SUCCESS; - ORTE_RML_SEND_COMPLETE(msg->msg); + if( NULL == msg->msg->channel) { + ORTE_RML_SEND_COMPLETE(msg->msg); + } + else { + ORTE_QOS_SEND_COMPLETE(msg->msg); + } OBJ_RELEASE(msg); peer->send_msg = NULL; } @@ -254,7 +269,12 @@ void mca_oob_usock_send_handler(int sd, short flags, void *cbdata) opal_event_del(&peer->send_event); peer->send_ev_active = false; msg->msg->status = rc; - ORTE_RML_SEND_COMPLETE(msg->msg); + if( NULL == msg->msg->channel) { + ORTE_RML_SEND_COMPLETE(msg->msg); + } + else { + ORTE_QOS_SEND_COMPLETE(msg->msg); + } OBJ_RELEASE(msg); peer->send_msg = NULL; ORTE_FORCED_TERMINATE(1); @@ -272,7 +292,7 @@ void mca_oob_usock_send_handler(int sd, short flags, void *cbdata) peer->send_msg = (mca_oob_usock_send_t*) opal_list_remove_first(&peer->send_queue); } - + /* if nothing else to do unregister for send event notifications */ if (NULL == peer->send_msg && peer->send_ev_active) { opal_event_del(&peer->send_event); @@ -320,7 +340,7 @@ static int read_bytes(mca_oob_usock_peer_t* peer) * to abort this message */ opal_output_verbose(OOB_USOCK_DEBUG_FAIL, orte_oob_base_framework.framework_output, - "%s-%s mca_oob_usock_msg_recv: readv failed: %s (%d)", + "%s-%s mca_oob_usock_msg_recv: readv failed: %s (%d)", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer->name)), strerror(opal_socket_errno), @@ -335,7 +355,7 @@ static int read_bytes(mca_oob_usock_peer_t* peer) * and let the caller know */ opal_output_verbose(OOB_USOCK_DEBUG_FAIL, orte_oob_base_framework.framework_output, - "%s-%s mca_oob_usock_msg_recv: peer closed connection", + "%s-%s mca_oob_usock_msg_recv: peer closed connection", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer->name))); /* stop all events */ @@ -506,10 +526,8 @@ void mca_oob_usock_recv_handler(int sd, short flags, void *cbdata) if (peer->recv_msg->hdr.dst.jobid == ORTE_PROC_MY_NAME->jobid && peer->recv_msg->hdr.dst.vpid == ORTE_PROC_MY_NAME->vpid) { /* yes - post it to the RML for delivery */ - opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output, - "%s DELIVERING TO RML", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); ORTE_RML_POST_MESSAGE(&peer->recv_msg->hdr.origin, peer->recv_msg->hdr.tag, + peer->recv_msg->hdr.channel, peer->recv_msg->hdr.seq_num, peer->recv_msg->data, peer->recv_msg->hdr.nbytes); OBJ_RELEASE(peer->recv_msg); @@ -525,6 +543,8 @@ void mca_oob_usock_recv_handler(int sd, short flags, void *cbdata) snd->origin = peer->recv_msg->hdr.origin; snd->tag = peer->recv_msg->hdr.tag; snd->data = peer->recv_msg->data; + snd->dst_channel = peer->recv_msg->hdr.channel; + snd->seq_num = peer->recv_msg->hdr.seq_num; snd->count = peer->recv_msg->hdr.nbytes; snd->cbfunc.iov = NULL; snd->cbdata = NULL; @@ -553,8 +573,8 @@ void mca_oob_usock_recv_handler(int sd, short flags, void *cbdata) } } break; - default: - opal_output(0, "%s-%s mca_oob_usock_peer_recv_handler: invalid socket state(%d)", + default: + opal_output(0, "%s-%s mca_oob_usock_peer_recv_handler: invalid socket state(%d)", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer->name)), peer->state); diff --git a/orte/mca/oob/usock/oob_usock_sendrecv.h b/orte/mca/oob/usock/oob_usock_sendrecv.h index c704c4f89f9..8614b5530e7 100644 --- a/orte/mca/oob/usock/oob_usock_sendrecv.h +++ b/orte/mca/oob/usock/oob_usock_sendrecv.h @@ -5,18 +5,18 @@ * Copyright (c) 2004-2006 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2006-2013 Los Alamos National Security, LLC. + * Copyright (c) 2006-2013 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2010-2013 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2013-2014 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2014 Intel, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -123,6 +123,8 @@ OBJ_CLASS_DECLARATION(mca_oob_usock_recv_t); msg->hdr.dst = (m)->dst; \ msg->hdr.type = MCA_OOB_USOCK_USER; \ msg->hdr.tag = (m)->tag; \ + msg->hdr.channel = (m)->dst_channel; \ + msg->hdr.seq_num = (m)->seq_num; \ /* point to the actual message */ \ msg->msg = (m); \ /* set the total number of bytes to be sent */ \ @@ -164,6 +166,8 @@ OBJ_CLASS_DECLARATION(mca_oob_usock_recv_t); msg->hdr.dst = (m)->dst; \ msg->hdr.type = MCA_OOB_USOCK_USER; \ msg->hdr.tag = (m)->tag; \ + msg->hdr.channel = (m)->dst_channel; \ + msg->hdr.seq_num = (m)->seq_num; \ /* point to the actual message */ \ msg->msg = (m); \ /* set the total number of bytes to be sent */ \ diff --git a/orte/mca/qos/Makefile.am b/orte/mca/qos/Makefile.am new file mode 100644 index 00000000000..b1e55afb922 --- /dev/null +++ b/orte/mca/qos/Makefile.am @@ -0,0 +1,31 @@ +# +# Copyright (c) 2014 Intel, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# main library setup +noinst_LTLIBRARIES = libmca_qos.la +libmca_qos_la_SOURCES = + +# pkgdata setup +dist_ortedata_DATA = + +# local files +headers = qos.h +libmca_qos_la_SOURCES += $(headers) + +# Conditionally install the header files +if WANT_INSTALL_HEADERS +ortedir = $(orteincludedir)/$(subdir) +nobase_orte_HEADERS = $(headers) +endif + +include base/Makefile.am + + +distclean-local: + rm -f base/static-components.h diff --git a/orte/mca/qos/ack/Makefile.am b/orte/mca/qos/ack/Makefile.am new file mode 100644 index 00000000000..6cc61b5598e --- /dev/null +++ b/orte/mca/qos/ack/Makefile.am @@ -0,0 +1,34 @@ +# +# Copyright (c) 2015 Intel, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +sources = \ + qos_ack.h \ + qos_ack_component.c + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if MCA_BUILD_orte_qos_ack_DSO +component_noinst = +component_install = mca_qos_ack.la +else +component_noinst = libmca_qos_ack.la +component_install = +endif + +mcacomponentdir = $(ortelibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_qos_ack_la_SOURCES = $(sources) +mca_qos_ack_la_LDFLAGS = -module -avoid-version + +noinst_LTLIBRARIES = $(component_noinst) +libmca_qos_ack_la_SOURCES = $(sources) +libmca_qos_ack_la_LDFLAGS = -module -avoid-version + diff --git a/orte/mca/qos/ack/qos_ack.h b/orte/mca/qos/ack/qos_ack.h new file mode 100644 index 00000000000..bca06b2f208 --- /dev/null +++ b/orte/mca/qos/ack/qos_ack.h @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2014 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +/** + * @file + * + * QoS Ack Component interface + * + * + * + */ + +#ifndef MCA_QOS_ACK_H +#define MCA_QOS_ACK_H + +#include "orte_config.h" +#include "orte/mca/qos/qos.h" +#include "orte/mca/qos/base/base.h" +#include "opal/class/opal_hotel.h" + +BEGIN_C_DECLS + +#define QOS_ACK_SEQ_NUM_UNINITIALIZED 0 +#define QOS_ACK_MAX_WINDOW 100 +#define QOS_ACK_MAX_OUTSTANDING_MSGS (QOS_ACK_MAX_WINDOW *2) +/* window timeout in secs - 100 seconds ok? + TO DO: make this a QOS attribute that can be specified by the user */ +#define QOS_ACK_WINDOW_TIMEOUT_IN_SECS 1 +#define ACK_WINDOW_COMPLETE 0 +#define ACK_TIMEOUT 1 +#define ACK_OUT_OF_ORDER 2 + +typedef enum { + orte_qos_ack_channel_state_inactive = 0, + orte_qos_ack_channel_state_filling_window = 1, + orte_qos_ack_channel_state_window_completed = 2, + orte_qos_ack_channel_state_awaiting_ack = 3, + orte_qos_ack_channel_state_received_ack = 4, +}orte_qos_ack_channel_state_t ; + +/* Ack Qos channel data structure */ +typedef struct orte_qos_ack_channel { + uint32_t channel_num; + // we retain the attributes so we can compare channels - we can get rid of this and compare incoming attributes + // with attributes of interest to this channel type + opal_list_t attributes; + /* size of the message window */ + uint32_t window; + /* window timeout in secs.*/ + uint32_t timeout_secs; + /* retry msg window on ack fail */ + bool retry; + /* seq number of the first msg in the active window */ + uint32_t window_first_seq_num; + /* sequence number of last outgoing msg */ + uint32_t out_msg_seq_num; + /* sequence number of last incoming msg */ + uint32_t in_msg_seq_num; + /* sequence number of the last message acked */ + uint32_t ack_msg_seq_num; + /* ACK outstanding msgs hotel */ + opal_hotel_t outstanding_msgs; + /* array for mapping msg seq num to room num for outgoing msgs in hotels */ + int seq_num_to_room_num[QOS_ACK_MAX_OUTSTANDING_MSGS]; + /* channel state */ + orte_qos_ack_channel_state_t state; + /* window timer event */ + opal_event_t msg_ack_timer_event; +}orte_qos_ack_channel_t; + +OBJ_CLASS_DECLARATION(orte_qos_ack_channel_t); + + +extern orte_qos_module_t orte_qos_ack_module; + +ORTE_DECLSPEC void orte_qos_ack_msg_ack_timeout_callback (struct opal_hotel_t *hotel, + int room_num, void *occupant); +ORTE_DECLSPEC void orte_qos_ack_msg_window_timeout_callback (int fd, short flags, void *cbdata); + +END_C_DECLS + +#endif /* MCA_QOS_ACK_H */ diff --git a/orte/mca/qos/ack/qos_ack_component.c b/orte/mca/qos/ack/qos_ack_component.c new file mode 100644 index 00000000000..440f2975162 --- /dev/null +++ b/orte/mca/qos/ack/qos_ack_component.c @@ -0,0 +1,585 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2014 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "orte_config.h" +#include "orte/constants.h" + + +#include "opal/mca/mca.h" +#include "opal/util/output.h" +#include "opal/mca/base/base.h" + + +#include "orte/mca/qos/base/base.h" +#include "orte/mca/qos/qos.h" +#include "qos_ack.h" + +/* ack module functions */ +static int qos_ack_start (void); +static void qos_ack_shutdown (void); +static void* ack_create (opal_list_t *qos_attributes, uint32_t channel_num); +static int ack_open (void *qos_channel, + opal_buffer_t * buf); +static int ack_send ( void *qos_channel, orte_rml_send_t *msg); +static int ack_recv (void *channel, orte_rml_recv_t *msg); +static void ack_close (void * channel); +static int ack_init_recv (void *channel, opal_list_t *attributes); +static int ack_cmp (void *channel, opal_list_t *attributes); +static void ack_send_callback (orte_rml_send_t *msg); + +/* utility functions */ +int send_ack (orte_qos_ack_channel_t * channel, orte_rml_channel_num_t channel_num, + uint32_t *ack_seq_nums_array, uint32_t num_msgs_acked, uint32_t ack_type); + +void orte_qos_ack_channel_process_ack (int status, orte_process_name_t* sender, + opal_buffer_t *buffer, orte_rml_tag_t tag, void *cbdata); + +void orte_qos_ack_msg_send_callback ( int status, + orte_process_name_t *peer, + struct opal_buffer_t* buffer, + orte_rml_tag_t tag, + void* cbdata); +/** + * ack module definition + */ +orte_qos_module_t orte_qos_ack_module = { + ack_create, + ack_open, + ack_send, + ack_recv, + ack_close, + ack_init_recv, + ack_cmp, + ack_send_callback +}; + +/** + * component definition + */ +mca_qos_base_component_t mca_qos_ack_component = { + /* First, the mca_base_component_t struct containing meta + information about the component itself */ + + { + MCA_QOS_BASE_VERSION_2_0_0, + + "ack", /* MCA component name */ + ORTE_MAJOR_VERSION, /* MCA component major version */ + ORTE_MINOR_VERSION, /* MCA component minor version */ + ORTE_RELEASE_VERSION, /* MCA component release version */ + NULL, + NULL, + }, + qos_ack_start, + qos_ack_shutdown, + orte_qos_ack, + { + ack_create, + ack_open, + ack_send, + ack_recv, + ack_close, + ack_init_recv, + ack_cmp, + ack_send_callback + } +}; + +static int qos_ack_start(void) { + orte_rml.recv_buffer_nb (ORTE_NAME_WILDCARD, ORTE_RML_TAG_MSG_ACK, + ORTE_RML_PERSISTENT, orte_qos_ack_channel_process_ack, + NULL); + /* post a persistent recieve for ACK TAG */ + return ORTE_SUCCESS; +} + +static void qos_ack_shutdown (void) { +} + +static void* ack_create (opal_list_t *qos_attributes, uint32_t channel_num) { + orte_qos_ack_channel_t * ack_chan; + int32_t rc; + uint32_t *type, type_val, *attribute, attribute_val; + type_val = orte_qos_ack; + ack_chan = OBJ_NEW (orte_qos_ack_channel_t); + ack_chan->channel_num = channel_num; + type = &type_val; + attribute = &attribute_val; + /* validate and store ack specific channel attributes */ + /* set channel type */ + if (ORTE_SUCCESS == (rc = orte_set_attribute( &ack_chan->attributes, ORTE_QOS_TYPE, ORTE_ATTR_GLOBAL, (void*)type, OPAL_UINT8))) { + if( orte_get_attribute (qos_attributes, ORTE_QOS_WINDOW_SIZE, (void**)&attribute, OPAL_UINT32)) { + if ( QOS_ACK_MAX_WINDOW < (*attribute)) { + ORTE_ERROR_LOG(OPAL_ERR_VALUE_OUT_OF_BOUNDS); + OBJ_RELEASE(ack_chan); + } + else { + ack_chan->window = *attribute; + if (ORTE_SUCCESS != (rc = orte_set_attribute(&ack_chan->attributes, ORTE_QOS_WINDOW_SIZE, + ORTE_ATTR_GLOBAL, (void*)attribute, OPAL_UINT32))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(ack_chan); + } else { + if( orte_get_attribute (qos_attributes, ORTE_QOS_ACK_NACK_TIMEOUT, (void**)&attribute, OPAL_UINT32)) { + ack_chan->timeout_secs = *attribute; + if (ORTE_SUCCESS != (rc = orte_set_attribute(&ack_chan->attributes, ORTE_QOS_ACK_NACK_TIMEOUT, + ORTE_ATTR_GLOBAL, (void*)attribute, OPAL_UINT32))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(ack_chan); + } else { + if( orte_get_attribute (qos_attributes, ORTE_QOS_MSG_RETRY, NULL, OPAL_BOOL)) { + OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, + "%s ack_create created channel = %p window = %d timeout =%d retry = %d", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ack_chan, + ack_chan->window, + ack_chan->timeout_secs, + ack_chan->retry)); + ack_chan->retry = true; + if (ORTE_SUCCESS != (rc = orte_set_attribute(&ack_chan->attributes, ORTE_QOS_MSG_RETRY, + ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(ack_chan); + } + } else { + ack_chan->retry = false; + OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, + "%s ack_create created channel = %p window = %d timeout =%d retry = %d", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ack_chan, + ack_chan->window, + ack_chan->timeout_secs, + ack_chan->retry)); + } + } + }else { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(ack_chan); + } + } + } + }else + OBJ_RELEASE(ack_chan); + }else { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(ack_chan); + } + return ack_chan; +} + +static int ack_open (void *qos_channel, opal_buffer_t * buf) { + int32_t rc = ORTE_SUCCESS; + uint32_t eviction_timeout; + orte_qos_ack_channel_t *ack_chan; + ack_chan = (orte_qos_ack_channel_t*) (qos_channel); + /* TO DO - need to adjust eviction timeout according to window size + lets keep max time out for the first pass */ + eviction_timeout = (ack_chan->timeout_secs + QOS_ACK_WINDOW_TIMEOUT_IN_SECS) * 100000; + /* init outstanding msg hotel */ + opal_hotel_init (&ack_chan->outstanding_msgs, QOS_ACK_MAX_OUTSTANDING_MSGS, + eviction_timeout, 0, + orte_qos_ack_msg_ack_timeout_callback); + OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, + "%s ack_open channel = %p init hotel timeout =%d", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ack_chan, eviction_timeout)); + /* set the message window timer event, but don't activate it */ + /*opal_event_set(opal_event_base, + &ack_chan->msg_window_timer_event, + -1, 0, orte_qos_ack_msg_window_timeout_callback, + ack_chan); + opal_event_set_priority(&ack_chan->msg_window_timer_event, ORTE_MSG_PRI);*/ + /* the Qos module puts the non local attributes to be sent to the peer in a list at the time of create. + pack those attributes into the buffer.*/ + if (ORTE_SUCCESS != (rc = orte_qos_base_pack_attributes(buf, &ack_chan->attributes))) + ORTE_ERROR_LOG(rc); + return rc; +} + +static int ack_send ( void *qos_channel, orte_rml_send_t *msg) { + int32_t rc = ORTE_SUCCESS; + struct timeval window_timeout; + int32_t room_num; + orte_qos_ack_channel_t *ack_chan = (orte_qos_ack_channel_t*) (qos_channel); + if (ack_chan->out_msg_seq_num == ack_chan->window_first_seq_num -1 ) { + /* begining msg window */ + ack_chan->out_msg_seq_num = ack_chan->window_first_seq_num; + OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, + "%s ack_send msg = %p to peer = %s\n begining window at seq_num = %d", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + msg, ORTE_NAME_PRINT(&msg->dst), ack_chan->out_msg_seq_num)); + ack_chan->state = orte_qos_ack_channel_state_filling_window; + } + else + ack_chan->out_msg_seq_num++; + if(ack_chan->out_msg_seq_num - ack_chan->window_first_seq_num == ack_chan->window - 1) { + /* we are at the end of the window. */ + /* update state */ + ack_chan->state = orte_qos_ack_channel_state_window_completed; + /* set begin window for next sequence */ + ack_chan->window_first_seq_num = ack_chan->out_msg_seq_num + 1; + } + msg->seq_num = ack_chan->out_msg_seq_num; + /* check msg into hotel */ + if( OPAL_SUCCESS == (opal_hotel_checkin(&ack_chan->outstanding_msgs, msg, &room_num ))) { + /* store room number */ + ack_chan->seq_num_to_room_num[(msg->seq_num % QOS_ACK_MAX_OUTSTANDING_MSGS)] = room_num; + } else { + OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, + "%s ack_send msg = %p to peer = %s returned with error %d\n", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + msg, ORTE_NAME_PRINT(&msg->dst), + ORTE_ERR_QOS_ACK_WINDOW_FULL)); + return ORTE_ERR_QOS_ACK_WINDOW_FULL; + } + OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, + "%s ack_send msg = %p to peer = %s\n", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + msg, ORTE_NAME_PRINT(&msg->dst))); + return ORTE_SUCCESS; +} + +static int ack_recv (void *qos_channel, orte_rml_recv_t *msg) { + orte_qos_ack_channel_t *ack_chan; + ack_chan = (orte_qos_ack_channel_t*) (qos_channel); + bool ack = false; + uint32_t num_msgs_to_ack = 0; + uint32_t *ack_seq_num_array; + uint32_t ack_type, i; + int32_t rc; + struct timeval ack_timeout; + /* check for out of order msg */ + if( ack_chan->in_msg_seq_num + 1 != msg->seq_num) + { + /* we got an out of order msg or we may have ended the window */ + ack = true; + ack_type = ACK_OUT_OF_ORDER; + /* stop window ack timer */ + opal_event_evtimer_del (&ack_chan->msg_ack_timer_event); + } + else { + /* check if we are at the end of the window */ + if (ack_chan->window == msg->seq_num - ack_chan->ack_msg_seq_num) { + ack = true; + ack_type = ACK_WINDOW_COMPLETE; + /* stop window ack timer */ + opal_event_evtimer_del (&ack_chan->msg_ack_timer_event); + } else { + if(msg->seq_num - ack_chan->ack_msg_seq_num == 1) { + /* begining window -start window ack timer */ + ack_timeout.tv_sec = ack_chan->timeout_secs; + ack_timeout.tv_usec = 0; + opal_event_evtimer_add (&ack_chan->msg_ack_timer_event, &ack_timeout); + } + ack_chan->in_msg_seq_num = msg->seq_num; + } + } + + if ((ack) && (msg->tag >= ORTE_RML_TAG_MAX)) { + num_msgs_to_ack = ack_chan->in_msg_seq_num - ack_chan->ack_msg_seq_num + 1; + ack_chan->in_msg_seq_num = msg->seq_num; + OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, + "%s ack_recv sending ack for %d msgs from %s\n", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + num_msgs_to_ack, + ORTE_NAME_PRINT(&msg->sender))); + if ( NULL != (ack_seq_num_array = malloc (sizeof(uint32_t) * num_msgs_to_ack))) { + + for (i = 1; i <= num_msgs_to_ack ; i++) { + ack_seq_num_array[i-1] = ack_chan->ack_msg_seq_num + i; + OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, + "%s ack_recv acking msg %d to peer = %s\n", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ack_seq_num_array[i-1], + ORTE_NAME_PRINT(&msg->sender))); + } + ack_seq_num_array[num_msgs_to_ack - 1] = msg->seq_num; + OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, + "%s ack_recv acking last msg %d to peer = %s\n", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ack_seq_num_array[num_msgs_to_ack - 1], + ORTE_NAME_PRINT(&msg->sender))); + + } + else { + OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, + "%s ack_recv cannot allocate ack array to send ack to peer = %s\n", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(&msg->sender))); + return ORTE_ERROR; + } + /* now construct and send ack message */ + rc = send_ack(ack_chan, msg->channel_num, + ack_seq_num_array, num_msgs_to_ack, ack_type); + if(ORTE_SUCCESS == rc) { + /* update last acked msg */ + ack_chan->ack_msg_seq_num = msg->seq_num; + } else { + //TO DO + } + + } + OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, + "%s ack_recv msg = %p seq_num = %d from peer = %s\n", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + msg, msg->seq_num, + ORTE_NAME_PRINT(&msg->sender))); + + return ORTE_SUCCESS; +} + +static void ack_close (void * channel) { + +} + +static int ack_init_recv (void *channel, opal_list_t *attributes) { + int32_t rc = ORTE_SUCCESS; + orte_qos_ack_channel_t *ack_chan; + ack_chan = (orte_qos_ack_channel_t*) (channel); + opal_event_evtimer_set (orte_event_base, &ack_chan->msg_ack_timer_event, + orte_qos_ack_msg_window_timeout_callback, (void *) ack_chan); + return rc; +} + +static int ack_cmp (void *channel, opal_list_t *attributes) { + return false; + +} + +static void ack_send_callback (orte_rml_send_t *msg) +{ + orte_qos_ack_channel_t *ack_chan; + /* complete the request back to the user only upon receiving the ack + nothing to do here, just make sure that the request is in the hotel */ + OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, + "%s ack_send_callback for msg = %p seq num =%d\n", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + msg, msg->seq_num)); + ack_chan = (orte_qos_ack_channel_t *) msg->channel->qos_channel_ptr; + /* if msg->status != SUCCESS - then evict all messages in the window and + complete them?? */ + if(ORTE_SUCCESS == msg->status) { + // nothing to do + assert(ack_chan->seq_num_to_room_num[msg->seq_num] != -1); + } else { + // TO DO : error handling + OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, + "%s ack_send_callback for msg = %p seq num =%d SEND FAILED status = %d\n", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + msg, msg->seq_num, msg->status)); + /* evict message from hotel and send end of window to receiver?? */ + + } +} + +void orte_qos_ack_msg_ack_timeout_callback (struct opal_hotel_t *hotel, + int room_num, void *occupant) +{ + orte_rml_send_t *msg; + orte_qos_ack_channel_t *ack_chan; + msg = (orte_rml_send_t *) occupant; + ack_chan = (orte_qos_ack_channel_t*) msg->channel->qos_channel_ptr; + OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, + "%s orte_qos_ack_msg_ack_timeout_callback for msg = %p seq num =%d\n", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + msg, msg->seq_num)); + /* for now complete only the msg that timed out + TO DO : handle the completion of all messages in the window */ + msg->status = ORTE_ERR_ACK_TIMEOUT_SENDER; + // set room num to -1 for the msg's seq number + ack_chan->seq_num_to_room_num[msg->seq_num] = -1; + // complete the msg + ORTE_RML_SEND_COMPLETE(msg); +} + +void orte_qos_ack_channel_process_ack (int status, orte_process_name_t* sender, + opal_buffer_t *buffer, + orte_rml_tag_t tag, void *cbdata) +{ + /* process ack received for the msg */ + uint32_t num_msgs_acked, channel_num, i; + int32_t num_values, room_num; + orte_rml_send_t *msg; + void *occupant = NULL; + orte_rml_channel_t *channel; + orte_qos_ack_channel_t *ack_chan; + uint32_t *seq_num_array; + uint32_t ack_type; + + num_values = 1; + /* unpack channel number first */ + opal_dss.unpack(buffer, (void*) &channel_num, &num_values, OPAL_UINT32); + OPAL_OUTPUT_VERBOSE((5, orte_qos_base_framework.framework_output, + "orte_qos_ack_channel_msg_ack_recv_callback recieved ack on channel = %d", + channel_num)); + channel = orte_rml_base_get_channel (channel_num); + OPAL_OUTPUT_VERBOSE((5, orte_qos_base_framework.framework_output, + "orte_qos_ack_channel_msg_ack_recv_callback recieved ack on channel = %d, channel =%p,qos_channel = %p", + channel_num, channel, channel->qos_channel_ptr)); + if ((NULL != channel) || (NULL != channel->qos_channel_ptr)) { + ack_chan = (orte_qos_ack_channel_t *) (channel->qos_channel_ptr); + seq_num_array = malloc (sizeof(uint32_t) * ack_chan->window); + num_values = 1; + /* unpack ack type */ + opal_dss.unpack(buffer, (void*) &ack_type, &num_values, OPAL_UINT32); + num_values = 1; + /* unpack num messages acked */ + opal_dss.unpack(buffer, (void*) &num_msgs_acked, &num_values, OPAL_UINT32); + /* unpack sequence number array */ + + + OPAL_OUTPUT_VERBOSE((5, orte_qos_base_framework.framework_output, + "orte_qos_ack_channel_msg_ack_recv_callback recieved acks for %d msgs on channel = %d", + num_msgs_acked, channel_num)); + for (i = 0; i < num_msgs_acked; i++) + { + opal_dss.unpack(buffer, (void*) &seq_num_array[i], &num_values, OPAL_UINT32); + OPAL_OUTPUT_VERBOSE((5, orte_qos_base_framework.framework_output, + "orte_qos_ack_channel_msg_ack_recv_callback recieved ack for msg with seq_num = %d, channel = %d", + seq_num_array[i], channel_num)); + room_num = ack_chan->seq_num_to_room_num[seq_num_array[i]]; + opal_hotel_checkout_and_return_occupant(&ack_chan->outstanding_msgs, room_num, &occupant); + if(occupant != NULL) { + msg = (orte_rml_send_t*) occupant; + OPAL_OUTPUT_VERBOSE((10, orte_rml_base_framework.framework_output, + "Releasing sent message with tag %d and seq_num %d after receiving Ack from dest ", + msg->tag, msg->seq_num )); + msg->status = ORTE_SUCCESS; + ORTE_RML_SEND_COMPLETE(msg); + } else { + OPAL_OUTPUT_VERBOSE((10, orte_rml_base_framework.framework_output, + "OOPS received an ACK for already completed seq_num =%d ", + seq_num_array[i] )); + } + } + free(seq_num_array); + } else { + OPAL_OUTPUT_VERBOSE((5, orte_qos_base_framework.framework_output, + "orte_qos_ack_channel_msg_ack_recv_callback recieved ack on non existent channel = %d", + channel_num)); + } +} + +int send_ack (orte_qos_ack_channel_t * channel, orte_rml_channel_num_t channel_num, + uint32_t *ack_seq_num_array, uint32_t num_msgs_acked, uint32_t ack_type) +{ + int rc = ORTE_SUCCESS; + orte_rml_channel_t *rml_channel; + opal_buffer_t *buffer; + rml_channel = orte_rml_base_get_channel (channel_num); + int i; + if (NULL == rml_channel) + { + OPAL_OUTPUT_VERBOSE ((0, orte_qos_base_framework.framework_output, + " function send_ack - couldn't retrieve rml_channel with channel num =%d", + channel_num)); + return ORTE_ERROR; + } + buffer = OBJ_NEW (opal_buffer_t); + /* pack channel number */ + opal_dss.pack (buffer, &rml_channel->peer_channel, 1, OPAL_UINT32); + /* pack ack type */ + opal_dss.pack (buffer, &ack_type, 1, OPAL_UINT32); + /* pack num messages */ + opal_dss.pack (buffer, &num_msgs_acked, 1, OPAL_UINT32); + /* pack seq number array */ + for (i =0; ipeer, buffer, ORTE_RML_TAG_MSG_ACK, + orte_qos_ack_msg_send_callback, rml_channel); + return rc; + +} + +void orte_qos_ack_msg_send_callback ( int status, + orte_process_name_t *peer, + struct opal_buffer_t* buffer, + orte_rml_tag_t tag, + void* cbdata) +{ + orte_rml_channel_t *channel = (orte_rml_channel_t*) cbdata; + OPAL_OUTPUT_VERBOSE ((0, orte_qos_base_framework.framework_output, + " orte_qos_ack_msg_send_callback channel num =%d status =%d", + channel->channel_num, status)); +} + +void orte_qos_ack_msg_window_timeout_callback (int fd, short flags, void *cbdata) +{ + uint32_t num_msgs_to_ack = 0; + uint32_t *ack_seq_num_array; + uint32_t ack_type, i; + int32_t rc; + orte_qos_ack_channel_t *ack_chan = (orte_qos_ack_channel_t*) cbdata; + OPAL_OUTPUT_VERBOSE ((0, orte_qos_base_framework.framework_output, + " orte_qos_ack_msg_window_timeout_callback for channel = %p last acked seq num = %d, last received seq num =%d", + ack_chan, ack_chan->ack_msg_seq_num, ack_chan->in_msg_seq_num )); + ack_type = ACK_TIMEOUT; + /* prepare to send ack */ + num_msgs_to_ack = ack_chan->in_msg_seq_num - ack_chan->ack_msg_seq_num; + OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, + "%s ack_recv sending ack for %d msgs \n", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + num_msgs_to_ack)); + if ( NULL != (ack_seq_num_array = malloc (sizeof(uint32_t) * num_msgs_to_ack))) { + + for (i = 1; i <= num_msgs_to_ack ; i++) { + ack_seq_num_array[i-1] = ack_chan->ack_msg_seq_num + i; + OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, + "%s ack_recv acking msg %d \n", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ack_seq_num_array[i-1])); + } + + /* now construct and send ack message */ + rc = send_ack(ack_chan, ack_chan->channel_num, + ack_seq_num_array, num_msgs_to_ack, ack_type); + if(ORTE_SUCCESS == rc) { + /* update last acked msg */ + ack_chan->ack_msg_seq_num = ack_chan->in_msg_seq_num; + } else { + //TO DO + } + } + else { + OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, + "%s cannot send ack \n", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); + + } +} +/*** ACK QOS CLASS INSTANCES ***/ + +static void channel_cons (orte_qos_ack_channel_t *ptr) +{ + int i; + OBJ_CONSTRUCT (&ptr->attributes, opal_list_t); + ptr->out_msg_seq_num = 0; + ptr->window_first_seq_num = 1; + ptr->in_msg_seq_num = 0; + ptr->ack_msg_seq_num = 0; + /* init seq num to room num array to -1 */ + for (i =0; i< QOS_ACK_MAX_OUTSTANDING_MSGS; i++) + ptr->seq_num_to_room_num[i] = -1; + OBJ_CONSTRUCT (&ptr->outstanding_msgs, opal_hotel_t); + ptr->state = orte_qos_ack_channel_state_inactive; +} +static void channel_des (orte_qos_ack_channel_t *ptr) +{ + OPAL_LIST_DESTRUCT(&ptr->attributes); + OBJ_DESTRUCT (&ptr->outstanding_msgs); + opal_event_evtimer_del (&ptr->msg_ack_timer_event); + // TO DO release timer event + +} +OBJ_CLASS_INSTANCE (orte_qos_ack_channel_t, + opal_object_t, + channel_cons, channel_des); diff --git a/orte/mca/qos/base/Makefile.am b/orte/mca/qos/base/Makefile.am new file mode 100644 index 00000000000..0f86a631ad6 --- /dev/null +++ b/orte/mca/qos/base/Makefile.am @@ -0,0 +1,18 @@ +# +# Copyright (c) 2014 Intel, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +dist_ortedata_DATA += base/help-qos-base.txt + +headers += \ + base/base.h + +libmca_qos_la_SOURCES += \ + base/qos_base_frame.c \ + base/qos_base_select.c \ + base/qos_base_channel_handlers.c diff --git a/orte/mca/qos/base/base.h b/orte/mca/qos/base/base.h new file mode 100644 index 00000000000..2cbf285e66c --- /dev/null +++ b/orte/mca/qos/base/base.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2014 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +/** + * @file + * + * QoS Framework maintenence interface + * + * + * + */ + +#ifndef MCA_QOS_BASE_H +#define MCA_QOS_BASE_H + +#include "orte_config.h" +#include "orte/mca/qos/qos.h" +#include "orte/mca/rml/base/base.h" +#include "opal/class/opal_list.h" + + +/* + * MCA Framework + */ +ORTE_DECLSPEC extern mca_base_framework_t orte_qos_base_framework; +/* select a component */ +ORTE_DECLSPEC int orte_qos_base_select(void); + +/* a global struct containing framework-level values */ +typedef struct { + opal_list_t open_channels; + opal_pointer_array_t actives; +#if OPAL_ENABLE_TIMING + bool timing; +#endif +} orte_qos_base_t; +ORTE_DECLSPEC extern orte_qos_base_t orte_qos_base; + +#define ORTE_QOS_MAX_WINDOW_SIZE 1000 + +typedef struct orte_qos_base_channel { + uint32_t channel_num; + opal_list_t attributes; +} orte_qos_base_channel_t; +OBJ_CLASS_DECLARATION(orte_qos_base_channel_t); + +/* common implementations */ +ORTE_DECLSPEC void* orte_qos_get_module ( opal_list_t *qos_attributes); +int orte_qos_base_pack_attributes (opal_buffer_t * buffer, opal_list_t * qos_attributes); + +#define ORTE_QOS_SEND_COMPLETE(m) \ + do { \ + orte_qos_module_t *mod; \ + opal_output_verbose(5, orte_qos_base_framework.framework_output, \ + "%s-%s Send message complete at %s:%d", \ + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \ + ORTE_NAME_PRINT(&((m)->dst)), \ + __FILE__, __LINE__); \ + mod = (orte_qos_module_t*) m->channel->qos; \ + if (NULL != mod) \ + mod->send_callback(m); \ + else \ + ORTE_RML_SEND_COMPLETE(m); \ + } while(0); + +END_C_DECLS + +#endif /* MCA_QOS_BASE_H */ diff --git a/orte/mca/qos/base/help-qos-base.txt b/orte/mca/qos/base/help-qos-base.txt new file mode 100644 index 00000000000..cfa4b6cc2e2 --- /dev/null +++ b/orte/mca/qos/base/help-qos-base.txt @@ -0,0 +1,12 @@ +# -*- text -*- +# +# Copyright (c) 2014 Intel, Inc. All rights reserved +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# +# +[no-qos-avail] +No Qos protocols available. diff --git a/orte/mca/qos/base/oob_base_select.c b/orte/mca/qos/base/oob_base_select.c new file mode 100644 index 00000000000..0cb01b05e49 --- /dev/null +++ b/orte/mca/qos/base/oob_base_select.c @@ -0,0 +1,137 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2014 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "orte_config.h" +#include "orte/constants.h" + +#include +#include + +#include "opal/mca/mca.h" +#include "opal/util/output.h" +#include "opal/mca/base/base.h" + +#include "orte/util/show_help.h" + +#include "orte/runtime/orte_globals.h" +#include "orte/mca/oob/oob.h" +#include "orte/mca/oob/base/base.h" + + +/** + * Function for selecting all runnable modules from those that are + * available. + * + * Call the init function on all available modules. + */ +int orte_oob_base_select(void) +{ + mca_base_component_list_item_t *cli, *cmp, *c2; + mca_oob_base_component_t *component, *c3; + bool added; + int i; + + /* Query all available components and ask if their transport is available */ + OPAL_LIST_FOREACH(cli, &orte_oob_base_framework.framework_components, mca_base_component_list_item_t) { + component = (mca_oob_base_component_t *) cli->cli_component; + + opal_output_verbose(5, orte_oob_base_framework.framework_output, + "mca:oob:select: checking available component %s", + component->oob_base.mca_component_name); + + /* If there's no query function, skip it */ + if (NULL == component->available) { + opal_output_verbose(5, orte_oob_base_framework.framework_output, + "mca:oob:select: Skipping component [%s]. It does not implement a query function", + component->oob_base.mca_component_name ); + continue; + } + + /* Query the component */ + opal_output_verbose(5, orte_oob_base_framework.framework_output, + "mca:oob:select: Querying component [%s]", + component->oob_base.mca_component_name); + + /* If the component is not available, then skip it as + * it has no available interfaces + */ + if (!component->available()) { + opal_output_verbose(5, orte_oob_base_framework.framework_output, + "mca:oob:select: Skipping component [%s] - no available interfaces", + component->oob_base.mca_component_name ); + continue; + } + + /* if it fails to startup, then skip it */ + if (ORTE_SUCCESS != component->startup()) { + opal_output_verbose(5, orte_oob_base_framework.framework_output, + "mca:oob:select: Skipping component [%s] - failed to startup", + component->oob_base.mca_component_name ); + continue; + } + + /* record it, but maintain priority order */ + added = false; + OPAL_LIST_FOREACH(cmp, &orte_oob_base.actives, mca_base_component_list_item_t) { + c3 = (mca_oob_base_component_t *) cmp->cli_component; + if (c3->priority > component->priority) { + continue; + } + opal_output_verbose(5, orte_oob_base_framework.framework_output, + "mca:oob:select: Inserting component"); + c2 = OBJ_NEW(mca_base_component_list_item_t); + c2->cli_component = (mca_base_component_t*)component; + opal_list_insert_pos(&orte_oob_base.actives, + &cmp->super, &c2->super); + added = true; + break; + } + if (!added) { + /* add to end */ + opal_output_verbose(5, orte_oob_base_framework.framework_output, + "mca:oob:select: Adding component to end"); + c2 = OBJ_NEW(mca_base_component_list_item_t); + c2->cli_component = (mca_base_component_t*)component; + opal_list_append(&orte_oob_base.actives, &c2->super); + } + } + + if (0 == opal_list_get_size(&orte_oob_base.actives)) { + /* no support available means we really cannot run */ + opal_output_verbose(5, orte_oob_base_framework.framework_output, + "mca:oob:select: Init failed to return any available transports"); + orte_show_help("help-oob-base.txt", "no-interfaces-avail", true); + return ORTE_ERR_SILENT; + } + + /* provide them an index so we can track their usability in a bitmap */ + i=0; + OPAL_LIST_FOREACH(cmp, &orte_oob_base.actives, mca_base_component_list_item_t) { + c3 = (mca_oob_base_component_t *) cmp->cli_component; + c3->idx = i++; + } + + opal_output_verbose(5, orte_oob_base_framework.framework_output, + "mca:oob:select: Found %d active transports", + (int)opal_list_get_size(&orte_oob_base.actives)); + return ORTE_SUCCESS; +} diff --git a/orte/mca/qos/base/qos_base_channel_handlers.c b/orte/mca/qos/base/qos_base_channel_handlers.c new file mode 100644 index 00000000000..ed7b6730af6 --- /dev/null +++ b/orte/mca/qos/base/qos_base_channel_handlers.c @@ -0,0 +1,163 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2014 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** @file: + * qos_base_channel_handlers.c - contains base functions handlers for open, send and close channel requests. + */ + +/* + * includes + */ +#include "orte_config.h" + +#ifdef HAVE_STRING_H +#include +#endif + +#include "orte/constants.h" +#include "orte/types.h" + +#include "opal/dss/dss.h" +#include "opal/util/output.h" +#include "opal/util/timings.h" +#include "opal/class/opal_list.h" + +#include "orte/mca/errmgr/errmgr.h" +#include "orte/runtime/orte_globals.h" +#include "orte/runtime/orte_wait.h" +#include "orte/util/name_fns.h" + +#include "orte/mca/qos/qos.h" +#include "orte/mca/qos/base/base.h" +#include "orte/mca/rml/base/base.h" + + +int orte_qos_base_pack_attributes (opal_buffer_t * buffer, + opal_list_t * qos_attributes) +{ + int32_t num_attributes; + int32_t rc= ORTE_SUCCESS; + orte_attribute_t *kv; + num_attributes = opal_list_get_size (qos_attributes); + OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, + "%s orte_qos_base_pack_attributes num_attributes = %d\n", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + num_attributes)); + if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, (void*)(&num_attributes), 1, ORTE_STD_CNTR))) { + ORTE_ERROR_LOG (rc); + return rc; + } + OPAL_LIST_FOREACH(kv, qos_attributes, orte_attribute_t) { + if (ORTE_ATTR_GLOBAL == kv->local) { + OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, + "%s orte_qos_base_pack_attributes attribute key = %d value =%d\n", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + kv->key, kv->data.uint8)); + if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, (void*)&kv, 1, ORTE_ATTRIBUTE))) { + ORTE_ERROR_LOG(rc); + return rc; + } + } + } + return rc; +} + +void* orte_qos_get_module (opal_list_t *qos_attributes) +{ + int32_t * type, type_val =0; + mca_qos_base_component_t *qos_comp; + type = &type_val; + if(!orte_get_attribute( qos_attributes, ORTE_QOS_TYPE, (void**)&type, OPAL_UINT8)) + return NULL; + OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, + "%s orte_qos_get_module channel type = %d\n", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + type_val)); + //check if type is valid + if (type_val < 0 || ORTE_QOS_MAX_COMPONENTS <= type_val) + return NULL; + // associate the qos module + qos_comp = (mca_qos_base_component_t *) opal_pointer_array_get_item(&orte_qos_base.actives, type_val); + if (NULL != qos_comp) + { + OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, + "%s qos_base_get_module returning qos module %p type =%d", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + &qos_comp->mod, type_val)); + return (void*)(&qos_comp->mod); + } else { + OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, + "%s qos_base_get_module failed to get qos component of type =%d", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + type_val)); + } + return NULL; +} + +void * orte_qos_create_channel (void *qos_mod, opal_list_t *qos_attributes, uint32_t channel_num) { + orte_qos_module_t *qos = (orte_qos_module_t *) (qos_mod); + if (NULL != qos) + return qos->create(qos_attributes, channel_num); + else + ORTE_ERROR_LOG (ORTE_ERR_BAD_PARAM); + return NULL; +} + +int orte_qos_open_channel (void *qos_mod, void *qos_channel, opal_buffer_t * buffer) { + orte_qos_module_t *qos = (orte_qos_module_t *) (qos_mod); + if (NULL != qos) + return (qos->open (qos_channel, buffer)); + else + ORTE_ERROR_LOG (ORTE_ERR_BAD_PARAM); + return ORTE_ERR_BAD_PARAM; +} + +void orte_qos_close_channel (void *qos_mod, void *qos_channel) { + orte_qos_module_t *qos = (orte_qos_module_t *) (qos_mod); + if (NULL != qos) + qos->close (qos_channel); + else + ORTE_ERROR_LOG (ORTE_ERR_BAD_PARAM); +} + +void orte_qos_init_recv_channel (void *qos_mod, void *qos_channel, opal_list_t * qos_attributes) { + orte_qos_module_t *qos = (orte_qos_module_t *) (qos_mod); + if (NULL != qos) + qos->init_recv (qos_channel, qos_attributes); + else + ORTE_ERROR_LOG (ORTE_ERR_BAD_PARAM); +} + +int orte_qos_cmp_channel (void *qos_mod, void *qos_channel, opal_list_t * qos_attributes) { + orte_qos_module_t *qos = (orte_qos_module_t *) (qos_mod); + if (NULL != qos) + return (qos->cmp (qos_channel, qos_attributes)); + ORTE_ERROR_LOG (ORTE_ERR_BAD_PARAM); + return -1; +} + +int orte_qos_send_channel (void *qos_mod, void *qos_channel, orte_rml_send_t *msg) { + orte_qos_module_t *qos = (orte_qos_module_t *) (qos_mod); + if (NULL != qos) + return(qos->send (qos_channel, msg)); + else + ORTE_ERROR_LOG (ORTE_ERR_BAD_PARAM); + return ORTE_ERROR; +} + +int orte_qos_recv_channel (void *qos_mod, void *qos_channel, orte_rml_recv_t *msg) { + orte_qos_module_t *qos = (orte_qos_module_t *) (qos_mod); + if (NULL != qos) + return(qos->recv(qos_channel, msg)); + else + ORTE_ERROR_LOG (ORTE_ERR_BAD_PARAM); + return ORTE_ERROR; +} + + diff --git a/orte/mca/qos/base/qos_base_frame.c b/orte/mca/qos/base/qos_base_frame.c new file mode 100644 index 00000000000..e14ddbc2e3c --- /dev/null +++ b/orte/mca/qos/base/qos_base_frame.c @@ -0,0 +1,118 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2014 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "orte_config.h" +#include "orte/constants.h" + +#include "opal/class/opal_bitmap.h" +#include "opal/mca/mca.h" +#include "opal/util/output.h" +#include "opal/mca/base/base.h" + +#include "orte/mca/rml/base/base.h" +#include "orte/mca/qos/base/base.h" +#include "orte/mca/qos/qos.h" +#if OPAL_ENABLE_FT_CR == 1 +#include "orte/mca/state/state.h" +#endif + +/* + * The following file was created by configure. It contains extern + * statements and the definition of an array of pointers to each + * component's public mca_base_component_t struct. + */ + +#include "orte/mca/qos/base/static-components.h" + +/* + * Global variables + */ +orte_qos_base_t orte_qos_base; +OPAL_TIMING_DECLARE(tm_qos) + +static int orte_qos_base_register(mca_base_register_flag_t flags) +{ +#if OPAL_ENABLE_TIMING + /* Detailed timing setup */ + orte_qos_base.timing = false; + (void) mca_base_var_rtegister ("orte", "qos", "base", "timing", + "Enable QOS timings", + MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, + OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, + &orte_qos_base.timing); +#endif + return ORTE_SUCCESS; +} + +static int orte_qos_base_close(void) +{ + + + /* shutdown all active transports */ + /*while (NULL != (cli = (mca_base_component_list_item_t *) opal_list_remove_first (&orte_qos_base.actives))) { + component = (mca_qos_base_component_t*)cli->cli_component; + if (NULL != component->shutdown) { + component->shutdown(); + } + OBJ_RELEASE(cli); + }*/ + // TO DO + + /* destruct our internal lists */ + OBJ_DESTRUCT(&orte_qos_base.actives); + OPAL_TIMING_EVENT((&tm_qos, "Finish")); + OPAL_TIMING_REPORT(orte_qos_base.timing, &tm_qos); + + return mca_base_framework_components_close(&orte_qos_base_framework, NULL); +} + +/** + * Function for finding and opening either all MCA components, + * or the one that was specifically requested via a MCA parameter. + */ +static int orte_qos_base_open(mca_base_open_flag_t flags) +{ + /* setup globals */ + OBJ_CONSTRUCT(&orte_qos_base.actives, opal_pointer_array_t); + opal_pointer_array_init(&orte_qos_base.actives, ORTE_QOS_MAX_COMPONENTS, INT_MAX, 1); + +/* +#if OPAL_ENABLE_FT_CR == 1 + + orte_state.add_job_state(ORTE_JOB_STATE_FT_CHECKPOINT, orte_qos_base_ft_event, ORTE_ERROR_PRI); + orte_state.add_job_state(ORTE_JOB_STATE_FT_CONTINUE, orte_qos_base_ft_event, ORTE_ERROR_PRI); + orte_state.add_job_state(ORTE_JOB_STATE_FT_RESTART, orte_qos_base_ft_event, ORTE_ERROR_PRI); +#endif*/ + + OPAL_TIMING_INIT(&tm_qos); + + /* Open up all available components */ + return mca_base_framework_components_open(&orte_qos_base_framework, flags); +} + +MCA_BASE_FRAMEWORK_DECLARE(orte, qos, "Messaging Quality of Service Subsystem", + orte_qos_base_register, orte_qos_base_open, orte_qos_base_close, + mca_qos_base_static_components, 0); + +/*** QOS CLASS INSTANCES ***/ + +static void channel_cons (orte_qos_base_channel_t *ptr) +{ + OBJ_CONSTRUCT(&ptr->attributes, opal_list_t); +} +static void channel_des (orte_qos_base_channel_t *ptr) +{ + OPAL_LIST_DESTRUCT(&ptr->attributes); +} +OBJ_CLASS_INSTANCE (orte_qos_base_channel_t, + opal_object_t, + channel_cons, channel_des); + + diff --git a/orte/mca/qos/base/qos_base_select.c b/orte/mca/qos/base/qos_base_select.c new file mode 100644 index 00000000000..26fe71bfcac --- /dev/null +++ b/orte/mca/qos/base/qos_base_select.c @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2014 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "orte_config.h" +#include "orte/constants.h" + +#include "opal/mca/mca.h" +#include "opal/util/output.h" +#include "opal/mca/base/base.h" + +#include "orte/util/show_help.h" + +#include "orte/runtime/orte_globals.h" +#include "orte/mca/qos/qos.h" +#include "orte/mca/qos/base/base.h" + + +/** + * Function for selecting all runnable modules from those that are + * available. + * + * Call the init function on all available modules. + */ +int orte_qos_base_select(void) +{ + mca_base_component_list_item_t *cli; + mca_qos_base_component_t *component; + int count = 0; + + /* Query all available components and ask if their transport is available */ + OPAL_LIST_FOREACH(cli, &orte_qos_base_framework.framework_components, mca_base_component_list_item_t) { + component = (mca_qos_base_component_t *) cli->cli_component; + + opal_output_verbose(5, orte_qos_base_framework.framework_output, + "mca:qos:select: checking available component %s", + component->qos_base.mca_component_name); + if (NULL == component->start ) + opal_output_verbose(5, orte_qos_base_framework.framework_output, + "mca:qos:select: component %s start function is null, type =%d", + component->qos_base.mca_component_name, component->type); + else { + /* if it fails to startup, then skip it */ + if (ORTE_SUCCESS != component->start()) { + opal_output_verbose(5, orte_qos_base_framework.framework_output, + "mca:qos:select: Skipping component [%s] - failed to initialize", + component->qos_base.mca_component_name ); + continue; + } + } + count++; + /* store each qos componenet in the actives pointer array at the index of that component type */ + opal_pointer_array_set_item(&orte_qos_base.actives, + component->type, component); + } + + if (0 == count) { + /* no support available means we really cannot run */ + opal_output_verbose(5, orte_qos_base_framework.framework_output, + "mca:qos:select: Init failed to return any available QoS components"); + orte_show_help("help-qos-base.txt", "no-interfaces-avail", true); + return ORTE_ERR_SILENT; + } + opal_output_verbose(5, orte_qos_base_framework.framework_output, + "mca:qos:select: Found %d active QoS components", + count); + return ORTE_SUCCESS; +} diff --git a/orte/mca/qos/noop/Makefile.am b/orte/mca/qos/noop/Makefile.am new file mode 100644 index 00000000000..c4585de201c --- /dev/null +++ b/orte/mca/qos/noop/Makefile.am @@ -0,0 +1,34 @@ +# +# Copyright (c) 2014 Intel, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +sources = \ + qos_noop.h \ + qos_noop_component.c + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if MCA_BUILD_orte_qos_noop_DSO +component_noinst = +component_install = mca_qos_noop.la +else +component_noinst = libmca_qos_noop.la +component_install = +endif + +mcacomponentdir = $(ortelibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_qos_noop_la_SOURCES = $(sources) +mca_qos_noop_la_LDFLAGS = -module -avoid-version + +noinst_LTLIBRARIES = $(component_noinst) +libmca_qos_noop_la_SOURCES = $(sources) +libmca_qos_noop_la_LDFLAGS = -module -avoid-version + diff --git a/orte/mca/qos/noop/qos_noop.h b/orte/mca/qos/noop/qos_noop.h new file mode 100644 index 00000000000..350d3110d7e --- /dev/null +++ b/orte/mca/qos/noop/qos_noop.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2014 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +/** + * @file + * + * QoS No-op Component interface + * + * + * + */ + +#ifndef MCA_QOS_NOOP_H +#define MCA_QOS_NOOP_H + +#include "orte_config.h" +#include "orte/mca/qos/qos.h" +#include "orte/mca/qos/base/base.h" + +BEGIN_C_DECLS + + +ORTE_MODULE_DECLSPEC extern orte_qos_component_t mca_qos_noop_component; + +extern orte_qos_module_t orte_qos_noop_module; + +END_C_DECLS + +#endif /* MCA_QOS_NOOP_H */ diff --git a/orte/mca/qos/noop/qos_noop_channel_handlers.c b/orte/mca/qos/noop/qos_noop_channel_handlers.c new file mode 100644 index 00000000000..5083ab48d15 --- /dev/null +++ b/orte/mca/qos/noop/qos_noop_channel_handlers.c @@ -0,0 +1,339 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2014 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** @file: + * qos_base_channel_handlers.c - contains base functions handlers for open, send and close channel requests. + */ + +/* + * includes + */ +#include "orte_config.h" + +#ifdef HAVE_STRING_H +#include +#endif + +#include "orte/constants.h" +#include "orte/types.h" + +#include "opal/dss/dss.h" +#include "opal/util/output.h" +#include "opal/util/timings.h" +#include "opal/class/opal_list.h" + +#include "orte/mca/errmgr/errmgr.h" +#include "orte/runtime/orte_globals.h" +#include "orte/runtime/orte_wait.h" +#include "orte/util/name_fns.h" + +#include "orte/mca/qos/qos.h" +#include "orte/mca/qos/base/base.h" + + +static int orte_qos_base_pack_attributes (opal_buffer_t * buffer, + opal_list_t * qos_attributes) +{ + int32_t num_attributes; + int32_t rc= ORTE_SUCCESS; + orte_attribute_t *kv; + num_attributes = opal_list_get_size (qos_attributes); + if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, (void*)(&num_attributes), 1, ORTE_STD_CNTR))) { + ORTE_LOG_ERROR (rc); + return rc; + } + OPAL_LIST_FOREACH(kv, qos_attributes, orte_attribute_t) { + if (ORTE_ATTR_GLOBAL == kv->local) { + if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, (void*)&kv, 1, ORTE_ATTRIBUTE))) { + ORTE_ERROR_LOG(rc); + return rc; + } + } + } + return rc; +} + +static int orte_qos_base_unpack_attributes (opal_buffer_t *buffer, + opal_list_t *qos_attributes) +{ + orte_attribute_t *kv; + int32_t count, n, k; + int32_t rc=ORTE_SUCCESS; + /* unpack the attributes */ + n=1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &count, + &n, ORTE_STD_CNTR))) { + ORTE_ERROR_LOG(rc); + return rc; + } + for (k=0; k < count; k++) { + n=1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &kv, + &n, ORTE_ATTRIBUTE))) { + ORTE_ERROR_LOG(rc); + return rc; + } + opal_list_append(qos_attributes, &kv->super); + } + return rc; +} + +void* orte_qos_base_create_channel ( orte_rml_channel_t *channel, + opal_list_t *qos_attributes) +{ + int32_t * type, type_val; + mca_qos_base_component_t *qos_comp; + if(!orte_get_attribute( qos_attributes, ORTE_QOS_TYPE, (void**)&type, OPAL_UINT8)) + return NULL; + type_val = *type; + //check if type is valid + if (0 < type_val || ORTE_QOS_MAX_COMPONENTS <= type_val) + return NULL; + // associate the qos module + qos_comp = (mca_qos_base_component_t *) opal_pointer_array_get_item(&orte_qos_base.actives, type_val); + channel->qos = (void*) &qos_comp->mod; + // call create channel function of the module. + return (qos_comp->mod.create( qos_attributes)); +} + +void * orte_qos_base_create (opal_list_t *qos_attributes) +{ + orte_qos_base_channel_t * base_chan; + int32_t num_attributes; + int32_t rc, *window; + orte_qos_type_t *type; + orte_attribute_t *kv; + base_chan = OBJ_NEW (orte_qos_base_channel_t); + *type = orte_qos_noop; + // TBD _ we ignore inapplicable attributes for now - need to return error? + // get attributes of interest to the base and store them locally. + if (ORTE_SUCCESS == (rc = orte_set_attribute( &base_chan->attributes, ORTE_QOS_TYPE, ORTE_ATTR_GLOBAL, (void*)type, OPAL_UINT8))) { + // window size?? + if( orte_get_attribute (qos_attributes, ORTE_QOS_WINDOW_SIZE, (void**)&window, OPAL_UINT32)) { + if ( ORTE_QOS_MAX_WINDOW_SIZE > (*window)) { + ORTE_ERROR_LOG(OPAL_ERR_VALUE_OUT_OF_BOUNDS); + OBJ_RELEASE(base_chan); + } + else { + if (ORTE_SUCCESS != (rc = orte_set_attribute(&base_chan->attributes, ORTE_QOS_WINDOW_SIZE, + ORTE_ATTR_GLOBAL, (void*)window, OPAL_UINT32))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(base_chan); + } + } + } else + OBJ_RELEASE(base_chan); + } else { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(base_chan); + } + return base_chan; +} + +int orte_qos_base_open_channel ( void * qos_channel, + opal_buffer_t *buffer) +{ + int32_t rc = ORTE_SUCCESS; + orte_qos_base_channel_t *base_chan; + base_chan = (orte_qos_base_channel_t*) (qos_channel); + // the Qos module puts the non local attributes to be sent to the peer in a list at the time of create. + // pack those attributes into the buffer. + if (ORTE_SUCCESS != (rc = orte_qos_base_pack_attributes(buffer, &base_chan->attributes))) + ORTE_ERROR_LOG(rc); + return rc; +} + +void orte_qos_base_chan_recv_init ( void * qos_channel, + opal_list_t *qos_attributes) +{ + // nothing to do for no op channel. +} + +void orte_qos_base_close_channel ( void * qos_channel) +{ + qos_channel = (orte_qos_base_channel_t*) (qos_channel); + OBJ_RELEASE(qos_channel); +} + +int orte_qos_base_comp_channel (void *qos_channel, + opal_list_t *qos_attributes) +{ + int32_t chan_typea, chan_typeb, *ptr, window_sizea, window_sizeb; + orte_qos_base_channel_t *base_chan = (orte_qos_base_channel_t*) qos_channel; + ptr = &chan_typea; + if (!orte_get_attribute(&base_chan->attributes, ORTE_QOS_TYPE, (void**)&ptr, OPAL_UINT8)) + return ORTE_ERROR; + ptr = &chan_typeb; + if (!orte_get_attribute(qos_attributes, ORTE_QOS_TYPE, (void**)&ptr, OPAL_UINT8)) + return ORTE_ERROR; + if (chan_typea == chan_typeb) { + ptr = &window_sizea; + if (!orte_get_attribute(&base_chan->attributes, ORTE_QOS_WINDOW_SIZE, (void**)&ptr, OPAL_UINT32)) + return ORTE_ERROR; + ptr = &window_sizeb; + if (!orte_get_attribute(qos_attributes, ORTE_QOS_WINDOW_SIZE, (void**)&ptr, OPAL_UINT32)) + return ORTE_ERROR; + return (window_sizea != window_sizeb); + } + else + return ORTE_ERROR; +} +/*static void orte_qos_open_channel_reply_send_callback ( int status, + orte_process_name_t* sender, + opal_buffer_t* buffer, + orte_rml_tag_t tag, + void* cbdata) +{ + // this is the send call back for open channel reply + orte_qos_channel_t *channel = (orte_qos_channel_t*) cbdata; + // if the message was not sent we should retry or complete the request appropriately + if (status!= ORTE_SUCCESS) + { + //retry request. + } + // if success then release the buffer and do open channel request completion after receiving response from peer + OBJ_RELEASE(buffer); +} + +static void orte_qos_open_channel_send_callback ( int status, + orte_process_name_t* sender, + opal_buffer_t* buffer, + orte_rml_tag_t tag, + void* cbdata) +{ + // this is the send call back for open channel request + orte_qos_open_channel_t *req = (orte_qos_open_channel_t*) cbdata; + // if the message was not sent we should retry or complete the request appropriately + if (status!= ORTE_SUCCESS) + { + // retry if retriable failure. + // else call completion handler. + //remove channel from list + opal_list_remove_item(&orte_qos_base.open_channels, &req->channel->super); + OBJ_RELEASE(req->channel); + // update msg status and channel num so end point can have appropriate info + req->msg->status = status; + req->msg->channel_num = ORTE_QOS_INVALID_CHANNEL_NUM; + ORTE_RML_OPEN_CHANNEL_COMPLETE(req->msg); + OBJ_RELEASE(req); + } + // if success then release the buffer and do open channel request completion after receiving response from peer + OBJ_RELEASE(buffer); +} + +void orte_qos_base_open_channel(int sd, short args, void *cbdata) +{ + opal_buffer_t *buffer; int rc; + orte_qos_open_channel_t *open_channel; + orte_qos_open_channel_request_t *req = (orte_qos_open_channel_request_t*)cbdata; + // create channel on sender side by calling the respective qos module. + req->post.channel = orte_qos_base_create_channel(req->post.msg->dst, req->post.msg->qos_attributes); + buffer = OBJ_NEW(opal_buffer_t); + //pack qos attributes list in buffer + if (ORTE_SUCCESS != orte_qos_base_pack_attributes(buffer, req->post.msg->qos_attributes)) { + //invalid attributes complete request with error + } + open_channel = OBJ_NEW(orte_qos_open_channel_t); + open_channel->msg = req->post.msg; + open_channel->channel = req->post.channel; + open_channel->msg->channel_num = open_channel->channel->channel_num; + OBJ_RELEASE(req); + // send request to peer to open channel + orte_rml.send_buffer_nb( &open_channel->msg->dst, buffer, ORTE_RML_TAG_OPEN_CHANNEL_REQ, + orte_qos_open_channel_send_callback, + open_channel); + // now post a recieve for open_channel_response tag + orte_rml.recv_buffer_nb(&open_channel->msg->dst, ORTE_RML_TAG_OPEN_CHANNEL_REPLY, + ORTE_RML_NON_PERSISTENT, orte_qos_open_channel_reply_callback, open_channel); + +} */ + + +/* +void orte_qos_open_channel_recv_callback (int status, + orte_process_name_t* peer, + struct opal_buffer_t* buffer, + orte_rml_tag_t tag, + void* cbdata) +{ + int32_t rc; + opal_list_t *qos_attributes = OBJ_NEW(opal_list_t); + orte_qos_channel_t *channel; + // un pack attributes first + if ( ORTE_SUCCESS == orte_qos_base_unpack_attributes( buffer, qos_attributes)) { + // create channel + if (NULL != (channel = orte_qos_base_create_channel ( *peer, qos_attributes)) ) { + buffer = OBJ_NEW (opal_buffer_t); + if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &channel->channel_num , 1, OPAL_INT))) { + ORTE_ERROR_LOG(rc); + return; + } + // send channel accept to sender with local channel num + orte_rml.send_buffer_nb ( peer, buffer, ORTE_RML_TAG_OPEN_CHANNEL_REPLY, + orte_qos_open_channel_reply_send_callback, + channel); + } + else { + // reply with error message + } + } + else { + //reply with error message + } +} + +void orte_qos_open_channel_reply_callback (int status, + orte_process_name_t* peer, + struct opal_buffer_t* buffer, + orte_rml_tag_t tag, + void* cbdata) +{ + orte_qos_open_channel_t *req = (orte_qos_open_channel_t*) cbdata; + orte_qos_channel_t * channel = req->channel; + int32_t count = 1; + int32_t rc; + // process open_channel response from a peer for a open channel request + if (ORTE_SUCCESS == status) { + // unpack buffer and get peer channel number. + + if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &channel->peer_channel_num, &count, OPAL_INT))) { + ORTE_ERROR_LOG(rc); + // do error completion + channel->state = orte_qos_channel_closed; + //remove channel from list + opal_list_remove_item(&orte_qos_base.open_channels, &channel->super); + OBJ_RELEASE(channel); + // update msg status and channel num so end point can have appropriate info + req->msg->status = ORTE_ERR_OPEN_CHANNEL_PEER_RESPONSE_INV; + req->msg->channel_num = ORTE_QOS_INVALID_CHANNEL_NUM; + } + else { + channel->state = orte_qos_channel_open; + req->msg->status = ORTE_SUCCESS; + req->msg->channel_num = channel->channel_num; + } + } + else { + channel->state = orte_qos_channel_closed; + //remove channel from list + opal_list_remove_item(&orte_qos_base.open_channels, &channel->super); + OBJ_RELEASE(channel); + // update msg status and channel num so end point can have appropriate info + req->msg->status = ORTE_ERR_OPEN_CHANNEL_PEER_FAIL; + req->msg->channel_num = ORTE_QOS_INVALID_CHANNEL_NUM; + } + ORTE_RML_OPEN_CHANNEL_COMPLETE(req->msg); + OBJ_RELEASE(req); + OBJ_RELEASE(buffer); + // 1: If success record peer channel number, update channel state. + //2: If not destroy channel. + //3: complete openchannel request. +} */ + + diff --git a/orte/mca/qos/noop/qos_noop_component.c b/orte/mca/qos/noop/qos_noop_component.c new file mode 100644 index 00000000000..34c526638dd --- /dev/null +++ b/orte/mca/qos/noop/qos_noop_component.c @@ -0,0 +1,181 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2014 - 2015 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "orte_config.h" +#include "orte/constants.h" + + +#include "opal/mca/mca.h" +#include "opal/util/output.h" +#include "opal/mca/base/base.h" + + +#include "orte/mca/qos/base/base.h" +#include "orte/mca/qos/qos.h" + +static int qos_noop_start (void); +static void qos_noop_shutdown (void); +static void* noop_create (opal_list_t *qos_attributes, uint32_t channel_num); +static int noop_open (void *qos_channel, + opal_buffer_t * buf); +static int noop_send ( void *qos_channel, orte_rml_send_t *msg); +static int noop_recv (void *channel, orte_rml_recv_t *msg); +static void noop_close (void * channel); +static int noop_init_recv (void *channel, opal_list_t *attributes); +static int noop_cmp (void *channel, opal_list_t *attributes); +static void noop_send_callback (orte_rml_send_t *msg); + +/** + * noop module definition + */ +orte_qos_module_t orte_qos_noop_module = { + noop_create, + noop_open, + noop_send, + noop_recv, + noop_close, + noop_init_recv, + noop_cmp +}; + +/** + * component definition + */ +mca_qos_base_component_t mca_qos_noop_component = { + /* First, the mca_base_component_t struct containing meta + information about the component itself */ + + { + MCA_QOS_BASE_VERSION_2_0_0, + + "noop", /* MCA component name */ + ORTE_MAJOR_VERSION, /* MCA component major version */ + ORTE_MINOR_VERSION, /* MCA component minor version */ + ORTE_RELEASE_VERSION, /* MCA component release version */ + NULL, + NULL, + }, + qos_noop_start, + qos_noop_shutdown, + orte_qos_noop, + { + noop_create, + noop_open, + noop_send, + noop_recv, + noop_close, + noop_init_recv, + noop_cmp, + noop_send_callback + } +}; + +static int qos_noop_start(void) { + return ORTE_SUCCESS; +} + +static void qos_noop_shutdown (void) { +} + +static void* noop_create (opal_list_t *qos_attributes, uint32_t channel_num) { + orte_qos_base_channel_t * noop_chan; + int32_t rc, *window, *type; + orte_qos_type_t type_val = orte_qos_noop; + noop_chan = OBJ_NEW (orte_qos_base_channel_t); + noop_chan->channel_num = channel_num; + type = &type_val; + // TBD _ we ignore inapplicable attributes for now - need to return error? + // get attributes of interest to the base and store them locally. + if (ORTE_SUCCESS == (rc = orte_set_attribute( &noop_chan->attributes, ORTE_QOS_TYPE, ORTE_ATTR_GLOBAL, (void*)type, OPAL_UINT8))) { + // window size?? + if( orte_get_attribute (qos_attributes, ORTE_QOS_WINDOW_SIZE, (void**)&window, OPAL_UINT32)) { + if ( ORTE_QOS_MAX_WINDOW_SIZE < (*window)) { + ORTE_ERROR_LOG(OPAL_ERR_VALUE_OUT_OF_BOUNDS); + OBJ_RELEASE(noop_chan); + } + else { + if (ORTE_SUCCESS != (rc = orte_set_attribute(&noop_chan->attributes, ORTE_QOS_WINDOW_SIZE, + ORTE_ATTR_GLOBAL, (void*)window, OPAL_UINT32))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(noop_chan); + } + } + }else + OBJ_RELEASE(noop_chan); + } else { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(noop_chan); + } + return noop_chan; +} + +static int noop_open (void *qos_channel, opal_buffer_t * buf) { + int32_t rc = ORTE_SUCCESS; + orte_qos_base_channel_t *noop_chan; + noop_chan = (orte_qos_base_channel_t*) (qos_channel); + // the Qos module puts the non local attributes to be sent to the peer in a list at the time of create. + // pack those attributes into the buffer. + if (ORTE_SUCCESS != (rc = orte_qos_base_pack_attributes(buf, &noop_chan->attributes))) + ORTE_ERROR_LOG(rc); + return rc; +} + +static int noop_send ( void *qos_channel, orte_rml_send_t *msg) { + //nothing to do + OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, + "%s noop_send msg = %p to peer = %s\n", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + msg, ORTE_NAME_PRINT(&msg->dst))); + return ORTE_SUCCESS; +} + +static int noop_recv (void *qos_channel, orte_rml_recv_t *msg) { + OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, + "%s noop_recv msg = %p from peer = %s\n", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + msg, ORTE_NAME_PRINT(&msg->sender))); + return ORTE_SUCCESS; +} + +static void noop_close (void * channel) { + orte_qos_base_channel_t *noop_chan = (orte_qos_base_channel_t*) channel; + OBJ_RELEASE (noop_chan); +} + +static int noop_init_recv (void *channel, opal_list_t *attributes) { + return ORTE_SUCCESS; +} + +static int noop_cmp (void *channel, opal_list_t *attributes) { + int32_t chan_typea, chan_typeb, *ptr, window_sizea, window_sizeb; + orte_qos_base_channel_t *noop_chan = (orte_qos_base_channel_t*) channel; + ptr = &chan_typea; + if (!orte_get_attribute(&noop_chan->attributes, ORTE_QOS_TYPE, (void**)&ptr, OPAL_UINT8)) + return ORTE_ERROR; + ptr = &chan_typeb; + if (!orte_get_attribute(attributes, ORTE_QOS_TYPE, (void**)&ptr, OPAL_UINT8)) + return ORTE_ERROR; + if (chan_typea == chan_typeb) { + ptr = &window_sizea; + if (!orte_get_attribute(&noop_chan->attributes, ORTE_QOS_WINDOW_SIZE, (void**)&ptr, OPAL_UINT32)) + return ORTE_ERROR; + ptr = &window_sizeb; + if (!orte_get_attribute(attributes, ORTE_QOS_WINDOW_SIZE, (void**)&ptr, OPAL_UINT32)) + return ORTE_ERROR; + return (window_sizea != window_sizeb); + } + else + return ORTE_ERROR; +} + +static void noop_send_callback (orte_rml_send_t *msg) { + // nothing to do for noop + ORTE_RML_SEND_COMPLETE(msg); +} diff --git a/orte/mca/qos/qos.h b/orte/mca/qos/qos.h new file mode 100644 index 00000000000..cd3c28974a6 --- /dev/null +++ b/orte/mca/qos/qos.h @@ -0,0 +1,168 @@ +/** + * copyright (c) 2014 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * This header defines Quality of Service Interface for Runtime messaging + */ + +/** + * @file + * + * Quality of Service (QoS) Communication Interface + * + * The QoS layer is responsible for providing quality of service for + * messages exchanged between two ORTE processes through the use of + * channels. + */ +#ifndef MCA_QOS_H_ +#define MCA_QOS_H_ + +#include "orte_config.h" +#include "orte/types.h" + +#ifdef HAVE_UNISTD_H +#include +#endif + +#include "opal/class/opal_list.h" +#include "opal/mca/mca.h" +#include "orte/mca/rml/base/base.h" +#include "orte/mca/qos/base/base.h" +#include "orte/mca/errmgr/errmgr.h" + +BEGIN_C_DECLS +/* ******************************************************************** */ +struct opal_buffer_t; +struct orte_process_name_t; + + +/* ******************************************************************** */ +#define ORTE_QOS_INVALID_CHANNEL_NUM 0xFFFF +#define ORTE_QOS_MAX_COMPONENTS 5 +typedef void (*orte_qos_callback_fn_t)(int status, + int channel_num, + struct orte_process_name_t* peer, + void* cbdata); + +typedef int (*mca_qos_base_component_start_fn_t)(void); +typedef void (*mca_qos_base_component_shutdown_fn_t)(void); + +#if OPAL_ENABLE_FT_CR == 1 +typedef int (*mca_qos_base_component_ft_event_fn_t)(int state); +#endif +ORTE_DECLSPEC void * orte_qos_create_channel (void *qos_mod, opal_list_t *qos_attributes, uint32_t channel_num); +ORTE_DECLSPEC int orte_qos_open_channel (void *qos_mod, void *qos_channel, opal_buffer_t * buffer); +ORTE_DECLSPEC void orte_qos_close_channel (void *qos_mod, void *qos_channel); +ORTE_DECLSPEC void orte_qos_init_recv_channel (void *qos_mod, void *qos_channel, opal_list_t *qos_attributes); +ORTE_DECLSPEC int orte_qos_cmp_channel (void *qos_mod, void *qos_channel, opal_list_t *qos_attributes); +ORTE_DECLSPEC int orte_qos_send_channel (void *qos_mod, void *qos_channel, orte_rml_send_t *msg); +ORTE_DECLSPEC int orte_qos_recv_channel (void *qos_mod, void *qos_channel, orte_rml_recv_t *msg); +/** + * qos module (channel) create function + * initialize type specific attributes of the channel. + */ +typedef void* (*orte_qos_base_module_create_fn_t) (opal_list_t *qos_attributes, uint32_t channel_num); + +/** + * qos module (channel) open function + * this function is called when rml_open_channel is requested + */ +typedef int (*orte_qos_base_module_open_fn_t) (void *qos_channel, + opal_buffer_t * buf); + +/** + * qos module (channel) send function + * this function is called when rml_send_channel is requested + */ +typedef int (*orte_qos_base_module_send_fn_t) ( void * qos_channel, + orte_rml_send_t *send); + +/** + * qos module (channel) recv function + * this function is called when a message is received on a channel + */ +typedef int (*orte_qos_base_module_recv_fn_t) ( void * channel, + orte_rml_recv_t *msg); +/** + * qos module (channel) close function + * this function is called when a message is received on a channel + */ + +typedef void (*orte_qos_base_module_close_fn_t) ( void * channel); +/** + * qos module (channel) init recv + * this function is used to initialize a channel for receiving msgs (called in response to open_channel req from peer) + */ +typedef int (*orte_qos_base_module_init_recv_fn_t) (void * channel, opal_list_t * attributes); + +/** + * qos module (channel) compare functions + * compares attributes of existing channel with the requested list of attributes + */ +typedef int (*orte_qos_base_module_cmp_fn_t) (void * channel, opal_list_t * attributes); + +/** + * qos module (channel) compare functions + * compares attributes of existing channel with the requested list of attributes + */ +typedef void (*orte_qos_base_module_send_callback_fn_t) (orte_rml_send_t *msg); + +/** + * + * the qos channel data structure + */ +typedef struct { + orte_qos_base_module_create_fn_t create; + orte_qos_base_module_open_fn_t open; + orte_qos_base_module_send_fn_t send; + orte_qos_base_module_recv_fn_t recv; + orte_qos_base_module_close_fn_t close; + orte_qos_base_module_init_recv_fn_t init_recv; + orte_qos_base_module_cmp_fn_t cmp; + orte_qos_base_module_send_callback_fn_t send_callback; +} orte_qos_module_t; + +typedef enum { + orte_qos_noop = 0, + orte_qos_ack = 1, + orte_qos_nack = 2, + orte_qos_ack_nack_hybrid = 3, + orte_qos_multipath = 4, +}orte_qos_type_t ; + +typedef struct { + mca_base_component_t qos_base; + mca_qos_base_component_start_fn_t start; + mca_qos_base_component_shutdown_fn_t shutdown; + orte_qos_type_t type; + orte_qos_module_t mod; +/* mca_qos_base_componenet_open_channel_fn_t open_channel; + mca_qos_base_component_send_channel_nb_fn_t send_channel; + mca_qos_base_component_recv_channel_nb_fn_t recv_channel; + mca_qos_base_component_close_channel_fn_t close_channel;*/ +#if OPAL_ENABLE_FT_CR == 1 + mca_qos_base_component_ft_event_fn_t ft_event; +#endif +} mca_qos_base_component_t; + +/** + * Macro for use in components that are of type oob + */ +#define MCA_QOS_BASE_VERSION_2_0_0 \ +MCA_BASE_VERSION_2_0_0, \ +"qos", 2, 0, 0 + +END_C_DECLS + +#endif + + + + + + + + diff --git a/orte/mca/rml/base/rml_base_channel_handlers.c b/orte/mca/rml/base/rml_base_channel_handlers.c new file mode 100644 index 00000000000..0c3ecf63efc --- /dev/null +++ b/orte/mca/rml/base/rml_base_channel_handlers.c @@ -0,0 +1,454 @@ +/* + * + * Copyright (c) 2015 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** @file: + * + */ + +/* + * includes + */ +#include "orte_config.h" + +#ifdef HAVE_STRING_H +#include +#endif + +#include "orte/constants.h" +#include "orte/types.h" + +#include "opal/dss/dss.h" +#include "opal/util/output.h" +#include "opal/util/timings.h" +#include "opal/class/opal_list.h" + +#include "orte/mca/errmgr/errmgr.h" +#include "orte/runtime/orte_globals.h" +#include "orte/runtime/orte_wait.h" +#include "orte/util/name_fns.h" + +#include "orte/mca/rml/rml.h" +#include "orte/mca/rml/base/base.h" +#include "orte/mca/rml/base/rml_contact.h" +#include "orte/mca/qos/base/base.h" + + +static int unpack_channel_attributes (opal_buffer_t *buffer, opal_list_t *qos_attributes); +static orte_rml_channel_t * get_channel ( orte_process_name_t * peer, opal_list_t *qos_attributes); +static int send_open_channel_reply (orte_process_name_t *peer, + orte_rml_channel_t *channel, + bool accept); + +void orte_rml_base_open_channel(int fd, short flags, void *cbdata) +{ + int32_t *type, type_val; + orte_rml_send_request_t *req = (orte_rml_send_request_t*)cbdata; + orte_process_name_t peer; + orte_rml_open_channel_t *open_chan; + orte_rml_channel_t *channel; + opal_buffer_t *buffer; + peer = req->post.channel.dst; + OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output, + "%s rml_open_channel to peer %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(&peer))); + OPAL_TIMING_EVENT((&tm_rml, "to %s", ORTE_NAME_PRINT(&peer))); + channel = OBJ_NEW(orte_rml_channel_t); + channel->channel_num = opal_pointer_array_add (&orte_rml_base.open_channels, channel); + channel->peer = peer; + open_chan = OBJ_NEW(orte_rml_open_channel_t); + open_chan->dst = peer; + open_chan->qos_attributes = req->post.channel.qos_attributes; + open_chan->cbfunc = req->post.channel.cbfunc; + open_chan->cbdata = req->post.channel.cbdata; + // OBJ_RELEASE(req); + OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output, + "%s rml_open_channel to peer %s SUCCESS", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(&peer))); + // associate open channel request and the newly created channel object + open_chan->channel = channel; + type = &type_val; + orte_get_attribute( open_chan->qos_attributes, ORTE_QOS_TYPE, (void**)&type, OPAL_UINT8); + open_chan->channel->qos = (void*) orte_qos_get_module (open_chan->qos_attributes); + + OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output, + "%s rml_open_channel type = %d to peer %s ", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + *type, + ORTE_NAME_PRINT(&peer))); + // now associate qos with the channel based on user requested attributes. + if ( NULL != open_chan->channel->qos) + { + open_chan->channel->qos_channel_ptr = orte_qos_create_channel (open_chan->channel->qos, open_chan->qos_attributes, + open_chan->channel->channel_num); + // create rml send for open channel request. Call the corresponding QoS module to pack the attributes. + buffer = OBJ_NEW (opal_buffer_t); + // call QoS module to pack attributes + if ( ORTE_SUCCESS == (orte_qos_open_channel(open_chan->channel->qos, open_chan->channel->qos_channel_ptr, buffer))) + { + /* pack channel number at the end */ + opal_dss.pack(buffer, (void*) &open_chan->channel->channel_num, 1, OPAL_UINT32); + OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output, + "%s rml_open_channel to peer %s SUCCESS sending to peer", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(&peer))); + // now post a recieve for open_channel_response tag + orte_rml.recv_buffer_nb(&peer, ORTE_RML_TAG_OPEN_CHANNEL_RESP, + ORTE_RML_NON_PERSISTENT, orte_rml_open_channel_resp_callback, open_chan); + // send request to peer to open channel + orte_rml.send_buffer_nb( &peer, buffer, ORTE_RML_TAG_OPEN_CHANNEL_REQ, + orte_rml_open_channel_send_callback, + open_chan); + + } else { + open_chan->status = ORTE_ERR_PACK_FAILURE; + ORTE_RML_OPEN_CHANNEL_COMPLETE(open_chan); + opal_pointer_array_set_item ( &orte_rml_base.open_channels, open_chan->channel->channel_num, NULL); + // call QoS module to release the QoS channel object. + orte_qos_close_channel (open_chan->channel->qos, open_chan->channel->qos_channel_ptr); + OBJ_RELEASE (buffer); + OBJ_RELEASE(open_chan->channel); + OBJ_RELEASE(open_chan); + } + } + else + { + // do error completion because a component for the requested QoS does not exist + open_chan->status = ORTE_ERR_QOS_TYPE_UNSUPPORTED; + ORTE_RML_OPEN_CHANNEL_COMPLETE(open_chan); + opal_pointer_array_set_item ( &orte_rml_base.open_channels, open_chan->channel->channel_num, NULL); + OBJ_RELEASE(open_chan->channel); + OBJ_RELEASE(open_chan); + } + +} + +void orte_rml_open_channel_send_callback ( int status, + orte_process_name_t* sender, + opal_buffer_t* buffer, + orte_rml_tag_t tag, + void* cbdata) +{ + // this is the send call back for open channel request + orte_rml_open_channel_t *req = (orte_rml_open_channel_t*) cbdata; + OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output, + "%s rml_open_channel_send_callback to peer %s status = %d", + ORTE_NAME_PRINT(sender), + ORTE_NAME_PRINT(&req->dst), status)); + // if the message was not sent we should retry or complete the request appropriately + if (status!= ORTE_SUCCESS) + { + req->status = status; + ORTE_RML_OPEN_CHANNEL_COMPLETE(req); + opal_pointer_array_set_item ( &orte_rml_base.open_channels, req->channel->channel_num, NULL); + // call QoS module to release the QoS channel object. + orte_qos_close_channel (req->channel->qos, req->channel->qos_channel_ptr); + OBJ_RELEASE(req->channel); + OBJ_RELEASE(req); + } + else { + // start a timer for response from peer + } + //OBJ_RELEASE(buffer); +} + +void orte_rml_open_channel_resp_callback (int status, + orte_process_name_t* peer, + struct opal_buffer_t* buffer, + orte_rml_tag_t tag, + void* cbdata) +{ + orte_rml_open_channel_t *req = (orte_rml_open_channel_t*) cbdata; + orte_rml_channel_t * channel = req->channel; + OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output, + "%s rml_open_channel_resp_callback to peer %s status = %d channel = %p", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(peer), status, + channel)); + int32_t rc; + bool peer_resp = false; + int32_t count = 1; + // unpack peer response from buffer to determine if peer has accepted the open request + if ((ORTE_SUCCESS == (rc = opal_dss.unpack(buffer, &peer_resp, &count, OPAL_BOOL))) && peer_resp) { + + OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output, + "%s rml_open_channel_resp_callback to peer response = %d", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + peer_resp)); + /* response will contain the peer channel number - the peer does not have the + option to change the channel attributes */ + // unpack and get peer channel number. + if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &channel->peer_channel, &count, OPAL_INT))) { + ORTE_ERROR_LOG(rc); + req->status = ORTE_ERR_UNPACK_FAILURE; + opal_pointer_array_set_item ( &orte_rml_base.open_channels, req->channel->channel_num, NULL); + // call QoS module to release the QoS channel object. + orte_qos_close_channel (req->channel->qos, req->channel->qos_channel_ptr); + OBJ_RELEASE(req->channel); + // TBD : should we send a close channel to the peer?? + } + else { + // call qos module to update the channel state.?? + req->status = ORTE_SUCCESS; + req->channel->state = orte_rml_channel_open; + } + } + else { + if (rc) { + ORTE_ERROR_LOG(rc); + req->status = ORTE_ERR_UNPACK_FAILURE; + } else { + req->status = ORTE_ERR_OPEN_CHANNEL_PEER_REJECT; + } + opal_pointer_array_set_item ( &orte_rml_base.open_channels, req->channel->channel_num, NULL); + // call QoS module to release the QoS channel object. + orte_qos_close_channel (req->channel->qos, req->channel->qos_channel_ptr); + OBJ_RELEASE(req->channel); + } + OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output, + "%s rml_open_channel_resp_callback to peer %s status = %d channel =%p num = %d", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(peer), req->status, + channel, channel->channel_num)); + ORTE_RML_OPEN_CHANNEL_COMPLETE(req); + OBJ_RELEASE(req); +} + +static int unpack_channel_attributes (opal_buffer_t *buffer, + opal_list_t *qos_attributes) +{ + orte_attribute_t *kv; + int32_t count, n, k; + int32_t rc=ORTE_SUCCESS; + /* unpack the attributes */ + n=1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &count, + &n, ORTE_STD_CNTR))) { + ORTE_ERROR_LOG(rc); + return rc; + } + OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output, + "%s rml_unpack_attributes num attributes = %d", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + count)); + for (k=0; k < count; k++) { + n=1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &kv, + &n, ORTE_ATTRIBUTE))) { + ORTE_ERROR_LOG(rc); + return rc; + } + OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output, + "rml_unpack_attributes unpacked attribute key = %d, value = %d ", + kv->key, + kv->data.uint8)); + kv->local = ORTE_ATTR_GLOBAL; + opal_list_append(qos_attributes, &kv->super); + } + return rc; +} + +void orte_rml_open_channel_recv_callback (int status, + orte_process_name_t* peer, + struct opal_buffer_t* buffer, + orte_rml_tag_t tag, + void* cbdata) +{ + opal_list_t qos_attributes; + orte_rml_channel_t *channel; + uint8_t *type, type_val = 10; + int32_t count =1; + OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output, + "%s rml_open_channel_recv_callback from peer %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(peer))); + OBJ_CONSTRUCT(&qos_attributes, opal_list_t); + /* unpack attributes first */ + if ( ORTE_SUCCESS == unpack_channel_attributes( buffer, &qos_attributes)) { + type = &type_val; + orte_get_attribute( &qos_attributes, ORTE_QOS_TYPE, (void**)&type, OPAL_UINT8); + OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output, + "rml_open_channel_recv_callback type =%d", + type_val)); + /* scan the list of channels to see if we already have a channel with qos_attributes */ + if (NULL == (channel = get_channel ( peer, &qos_attributes))) { + /* create a new channel for the req */ + channel = OBJ_NEW(orte_rml_channel_t); + channel->channel_num = opal_pointer_array_add (&orte_rml_base.open_channels, channel); + OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output, + "rml_open_channel_recv_callback channel num =%d", + channel->channel_num)); + channel->peer = *peer; + channel->receive = true; + channel->qos = (void*) orte_qos_get_module (&qos_attributes); + /* now associate qos with the channel based on requested attributes */ + channel->qos_channel_ptr = (void*) orte_qos_create_channel(channel->qos, &qos_attributes, + channel->channel_num); + if (channel->qos_channel_ptr) { + /* call qos to init recv state */ + orte_qos_init_recv_channel ( channel->qos, channel->qos_channel_ptr, &qos_attributes); + /* send channel accept reply to sender */ + if(ORTE_SUCCESS == send_open_channel_reply (peer, channel, true)) { + /* update channel state */ + channel->state = orte_rml_channel_open; + /*store src channel number */ + opal_dss.unpack(buffer, (void*) &channel->peer_channel, &count, OPAL_UINT32); + } + else { + /* the receiver shall not attempt to resend or send a reject message + instead we let the sender's request timeout at his end. + release the channel etc */ + opal_pointer_array_set_item ( &orte_rml_base.open_channels, channel->channel_num, NULL); + orte_qos_close_channel (channel->qos, channel->qos_channel_ptr); + OBJ_RELEASE(channel); + } + } else { + send_open_channel_reply (peer, NULL, false); + opal_pointer_array_set_item ( &orte_rml_base.open_channels, channel->channel_num, NULL); + //orte_qos_close_channel (channel->qos, channel->qos_channel_ptr); + OBJ_RELEASE(channel); + } + } + else { + /*this means that there exists a channel with the same attributes which was + previously created on user or sender's open channel request + send channel accept reply to sender */ + if(ORTE_SUCCESS == send_open_channel_reply (peer, channel, true)) + /* exercise caution while updating state of a bidirectional channel*/ + channel->state = orte_rml_channel_open; + else { + /* the receiver shall not attempt to resend or send a reject message + instead we let the sender's request timeout at his end. + release the channel etc */ + opal_pointer_array_set_item ( &orte_rml_base.open_channels, channel->channel_num, NULL); + orte_qos_close_channel (channel->qos, channel->qos_channel_ptr); + OBJ_RELEASE(channel); + } + } + + } + else { + //reply with error message + send_open_channel_reply (peer, NULL, false); + } +} + +static int send_open_channel_reply (orte_process_name_t *peer, + orte_rml_channel_t *channel, + bool accept) +{ + opal_buffer_t *buffer; + int32_t rc; + buffer = OBJ_NEW (opal_buffer_t); + if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &accept , 1, OPAL_BOOL))) { + ORTE_ERROR_LOG(rc); + return rc; + } + if (accept) { + if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &channel->channel_num , 1, OPAL_INT))) { + ORTE_ERROR_LOG(rc); + return rc; + } + } + /* TBD: should specify reason for reject + send channel accept to sender */ + orte_rml.send_buffer_nb ( peer, buffer, ORTE_RML_TAG_OPEN_CHANNEL_RESP, + orte_rml_open_channel_reply_send_callback, + channel); + + return rc; +} + +static orte_rml_channel_t * get_channel ( orte_process_name_t * peer, opal_list_t *qos_attributes) +{ + orte_rml_channel_t *channel = NULL; + int32_t i = 0; + for (i=0; i < orte_rml_base.open_channels.size; i++) { + if (NULL != (channel = (orte_rml_channel_t*) opal_pointer_array_get_item (&orte_rml_base.open_channels, i))) { + /* compare basic properties */ + if ((OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, &channel->peer, peer)) && + ((orte_rml_channel_open == channel->state) || + (orte_rml_channel_opening == channel->state))) + { + /* compare channel attributes */ + if( ORTE_SUCCESS == orte_qos_cmp_channel ( channel->qos, channel->qos_channel_ptr, qos_attributes)) { + /* we have an existing channel that we can use */ + /* make it a receive channel and inform qos to init recv state */ + channel->receive = true; + orte_qos_init_recv_channel ( channel->qos, channel->qos_channel_ptr, qos_attributes); + return channel; + } + else + return NULL; + } + } + } + return NULL; +} + +void orte_rml_open_channel_reply_send_callback ( int status, + orte_process_name_t* sender, + opal_buffer_t* buffer, + orte_rml_tag_t tag, + void* cbdata) +{ + // this is the send call back for open channel reply + orte_rml_channel_t *channel = (orte_rml_channel_t*) cbdata; + // if the message was not sent we should retry or release the channel resources + if (status!= ORTE_SUCCESS) + { + ORTE_ERROR_LOG (status); + // release channel + if(NULL != channel) { + opal_pointer_array_set_item ( &orte_rml_base.open_channels, channel->channel_num, NULL); + // call QoS module to release the QoS channel object. + orte_qos_close_channel (channel->qos, channel->qos_channel_ptr); + OBJ_RELEASE(channel); + } else { + // we did not accept the request so nothing to do + } + } + // if success then release the buffer and do open channel request completion after receiving response from peer + OBJ_RELEASE(buffer); +} + +orte_rml_channel_t * orte_rml_base_get_channel (orte_rml_channel_num_t chan_num) { + orte_rml_channel_t * channel; + + channel = (orte_rml_channel_t*) opal_pointer_array_get_item (&orte_rml_base.open_channels, chan_num); + /* if (NULL != channel) + OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output, + "orte_rml_base_get_channel channel = %p num=%d qos_channel= %p state =%d", + channel, chan_num, channel->qos_channel_ptr, channel->state)); + else + OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output, + "orte_rml_base_get_channel channel %d is null", + chan_num));*/ + if ((NULL != channel) && (orte_rml_channel_open == channel->state)) + return channel; + else + return NULL; + return channel; +} + +void orte_rml_base_prep_send_channel (orte_rml_channel_t *channel, + orte_rml_send_t *send) +{ + // add channel number and notify Qos + send->dst_channel = channel->peer_channel; + orte_qos_send_channel (channel->qos, channel->qos_channel_ptr, send); +} + +void orte_rml_base_process_recv_channel (orte_rml_channel_t *channel, + orte_rml_recv_t *recv) +{ + // call qos for recv post processing + orte_qos_recv_channel (channel->qos, channel->qos_channel_ptr, recv); +} diff --git a/orte/mca/rml/oob/rml_oob_component.c b/orte/mca/rml/oob/rml_oob_component.c index 9c007658d0c..a70c619b9e2 100644 --- a/orte/mca/rml/oob/rml_oob_component.c +++ b/orte/mca/rml/oob/rml_oob_component.c @@ -6,17 +6,18 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. * All rights reserved. + * Copyright (c) 2014 Intel, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -100,8 +101,12 @@ orte_rml_oob_module_t orte_rml_oob_module = { orte_rml_oob_del_exception, orte_rml_oob_ft_event, - - orte_rml_oob_purge + + orte_rml_oob_purge, + orte_rml_oob_open_channel, + orte_rml_oob_send_channel_nb, + orte_rml_oob_send_buffer_channel_nb, + orte_rml_oob_close_channel } }; @@ -128,11 +133,11 @@ rml_oob_init(int* priority) *priority = 1; return &orte_rml_oob_module.super; } - + *priority = 1; - + OBJ_CONSTRUCT(&orte_rml_oob_module.exceptions, opal_list_t); - + init_done = true; return &orte_rml_oob_module.super; } @@ -142,7 +147,7 @@ orte_rml_oob_init(void) { /* enable the base receive to get updates on contact info */ orte_rml_base_comm_start(); - + return ORTE_SUCCESS; } @@ -152,7 +157,7 @@ orte_rml_oob_fini(void) { opal_list_item_t *item; - while (NULL != + while (NULL != (item = opal_list_remove_first(&orte_rml_oob_module.exceptions))) { OBJ_RELEASE(item); } @@ -160,7 +165,7 @@ orte_rml_oob_fini(void) /* clear the base receive */ orte_rml_base_comm_stop(); - + return ORTE_SUCCESS; } diff --git a/orte/test/system/oob_stress_channel.c b/orte/test/system/oob_stress_channel.c new file mode 100644 index 00000000000..62361825dda --- /dev/null +++ b/orte/test/system/oob_stress_channel.c @@ -0,0 +1,221 @@ +#include "orte_config.h" + +#include +#include +#include + +#include "opal/runtime/opal_progress.h" + +#include "orte/util/proc_info.h" +#include "orte/util/name_fns.h" +#include "orte/runtime/orte_globals.h" +#include "orte/mca/rml/rml.h" +#include "orte/mca/errmgr/errmgr.h" + +#include "orte/runtime/runtime.h" +#include "orte/runtime/orte_wait.h" +#include "orte/mca/qos/qos.h" +#include "orte/util/attr.h" + +#define MY_TAG 12345 +#define MAX_COUNT 3 + +static volatile bool msgs_recvd; +static volatile bool channel_inactive = false; +static volatile bool msg_active = false; +static volatile orte_rml_channel_num_t channel; +static volatile int num_msgs_recvd = 0; +static void send_channel_callback(int status, + orte_rml_channel_num_t channel_num, + orte_process_name_t * peer, + opal_list_t *qos_attributes, + void * cbdata) +{ + if (ORTE_SUCCESS != status) { + opal_output(0, "open channel not successful status =%d", status); + + } else { + channel = channel_num; + opal_output(0, "Open channel successful - channel num = %d", channel_num); + + } + channel_inactive = false; +} +static void send_callback(int status, orte_process_name_t *peer, + opal_buffer_t* buffer, orte_rml_tag_t tag, + void* cbdata) + +{ + OBJ_RELEASE(buffer); + if (ORTE_SUCCESS != status) { + opal_output(0, "rml_send_nb not successful status =%d", status); + } + msg_active = false; +} + +static void recv_callback(int status, orte_process_name_t *sender, + opal_buffer_t* buffer, orte_rml_tag_t tag, + void* cbdata) + +{ + //orte_rml_recv_cb_t *blob = (orte_rml_recv_cb_t*)cbdata; + num_msgs_recvd++; + opal_output(0, "recv_callback received msg =%d", num_msgs_recvd); + if ( num_msgs_recvd == 5) { + num_msgs_recvd =0; + msgs_recvd = false; + /* transfer the sender */ + // blob->name.jobid = sender->jobid; + // blob->name.vpid = sender->vpid; + /* just copy the payload to the buf */ + //opal_dss.copy_payload(&blob->data, buffer); + /* flag as complete */ + // blob->active = false; + } + //else + // OBJ_DESTRUCT(blob); + +} + +static void channel_send_callback (int status, orte_rml_channel_num_t channel, + opal_buffer_t * buffer, orte_rml_tag_t tag, + void *cbdata) +{ + OBJ_RELEASE(buffer); + if (ORTE_SUCCESS != status) { + opal_output(0, "send_nb_channel not successful status =%d", status); + } + msg_active = false; +} + + +int main(int argc, char *argv[]){ + int count; + int msgsize; + int *type, type_val; + int *i, j, rc, n; + orte_process_name_t peer; + double maxpower; + opal_buffer_t *buf; + orte_rml_recv_cb_t blob; + opal_list_t *qos_attributes; + int window; + uint32_t timeout = 1; + bool retry = false; + uint8_t *msg; + /* + * Init + */ + orte_init(&argc, &argv, ORTE_PROC_NON_MPI); + + if (argc > 1) { + count = atoi(argv[1]); + if (count < 0) { + count = INT_MAX-1; + } + } else { + count = MAX_COUNT; + } + + peer.jobid = ORTE_PROC_MY_NAME->jobid; + peer.vpid = ORTE_PROC_MY_NAME->vpid + 1; + if (peer.vpid == orte_process_info.num_procs) { + peer.vpid = 0; + } + type_val = orte_qos_ack; + type = &type_val; + window = 5; + count =3; + if (ORTE_PROC_MY_NAME->vpid == 0) { + qos_attributes = OBJ_NEW (opal_list_t); + if (ORTE_SUCCESS == (rc = orte_set_attribute( qos_attributes, + ORTE_QOS_TYPE, ORTE_ATTR_GLOBAL, (void*)type, OPAL_UINT8))) { + type = &window; + if (ORTE_SUCCESS == (rc = orte_set_attribute(qos_attributes, ORTE_QOS_WINDOW_SIZE, + ORTE_ATTR_GLOBAL, (void*) type, OPAL_UINT32))) { + // orte_get_attribute( &qos_attributes, ORTE_QOS_WINDOW_SIZE, (void**)&type, OPAL_UINT32); + // opal_output(0, "%s set attribute window =%d complete \n", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), *type ); + type = &timeout; + orte_set_attribute (qos_attributes, ORTE_QOS_ACK_NACK_TIMEOUT, ORTE_ATTR_GLOBAL, + (void*)type, OPAL_UINT32); + + orte_set_attribute (qos_attributes, ORTE_QOS_MSG_RETRY, ORTE_ATTR_GLOBAL, + NULL, OPAL_BOOL); + /* Uncomment following lines to print channel attributes */ + /* + opal_output(0, "%s set attribute retry =%d complete \n", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), retry ); + + orte_get_attribute( qos_attributes, ORTE_QOS_TYPE, (void**)&type, OPAL_UINT8); + opal_output(0, "%s set attribute type =%d complete \n", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), *type ); + orte_get_attribute( qos_attributes, ORTE_QOS_WINDOW_SIZE, (void**)&type, OPAL_UINT32); + opal_output(0, "%s set attribute window =%d complete \n", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), *type ); + orte_get_attribute( qos_attributes, ORTE_QOS_ACK_NACK_TIMEOUT, (void**)&type, OPAL_UINT32); + opal_output(0, "%s set attribute timeout =%d complete \n", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), *type );*/ + + channel_inactive = true; + orte_rml.open_channel ( &peer, qos_attributes, send_channel_callback, NULL); + opal_output(0, "%s process sent open channel request %d waiting for completion \n", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), j); + + ORTE_WAIT_FOR_COMPLETION(channel_inactive); + opal_output(0, "%s open channel complete", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME) ); + } + } + } + else { + // other process waits to recv a buffer from rank 0 + orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, MY_TAG, + ORTE_RML_PERSISTENT, + recv_callback, NULL); + } + /* send a window of messages to peer on the channel */ + for (j=1; j < count+1; j++) { + if (ORTE_PROC_MY_NAME->vpid == 0) { + /* rank0 starts ring */ + msg_active = true; + for (n = 0; n< window; n++ ) { + buf = OBJ_NEW(opal_buffer_t); + maxpower = (double)(j%7); + msgsize = (int)pow(10.0, maxpower); + opal_output(0, "Ring %d message %d size %d bytes", j,n, msgsize); + msg = (uint8_t*)malloc(msgsize); + opal_dss.pack(buf, msg, msgsize, OPAL_BYTE); + free(msg); + orte_rml.send_buffer_channel_nb(channel, buf, MY_TAG, channel_send_callback, NULL); + //orte_rml.send_buffer_nb(&peer, buf,MY_TAG, send_callback, NULL) + } + //orte_rml.send_buffer_nb(&peer, buf,MY_TAG, send_callback, NULL) + /* wait for it to come around */ + OBJ_CONSTRUCT(&blob, orte_rml_recv_cb_t); + blob.active = true; + orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, MY_TAG, + ORTE_RML_NON_PERSISTENT, + orte_rml_recv_callback, &blob); + ORTE_WAIT_FOR_COMPLETION(blob.active); + OBJ_DESTRUCT(&blob); + ORTE_WAIT_FOR_COMPLETION(msg_active); + opal_output(0, "%s Ring %d completed", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), j); + sleep(2); + } + else { + msgs_recvd = true; + ORTE_WAIT_FOR_COMPLETION(msgs_recvd); + buf = OBJ_NEW(opal_buffer_t); + /* send it along */ + msg_active = true; + maxpower = (double)(j%7); + msgsize = (int)pow(10.0, maxpower); + opal_output(0, "Ring %d message %d size %d bytes", j,n, msgsize); + msg = (uint8_t*)malloc(msgsize); + opal_dss.pack(buf, msg, msgsize, OPAL_BYTE); + free(msg); + orte_rml.send_buffer_nb(&peer, buf, MY_TAG, send_callback, NULL); + ORTE_WAIT_FOR_COMPLETION(msg_active); + sleep (2); + } + } + + orte_finalize(); + + return 0; +} diff --git a/orte/util/attr.h b/orte/util/attr.h index c90c036be50..168dd2e423b 100644 --- a/orte/util/attr.h +++ b/orte/util/attr.h @@ -1,9 +1,9 @@ /* * Copyright (c) 2014 Intel, Inc. All rights reserved * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -80,7 +80,7 @@ typedef uint16_t orte_job_flags_t; #define ORTE_JOB_FLAG_DEBUGGER_DAEMON 0x0010 // job is launching debugger daemons #define ORTE_JOB_FLAG_FORWARD_OUTPUT 0x0020 // forward output from the apps #define ORTE_JOB_FLAG_DO_NOT_MONITOR 0x0040 // do not monitor apps for termination -#define ORTE_JOB_FLAG_FORWARD_COMM 0x0080 // +#define ORTE_JOB_FLAG_FORWARD_COMM 0x0080 // #define ORTE_JOB_FLAG_RECOVERABLE 0x0100 // job is recoverable #define ORTE_JOB_FLAG_RESTART 0x0200 // #define ORTE_JOB_FLAG_PROCS_MIGRATING 0x0400 // some procs in job are migrating from one node to another @@ -168,6 +168,16 @@ typedef uint16_t orte_proc_flags_t; #define ORTE_PROC_MAX_KEY 400 +/*** MESSAGING QOS ATTRIBUTE KEYS ***/ +#define ORTE_QOS_START_KEY ORTE_PROC_MAX_KEY +#define ORTE_QOS_TYPE (ORTE_QOS_START_KEY + 1) //uint8- defining what type of qos - refer to orte_qos_type enum for values +#define ORTE_QOS_WINDOW_SIZE (ORTE_QOS_START_KEY + 2) // uint32 - number of messages in the window (stream) +#define ORTE_QOS_ACK_NACK_TIMEOUT (ORTE_QOS_START_KEY + 3) //uint32 - timeout value in secs for msg/window ack nack +#define ORTE_QOS_MSG_RETRY (ORTE_QOS_START_KEY + 4) // bool- resend message upon ACK fail or NACK or timeout. +#define ORTE_QOS_NUM_RETRIES (ORTE_QOS_START_KEY + 5) // uint32 - number of retries. + +#define ORTE_QOS_MAX_KEY 500 + #define ORTE_ATTR_KEY_MAX 1000 From 6c703ec18223560dad4edb50897f4617e10bc0d8 Mon Sep 17 00:00:00 2001 From: annu13 Date: Fri, 24 Apr 2015 04:53:58 -0700 Subject: [PATCH 12/14] close channel and out of order changes --- opal/class/opal_hotel.h | 14 + opal/class/opal_object.h | 18 +- orte/include/orte/constants.h | 7 +- orte/mca/qos/ack/qos_ack.h | 17 +- orte/mca/qos/ack/qos_ack_component.c | 488 +++++++++++------- orte/mca/qos/base/base.h | 1 + orte/mca/qos/base/qos_base_channel_handlers.c | 12 +- orte/mca/qos/base/qos_base_frame.c | 2 +- orte/mca/qos/noop/qos_noop_component.c | 20 +- orte/mca/qos/qos.h | 4 +- orte/mca/rml/base/base.h | 63 ++- orte/mca/rml/base/rml_base_channel_handlers.c | 207 +++++--- orte/mca/rml/base/rml_base_frame.c | 19 +- orte/mca/rml/base/rml_base_msg_handlers.c | 152 +++--- orte/mca/rml/base/rml_base_receive.c | 16 +- orte/mca/rml/ftrm/rml_ftrm_component.c | 9 +- orte/mca/rml/oob/rml_oob_component.c | 5 +- orte/mca/rml/oob/rml_oob_send.c | 63 ++- orte/mca/rml/rml.h | 5 +- orte/test/system/oob_stress_channel.c | 165 +++--- 20 files changed, 841 insertions(+), 446 deletions(-) diff --git a/opal/class/opal_hotel.h b/opal/class/opal_hotel.h index 5d1f58ae8b0..f8ecd4c0cb5 100644 --- a/opal/class/opal_hotel.h +++ b/opal/class/opal_hotel.h @@ -284,6 +284,20 @@ static inline void opal_hotel_checkout_and_return_occupant(opal_hotel_t *hotel, } +/** + * Returns true if the hotel is empty (no occupant) + * @param hotel Pointer to hotel (IN) + * @return bool true if empty false if there is a occupant(s) + * + */ +static inline bool opal_hotel_is_empty (opal_hotel_t *hotel) +{ + if (hotel->last_unoccupied_room == hotel->num_rooms - 1) + return true; + else + return false; +} + /** * Destroy a hotel. * diff --git a/opal/class/opal_object.h b/opal/class/opal_object.h index 79470d586eb..dd9c33cc33a 100644 --- a/opal/class/opal_object.h +++ b/opal/class/opal_object.h @@ -5,15 +5,15 @@ * Copyright (c) 2004-2007 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2014 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -46,13 +46,13 @@ * OBJ_CLASS_DECLARATION(sally_t); * @endcode * All classes must have a parent which is also class. - * + * * In an implementation (.c) file, instantiate a class descriptor for * the class like this: * @code * OBJ_CLASS_INSTANCE(sally_t, parent_t, sally_construct, sally_destruct); * @endcode - * This macro actually expands to + * This macro actually expands to * @code * opal_class_t sally_t_class = { * "sally_t", @@ -240,7 +240,7 @@ struct opal_object_t { * constructor. * * @param type Type (class) of the object - * @return Pointer to the object + * @return Pointer to the object */ static inline opal_object_t *opal_obj_new(opal_class_t * cls); #if OPAL_ENABLE_DEBUG @@ -304,12 +304,14 @@ static inline opal_object_t *opal_obj_new_debug(opal_class_t* type, const char* * to NULL. * * @param object Pointer to the object + * + * */ #if OPAL_ENABLE_DEBUG #define OBJ_RELEASE(object) \ do { \ - assert(NULL != ((opal_object_t *) (object))->obj_class); \ assert(OPAL_OBJ_MAGIC_ID == ((opal_object_t *) (object))->obj_magic_id); \ + assert(NULL != ((opal_object_t *) (object))->obj_class); \ if (0 == opal_obj_update((opal_object_t *) (object), -1)) { \ OBJ_SET_MAGIC_ID((object), 0); \ opal_obj_run_destructors((opal_object_t *) (object)); \ @@ -457,7 +459,7 @@ static inline void opal_obj_run_destructors(opal_object_t * object) * * @param size Size of the object * @param cls Pointer to the class descriptor of this object - * @return Pointer to the object + * @return Pointer to the object */ static inline opal_object_t *opal_obj_new(opal_class_t * cls) { diff --git a/orte/include/orte/constants.h b/orte/include/orte/constants.h index 0095ff9fcfd..d675a3499c0 100644 --- a/orte/include/orte/constants.h +++ b/orte/include/orte/constants.h @@ -11,6 +11,7 @@ * All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -140,7 +141,11 @@ enum { ORTE_ERR_QOS_ACK_WINDOW_FULL = (ORTE_ERR_BASE - 49), ORTE_ERR_ACK_TIMEOUT_SENDER = (ORTE_ERR_BASE - 50), ORTE_ERR_ACK_TIMEOUT_RECEIVER = (ORTE_ERR_BASE - 51), - ORTE_ERR_LOST_MSG_IN_WINDOW = (ORTE_ERR_BASE - 52) + ORTE_ERR_LOST_MSG_IN_WINDOW = (ORTE_ERR_BASE - 52), + ORTE_ERR_CHANNEL_BUSY = (ORTE_ERR_BASE - 53), + ORTE_ERR_DUPLICATE_MSG = (ORTE_ERR_BASE - 54), + ORTE_ERR_OUT_OF_ORDER_MSG = (ORTE_ERR_BASE - 55), + ORTE_ERR_OPEN_CHANNEL_DUPLICATE = (ORTE_ERR_BASE - 56), }; #define ORTE_ERR_MAX (ORTE_ERR_BASE - 100) diff --git a/orte/mca/qos/ack/qos_ack.h b/orte/mca/qos/ack/qos_ack.h index bca06b2f208..f9c38f5b28e 100644 --- a/orte/mca/qos/ack/qos_ack.h +++ b/orte/mca/qos/ack/qos_ack.h @@ -35,6 +35,7 @@ BEGIN_C_DECLS #define ACK_WINDOW_COMPLETE 0 #define ACK_TIMEOUT 1 #define ACK_OUT_OF_ORDER 2 +#define ACK_RECV_MISSED_MSG 3 /* received previously missed msgs*/ typedef enum { orte_qos_ack_channel_state_inactive = 0, @@ -46,6 +47,7 @@ typedef enum { /* Ack Qos channel data structure */ typedef struct orte_qos_ack_channel { + opal_list_item_t super; uint32_t channel_num; // we retain the attributes so we can compare channels - we can get rid of this and compare incoming attributes // with attributes of interest to this channel type @@ -76,13 +78,24 @@ typedef struct orte_qos_ack_channel { OBJ_CLASS_DECLARATION(orte_qos_ack_channel_t); - extern orte_qos_module_t orte_qos_ack_module; +int orte_qos_ack_channel_get_msg_room (orte_qos_ack_channel_t * ack_chan, + uint32_t seq_num) +{ + return ack_chan->seq_num_to_room_num[(seq_num % QOS_ACK_MAX_OUTSTANDING_MSGS)]; +} + +void orte_qos_ack_channel_set_msg_room (orte_qos_ack_channel_t * ack_chan, + uint32_t seq_num, int room_num) +{ + ack_chan->seq_num_to_room_num[(seq_num % QOS_ACK_MAX_OUTSTANDING_MSGS)] = room_num; +} ORTE_DECLSPEC void orte_qos_ack_msg_ack_timeout_callback (struct opal_hotel_t *hotel, int room_num, void *occupant); ORTE_DECLSPEC void orte_qos_ack_msg_window_timeout_callback (int fd, short flags, void *cbdata); - +ORTE_DECLSPEC void orte_qos_ack_recv_msg_timeout_callback (struct opal_hotel_t *hotel, + int room_num, void *occupant); END_C_DECLS #endif /* MCA_QOS_ACK_H */ diff --git a/orte/mca/qos/ack/qos_ack_component.c b/orte/mca/qos/ack/qos_ack_component.c index 440f2975162..c2a60cdee1c 100644 --- a/orte/mca/qos/ack/qos_ack_component.c +++ b/orte/mca/qos/ack/qos_ack_component.c @@ -16,7 +16,7 @@ #include "opal/util/output.h" #include "opal/mca/base/base.h" - +#include "orte/mca/oob/base/base.h" #include "orte/mca/qos/base/base.h" #include "orte/mca/qos/qos.h" #include "qos_ack.h" @@ -29,14 +29,16 @@ static int ack_open (void *qos_channel, opal_buffer_t * buf); static int ack_send ( void *qos_channel, orte_rml_send_t *msg); static int ack_recv (void *channel, orte_rml_recv_t *msg); -static void ack_close (void * channel); +static int ack_close (void * channel); static int ack_init_recv (void *channel, opal_list_t *attributes); static int ack_cmp (void *channel, opal_list_t *attributes); static void ack_send_callback (orte_rml_send_t *msg); /* utility functions */ -int send_ack (orte_qos_ack_channel_t * channel, orte_rml_channel_num_t channel_num, - uint32_t *ack_seq_nums_array, uint32_t num_msgs_acked, uint32_t ack_type); +static inline int send_ack (orte_qos_ack_channel_t * channel, + orte_rml_channel_num_t channel_num, + uint32_t ack_type, + uint32_t last_msg_seq_num); void orte_qos_ack_channel_process_ack (int status, orte_process_name_t* sender, opal_buffer_t *buffer, orte_rml_tag_t tag, void *cbdata); @@ -46,6 +48,9 @@ void orte_qos_ack_msg_send_callback ( int status, struct opal_buffer_t* buffer, orte_rml_tag_t tag, void* cbdata); +static inline int process_out_of_order_msg ( orte_qos_ack_channel_t *channel, + orte_rml_recv_t *msg); +static int hack = 0; /** * ack module definition */ @@ -204,8 +209,6 @@ static int ack_open (void *qos_channel, opal_buffer_t * buf) { } static int ack_send ( void *qos_channel, orte_rml_send_t *msg) { - int32_t rc = ORTE_SUCCESS; - struct timeval window_timeout; int32_t room_num; orte_qos_ack_channel_t *ack_chan = (orte_qos_ack_channel_t*) (qos_channel); if (ack_chan->out_msg_seq_num == ack_chan->window_first_seq_num -1 ) { @@ -230,7 +233,7 @@ static int ack_send ( void *qos_channel, orte_rml_send_t *msg) { /* check msg into hotel */ if( OPAL_SUCCESS == (opal_hotel_checkin(&ack_chan->outstanding_msgs, msg, &room_num ))) { /* store room number */ - ack_chan->seq_num_to_room_num[(msg->seq_num % QOS_ACK_MAX_OUTSTANDING_MSGS)] = room_num; + orte_qos_ack_channel_set_msg_room(ack_chan, msg->seq_num, room_num); } else { OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, "%s ack_send msg = %p to peer = %s returned with error %d\n", @@ -246,103 +249,230 @@ static int ack_send ( void *qos_channel, orte_rml_send_t *msg) { return ORTE_SUCCESS; } +static inline int send_ack (orte_qos_ack_channel_t * ack_chan, + orte_rml_channel_num_t channel_num, + uint32_t ack_type, uint32_t last_msg_seq_num) +{ + int rc; + orte_rml_channel_t *rml_channel; + opal_buffer_t *buffer; + uint32_t num_msgs_to_ack = 0; + uint32_t *ack_seq_num_array; + uint32_t i; + rml_channel = orte_rml_base_get_channel (channel_num); + num_msgs_to_ack = ack_chan->in_msg_seq_num - ack_chan->ack_msg_seq_num + 1; + OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, + "%s sending ack type = %d \n", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ack_type)); + if ( NULL != (ack_seq_num_array = malloc (sizeof(uint32_t) * num_msgs_to_ack))) { + for (i = 1; i <= num_msgs_to_ack ; i++) { + ack_seq_num_array[i-1] = ack_chan->ack_msg_seq_num + i; + OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, + "%s ack_recv acking msg %d to peer = %s\n", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ack_seq_num_array[i-1], + ORTE_NAME_PRINT(&rml_channel->peer))); + } + ack_seq_num_array[num_msgs_to_ack - 1] = last_msg_seq_num; + OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, + "%s ack_recv acking last msg %d to peer = %s\n", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ack_seq_num_array[num_msgs_to_ack - 1], + ORTE_NAME_PRINT(&rml_channel->peer))); + } + else { + OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, + "%s ack_recv cannot allocate ack array to send ack to peer = %s\n", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(&rml_channel->peer))); + rc = ORTE_ERR_TEMP_OUT_OF_RESOURCE; + return rc; + } + buffer = OBJ_NEW (opal_buffer_t); + /* pack channel number */ + opal_dss.pack (buffer, &rml_channel->peer_channel, 1, OPAL_UINT32); + /* pack ack type */ + opal_dss.pack (buffer, &ack_type, 1, OPAL_UINT32); + /* pack num messages */ + opal_dss.pack (buffer, &num_msgs_to_ack, 1, OPAL_UINT32); + /* pack seq number array */ + for (i =0; ipeer, buffer, ORTE_RML_TAG_MSG_ACK, + orte_qos_ack_msg_send_callback, rml_channel); + if(ORTE_SUCCESS == rc) { + /* update last acked msg */ + ack_chan->ack_msg_seq_num = last_msg_seq_num; + } else { + //TO DO + } + return rc; +} + +static inline int process_out_of_order_msg ( orte_qos_ack_channel_t *ack_chan, + orte_rml_recv_t *msg) +{ + int32_t rc, room_num, first_lost_msg_seq_num, num_lost_msgs, i; + orte_rml_recv_t *out_msg; + void *occupant = NULL; + OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, + "%s process_out_of_order_msg msg %d", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + msg->seq_num)); + /* if this msg is a duplicate - then do nothing */ + if ((orte_qos_ack_channel_get_msg_room(ack_chan, msg->seq_num)) != -1) { + OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, + "%s process_out_of_order_msg msg %d", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + msg->seq_num)); + rc = ORTE_ERR_DUPLICATE_MSG; + } + else { + opal_hotel_checkin(&ack_chan->outstanding_msgs, (void*)msg, &room_num); + OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, + "process_out_of_order_msg checked in msg %d in room %d\n", + msg->seq_num, room_num)); + orte_qos_ack_channel_set_msg_room (ack_chan, msg->seq_num, room_num); + rc = ORTE_ERR_OUT_OF_ORDER_MSG; + /* check if we need to send an ACK */ + if (ack_chan->ack_msg_seq_num <= ack_chan->in_msg_seq_num) { + OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, + "%s process_out_of_order_msg sending ack last seq_num = %d\n", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + msg->seq_num)); + /* send ACK. */ + send_ack (ack_chan, msg->channel_num, ACK_OUT_OF_ORDER, msg->seq_num); + /* stop window ack timer */ + opal_event_evtimer_del (&ack_chan->msg_ack_timer_event); + } + else { + /* if we got a lost msg - any seq num between in_msg_seq_num and ack_seq_num*/ + if (ack_chan->ack_msg_seq_num > msg->seq_num) { + /* check if we have got all lost msgs */ + first_lost_msg_seq_num = ack_chan->in_msg_seq_num + 1; + num_lost_msgs = ack_chan->ack_msg_seq_num - ack_chan->in_msg_seq_num; + OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, + "%s process_out_of_order_msg msg %d first_lost_msg =%d num_lost_msgs =%d", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + msg->seq_num, first_lost_msg_seq_num, num_lost_msgs)); + for (i =0 ; i < num_lost_msgs; i++) { + if ((orte_qos_ack_channel_get_msg_room(ack_chan, first_lost_msg_seq_num +i)) == -1) + break; + } + if (i == num_lost_msgs) { + + /* we got all the lost msgs so we can complete all the msgs in the hotel now */ + /* reset ack_seq_num */ + ack_chan->ack_msg_seq_num = first_lost_msg_seq_num -1; + room_num = 0; + for ( i = 0; room_num != -1; i++) { + OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, + "%s process_out_of_order_msg got all lost msgs completing outstanding msgs %d", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + (first_lost_msg_seq_num + i))); + /* evict msg and complete it */ + room_num = orte_qos_ack_channel_get_msg_room (ack_chan, first_lost_msg_seq_num +i); + opal_hotel_checkout_and_return_occupant(&ack_chan->outstanding_msgs, room_num, &occupant); + orte_qos_ack_channel_set_msg_room(ack_chan, first_lost_msg_seq_num +i, -1); + out_msg = (orte_rml_recv_t *) occupant; + if ((NULL != out_msg) && (room_num != -1)) { + // set in seq num */ + ack_chan->in_msg_seq_num = out_msg->seq_num; + orte_rml_base_complete_recv_msg(&out_msg); + /* completing recv msg to rml */ + OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, + "process_out_of_order_msg completed recv msg %d", + (first_lost_msg_seq_num + i))); + } else { + OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, + "%s process_out_of_order_msg lost msg %d not in hotel", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + (first_lost_msg_seq_num + i))); + } + } //end for + /* send ACK */ + send_ack (ack_chan, ack_chan->channel_num, ACK_RECV_MISSED_MSG, + ack_chan->in_msg_seq_num); + } //end if (i== num_lost_msgs) + } // if (ack_chan->ack_msg_seq_num > msg->seq_num) + } //end else + } // end duplicate else + return rc; +} + static int ack_recv (void *qos_channel, orte_rml_recv_t *msg) { orte_qos_ack_channel_t *ack_chan; ack_chan = (orte_qos_ack_channel_t*) (qos_channel); - bool ack = false; - uint32_t num_msgs_to_ack = 0; - uint32_t *ack_seq_num_array; - uint32_t ack_type, i; int32_t rc; struct timeval ack_timeout; - /* check for out of order msg */ - if( ack_chan->in_msg_seq_num + 1 != msg->seq_num) + OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, + "%s ack_recv msg = %p seq_num = %d from peer = %s\n", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + msg, msg->seq_num, + ORTE_NAME_PRINT(&msg->sender))); + /** HACK - drop every third msg to stimulate lost msg */ + /* if ((msg->seq_num == 3) && (hack == 0)) { + OBJ_RELEASE(msg); + hack = 1; + return ORTE_ERROR; + }*/ + /* check if this is the next expected msg*/ + if((ack_chan->in_msg_seq_num + 1 == msg->seq_num) && (ack_chan->ack_msg_seq_num < msg->seq_num)) { - /* we got an out of order msg or we may have ended the window */ - ack = true; - ack_type = ACK_OUT_OF_ORDER; - /* stop window ack timer */ - opal_event_evtimer_del (&ack_chan->msg_ack_timer_event); - } - else { /* check if we are at the end of the window */ - if (ack_chan->window == msg->seq_num - ack_chan->ack_msg_seq_num) { - ack = true; - ack_type = ACK_WINDOW_COMPLETE; + if(ack_chan->window == (msg->seq_num - ack_chan->ack_msg_seq_num)) { /* stop window ack timer */ opal_event_evtimer_del (&ack_chan->msg_ack_timer_event); + rc = send_ack (ack_chan, msg->channel_num, ACK_WINDOW_COMPLETE, msg->seq_num); } else { - if(msg->seq_num - ack_chan->ack_msg_seq_num == 1) { + if(ack_chan->in_msg_seq_num == ack_chan->ack_msg_seq_num) { /* begining window -start window ack timer */ ack_timeout.tv_sec = ack_chan->timeout_secs; ack_timeout.tv_usec = 0; opal_event_evtimer_add (&ack_chan->msg_ack_timer_event, &ack_timeout); } - ack_chan->in_msg_seq_num = msg->seq_num; + rc = ORTE_SUCCESS; } - } - - if ((ack) && (msg->tag >= ORTE_RML_TAG_MAX)) { - num_msgs_to_ack = ack_chan->in_msg_seq_num - ack_chan->ack_msg_seq_num + 1; ack_chan->in_msg_seq_num = msg->seq_num; - OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, - "%s ack_recv sending ack for %d msgs from %s\n", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - num_msgs_to_ack, - ORTE_NAME_PRINT(&msg->sender))); - if ( NULL != (ack_seq_num_array = malloc (sizeof(uint32_t) * num_msgs_to_ack))) { - - for (i = 1; i <= num_msgs_to_ack ; i++) { - ack_seq_num_array[i-1] = ack_chan->ack_msg_seq_num + i; - OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, - "%s ack_recv acking msg %d to peer = %s\n", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ack_seq_num_array[i-1], - ORTE_NAME_PRINT(&msg->sender))); - } - ack_seq_num_array[num_msgs_to_ack - 1] = msg->seq_num; - OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, - "%s ack_recv acking last msg %d to peer = %s\n", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ack_seq_num_array[num_msgs_to_ack - 1], - ORTE_NAME_PRINT(&msg->sender))); - - } - else { - OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, - "%s ack_recv cannot allocate ack array to send ack to peer = %s\n", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&msg->sender))); - return ORTE_ERROR; - } - /* now construct and send ack message */ - rc = send_ack(ack_chan, msg->channel_num, - ack_seq_num_array, num_msgs_to_ack, ack_type); - if(ORTE_SUCCESS == rc) { - /* update last acked msg */ - ack_chan->ack_msg_seq_num = msg->seq_num; - } else { - //TO DO - } - } - OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, - "%s ack_recv msg = %p seq_num = %d from peer = %s\n", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - msg, msg->seq_num, - ORTE_NAME_PRINT(&msg->sender))); - - return ORTE_SUCCESS; + else { + rc = process_out_of_order_msg(ack_chan, msg); + } + return rc; } -static void ack_close (void * channel) { - +static int ack_close (void * channel) { + int32_t rc = ORTE_SUCCESS; + orte_qos_ack_channel_t *ack_chan; + ack_chan = (orte_qos_ack_channel_t*) (channel); + /* check if channel is busy (no outstanding msgs */ + if (opal_hotel_is_empty (&ack_chan->outstanding_msgs)) { + /* no outstanding msgs, release channel */ + OBJ_RELEASE(ack_chan); + rc = ORTE_SUCCESS; + } else + rc = ORTE_ERR_CHANNEL_BUSY; + return rc; } static int ack_init_recv (void *channel, opal_list_t *attributes) { int32_t rc = ORTE_SUCCESS; + uint32_t eviction_timeout; orte_qos_ack_channel_t *ack_chan; - ack_chan = (orte_qos_ack_channel_t*) (channel); + ack_chan = (orte_qos_ack_channel_t*) channel; + /* TO DO - need to adjust eviction timeout according to window size + lets keep max time out for the first pass */ + eviction_timeout = (ack_chan->timeout_secs + QOS_ACK_WINDOW_TIMEOUT_IN_SECS) * 100000; + /* init outstanding msg hotel */ + opal_hotel_init (&ack_chan->outstanding_msgs, QOS_ACK_MAX_OUTSTANDING_MSGS, + eviction_timeout, 0, + orte_qos_ack_recv_msg_timeout_callback); + OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, + "%s ack_open channel = %p init hotel timeout =%d", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ack_chan, eviction_timeout)); opal_event_evtimer_set (orte_event_base, &ack_chan->msg_ack_timer_event, orte_qos_ack_msg_window_timeout_callback, (void *) ack_chan); return rc; @@ -367,7 +497,7 @@ static void ack_send_callback (orte_rml_send_t *msg) complete them?? */ if(ORTE_SUCCESS == msg->status) { // nothing to do - assert(ack_chan->seq_num_to_room_num[msg->seq_num] != -1); + assert((orte_qos_ack_channel_get_msg_room(ack_chan, msg->seq_num)) != -1); } else { // TO DO : error handling OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, @@ -394,11 +524,35 @@ void orte_qos_ack_msg_ack_timeout_callback (struct opal_hotel_t *hotel, TO DO : handle the completion of all messages in the window */ msg->status = ORTE_ERR_ACK_TIMEOUT_SENDER; // set room num to -1 for the msg's seq number - ack_chan->seq_num_to_room_num[msg->seq_num] = -1; + orte_qos_ack_channel_set_msg_room (ack_chan, msg->seq_num , -1); // complete the msg ORTE_RML_SEND_COMPLETE(msg); } +void orte_qos_ack_recv_msg_timeout_callback (struct opal_hotel_t *hotel, + int room_num, void *occupant) +{ + orte_rml_recv_t *msg; + orte_qos_ack_channel_t *ack_chan; + orte_rml_channel_t *channel; + msg = (orte_rml_recv_t *) occupant; + channel = orte_rml_base_get_channel(msg->channel_num); + ack_chan = (orte_qos_ack_channel_t*) channel->qos_channel_ptr; + OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, + "%s OOPS received msg = %p seq num =%d timed out on ACK Queue\n", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + msg, msg->seq_num)); + /* Need to determine correct action here as the sender hasn't responded yet to + a lost msg event */ + /* This is highly unlikely - lets assert to enable debug*/ + assert(0); + /* + // set room num to -1 for the msg's seq number + ack_chan->seq_num_to_room_num[msg->seq_num % QOS_ACK_MAX_OUTSTANDING_MSGS] = -1; + // complete the msg + ORTE_RML_REACTIVATE_MESSAGE(msg);*/ +} + void orte_qos_ack_channel_process_ack (int status, orte_process_name_t* sender, opal_buffer_t *buffer, orte_rml_tag_t tag, void *cbdata) @@ -406,23 +560,21 @@ void orte_qos_ack_channel_process_ack (int status, orte_process_name_t* sender, /* process ack received for the msg */ uint32_t num_msgs_acked, channel_num, i; int32_t num_values, room_num; - orte_rml_send_t *msg; + orte_rml_send_t *msg, *missed_msg; void *occupant = NULL; orte_rml_channel_t *channel; orte_qos_ack_channel_t *ack_chan; uint32_t *seq_num_array; uint32_t ack_type; - + uint32_t num_missed_msgs; + uint32_t missed_msg_seq_num = 0; num_values = 1; /* unpack channel number first */ opal_dss.unpack(buffer, (void*) &channel_num, &num_values, OPAL_UINT32); OPAL_OUTPUT_VERBOSE((5, orte_qos_base_framework.framework_output, - "orte_qos_ack_channel_msg_ack_recv_callback recieved ack on channel = %d", + "orte_qos_ack_channel_process_ack recieved ack on channel = %d", channel_num)); channel = orte_rml_base_get_channel (channel_num); - OPAL_OUTPUT_VERBOSE((5, orte_qos_base_framework.framework_output, - "orte_qos_ack_channel_msg_ack_recv_callback recieved ack on channel = %d, channel =%p,qos_channel = %p", - channel_num, channel, channel->qos_channel_ptr)); if ((NULL != channel) || (NULL != channel->qos_channel_ptr)) { ack_chan = (orte_qos_ack_channel_t *) (channel->qos_channel_ptr); seq_num_array = malloc (sizeof(uint32_t) * ack_chan->window); @@ -432,73 +584,78 @@ void orte_qos_ack_channel_process_ack (int status, orte_process_name_t* sender, num_values = 1; /* unpack num messages acked */ opal_dss.unpack(buffer, (void*) &num_msgs_acked, &num_values, OPAL_UINT32); - /* unpack sequence number array */ - - OPAL_OUTPUT_VERBOSE((5, orte_qos_base_framework.framework_output, - "orte_qos_ack_channel_msg_ack_recv_callback recieved acks for %d msgs on channel = %d", - num_msgs_acked, channel_num)); - for (i = 0; i < num_msgs_acked; i++) - { - opal_dss.unpack(buffer, (void*) &seq_num_array[i], &num_values, OPAL_UINT32); - OPAL_OUTPUT_VERBOSE((5, orte_qos_base_framework.framework_output, - "orte_qos_ack_channel_msg_ack_recv_callback recieved ack for msg with seq_num = %d, channel = %d", - seq_num_array[i], channel_num)); - room_num = ack_chan->seq_num_to_room_num[seq_num_array[i]]; - opal_hotel_checkout_and_return_occupant(&ack_chan->outstanding_msgs, room_num, &occupant); - if(occupant != NULL) { - msg = (orte_rml_send_t*) occupant; - OPAL_OUTPUT_VERBOSE((10, orte_rml_base_framework.framework_output, + "orte_qos_ack_channel_process_ack recieved ack type %d for %d msgs on channel = %d", + ack_type, num_msgs_acked, channel_num)); + if (ACK_OUT_OF_ORDER != ack_type) { + //handle normal ACK + for (i = 0; i < num_msgs_acked; i++) + { + opal_dss.unpack(buffer, (void*) &seq_num_array[i], &num_values, OPAL_UINT32); + room_num = orte_qos_ack_channel_get_msg_room (ack_chan, seq_num_array[i]); + opal_hotel_checkout_and_return_occupant(&ack_chan->outstanding_msgs, room_num, &occupant); + orte_qos_ack_channel_set_msg_room(ack_chan, seq_num_array[i], -1); + if((occupant != NULL) && (room_num != -1)) { + msg = (orte_rml_send_t*) occupant; + OPAL_OUTPUT_VERBOSE((10, orte_rml_base_framework.framework_output, "Releasing sent message with tag %d and seq_num %d after receiving Ack from dest ", msg->tag, msg->seq_num )); - msg->status = ORTE_SUCCESS; - ORTE_RML_SEND_COMPLETE(msg); - } else { - OPAL_OUTPUT_VERBOSE((10, orte_rml_base_framework.framework_output, + msg->status = ORTE_SUCCESS; + ORTE_RML_SEND_COMPLETE(msg); + } else { + OPAL_OUTPUT_VERBOSE((10, orte_rml_base_framework.framework_output, "OOPS received an ACK for already completed seq_num =%d ", seq_num_array[i] )); + } } - } + } else { + // handle out of order ACK - complete msgs received in order, retry the lost msg. + for (i = 0; i < num_msgs_acked; i++) + { + opal_dss.unpack(buffer, (void*) &seq_num_array[i], &num_values, OPAL_UINT32); + room_num = orte_qos_ack_channel_get_msg_room (ack_chan, seq_num_array[i]); + opal_hotel_checkout_and_return_occupant(&ack_chan->outstanding_msgs, room_num, &occupant); + orte_qos_ack_channel_set_msg_room(ack_chan, seq_num_array[i], -1); + if ((NULL != occupant) && ((i == 0 )|| (seq_num_array[i] == seq_num_array[i-1] +1 ))) { + msg = (orte_rml_send_t*) occupant; + msg->status = ORTE_SUCCESS; + ORTE_RML_SEND_COMPLETE(msg); + } else { + if (NULL != occupant) { + num_missed_msgs = (seq_num_array[i] - seq_num_array [i-1] - 1); + assert( i == num_msgs_acked -1); + /* recheck the ith msg */ + opal_hotel_checkin(&ack_chan->outstanding_msgs, (void*)occupant, &room_num); + orte_qos_ack_channel_set_msg_room (ack_chan, seq_num_array[i], room_num); + /* resend and recheck all the missed msgs*/ + missed_msg_seq_num = seq_num_array[i-1] + 1; + for (; missed_msg_seq_num < seq_num_array[i]; missed_msg_seq_num++) { + room_num = orte_qos_ack_channel_get_msg_room (ack_chan, missed_msg_seq_num); + opal_hotel_checkout_and_return_occupant (&ack_chan->outstanding_msgs, room_num, &occupant); + assert ( NULL != occupant); + missed_msg = (orte_rml_send_t*) occupant; + missed_msg->status = ORTE_ERR_LOST_MSG_IN_WINDOW; + opal_hotel_checkin(&ack_chan->outstanding_msgs, (void*)missed_msg, &room_num); + orte_qos_ack_channel_set_msg_room (ack_chan, missed_msg_seq_num, room_num); + /* send this out on wire directly */ + ORTE_OOB_SEND (missed_msg); + } //end for + } else { + OPAL_OUTPUT_VERBOSE((10, orte_rml_base_framework.framework_output, + "OOPS received an ACK for already completed seq_num =%d ", + seq_num_array[i] )); + }//end if (NULL != occupant) + } //end else + } // end for + }//end out of order ack processing free(seq_num_array); - } else { - OPAL_OUTPUT_VERBOSE((5, orte_qos_base_framework.framework_output, + }else { + OPAL_OUTPUT_VERBOSE((5, orte_qos_base_framework.framework_output, "orte_qos_ack_channel_msg_ack_recv_callback recieved ack on non existent channel = %d", channel_num)); } } -int send_ack (orte_qos_ack_channel_t * channel, orte_rml_channel_num_t channel_num, - uint32_t *ack_seq_num_array, uint32_t num_msgs_acked, uint32_t ack_type) -{ - int rc = ORTE_SUCCESS; - orte_rml_channel_t *rml_channel; - opal_buffer_t *buffer; - rml_channel = orte_rml_base_get_channel (channel_num); - int i; - if (NULL == rml_channel) - { - OPAL_OUTPUT_VERBOSE ((0, orte_qos_base_framework.framework_output, - " function send_ack - couldn't retrieve rml_channel with channel num =%d", - channel_num)); - return ORTE_ERROR; - } - buffer = OBJ_NEW (opal_buffer_t); - /* pack channel number */ - opal_dss.pack (buffer, &rml_channel->peer_channel, 1, OPAL_UINT32); - /* pack ack type */ - opal_dss.pack (buffer, &ack_type, 1, OPAL_UINT32); - /* pack num messages */ - opal_dss.pack (buffer, &num_msgs_acked, 1, OPAL_UINT32); - /* pack seq number array */ - for (i =0; ipeer, buffer, ORTE_RML_TAG_MSG_ACK, - orte_qos_ack_msg_send_callback, rml_channel); - return rc; - -} void orte_qos_ack_msg_send_callback ( int status, orte_process_name_t *peer, @@ -514,48 +671,18 @@ void orte_qos_ack_msg_send_callback ( int status, void orte_qos_ack_msg_window_timeout_callback (int fd, short flags, void *cbdata) { - uint32_t num_msgs_to_ack = 0; - uint32_t *ack_seq_num_array; - uint32_t ack_type, i; int32_t rc; orte_qos_ack_channel_t *ack_chan = (orte_qos_ack_channel_t*) cbdata; OPAL_OUTPUT_VERBOSE ((0, orte_qos_base_framework.framework_output, " orte_qos_ack_msg_window_timeout_callback for channel = %p last acked seq num = %d, last received seq num =%d", ack_chan, ack_chan->ack_msg_seq_num, ack_chan->in_msg_seq_num )); - ack_type = ACK_TIMEOUT; - /* prepare to send ack */ - num_msgs_to_ack = ack_chan->in_msg_seq_num - ack_chan->ack_msg_seq_num; - OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, - "%s ack_recv sending ack for %d msgs \n", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - num_msgs_to_ack)); - if ( NULL != (ack_seq_num_array = malloc (sizeof(uint32_t) * num_msgs_to_ack))) { + /* send ack message */ + rc = send_ack(ack_chan, ack_chan->channel_num, ACK_TIMEOUT, ack_chan->in_msg_seq_num); + +} - for (i = 1; i <= num_msgs_to_ack ; i++) { - ack_seq_num_array[i-1] = ack_chan->ack_msg_seq_num + i; - OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, - "%s ack_recv acking msg %d \n", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ack_seq_num_array[i-1])); - } - /* now construct and send ack message */ - rc = send_ack(ack_chan, ack_chan->channel_num, - ack_seq_num_array, num_msgs_to_ack, ack_type); - if(ORTE_SUCCESS == rc) { - /* update last acked msg */ - ack_chan->ack_msg_seq_num = ack_chan->in_msg_seq_num; - } else { - //TO DO - } - } - else { - OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, - "%s cannot send ack \n", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - } -} /*** ACK QOS CLASS INSTANCES ***/ static void channel_cons (orte_qos_ack_channel_t *ptr) @@ -574,12 +701,9 @@ static void channel_cons (orte_qos_ack_channel_t *ptr) } static void channel_des (orte_qos_ack_channel_t *ptr) { - OPAL_LIST_DESTRUCT(&ptr->attributes); - OBJ_DESTRUCT (&ptr->outstanding_msgs); - opal_event_evtimer_del (&ptr->msg_ack_timer_event); - // TO DO release timer event - + // OPAL_LIST_DESTRUCT(&ptr->attributes); + //OBJ_DESTRUCT (&ptr->outstanding_msgs); } OBJ_CLASS_INSTANCE (orte_qos_ack_channel_t, - opal_object_t, + opal_list_item_t, channel_cons, channel_des); diff --git a/orte/mca/qos/base/base.h b/orte/mca/qos/base/base.h index 2cbf285e66c..d0918b5338f 100644 --- a/orte/mca/qos/base/base.h +++ b/orte/mca/qos/base/base.h @@ -45,6 +45,7 @@ ORTE_DECLSPEC extern orte_qos_base_t orte_qos_base; #define ORTE_QOS_MAX_WINDOW_SIZE 1000 typedef struct orte_qos_base_channel { + opal_list_item_t super; uint32_t channel_num; opal_list_t attributes; } orte_qos_base_channel_t; diff --git a/orte/mca/qos/base/qos_base_channel_handlers.c b/orte/mca/qos/base/qos_base_channel_handlers.c index ed7b6730af6..bb47c1038f6 100644 --- a/orte/mca/qos/base/qos_base_channel_handlers.c +++ b/orte/mca/qos/base/qos_base_channel_handlers.c @@ -118,12 +118,13 @@ int orte_qos_open_channel (void *qos_mod, void *qos_channel, opal_buffer_t * buf return ORTE_ERR_BAD_PARAM; } -void orte_qos_close_channel (void *qos_mod, void *qos_channel) { +int orte_qos_close_channel (void *qos_mod, void *qos_channel) { orte_qos_module_t *qos = (orte_qos_module_t *) (qos_mod); - if (NULL != qos) - qos->close (qos_channel); + if ((NULL != qos) && (NULL != qos_channel)) + return (qos->close (qos_channel)); else ORTE_ERROR_LOG (ORTE_ERR_BAD_PARAM); + return (ORTE_ERR_BAD_PARAM); } void orte_qos_init_recv_channel (void *qos_mod, void *qos_channel, opal_list_t * qos_attributes) { @@ -155,9 +156,10 @@ int orte_qos_recv_channel (void *qos_mod, void *qos_channel, orte_rml_recv_t *ms orte_qos_module_t *qos = (orte_qos_module_t *) (qos_mod); if (NULL != qos) return(qos->recv(qos_channel, msg)); - else + else { ORTE_ERROR_LOG (ORTE_ERR_BAD_PARAM); - return ORTE_ERROR; + return ORTE_ERROR; + } } diff --git a/orte/mca/qos/base/qos_base_frame.c b/orte/mca/qos/base/qos_base_frame.c index e14ddbc2e3c..215505d1331 100644 --- a/orte/mca/qos/base/qos_base_frame.c +++ b/orte/mca/qos/base/qos_base_frame.c @@ -112,7 +112,7 @@ static void channel_des (orte_qos_base_channel_t *ptr) OPAL_LIST_DESTRUCT(&ptr->attributes); } OBJ_CLASS_INSTANCE (orte_qos_base_channel_t, - opal_object_t, + opal_list_item_t, channel_cons, channel_des); diff --git a/orte/mca/qos/noop/qos_noop_component.c b/orte/mca/qos/noop/qos_noop_component.c index 34c526638dd..019c09ef9b5 100644 --- a/orte/mca/qos/noop/qos_noop_component.c +++ b/orte/mca/qos/noop/qos_noop_component.c @@ -27,7 +27,7 @@ static int noop_open (void *qos_channel, opal_buffer_t * buf); static int noop_send ( void *qos_channel, orte_rml_send_t *msg); static int noop_recv (void *channel, orte_rml_recv_t *msg); -static void noop_close (void * channel); +static int noop_close (void * channel); static int noop_init_recv (void *channel, opal_list_t *attributes); static int noop_cmp (void *channel, opal_list_t *attributes); static void noop_send_callback (orte_rml_send_t *msg); @@ -42,7 +42,8 @@ orte_qos_module_t orte_qos_noop_module = { noop_recv, noop_close, noop_init_recv, - noop_cmp + noop_cmp, + noop_send_callback }; /** @@ -86,11 +87,12 @@ static void qos_noop_shutdown (void) { static void* noop_create (opal_list_t *qos_attributes, uint32_t channel_num) { orte_qos_base_channel_t * noop_chan; - int32_t rc, *window, *type; + int32_t rc, *window, *type, window_val; orte_qos_type_t type_val = orte_qos_noop; noop_chan = OBJ_NEW (orte_qos_base_channel_t); noop_chan->channel_num = channel_num; type = &type_val; + window = &window_val; // TBD _ we ignore inapplicable attributes for now - need to return error? // get attributes of interest to the base and store them locally. if (ORTE_SUCCESS == (rc = orte_set_attribute( &noop_chan->attributes, ORTE_QOS_TYPE, ORTE_ATTR_GLOBAL, (void*)type, OPAL_UINT8))) { @@ -144,9 +146,15 @@ static int noop_recv (void *qos_channel, orte_rml_recv_t *msg) { return ORTE_SUCCESS; } -static void noop_close (void * channel) { - orte_qos_base_channel_t *noop_chan = (orte_qos_base_channel_t*) channel; - OBJ_RELEASE (noop_chan); +static int noop_close (void * channel) { + orte_qos_base_channel_t *noop_chan; + if(NULL != channel) { + noop_chan = (orte_qos_base_channel_t*) channel; + OBJ_RELEASE (noop_chan); + return ORTE_SUCCESS; + } else + return ORTE_ERR_BAD_PARAM; + } static int noop_init_recv (void *channel, opal_list_t *attributes) { diff --git a/orte/mca/qos/qos.h b/orte/mca/qos/qos.h index cd3c28974a6..03bab52f152 100644 --- a/orte/mca/qos/qos.h +++ b/orte/mca/qos/qos.h @@ -55,7 +55,7 @@ typedef int (*mca_qos_base_component_ft_event_fn_t)(int state); #endif ORTE_DECLSPEC void * orte_qos_create_channel (void *qos_mod, opal_list_t *qos_attributes, uint32_t channel_num); ORTE_DECLSPEC int orte_qos_open_channel (void *qos_mod, void *qos_channel, opal_buffer_t * buffer); -ORTE_DECLSPEC void orte_qos_close_channel (void *qos_mod, void *qos_channel); +ORTE_DECLSPEC int orte_qos_close_channel (void *qos_mod, void *qos_channel); ORTE_DECLSPEC void orte_qos_init_recv_channel (void *qos_mod, void *qos_channel, opal_list_t *qos_attributes); ORTE_DECLSPEC int orte_qos_cmp_channel (void *qos_mod, void *qos_channel, opal_list_t *qos_attributes); ORTE_DECLSPEC int orte_qos_send_channel (void *qos_mod, void *qos_channel, orte_rml_send_t *msg); @@ -91,7 +91,7 @@ typedef int (*orte_qos_base_module_recv_fn_t) ( void * channel, * this function is called when a message is received on a channel */ -typedef void (*orte_qos_base_module_close_fn_t) ( void * channel); +typedef int (*orte_qos_base_module_close_fn_t) ( void * channel); /** * qos module (channel) init recv * this function is used to initialize a channel for receiving msgs (called in response to open_channel req from peer) diff --git a/orte/mca/rml/base/base.h b/orte/mca/rml/base/base.h index d434d01521b..cf71837d6b0 100644 --- a/orte/mca/rml/base/base.h +++ b/orte/mca/rml/base/base.h @@ -133,13 +133,14 @@ typedef enum { * It contains a pointer to a struct that contains the QoS specific channel data. */ typedef struct { + opal_list_item_t super; orte_rml_channel_num_t channel_num; // the channel number reference (exposed to the user). orte_process_name_t peer; // the other end point (peer) of the channel orte_rml_channel_num_t peer_channel; // peer channel number void * qos; // pointer to QoS component specific module void * qos_channel_ptr; // pointer to QoS component specific channel struct orte_rml_channel_state_t state; // channel state - bool receive; // set to true if this is a receive (peer opened) channel. (Default is send channel) + bool recv; // set to true if this is a receive (peer opened) channel. (Default is send channel) } orte_rml_channel_t; OBJ_CLASS_DECLARATION(orte_rml_channel_t); @@ -198,13 +199,28 @@ typedef struct { } orte_rml_open_channel_t; OBJ_CLASS_DECLARATION(orte_rml_open_channel_t); +/* structure to send RML channel close messages - used internally */ +typedef struct { + opal_list_item_t super; + /* msg send status */ + int status; + /* channel object */ + orte_rml_channel_t *channel; + /* user's callback function */ + orte_rml_channel_callback_fn_t cbfunc; + /* user's cbdata */ + void *cbdata; +} orte_rml_close_channel_t; +OBJ_CLASS_DECLARATION(orte_rml_close_channel_t); + /* define an object for transferring send requests to the event lib */ typedef struct { opal_object_t super; opal_event_t ev; union { orte_rml_send_t send; - orte_rml_open_channel_t channel; + orte_rml_open_channel_t open_channel; + orte_rml_close_channel_t close_channel; }post; } orte_rml_send_request_t; OBJ_CLASS_DECLARATION(orte_rml_send_request_t); @@ -277,6 +293,21 @@ OBJ_CLASS_DECLARATION(orte_rml_recv_request_t); opal_event_active(&(m)->ev, OPAL_EV_WRITE, 1); \ } while(0); +/* + reactivates rcv msg on the unposted rcvd list when a match occurs + need a different path as the QoS recv processing was already done + for this process +*/ +#define ORTE_RML_REACTIVATE_MESSAGE(m) \ + do { \ + /* setup the event */ \ + opal_event_set(orte_event_base, &(m)->ev, -1, \ + OPAL_EV_WRITE, \ + orte_rml_base_reprocess_msg, (m)); \ + opal_event_set_priority(&(m)->ev, ORTE_MSG_PRI); \ + opal_event_active(&(m)->ev, OPAL_EV_WRITE, 1); \ +} while(0); + #define ORTE_RML_SEND_COMPLETE(m) \ do { \ opal_output_verbose(5, orte_rml_base_framework.framework_output, \ @@ -318,7 +349,7 @@ OBJ_CLASS_DECLARATION(orte_rml_recv_request_t); }while(0); -#define ORTE_RML_OPEN_CHANNEL_COMPLETE(m) \ +#define ORTE_RML_OPEN_CHANNEL_COMPLETE(m) \ do { \ opal_output_verbose(5, orte_rml_base_framework.framework_output, \ "%s-%s open channel message complete at %s:%d", \ @@ -331,7 +362,17 @@ OBJ_CLASS_DECLARATION(orte_rml_recv_request_t); NULL, (m)->cbdata) ; \ }while(0); - +#define ORTE_RML_CLOSE_CHANNEL_COMPLETE(m) \ + do { \ + opal_output_verbose(5, orte_rml_base_framework.framework_output, \ + "%s-%d close channel message complete at %s:%d", \ + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \ + (m)->channel->channel_num, \ + __FILE__, __LINE__); \ + /* call the callback function */ \ + (m)->cbfunc((m)->status, (m)->channel->channel_num, \ + NULL, NULL, (m)->cbdata) ; \ +}while(0); /* * This is the base priority for a RML wrapper component * If there exists more than one wrapper, then the one with @@ -348,19 +389,25 @@ ORTE_DECLSPEC void orte_rml_base_post_recv(int sd, short args, void *cbdata); ORTE_DECLSPEC void orte_rml_base_process_msg(int fd, short flags, void *cbdata); ORTE_DECLSPEC void orte_rml_base_process_error(int fd, short flags, void *cbdata); ORTE_DECLSPEC void orte_rml_base_open_channel(int fd, short flags, void *cbdata); -ORTE_DECLSPEC void orte_rml_open_channel_send_callback ( int status, orte_process_name_t* sender, +ORTE_DECLSPEC void orte_rml_base_close_channel(int fd, short flags, void *cbdata); +ORTE_DECLSPEC void orte_rml_base_open_channel_send_callback ( int status, orte_process_name_t* sender, opal_buffer_t* buffer, orte_rml_tag_t tag, void* cbdata); -ORTE_DECLSPEC void orte_rml_open_channel_resp_callback (int status, orte_process_name_t* peer, +ORTE_DECLSPEC void orte_rml_base_open_channel_resp_callback (int status, orte_process_name_t* peer, struct opal_buffer_t* buffer, orte_rml_tag_t tag, void* cbdata); -ORTE_DECLSPEC void orte_rml_open_channel_reply_send_callback ( int status, orte_process_name_t* sender, +ORTE_DECLSPEC void orte_rml_base_open_channel_reply_send_callback ( int status, orte_process_name_t* sender, opal_buffer_t* buffer, orte_rml_tag_t tag, void* cbdata); ORTE_DECLSPEC void orte_rml_base_prep_send_channel (orte_rml_channel_t *channel, orte_rml_send_t *send); -ORTE_DECLSPEC void orte_rml_base_process_recv_channel (orte_rml_channel_t *channel, +ORTE_DECLSPEC int orte_rml_base_process_recv_channel (orte_rml_channel_t *channel, orte_rml_recv_t *recv); +ORTE_DECLSPEC void orte_rml_base_close_channel_send_callback ( int status, orte_process_name_t* sender, + opal_buffer_t* buffer, orte_rml_tag_t tag, void* cbdata); +ORTE_DECLSPEC void orte_rml_base_send_close_channel ( orte_rml_close_channel_t *close_chan); +ORTE_DECLSPEC void orte_rml_base_reprocess_msg(int fd, short flags, void *cbdata); +ORTE_DECLSPEC void orte_rml_base_complete_recv_msg (orte_rml_recv_t **recv_msg); END_C_DECLS #endif /* MCA_RML_BASE_H */ diff --git a/orte/mca/rml/base/rml_base_channel_handlers.c b/orte/mca/rml/base/rml_base_channel_handlers.c index 0c3ecf63efc..bb784e0485e 100644 --- a/orte/mca/rml/base/rml_base_channel_handlers.c +++ b/orte/mca/rml/base/rml_base_channel_handlers.c @@ -40,10 +40,76 @@ static int unpack_channel_attributes (opal_buffer_t *buffer, opal_list_t *qos_attributes); -static orte_rml_channel_t * get_channel ( orte_process_name_t * peer, opal_list_t *qos_attributes); +static orte_rml_channel_t * get_channel ( orte_process_name_t * peer, + opal_list_t *qos_attributes, + bool recv); static int send_open_channel_reply (orte_process_name_t *peer, orte_rml_channel_t *channel, bool accept); +void orte_rml_base_close_channel(int fd, short flags, void *cbdata) +{ + orte_rml_send_request_t *req = (orte_rml_send_request_t*)cbdata; + orte_rml_close_channel_t *close_chan; + OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output, + "%s rml_close_channel to peer %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(&req->post.close_channel.channel->peer))); + OPAL_TIMING_EVENT((&tm_rml, "to %s", ORTE_NAME_PRINT(&peer))); + close_chan = OBJ_NEW(orte_rml_close_channel_t); + close_chan->channel = req->post.close_channel.channel; + close_chan->cbfunc = req->post.close_channel.cbfunc; + close_chan->cbdata = req->post.close_channel.cbdata; + OBJ_RELEASE(req); + /* check with qos if the channel ready to be closed */ + if (ORTE_SUCCESS == orte_qos_close_channel (close_chan->channel->qos, + close_chan->channel->qos_channel_ptr)) { + orte_rml_base_send_close_channel( close_chan); + } + /* complete close request with error channel busy */ + else { + close_chan->status = ORTE_ERR_CHANNEL_BUSY; + ORTE_RML_CLOSE_CHANNEL_COMPLETE(close_chan); + OBJ_RELEASE(close_chan); + } +} + +void orte_rml_base_send_close_channel ( orte_rml_close_channel_t *close_chan) +{ + opal_buffer_t *buffer; + // send msg to peer to close channel. + buffer = OBJ_NEW (opal_buffer_t); + /* pack the channel number*/ + opal_dss.pack(buffer, &close_chan->channel->peer_channel, 1, OPAL_UINT32); + orte_rml.send_buffer_nb( &close_chan->channel->peer, buffer, ORTE_RML_TAG_CLOSE_CHANNEL_REQ, + orte_rml_base_close_channel_send_callback, + close_chan); +} + +void orte_rml_base_close_channel_send_callback ( int status, + orte_process_name_t* sender, + opal_buffer_t* buffer, + orte_rml_tag_t tag, + void* cbdata) +{ + // this is the send call back for open channel request + orte_rml_close_channel_t *req = (orte_rml_close_channel_t*) cbdata; + orte_process_name_t peer = req->channel->peer; + OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output, + "%s rml_close_channel_send_callback to peer %s status = %d", + ORTE_NAME_PRINT(sender), + ORTE_NAME_PRINT(&peer), status)); + req->status = status; + // if the message could not be sent log error + if (ORTE_SUCCESS != req->status) + ORTE_ERROR_LOG (req->status); + //complete the req. + ORTE_RML_CLOSE_CHANNEL_COMPLETE(req); + opal_pointer_array_set_item ( &orte_rml_base.open_channels, req->channel->channel_num, NULL); + // release the channel object and the req. + OBJ_RELEASE(req->channel); + OBJ_RELEASE(req); + OBJ_RELEASE(buffer); +} void orte_rml_base_open_channel(int fd, short flags, void *cbdata) { @@ -53,31 +119,35 @@ void orte_rml_base_open_channel(int fd, short flags, void *cbdata) orte_rml_open_channel_t *open_chan; orte_rml_channel_t *channel; opal_buffer_t *buffer; - peer = req->post.channel.dst; + peer = req->post.open_channel.dst; OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output, "%s rml_open_channel to peer %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&peer))); OPAL_TIMING_EVENT((&tm_rml, "to %s", ORTE_NAME_PRINT(&peer))); + /* return error if a channel already exists */ + if ( NULL != (channel = get_channel (&peer, req->post.open_channel.qos_attributes, false))) + { + req->post.open_channel.status = ORTE_ERR_OPEN_CHANNEL_DUPLICATE; + req->post.open_channel.channel = channel; + ORTE_RML_OPEN_CHANNEL_COMPLETE(&req->post.open_channel); + OBJ_RELEASE(req); + return; + } channel = OBJ_NEW(orte_rml_channel_t); channel->channel_num = opal_pointer_array_add (&orte_rml_base.open_channels, channel); channel->peer = peer; open_chan = OBJ_NEW(orte_rml_open_channel_t); open_chan->dst = peer; - open_chan->qos_attributes = req->post.channel.qos_attributes; - open_chan->cbfunc = req->post.channel.cbfunc; - open_chan->cbdata = req->post.channel.cbdata; - // OBJ_RELEASE(req); - OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output, - "%s rml_open_channel to peer %s SUCCESS", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&peer))); + open_chan->qos_attributes = req->post.open_channel.qos_attributes; + open_chan->cbfunc = req->post.open_channel.cbfunc; + open_chan->cbdata = req->post.open_channel.cbdata; + OBJ_RELEASE(req); // associate open channel request and the newly created channel object open_chan->channel = channel; type = &type_val; orte_get_attribute( open_chan->qos_attributes, ORTE_QOS_TYPE, (void**)&type, OPAL_UINT8); open_chan->channel->qos = (void*) orte_qos_get_module (open_chan->qos_attributes); - OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output, "%s rml_open_channel type = %d to peer %s ", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -86,8 +156,9 @@ void orte_rml_base_open_channel(int fd, short flags, void *cbdata) // now associate qos with the channel based on user requested attributes. if ( NULL != open_chan->channel->qos) { - open_chan->channel->qos_channel_ptr = orte_qos_create_channel (open_chan->channel->qos, open_chan->qos_attributes, - open_chan->channel->channel_num); + open_chan->channel->qos_channel_ptr = orte_qos_create_channel (open_chan->channel->qos, + open_chan->qos_attributes, + open_chan->channel->channel_num); // create rml send for open channel request. Call the corresponding QoS module to pack the attributes. buffer = OBJ_NEW (opal_buffer_t); // call QoS module to pack attributes @@ -99,12 +170,12 @@ void orte_rml_base_open_channel(int fd, short flags, void *cbdata) "%s rml_open_channel to peer %s SUCCESS sending to peer", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&peer))); - // now post a recieve for open_channel_response tag + // post a recieve for open_channel_response tag orte_rml.recv_buffer_nb(&peer, ORTE_RML_TAG_OPEN_CHANNEL_RESP, - ORTE_RML_NON_PERSISTENT, orte_rml_open_channel_resp_callback, open_chan); + ORTE_RML_NON_PERSISTENT, orte_rml_base_open_channel_resp_callback, open_chan); // send request to peer to open channel orte_rml.send_buffer_nb( &peer, buffer, ORTE_RML_TAG_OPEN_CHANNEL_REQ, - orte_rml_open_channel_send_callback, + orte_rml_base_open_channel_send_callback, open_chan); } else { @@ -130,7 +201,7 @@ void orte_rml_base_open_channel(int fd, short flags, void *cbdata) } -void orte_rml_open_channel_send_callback ( int status, +void orte_rml_base_open_channel_send_callback ( int status, orte_process_name_t* sender, opal_buffer_t* buffer, orte_rml_tag_t tag, @@ -156,10 +227,10 @@ void orte_rml_open_channel_send_callback ( int status, else { // start a timer for response from peer } - //OBJ_RELEASE(buffer); + OBJ_RELEASE(buffer); } -void orte_rml_open_channel_resp_callback (int status, +void orte_rml_base_open_channel_resp_callback (int status, orte_process_name_t* peer, struct opal_buffer_t* buffer, orte_rml_tag_t tag, @@ -183,8 +254,8 @@ void orte_rml_open_channel_resp_callback (int status, ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), peer_resp)); /* response will contain the peer channel number - the peer does not have the - option to change the channel attributes */ - // unpack and get peer channel number. + option to change the channel attributes + unpack and get peer channel number.*/ if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &channel->peer_channel, &count, OPAL_INT))) { ORTE_ERROR_LOG(rc); req->status = ORTE_ERR_UNPACK_FAILURE; @@ -278,7 +349,7 @@ void orte_rml_open_channel_recv_callback (int status, "rml_open_channel_recv_callback type =%d", type_val)); /* scan the list of channels to see if we already have a channel with qos_attributes */ - if (NULL == (channel = get_channel ( peer, &qos_attributes))) { + if (NULL == (channel = get_channel ( peer, &qos_attributes, true))) { /* create a new channel for the req */ channel = OBJ_NEW(orte_rml_channel_t); channel->channel_num = opal_pointer_array_add (&orte_rml_base.open_channels, channel); @@ -286,7 +357,7 @@ void orte_rml_open_channel_recv_callback (int status, "rml_open_channel_recv_callback channel num =%d", channel->channel_num)); channel->peer = *peer; - channel->receive = true; + channel->recv = true; channel->qos = (void*) orte_qos_get_module (&qos_attributes); /* now associate qos with the channel based on requested attributes */ channel->qos_channel_ptr = (void*) orte_qos_create_channel(channel->qos, &qos_attributes, @@ -317,22 +388,12 @@ void orte_rml_open_channel_recv_callback (int status, } } else { - /*this means that there exists a channel with the same attributes which was - previously created on user or sender's open channel request - send channel accept reply to sender */ - if(ORTE_SUCCESS == send_open_channel_reply (peer, channel, true)) - /* exercise caution while updating state of a bidirectional channel*/ - channel->state = orte_rml_channel_open; - else { - /* the receiver shall not attempt to resend or send a reject message - instead we let the sender's request timeout at his end. - release the channel etc */ - opal_pointer_array_set_item ( &orte_rml_base.open_channels, channel->channel_num, NULL); - orte_qos_close_channel (channel->qos, channel->qos_channel_ptr); - OBJ_RELEASE(channel); - } + /* there exists a channel with the same attributes reject the request */ + OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output, + "rml_open_channel_recv_callback OOPS CHANNEL EXISTS ALREADY channel num =%d", + channel->channel_num)); + send_open_channel_reply (peer, channel, false); } - } else { //reply with error message @@ -358,42 +419,40 @@ static int send_open_channel_reply (orte_process_name_t *peer, } } /* TBD: should specify reason for reject - send channel accept to sender */ + send open channel response to sender */ orte_rml.send_buffer_nb ( peer, buffer, ORTE_RML_TAG_OPEN_CHANNEL_RESP, - orte_rml_open_channel_reply_send_callback, + orte_rml_base_open_channel_reply_send_callback, channel); return rc; } -static orte_rml_channel_t * get_channel ( orte_process_name_t * peer, opal_list_t *qos_attributes) +static orte_rml_channel_t * get_channel ( orte_process_name_t * peer, + opal_list_t *qos_attributes, + bool recv) { orte_rml_channel_t *channel = NULL; int32_t i = 0; + /* search available channels and return channel that matches the attributes */ for (i=0; i < orte_rml_base.open_channels.size; i++) { if (NULL != (channel = (orte_rml_channel_t*) opal_pointer_array_get_item (&orte_rml_base.open_channels, i))) { /* compare basic properties */ if ((OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, &channel->peer, peer)) && ((orte_rml_channel_open == channel->state) || - (orte_rml_channel_opening == channel->state))) + (orte_rml_channel_opening == channel->state)) && + (channel->recv == recv)) { /* compare channel attributes */ - if( ORTE_SUCCESS == orte_qos_cmp_channel ( channel->qos, channel->qos_channel_ptr, qos_attributes)) { - /* we have an existing channel that we can use */ - /* make it a receive channel and inform qos to init recv state */ - channel->receive = true; - orte_qos_init_recv_channel ( channel->qos, channel->qos_channel_ptr, qos_attributes); + if( ORTE_SUCCESS == orte_qos_cmp_channel ( channel->qos, channel->qos_channel_ptr, qos_attributes)) return channel; - } - else - return NULL; + } } } return NULL; } -void orte_rml_open_channel_reply_send_callback ( int status, +void orte_rml_base_open_channel_reply_send_callback ( int status, orte_process_name_t* sender, opal_buffer_t* buffer, orte_rml_tag_t tag, @@ -423,14 +482,6 @@ orte_rml_channel_t * orte_rml_base_get_channel (orte_rml_channel_num_t chan_num) orte_rml_channel_t * channel; channel = (orte_rml_channel_t*) opal_pointer_array_get_item (&orte_rml_base.open_channels, chan_num); - /* if (NULL != channel) - OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output, - "orte_rml_base_get_channel channel = %p num=%d qos_channel= %p state =%d", - channel, chan_num, channel->qos_channel_ptr, channel->state)); - else - OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output, - "orte_rml_base_get_channel channel %d is null", - chan_num));*/ if ((NULL != channel) && (orte_rml_channel_open == channel->state)) return channel; else @@ -446,9 +497,43 @@ void orte_rml_base_prep_send_channel (orte_rml_channel_t *channel, orte_qos_send_channel (channel->qos, channel->qos_channel_ptr, send); } -void orte_rml_base_process_recv_channel (orte_rml_channel_t *channel, +int orte_rml_base_process_recv_channel (orte_rml_channel_t *channel, orte_rml_recv_t *recv) { // call qos for recv post processing - orte_qos_recv_channel (channel->qos, channel->qos_channel_ptr, recv); + return (orte_qos_recv_channel (channel->qos, channel->qos_channel_ptr, recv)); +} + +void orte_rml_close_channel_recv_callback (int status, + orte_process_name_t* peer, + struct opal_buffer_t* buffer, + orte_rml_tag_t tag, + void* cbdata) +{ + // find the channel and close it or log error + orte_rml_channel_t *channel; + int32_t count =1, rc; + orte_rml_channel_num_t channel_num =5; + OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output, + "%s rml_close_channel_recv_callback from peer %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(peer))); + /* unpack channel number */ + if(ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &channel_num, + &count, OPAL_UINT32))) { + ORTE_ERROR_LOG(rc); + return; + } + channel = orte_rml_base_get_channel(channel_num); + OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output, + "%s rml_close_channel_recv_callback for channel num =%d channel=%p", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + channel_num, channel)); + if (NULL != channel) { + orte_qos_close_channel ( channel->qos, channel->qos_channel_ptr); + opal_pointer_array_set_item ( &orte_rml_base.open_channels, channel->channel_num, NULL); + OBJ_RELEASE(channel); + } else { + ORTE_ERROR_LOG(OPAL_ERR_BAD_PARAM); + } } diff --git a/orte/mca/rml/base/rml_base_frame.c b/orte/mca/rml/base/rml_base_frame.c index 1eef10f55b1..dbaeb39e2fb 100644 --- a/orte/mca/rml/base/rml_base_frame.c +++ b/orte/mca/rml/base/rml_base_frame.c @@ -216,6 +216,10 @@ int orte_rml_base_select(void) orte_rml.recv_buffer_nb (ORTE_NAME_WILDCARD, ORTE_RML_TAG_OPEN_CHANNEL_REQ, ORTE_RML_PERSISTENT, orte_rml_open_channel_recv_callback, NULL); + /* post a persistent recieve for close channel request */ + orte_rml.recv_buffer_nb (ORTE_NAME_WILDCARD, ORTE_RML_TAG_CLOSE_CHANNEL_REQ, + ORTE_RML_PERSISTENT, orte_rml_close_channel_recv_callback, + NULL); return ORTE_SUCCESS; } @@ -266,11 +270,11 @@ static void channel_cons(orte_rml_channel_t *ptr) ptr->channel_num = ORTE_RML_INVALID_CHANNEL_NUM; ptr->qos = NULL; ptr->qos_channel_ptr = NULL; - ptr->receive = false; + ptr->recv = false; } OBJ_CLASS_INSTANCE(orte_rml_channel_t, - opal_object_t, + opal_list_item_t, channel_cons, NULL); static void open_channel_cons(orte_rml_open_channel_t *ptr) @@ -282,10 +286,19 @@ OBJ_CLASS_INSTANCE(orte_rml_open_channel_t, opal_list_item_t, open_channel_cons, NULL); +static void close_channel_cons(orte_rml_close_channel_t *ptr) +{ + ptr->cbdata = NULL; + ptr->channel = NULL; +} +OBJ_CLASS_INSTANCE(orte_rml_close_channel_t, + opal_list_item_t, + close_channel_cons, NULL); + static void send_req_cons(orte_rml_send_request_t *ptr) { OBJ_CONSTRUCT(&ptr->post.send, orte_rml_send_t); - OBJ_CONSTRUCT(&ptr->post.channel, orte_rml_open_channel_t); + OBJ_CONSTRUCT(&ptr->post.open_channel, orte_rml_open_channel_t); } OBJ_CLASS_INSTANCE(orte_rml_send_request_t, opal_object_t, diff --git a/orte/mca/rml/base/rml_base_msg_handlers.c b/orte/mca/rml/base/rml_base_msg_handlers.c index 6481f5ba909..f35e4922c83 100644 --- a/orte/mca/rml/base/rml_base_msg_handlers.c +++ b/orte/mca/rml/base/rml_base_msg_handlers.c @@ -53,6 +53,7 @@ static void msg_match_recv(orte_rml_posted_recv_t *rcv, bool get_all); + void orte_rml_base_post_recv(int sd, short args, void *cbdata) { orte_rml_recv_request_t *req = (orte_rml_recv_request_t*)cbdata; @@ -119,58 +120,12 @@ void orte_rml_base_post_recv(int sd, short args, void *cbdata) OBJ_RELEASE(req); } -static void msg_match_recv(orte_rml_posted_recv_t *rcv, bool get_all) -{ - opal_list_item_t *item, *next; - orte_rml_recv_t *msg; - orte_ns_cmp_bitmask_t mask = ORTE_NS_CMP_ALL | ORTE_NS_CMP_WILD; - - /* scan thru the list of unmatched recvd messages and - * see if any matches this spec - if so, push the first - * into the recvd msg queue and look no further - */ - item = opal_list_get_first(&orte_rml_base.unmatched_msgs); - while (item != opal_list_get_end(&orte_rml_base.unmatched_msgs)) { - next = opal_list_get_next(item); - msg = (orte_rml_recv_t*)item; - opal_output_verbose(5, orte_rml_base_framework.framework_output, - "%s checking recv for %s against unmatched msg from %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&rcv->peer), - ORTE_NAME_PRINT(&msg->sender)); - - /* since names could include wildcards, must use - * the more generalized comparison function - */ - if (OPAL_EQUAL == orte_util_compare_name_fields(mask, &msg->sender, &rcv->peer) && - msg->tag == rcv->tag) { - ORTE_RML_ACTIVATE_MESSAGE(msg); - opal_list_remove_item(&orte_rml_base.unmatched_msgs, item); - if (!get_all) { - break; - } - } - item = next; - } -} - -void orte_rml_base_process_msg(int fd, short flags, void *cbdata) +void orte_rml_base_complete_recv_msg (orte_rml_recv_t **recv_msg) { - orte_rml_recv_t *msg = (orte_rml_recv_t*)cbdata; orte_rml_posted_recv_t *post; orte_ns_cmp_bitmask_t mask = ORTE_NS_CMP_ALL | ORTE_NS_CMP_WILD; opal_buffer_t buf; - - OPAL_OUTPUT_VERBOSE((5, orte_rml_base_framework.framework_output, - "%s message received from %s for tag %d on channel=%d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&msg->sender), - msg->tag, - msg->channel_num)); - - OPAL_TIMING_EVENT((&tm_rml,"from %s %d bytes", - ORTE_NAME_PRINT(&msg->sender), msg->iov.iov_len)); - + orte_rml_recv_t *msg = *recv_msg; /* see if we have a waiting recv for this message */ OPAL_LIST_FOREACH(post, &orte_rml_base.posted_recvs, orte_rml_posted_recv_t) { /* since names could include wildcards, must use @@ -178,15 +133,6 @@ void orte_rml_base_process_msg(int fd, short flags, void *cbdata) */ if (OPAL_EQUAL == orte_util_compare_name_fields(mask, &msg->sender, &post->peer) && msg->tag == post->tag) { - if ((ORTE_RML_INVALID_CHANNEL_NUM != msg->channel_num) && - (NULL != orte_rml_base_get_channel(msg->channel_num) )) { - OPAL_OUTPUT_VERBOSE((5, orte_rml_base_framework.framework_output, - "%s calling recv msg on channel=%d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - msg->channel_num)); - // call channel for recv post processing - orte_rml_base_process_recv_channel (orte_rml_base_get_channel(msg->channel_num), msg); - } /* deliver the data to this location */ if (post->buffer_data) { /* deliver it in a buffer */ @@ -198,13 +144,13 @@ void orte_rml_base_process_msg(int fd, short flags, void *cbdata) /* the user must have unloaded the buffer if they wanted * to retain ownership of it, so release whatever remains */ - /* OPAL_OUTPUT_VERBOSE((5, orte_rml_base_framework.framework_output, + OPAL_OUTPUT_VERBOSE((5, orte_rml_base_framework.framework_output, "%s message received bytes from %s for tag %d on channel=%d called callback", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&msg->sender), msg->tag, - msg->channel_num));*/ - OBJ_DESTRUCT(&buf); + msg->channel_num)); + OBJ_DESTRUCT(&buf); } else { /* deliver as an iovec */ post->cbfunc.iov(ORTE_SUCCESS, &msg->sender, &msg->iov, 1, msg->tag, post->cbdata); @@ -232,16 +178,96 @@ void orte_rml_base_process_msg(int fd, short flags, void *cbdata) return; } } - /* we get here if no matching recv was found - we then hold * the message until such a recv is issued */ + OPAL_OUTPUT_VERBOSE((5, orte_rml_base_framework.framework_output, + "%s message received bytes from %s for tag %d on channel=%d Not Matched adding to unmatched msgs", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(&msg->sender), + msg->tag, + msg->channel_num)); + opal_list_append(&orte_rml_base.unmatched_msgs, &msg->super); +} + +static void msg_match_recv(orte_rml_posted_recv_t *rcv, bool get_all) +{ + opal_list_item_t *item, *next; + orte_rml_recv_t *msg; + orte_ns_cmp_bitmask_t mask = ORTE_NS_CMP_ALL | ORTE_NS_CMP_WILD; + + /* scan thru the list of unmatched recvd messages and + * see if any matches this spec - if so, push the first + * into the recvd msg queue and look no further + */ + item = opal_list_get_first(&orte_rml_base.unmatched_msgs); + while (item != opal_list_get_end(&orte_rml_base.unmatched_msgs)) { + next = opal_list_get_next(item); + msg = (orte_rml_recv_t*)item; + opal_output_verbose(5, orte_rml_base_framework.framework_output, + "%s checking recv for %s against unmatched msg from %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(&rcv->peer), + ORTE_NAME_PRINT(&msg->sender)); + + /* since names could include wildcards, must use + * the more generalized comparison function + */ + if (OPAL_EQUAL == orte_util_compare_name_fields(mask, &msg->sender, &rcv->peer) && + msg->tag == rcv->tag) { + ORTE_RML_REACTIVATE_MESSAGE(msg); + opal_list_remove_item(&orte_rml_base.unmatched_msgs, item); + if (!get_all) { + break; + } + } + item = next; + } +} + +void orte_rml_base_process_msg(int fd, short flags, void *cbdata) +{ + orte_rml_recv_t *msg = (orte_rml_recv_t*)cbdata; OPAL_OUTPUT_VERBOSE((5, orte_rml_base_framework.framework_output, - "%s message received bytes from %s for tag %d on channel=%d Not Matched adding to unmatched msgs", + "%s message received from %s for tag %d on channel=%d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&msg->sender), msg->tag, msg->channel_num)); - opal_list_append(&orte_rml_base.unmatched_msgs, &msg->super); + + OPAL_TIMING_EVENT((&tm_rml,"from %s %d bytes", + ORTE_NAME_PRINT(&msg->sender), msg->iov.iov_len)); + if ((ORTE_RML_INVALID_CHANNEL_NUM != msg->channel_num) && + (NULL != orte_rml_base_get_channel(msg->channel_num) )) { + + // call channel for recv post processing + if (ORTE_SUCCESS != (orte_rml_base_process_recv_channel (orte_rml_base_get_channel(msg->channel_num), msg))) + { + /* the qos channel has determined an error so we cannot complete this msg to the caller */ + OPAL_OUTPUT_VERBOSE((5, orte_rml_base_framework.framework_output, + "%s QoS channel receive error - cannot complete msg on channel=%d", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + msg->channel_num)); + return; + } + + } + orte_rml_base_complete_recv_msg (&msg); } +void orte_rml_base_reprocess_msg(int fd, short flags, void *cbdata) +{ + orte_rml_recv_t *msg = (orte_rml_recv_t*)cbdata; + OPAL_OUTPUT_VERBOSE((5, orte_rml_base_framework.framework_output, + "%s reprocessing msg received from %s for tag %d on channel=%d", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(&msg->sender), + msg->tag, + msg->channel_num)); + + OPAL_TIMING_EVENT((&tm_rml,"from %s %d bytes", + ORTE_NAME_PRINT(&msg->sender), msg->iov.iov_len)); + orte_rml_base_complete_recv_msg ( &msg); + /* the msg should be matched and released in this path + add an assert (msg!= NULL) ?? */ +} diff --git a/orte/mca/rml/base/rml_base_receive.c b/orte/mca/rml/base/rml_base_receive.c index bc77fbe3de9..b2557c56bcc 100644 --- a/orte/mca/rml/base/rml_base_receive.c +++ b/orte/mca/rml/base/rml_base_receive.c @@ -11,7 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007-2012 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -55,7 +55,7 @@ void orte_rml_base_comm_start(void) if (recv_issued) { return; } - + orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_RML_INFO_UPDATE, ORTE_RML_PERSISTENT, @@ -70,7 +70,7 @@ void orte_rml_base_comm_stop(void) if (!recv_issued) { return; } - + orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_RML_INFO_UPDATE); recv_issued = false; } @@ -88,19 +88,19 @@ orte_rml_base_recv(int status, orte_process_name_t* sender, orte_std_cntr_t count; opal_buffer_t *buf; int rc; - + OPAL_OUTPUT_VERBOSE((5, orte_rml_base_framework.framework_output, "%s rml:base:recv: processing message from %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(sender))); - + count = 1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &command, &count, ORTE_RML_CMD))) { ORTE_ERROR_LOG(rc); return; } - + switch (command) { case ORTE_RML_UPDATE_CMD: if (ORTE_SUCCESS != (rc = orte_rml_base_update_contact_info(buffer))) { @@ -108,11 +108,11 @@ orte_rml_base_recv(int status, orte_process_name_t* sender, return; } break; - + default: ORTE_ERROR_LOG(ORTE_ERR_VALUE_OUT_OF_BOUNDS); } - + /* send an ack back - this is REQUIRED to ensure that the routing * info gets updated -before- a message intending to use that info * arrives. Because message ordering is NOT preserved in the OOB, it diff --git a/orte/mca/rml/ftrm/rml_ftrm_component.c b/orte/mca/rml/ftrm/rml_ftrm_component.c index 7cd5a69b232..96a028b3e9c 100644 --- a/orte/mca/rml/ftrm/rml_ftrm_component.c +++ b/orte/mca/rml/ftrm/rml_ftrm_component.c @@ -5,14 +5,14 @@ * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * $COPYRIGHT$ - * + * * Additional copyrights may follow - * + * * $HEADER$ */ @@ -79,8 +79,7 @@ orte_rml_module_t orte_rml_ftrm_module = { orte_rml_ftrm_del_exception_handler, orte_rml_ftrm_ft_event, - - orte_rml_ftrm_purge + orte_rml_ftrm_purge, }; int rml_ftrm_output_handle; diff --git a/orte/mca/rml/oob/rml_oob_component.c b/orte/mca/rml/oob/rml_oob_component.c index a70c619b9e2..966ebe50b2b 100644 --- a/orte/mca/rml/oob/rml_oob_component.c +++ b/orte/mca/rml/oob/rml_oob_component.c @@ -80,7 +80,6 @@ orte_rml_component_t mca_rml_oob_component = { }; orte_rml_oob_module_t orte_rml_oob_module = { - { orte_rml_oob_init, orte_rml_oob_fini, @@ -99,15 +98,13 @@ orte_rml_oob_module_t orte_rml_oob_module = { orte_rml_oob_add_exception, orte_rml_oob_del_exception, - orte_rml_oob_ft_event, - orte_rml_oob_purge, + orte_rml_oob_open_channel, orte_rml_oob_send_channel_nb, orte_rml_oob_send_buffer_channel_nb, orte_rml_oob_close_channel - } }; /* Local variables */ diff --git a/orte/mca/rml/oob/rml_oob_send.c b/orte/mca/rml/oob/rml_oob_send.c index 48652d81ab7..7c2160c9511 100644 --- a/orte/mca/rml/oob/rml_oob_send.c +++ b/orte/mca/rml/oob/rml_oob_send.c @@ -302,10 +302,10 @@ int orte_rml_oob_open_channel(orte_process_name_t * peer, return ORTE_ERROR_QOS_UNAVAILABLE;*/ /* process the request in an event to be safe */ req = OBJ_NEW(orte_rml_send_request_t); - req->post.channel.dst = *peer; - req->post.channel.qos_attributes = qos_attributes; - req->post.channel.cbfunc = cbfunc; - req->post.channel.cbdata = cbdata; + req->post.open_channel.dst = *peer; + req->post.open_channel.qos_attributes = qos_attributes; + req->post.open_channel.cbfunc = cbfunc; + req->post.open_channel.cbdata = cbdata; /* setup the event for the open callback */ opal_event_set(orte_event_base, &req->ev, -1, OPAL_EV_WRITE, orte_rml_base_open_channel, req); opal_event_set_priority(&req->ev, ORTE_MSG_PRI); @@ -317,14 +317,46 @@ int orte_rml_oob_open_channel(orte_process_name_t * peer, return ORTE_SUCCESS; } -int orte_rml_oob_send_channel_nb (orte_rml_channel_num_t channel, +int orte_rml_oob_send_channel_nb (orte_rml_channel_num_t channel_num, struct iovec* msg, int count, orte_rml_tag_t tag, orte_rml_send_channel_callback_fn_t cbfunc, void* cbdata) { - // TO DO + orte_rml_send_request_t *req; + orte_rml_channel_t *channel; + OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output, + "%s rml_send_buffer to channel %d at tag %d", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + channel_num, tag)); + + if (ORTE_RML_TAG_INVALID == tag) { + /* cannot send to an invalid tag */ + ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); + return ORTE_ERR_BAD_PARAM; + } + channel = (orte_rml_channel_t*) orte_rml_base_get_channel (channel_num); + if (NULL == channel) { + /* cannot send to a non existing or closed channel */ + ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); + return ORTE_ERR_BAD_PARAM; + } + /* get ourselves into an event to protect against + * race conditions and threads + */ + req = OBJ_NEW(orte_rml_send_request_t); + req->post.send.dst = channel->peer; + req->post.send.iov = msg; + req->post.send.count = count; + req->post.send.tag = tag; + req->post.send.cbfunc.iov = cbfunc; + req->post.send.cbdata = cbdata; + req->post.send.channel = channel; + /* setup the event for the send callback */ + opal_event_set(orte_event_base, &req->ev, -1, OPAL_EV_WRITE, send_msg, req); + opal_event_set_priority(&req->ev, ORTE_MSG_PRI); + opal_event_active(&req->ev, OPAL_EV_WRITE, 1); return ORTE_SUCCESS; } @@ -374,9 +406,22 @@ int orte_rml_oob_close_channel (orte_rml_channel_num_t channel_num, void* cbdata) { orte_rml_channel_t *channel; + orte_rml_send_request_t *req; + OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output, + "%s rml_close_channel channel num %d", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + channel_num)); channel = orte_rml_base_get_channel (channel_num); - if (NULL != channel) { - // TO DO - } + if (NULL == channel) + return ORTE_ERR_BAD_PARAM; + /* process the request in an event to be safe */ + req = OBJ_NEW(orte_rml_send_request_t); + req->post.close_channel.channel = channel; + req->post.close_channel.cbfunc = cbfunc; + req->post.close_channel.cbdata = cbdata; + /* setup the event for the open callback */ + opal_event_set(orte_event_base, &req->ev, -1, OPAL_EV_WRITE, orte_rml_base_close_channel, req); + opal_event_set_priority(&req->ev, ORTE_MSG_PRI); + opal_event_active(&req->ev, OPAL_EV_WRITE, 1); return ORTE_SUCCESS; } diff --git a/orte/mca/rml/rml.h b/orte/mca/rml/rml.h index bec0886466d..ef603f3e9dc 100644 --- a/orte/mca/rml/rml.h +++ b/orte/mca/rml/rml.h @@ -79,6 +79,10 @@ ORTE_DECLSPEC void orte_rml_open_channel_recv_callback(int status, orte_process_name_t* sender, opal_buffer_t *buffer, orte_rml_tag_t tag, void *cbdata); +ORTE_DECLSPEC void orte_rml_close_channel_recv_callback(int status, + orte_process_name_t* sender, + opal_buffer_t *buffer, + orte_rml_tag_t tag, void *cbdata); /* ******************************************************************** */ @@ -693,7 +697,6 @@ struct orte_rml_module_t { /** close a qos messaging channel */ orte_rml_module_close_channel_fn_t close_channel; - }; /** Convienence typedef */ typedef struct orte_rml_module_t orte_rml_module_t; diff --git a/orte/test/system/oob_stress_channel.c b/orte/test/system/oob_stress_channel.c index 62361825dda..0cee39c9ed0 100644 --- a/orte/test/system/oob_stress_channel.c +++ b/orte/test/system/oob_stress_channel.c @@ -22,10 +22,26 @@ static volatile bool msgs_recvd; static volatile bool channel_inactive = false; +static volatile bool channel_active = false; static volatile bool msg_active = false; static volatile orte_rml_channel_num_t channel; static volatile int num_msgs_recvd = 0; -static void send_channel_callback(int status, +static volatile int num_msgs_sent = 0; + +static void close_channel_callback(int status, + orte_rml_channel_num_t channel_num, + orte_process_name_t * peer, + opal_list_t *qos_attributes, + void * cbdata) +{ + if (ORTE_SUCCESS != status) + opal_output(0, "close channel not successful status =%d", status); + else + opal_output(0, "close channel successful - channel num = %d", channel_num); + channel_active = false; +} + +static void open_channel_callback(int status, orte_rml_channel_num_t channel_num, orte_process_name_t * peer, opal_list_t *qos_attributes, @@ -41,16 +57,19 @@ static void send_channel_callback(int status, } channel_inactive = false; } + static void send_callback(int status, orte_process_name_t *peer, opal_buffer_t* buffer, orte_rml_tag_t tag, void* cbdata) { OBJ_RELEASE(buffer); + num_msgs_sent++; if (ORTE_SUCCESS != status) { opal_output(0, "rml_send_nb not successful status =%d", status); } - msg_active = false; + if(num_msgs_sent == 5) + msg_active = false; } static void recv_callback(int status, orte_process_name_t *sender, @@ -64,16 +83,8 @@ static void recv_callback(int status, orte_process_name_t *sender, if ( num_msgs_recvd == 5) { num_msgs_recvd =0; msgs_recvd = false; - /* transfer the sender */ - // blob->name.jobid = sender->jobid; - // blob->name.vpid = sender->vpid; - /* just copy the payload to the buf */ - //opal_dss.copy_payload(&blob->data, buffer); - /* flag as complete */ - // blob->active = false; + } - //else - // OBJ_DESTRUCT(blob); } @@ -126,54 +137,45 @@ int main(int argc, char *argv[]){ type = &type_val; window = 5; count =3; - if (ORTE_PROC_MY_NAME->vpid == 0) { - qos_attributes = OBJ_NEW (opal_list_t); - if (ORTE_SUCCESS == (rc = orte_set_attribute( qos_attributes, + qos_attributes = OBJ_NEW (opal_list_t); + if (ORTE_SUCCESS == (rc = orte_set_attribute( qos_attributes, ORTE_QOS_TYPE, ORTE_ATTR_GLOBAL, (void*)type, OPAL_UINT8))) { - type = &window; - if (ORTE_SUCCESS == (rc = orte_set_attribute(qos_attributes, ORTE_QOS_WINDOW_SIZE, + type = &window; + if (ORTE_SUCCESS == (rc = orte_set_attribute(qos_attributes, ORTE_QOS_WINDOW_SIZE, ORTE_ATTR_GLOBAL, (void*) type, OPAL_UINT32))) { - // orte_get_attribute( &qos_attributes, ORTE_QOS_WINDOW_SIZE, (void**)&type, OPAL_UINT32); - // opal_output(0, "%s set attribute window =%d complete \n", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), *type ); - type = &timeout; - orte_set_attribute (qos_attributes, ORTE_QOS_ACK_NACK_TIMEOUT, ORTE_ATTR_GLOBAL, + // orte_get_attribute( &qos_attributes, ORTE_QOS_WINDOW_SIZE, (void**)&type, OPAL_UINT32); + // opal_output(0, "%s set attribute window =%d complete \n", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), *type ); + type = &timeout; + orte_set_attribute (qos_attributes, ORTE_QOS_ACK_NACK_TIMEOUT, ORTE_ATTR_GLOBAL, (void*)type, OPAL_UINT32); - - orte_set_attribute (qos_attributes, ORTE_QOS_MSG_RETRY, ORTE_ATTR_GLOBAL, + orte_set_attribute (qos_attributes, ORTE_QOS_MSG_RETRY, ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL); - /* Uncomment following lines to print channel attributes */ - /* - opal_output(0, "%s set attribute retry =%d complete \n", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), retry ); - - orte_get_attribute( qos_attributes, ORTE_QOS_TYPE, (void**)&type, OPAL_UINT8); - opal_output(0, "%s set attribute type =%d complete \n", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), *type ); - orte_get_attribute( qos_attributes, ORTE_QOS_WINDOW_SIZE, (void**)&type, OPAL_UINT32); - opal_output(0, "%s set attribute window =%d complete \n", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), *type ); - orte_get_attribute( qos_attributes, ORTE_QOS_ACK_NACK_TIMEOUT, (void**)&type, OPAL_UINT32); - opal_output(0, "%s set attribute timeout =%d complete \n", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), *type );*/ - - channel_inactive = true; - orte_rml.open_channel ( &peer, qos_attributes, send_channel_callback, NULL); - opal_output(0, "%s process sent open channel request %d waiting for completion \n", + /* Uncomment following lines to print channel attributes */ + /* + opal_output(0, "%s set attribute retry =%d complete \n", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), retry ); + orte_get_attribute( qos_attributes, ORTE_QOS_TYPE, (void**)&type, OPAL_UINT8); + opal_output(0, "%s set attribute type =%d complete \n", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), *type ); + orte_get_attribute( qos_attributes, ORTE_QOS_WINDOW_SIZE, (void**)&type, OPAL_UINT32); + opal_output(0, "%s set attribute window =%d complete \n", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), *type ) + orte_get_attribute( qos_attributes, ORTE_QOS_ACK_NACK_TIMEOUT, (void**)&type, OPAL_UINT32); + opal_output(0, "%s set attribute timeout =%d complete \n", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), *type );*/ + channel_inactive = true; + orte_rml.open_channel ( &peer, qos_attributes, open_channel_callback, NULL); + opal_output(0, "%s process sent open channel request %d waiting for completion \n", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), j); - - ORTE_WAIT_FOR_COMPLETION(channel_inactive); - opal_output(0, "%s open channel complete", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME) ); - } - } - } - else { - // other process waits to recv a buffer from rank 0 - orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, MY_TAG, - ORTE_RML_PERSISTENT, - recv_callback, NULL); + ORTE_WAIT_FOR_COMPLETION(channel_inactive); + opal_output(0, "%s open channel complete to %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(&peer)); + } } - /* send a window of messages to peer on the channel */ - for (j=1; j < count+1; j++) { - if (ORTE_PROC_MY_NAME->vpid == 0) { + for (j = 0; j< count; j++) + { + if (ORTE_PROC_MY_NAME->vpid == 0) + { /* rank0 starts ring */ msg_active = true; - for (n = 0; n< window; n++ ) { + for (n = 0; n< window; n++ ) + { buf = OBJ_NEW(opal_buffer_t); maxpower = (double)(j%7); msgsize = (int)pow(10.0, maxpower); @@ -182,40 +184,49 @@ int main(int argc, char *argv[]){ opal_dss.pack(buf, msg, msgsize, OPAL_BYTE); free(msg); orte_rml.send_buffer_channel_nb(channel, buf, MY_TAG, channel_send_callback, NULL); + OBJ_CONSTRUCT(&blob, orte_rml_recv_cb_t); + blob.active = true; + orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, MY_TAG, + ORTE_RML_NON_PERSISTENT, + orte_rml_recv_callback, &blob); + ORTE_WAIT_FOR_COMPLETION(blob.active); + OBJ_DESTRUCT(&blob); //orte_rml.send_buffer_nb(&peer, buf,MY_TAG, send_callback, NULL) } - //orte_rml.send_buffer_nb(&peer, buf,MY_TAG, send_callback, NULL) - /* wait for it to come around */ - OBJ_CONSTRUCT(&blob, orte_rml_recv_cb_t); - blob.active = true; - orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, MY_TAG, - ORTE_RML_NON_PERSISTENT, - orte_rml_recv_callback, &blob); - ORTE_WAIT_FOR_COMPLETION(blob.active); - OBJ_DESTRUCT(&blob); ORTE_WAIT_FOR_COMPLETION(msg_active); opal_output(0, "%s Ring %d completed", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), j); - sleep(2); - } - else { - msgs_recvd = true; - ORTE_WAIT_FOR_COMPLETION(msgs_recvd); - buf = OBJ_NEW(opal_buffer_t); - /* send it along */ + //sleep(2); + } + else + { msg_active = true; - maxpower = (double)(j%7); - msgsize = (int)pow(10.0, maxpower); - opal_output(0, "Ring %d message %d size %d bytes", j,n, msgsize); - msg = (uint8_t*)malloc(msgsize); - opal_dss.pack(buf, msg, msgsize, OPAL_BYTE); - free(msg); - orte_rml.send_buffer_nb(&peer, buf, MY_TAG, send_callback, NULL); + for (n =0; n < window; n++) { + OBJ_CONSTRUCT(&blob, orte_rml_recv_cb_t); + blob.active = true; + orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, MY_TAG, + ORTE_RML_NON_PERSISTENT, + orte_rml_recv_callback, &blob); + ORTE_WAIT_FOR_COMPLETION(blob.active); + opal_output(0, "%s received message %d from %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), j, + ORTE_NAME_PRINT(&blob.name)); + /* send it along */ + buf = OBJ_NEW(opal_buffer_t); + opal_dss.copy_payload(buf, &blob.data); + OBJ_DESTRUCT(&blob); + orte_rml.send_buffer_channel_nb(channel, buf, MY_TAG, channel_send_callback, NULL); + } ORTE_WAIT_FOR_COMPLETION(msg_active); - sleep (2); + opal_output(0, "%s Ring %d completed", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), j); + //sleep (2); } } - + channel_active = true; + orte_rml.close_channel ( channel,close_channel_callback, NULL); + opal_output(0, "%s process sent close channel request waiting for completion \n", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); + ORTE_WAIT_FOR_COMPLETION(channel_active); + opal_output(0, "%s close channel complete to %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(&peer)); orte_finalize(); - return 0; } From 13adb9cdc01d71883e65013e2c5af8cd503bf2ca Mon Sep 17 00:00:00 2001 From: annu13 Date: Tue, 28 Apr 2015 12:56:59 -0700 Subject: [PATCH 13/14] clean up and auto merge fixups --- .../topo/base/topo_base_dist_graph_create.c | 18 +-- opal/class/opal_object.h | 2 - orte/mca/ess/base/ess_base_std_app.c | 27 +--- orte/mca/ess/base/ess_base_std_orted.c | 56 ------- orte/mca/ess/base/ess_base_std_tool.c | 9 -- orte/mca/ess/hnp/ess_hnp_module.c | 49 ------- orte/mca/oob/tcp/oob_tcp.c | 5 +- orte/mca/oob/tcp/oob_tcp_component.c | 31 ++-- orte/mca/oob/tcp/oob_tcp_sendrecv.c | 5 - orte/mca/oob/tcp/oob_tcp_sendrecv.h | 2 +- orte/mca/oob/usock/oob_usock_connection.c | 10 +- orte/mca/oob/usock/oob_usock_hdr.h | 2 +- orte/mca/oob/usock/oob_usock_sendrecv.c | 6 +- orte/mca/oob/usock/oob_usock_sendrecv.h | 2 +- orte/mca/qos/base/oob_base_select.c | 137 ------------------ orte/mca/qos/qos.h | 11 +- orte/mca/rml/base/base.h | 3 +- orte/mca/rml/base/rml_base_frame.c | 6 +- orte/mca/rml/base/rml_base_receive.c | 7 - orte/mca/rml/oob/rml_oob.h | 2 +- orte/mca/rml/oob/rml_oob_send.c | 21 ++- orte/mca/rml/rml.h | 62 +------- orte/mca/rml/rml_types.h | 15 +- orte/util/attr.h | 2 +- 24 files changed, 68 insertions(+), 422 deletions(-) delete mode 100644 orte/mca/qos/base/oob_base_select.c diff --git a/ompi/mca/topo/base/topo_base_dist_graph_create.c b/ompi/mca/topo/base/topo_base_dist_graph_create.c index 74db0a44ee4..8b61680a33b 100644 --- a/ompi/mca/topo/base/topo_base_dist_graph_create.c +++ b/ompi/mca/topo/base/topo_base_dist_graph_create.c @@ -32,9 +32,9 @@ typedef struct _dist_graph_elem { } mca_topo_base_dist_graph_elem_t; int mca_topo_base_dist_graph_distribute(mca_topo_base_module_t* module, - ompi_communicator_t *comm, + ompi_communicator_t *comm, int n, int nodes[], - int degrees[], int targets[], + int degrees[], int targets[], int weights[], mca_topo_base_comm_dist_graph_2_2_0_t** ptopo) { @@ -279,11 +279,11 @@ int mca_topo_base_dist_graph_distribute(mca_topo_base_module_t* module, } int mca_topo_base_dist_graph_create(mca_topo_base_module_t* module, - ompi_communicator_t *comm_old, + ompi_communicator_t *comm_old, int n, int nodes[], - int degrees[], int targets[], + int degrees[], int targets[], int weights[], - ompi_info_t *info, int reorder, + ompi_info_t *info, int reorder, ompi_communicator_t **newcomm) { int err; @@ -292,22 +292,20 @@ int mca_topo_base_dist_graph_create(mca_topo_base_module_t* module, ompi_communicator_t *new_comm; mca_topo_base_comm_dist_graph_2_2_0_t* topo; - num_procs = ompi_comm_size(comm_old); topo_procs = (ompi_proc_t**)malloc(num_procs * sizeof(ompi_proc_t *)); if (NULL == topo_procs) { return OMPI_ERR_OUT_OF_RESOURCE; } - num_procs = ompi_comm_size(comm_old); new_comm = ompi_comm_allocate(num_procs, 0); if (NULL == new_comm) { free(topo_procs); return OMPI_ERR_OUT_OF_RESOURCE; } err = mca_topo_base_dist_graph_distribute(module, - comm_old, + comm_old, n, nodes, - degrees, targets, + degrees, targets, weights, &topo); if( OMPI_SUCCESS != err ) { @@ -320,7 +318,7 @@ int mca_topo_base_dist_graph_create(mca_topo_base_module_t* module, must be set before invoking ompi_comm_enable */ rank = ompi_comm_rank(comm_old); if(OMPI_GROUP_IS_DENSE(comm_old->c_local_group)) { - memcpy(topo_procs, + memcpy(topo_procs, comm_old->c_local_group->grp_proc_pointers, num_procs * sizeof(ompi_proc_t *)); } else { diff --git a/opal/class/opal_object.h b/opal/class/opal_object.h index dd9c33cc33a..02d9b17ada7 100644 --- a/opal/class/opal_object.h +++ b/opal/class/opal_object.h @@ -11,9 +11,7 @@ * All rights reserved. * Copyright (c) 2007-2014 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ - * * Additional copyrights may follow - * * $HEADER$ */ diff --git a/orte/mca/ess/base/ess_base_std_app.c b/orte/mca/ess/base/ess_base_std_app.c index 4762c69f07b..b6b4068e8d9 100644 --- a/orte/mca/ess/base/ess_base_std_app.c +++ b/orte/mca/ess/base/ess_base_std_app.c @@ -12,7 +12,7 @@ * Copyright (c) 2010-2012 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2013-2014 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2015 Intel, Inc. All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -117,7 +117,6 @@ int orte_ess_base_app_setup(bool db_restrict_local) /* get a separate orte event base */ orte_event_base = opal_start_progress_thread("orte", true); progress_thread_running = true; - /* open and setup the state machine */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_state_base_framework, 0))) { ORTE_ERROR_LOG(ret); @@ -144,7 +143,6 @@ int orte_ess_base_app_setup(bool db_restrict_local) ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (NULL == orte_process_info.tmpdir_base) ? "UNDEF" : orte_process_info.tmpdir_base, orte_process_info.nodename)); - if (ORTE_SUCCESS != (ret = orte_session_dir(true, orte_process_info.tmpdir_base, orte_process_info.nodename, NULL, @@ -153,13 +151,11 @@ int orte_ess_base_app_setup(bool db_restrict_local) error = "orte_session_dir"; goto error; } - /* Once the session directory location has been established, set the opal_output env file location to be in the proc-specific session directory. */ opal_output_set_output_file_info(orte_process_info.proc_session_dir, "output-", NULL, NULL); - /* store the session directory location in the database */ OBJ_CONSTRUCT(&kv, opal_value_t); kv.key = strdup(OPAL_DSTORE_JOB_SDIR); @@ -188,7 +184,6 @@ int orte_ess_base_app_setup(bool db_restrict_local) } OBJ_DESTRUCT(&kv); } - /* Setup the communication infrastructure */ /* * OOB Layer @@ -203,7 +198,6 @@ int orte_ess_base_app_setup(bool db_restrict_local) error = "orte_oob_base_select"; goto error; } - /* Runtime Messaging Layer */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_rml_base_framework, 0))) { ORTE_ERROR_LOG(ret); @@ -215,7 +209,6 @@ int orte_ess_base_app_setup(bool db_restrict_local) error = "orte_rml_base_select"; goto error; } - /* Messaging QoS Layer */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_qos_base_framework, 0))) { ORTE_ERROR_LOG(ret); @@ -227,14 +220,12 @@ int orte_ess_base_app_setup(bool db_restrict_local) error = "orte_qos_base_select"; goto error; } - /* setup the errmgr */ if (ORTE_SUCCESS != (ret = orte_errmgr_base_select())) { ORTE_ERROR_LOG(ret); error = "orte_errmgr_base_select"; goto error; } - /* Routed system */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_routed_base_framework, 0))) { ORTE_ERROR_LOG(ret); @@ -246,7 +237,6 @@ int orte_ess_base_app_setup(bool db_restrict_local) error = "orte_routed_base_select"; goto error; } - /* * Group communications */ @@ -260,7 +250,6 @@ int orte_ess_base_app_setup(bool db_restrict_local) error = "orte_grpcomm_base_select"; goto error; } - /* non-daemon/HNP apps can only have the default proxy PLM * module open - provide a chance for it to initialize */ @@ -269,22 +258,18 @@ int orte_ess_base_app_setup(bool db_restrict_local) error = "orte_plm_init"; goto error; } - /* enable communication via the rml */ if (ORTE_SUCCESS != (ret = orte_rml.enable_comm())) { ORTE_ERROR_LOG(ret); error = "orte_rml.enable_comm"; goto error; } - /* setup the routed info */ if (ORTE_SUCCESS != (ret = orte_routed.init_routes(ORTE_PROC_MY_NAME->jobid, NULL))) { ORTE_ERROR_LOG(ret); error = "orte_routed.init_routes"; goto error; } - - #if OPAL_ENABLE_FT_CR == 1 /* * Setup the SnapC @@ -309,13 +294,11 @@ int orte_ess_base_app_setup(bool db_restrict_local) error = "orte_sstore_base_select"; goto error; } - /* apps need the OPAL CR stuff */ opal_cr_set_enabled(true); #else opal_cr_set_enabled(false); #endif - /* Initalize the CR setup * Note: Always do this, even in non-FT builds. * If we don't some user level tools may hang. @@ -325,7 +308,6 @@ int orte_ess_base_app_setup(bool db_restrict_local) error = "orte_cr_init"; goto error; } - /* open the distributed file system */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_dfs_base_framework, 0))) { ORTE_ERROR_LOG(ret); @@ -337,9 +319,7 @@ int orte_ess_base_app_setup(bool db_restrict_local) error = "orte_dfs_base_select"; goto error; } - return ORTE_SUCCESS; - error: if (!progress_thread_running) { /* can't send the help message, so ensure it @@ -350,7 +330,6 @@ int orte_ess_base_app_setup(bool db_restrict_local) orte_show_help("help-orte-runtime.txt", "orte_init:startup:internal-failure", true, error, ORTE_ERROR_NAME(ret), ret); - return ret; } @@ -424,10 +403,8 @@ void orte_ess_base_app_abort(int status, bool report) * clean environment. Taken from orte_finalize(): * - Assume errmgr cleans up child processes before we exit. */ - /* CRS cleanup since it may have a named pipe and thread active */ orte_cr_finalize(); - /* If we were asked to report this termination, do so. * Since singletons don't start an HNP unless necessary, and * direct-launched procs don't have daemons at all, only send @@ -443,11 +420,9 @@ void orte_ess_base_app_abort(int status, bool report) * have a chance to be sent */ nanosleep(&tp, NULL); } - /* - Clean out the global structures * (not really necessary, but good practice) */ orte_proc_info_finalize(); - /* Now Exit */ _exit(status); } diff --git a/orte/mca/ess/base/ess_base_std_orted.c b/orte/mca/ess/base/ess_base_std_orted.c index 89cb33219fa..526c7732d19 100644 --- a/orte/mca/ess/base/ess_base_std_orted.c +++ b/orte/mca/ess/base/ess_base_std_orted.c @@ -124,7 +124,6 @@ int orte_ess_base_orted_setup(char **hosts) opal_proc_local_set(&orte_process_info.super); plm_in_use = false; - /* setup callback for SIGPIPE */ setup_sighandler(SIGPIPE, &epipe_handler, epipe_signal_callback); /* Set signal handlers to catch kill signals so we can properly clean up @@ -132,18 +131,14 @@ int orte_ess_base_orted_setup(char **hosts) */ setup_sighandler(SIGTERM, &term_handler, shutdown_signal); setup_sighandler(SIGINT, &int_handler, shutdown_signal); - /** setup callbacks for signals we should ignore */ setup_sighandler(SIGUSR1, &sigusr1_handler, signal_callback); setup_sighandler(SIGUSR2, &sigusr2_handler, signal_callback); - signals_set = true; - #if OPAL_HAVE_HWLOC { hwloc_obj_t obj; unsigned i, j; - /* get the local topology */ if (NULL == opal_hwloc_topology) { if (OPAL_SUCCESS != (ret = opal_hwloc_base_get_topology())) { @@ -153,7 +148,6 @@ int orte_ess_base_orted_setup(char **hosts) } /* generate the signature */ orte_topo_signature = opal_hwloc_base_get_topo_signature(opal_hwloc_topology); - /* remove the hostname from the topology. Unfortunately, hwloc * decided to add the source hostname to the "topology", thus * rendering it unusable as a pure topological description. So @@ -178,14 +172,12 @@ int orte_ess_base_orted_setup(char **hosts) break; } } - if (15 < opal_output_get_verbosity(orte_ess_base_framework.framework_output)) { opal_output(0, "%s Topology Info:", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); opal_dss.dump(0, opal_hwloc_topology, OPAL_HWLOC_TOPO); } } #endif - /* open and setup the opal_pstat framework so we can provide * process stats if requested */ @@ -199,7 +191,6 @@ int orte_ess_base_orted_setup(char **hosts) error = "opal_pstat_base_select"; goto error; } - /* open and setup the state machine */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_state_base_framework, 0))) { ORTE_ERROR_LOG(ret); @@ -211,14 +202,12 @@ int orte_ess_base_orted_setup(char **hosts) error = "orte_state_base_select"; goto error; } - /* open the errmgr */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_errmgr_base_framework, 0))) { ORTE_ERROR_LOG(ret); error = "orte_errmgr_base_open"; goto error; } - /* some environments allow remote launches - e.g., ssh - so * open and select something -only- if we are given * a specific module to use @@ -241,7 +230,6 @@ int orte_ess_base_orted_setup(char **hosts) goto error; } } - /* setup my session directory here as the OOB may need it */ if (orte_create_session_dirs) { OPAL_OUTPUT_VERBOSE((2, orte_ess_base_framework.framework_output, @@ -265,7 +253,6 @@ int orte_ess_base_orted_setup(char **hosts) * stale directories laying around */ orte_session_dir_cleanup(ORTE_JOBID_WILDCARD); - /* now actually create the directory tree */ if (ORTE_SUCCESS != (ret = orte_session_dir(true, orte_process_info.tmpdir_base, @@ -275,18 +262,15 @@ int orte_ess_base_orted_setup(char **hosts) error = "orte_session_dir"; goto error; } - /* set the opal_output env file location to be in the * proc-specific session directory. */ opal_output_set_output_file_info(orte_process_info.proc_session_dir, "output-", NULL, NULL); - /* setup stdout/stderr */ if (orte_debug_daemons_file_flag) { /* if we are debugging to a file, then send stdout/stderr to * the orted log file */ - /* get my jobid */ if (ORTE_SUCCESS != (ret = orte_util_convert_jobid_to_string(&jobidstring, ORTE_PROC_MY_NAME->jobid))) { @@ -294,7 +278,6 @@ int orte_ess_base_orted_setup(char **hosts) error = "convert_jobid"; goto error; } - /* define a log file name in the session directory */ snprintf(log_file, PATH_MAX, "output-orted-%s-%s.log", jobidstring, orte_process_info.nodename); @@ -319,7 +302,6 @@ int orte_ess_base_orted_setup(char **hosts) } } } - /* Setup the communication infrastructure */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_oob_base_framework, 0))) { ORTE_ERROR_LOG(ret); @@ -341,7 +323,6 @@ int orte_ess_base_orted_setup(char **hosts) error = "orte_rml_base_select"; goto error; } - /* Messaging QoS Layer */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_qos_base_framework, 0))) { ORTE_ERROR_LOG(ret); @@ -353,14 +334,12 @@ int orte_ess_base_orted_setup(char **hosts) error = "orte_qos_base_select"; goto error; } - /* select the errmgr */ if (ORTE_SUCCESS != (ret = orte_errmgr_base_select())) { ORTE_ERROR_LOG(ret); error = "orte_errmgr_base_select"; goto error; } - /* Routed system */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_routed_base_framework, 0))) { ORTE_ERROR_LOG(ret); @@ -372,7 +351,6 @@ int orte_ess_base_orted_setup(char **hosts) error = "orte_routed_base_select"; goto error; } - /* * Group communications */ @@ -386,7 +364,6 @@ int orte_ess_base_orted_setup(char **hosts) error = "orte_grpcomm_base_select"; goto error; } - /* Open/select the odls */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_odls_base_framework, 0))) { ORTE_ERROR_LOG(ret); @@ -398,7 +375,6 @@ int orte_ess_base_orted_setup(char **hosts) error = "orte_odls_base_select"; goto error; } - /* Open/select the rtc */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_rtc_base_framework, 0))) { ORTE_ERROR_LOG(ret); @@ -410,14 +386,12 @@ int orte_ess_base_orted_setup(char **hosts) error = "orte_rtc_base_select"; goto error; } - /* enable communication with the rml */ if (ORTE_SUCCESS != (ret = orte_rml.enable_comm())) { ORTE_ERROR_LOG(ret); error = "orte_rml.enable_comm"; goto error; } - #if ORTE_ENABLE_STATIC_PORTS /* if we are using static ports, then we need to setup * the daemon info so the RML can function properly @@ -430,7 +404,6 @@ int orte_ess_base_orted_setup(char **hosts) * if we are trying to setup common or static ports */ orte_routed.update_routing_plan(); - /* extract the node info from the environment and * build a nidmap from it */ @@ -446,7 +419,6 @@ int orte_ess_base_orted_setup(char **hosts) * need to do it anyway just to initialize things */ orte_routed.update_routing_plan(); - /* Now provide a chance for the PLM * to perform any module-specific init functions. This * needs to occur AFTER the communications are setup @@ -461,7 +433,6 @@ int orte_ess_base_orted_setup(char **hosts) goto error; } } - /* setup the global job and node arrays */ orte_job_data = OBJ_NEW(opal_pointer_array_t); if (ORTE_SUCCESS != (ret = opal_pointer_array_init(orte_job_data, @@ -472,7 +443,6 @@ int orte_ess_base_orted_setup(char **hosts) error = "setup job array"; goto error; } - orte_node_pool = OBJ_NEW(opal_pointer_array_t); if (ORTE_SUCCESS != (ret = opal_pointer_array_init(orte_node_pool, ORTE_GLOBAL_ARRAY_BLOCK_SIZE, @@ -491,18 +461,15 @@ int orte_ess_base_orted_setup(char **hosts) error = "setup node topologies array"; goto error; } - /* Setup the job data object for the daemons */ /* create and store the job data object */ jdata = OBJ_NEW(orte_job_t); jdata->jobid = ORTE_PROC_MY_NAME->jobid; opal_pointer_array_set_item(orte_job_data, 0, jdata); - /* every job requires at least one app */ app = OBJ_NEW(orte_app_context_t); opal_pointer_array_set_item(jdata->apps, 0, app); jdata->num_apps++; - /* create and store a node object where we are */ node = OBJ_NEW(orte_node_t); node->name = strdup(orte_process_info.nodename); @@ -511,17 +478,14 @@ int orte_ess_base_orted_setup(char **hosts) /* point our topology to the one detected locally */ node->topology = opal_hwloc_topology; #endif - /* create and store a proc object for us */ proc = OBJ_NEW(orte_proc_t); proc->name.jobid = ORTE_PROC_MY_NAME->jobid; proc->name.vpid = ORTE_PROC_MY_NAME->vpid; - proc->pid = orte_process_info.pid; proc->rml_uri = orte_rml.get_contact_info(); proc->state = ORTE_PROC_STATE_RUNNING; opal_pointer_array_set_item(jdata->procs, proc->name.vpid, proc); - /* record that the daemon (i.e., us) is on this node * NOTE: we do not add the proc object to the node's * proc array because we are not an application proc. @@ -532,24 +496,20 @@ int orte_ess_base_orted_setup(char **hosts) node->daemon = proc; ORTE_FLAG_SET(node, ORTE_NODE_FLAG_DAEMON_LAUNCHED); node->state = ORTE_NODE_STATE_UP; - /* now point our proc node field to the node */ OBJ_RETAIN(node); /* keep accounting straight */ proc->node = node; - /* record that the daemon job is running */ jdata->num_procs = 1; jdata->state = ORTE_JOB_STATE_RUNNING; /* obviously, we have "reported" */ jdata->num_reported = 1; - /* setup the PMIx server */ if (ORTE_SUCCESS != (ret = pmix_server_init())) { ORTE_ERROR_LOG(ret); error = "pmix server init"; goto error; } - /* setup the routed info - the selected routed component * will know what to do. */ @@ -558,7 +518,6 @@ int orte_ess_base_orted_setup(char **hosts) error = "orte_routed.init_routes"; goto error; } - /* setup I/O forwarding system - must come after we init routes */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_iof_base_framework, 0))) { ORTE_ERROR_LOG(ret); @@ -570,7 +529,6 @@ int orte_ess_base_orted_setup(char **hosts) error = "orte_iof_base_select"; goto error; } - /* setup the FileM */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_filem_base_framework, 0))) { ORTE_ERROR_LOG(ret); @@ -613,7 +571,6 @@ int orte_ess_base_orted_setup(char **hosts) #else opal_cr_set_enabled(false); #endif - /* * Initalize the CR setup * Note: Always do this, even in non-FT builds. @@ -624,7 +581,6 @@ int orte_ess_base_orted_setup(char **hosts) error = "orte_cr_init"; goto error; } - /* setup the DFS framework */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_dfs_base_framework, 0))) { ORTE_ERROR_LOG(ret); @@ -636,7 +592,6 @@ int orte_ess_base_orted_setup(char **hosts) error = "orte_dfs_select"; goto error; } - /* setup the SCHIZO framework */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_schizo_base_framework, 0))) { ORTE_ERROR_LOG(ret); @@ -648,14 +603,11 @@ int orte_ess_base_orted_setup(char **hosts) error = "orte_schizo_select"; goto error; } - return ORTE_SUCCESS; - error: orte_show_help("help-orte-runtime.txt", "orte_init:startup:internal-failure", true, error, ORTE_ERROR_NAME(ret), ret); - return ORTE_ERR_SILENT; } @@ -669,15 +621,12 @@ int orte_ess_base_orted_finalize(void) opal_event_signal_del(&sigusr1_handler); opal_event_signal_del(&sigusr2_handler); } - /* cleanup */ if (NULL != log_path) { unlink(log_path); } - /* shutdown the pmix server */ pmix_server_finalize(); - /* close frameworks */ (void) mca_base_framework_close(&orte_schizo_base_framework); (void) mca_base_framework_close(&orte_filem_base_framework); @@ -685,10 +634,8 @@ int orte_ess_base_orted_finalize(void) (void) mca_base_framework_close(&orte_iof_base_framework); (void) mca_base_framework_close(&orte_errmgr_base_framework); (void) mca_base_framework_close(&orte_plm_base_framework); - /* close the dfs so its threads can exit */ (void) mca_base_framework_close(&orte_dfs_base_framework); - /* make sure our local procs are dead */ orte_odls.kill_local_procs(NULL); (void) mca_base_framework_close(&orte_rtc_base_framework); @@ -697,12 +644,9 @@ int orte_ess_base_orted_finalize(void) (void) mca_base_framework_close(&orte_rml_base_framework); (void) mca_base_framework_close(&orte_oob_base_framework); (void) mca_base_framework_close(&orte_state_base_framework); - (void) mca_base_framework_close(&opal_dstore_base_framework); - /* cleanup any lingering session directories */ orte_session_dir_cleanup(ORTE_JOBID_WILDCARD); - return ORTE_SUCCESS; } diff --git a/orte/mca/ess/base/ess_base_std_tool.c b/orte/mca/ess/base/ess_base_std_tool.c index 069848d8abd..8c8cefa7bee 100644 --- a/orte/mca/ess/base/ess_base_std_tool.c +++ b/orte/mca/ess/base/ess_base_std_tool.c @@ -89,7 +89,6 @@ int orte_ess_base_tool_setup(void) progress_thread_running = true; orte_event_base_active = true; } - /* open and setup the state machine */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_state_base_framework, 0))) { ORTE_ERROR_LOG(ret); @@ -101,7 +100,6 @@ int orte_ess_base_tool_setup(void) error = "orte_state_base_select"; goto error; } - /* open and setup the error manager */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_errmgr_base_framework, 0))) { ORTE_ERROR_LOG(ret); @@ -113,7 +111,6 @@ int orte_ess_base_tool_setup(void) error = "orte_errmgr_base_select"; goto error; } - /* Setup the communication infrastructure */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_oob_base_framework, 0))) { ORTE_ERROR_LOG(ret); @@ -125,7 +122,6 @@ int orte_ess_base_tool_setup(void) error = "orte_oob_base_select"; goto error; } - /* Runtime Messaging Layer */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_rml_base_framework, 0))) { ORTE_ERROR_LOG(ret); @@ -137,7 +133,6 @@ int orte_ess_base_tool_setup(void) error = "orte_rml_base_select"; goto error; } - /* Messaging QoS Layer */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_qos_base_framework, 0))) { ORTE_ERROR_LOG(ret); @@ -149,7 +144,6 @@ int orte_ess_base_tool_setup(void) error = "orte_qos_base_select"; goto error; } - /* Routed system */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_routed_base_framework, 0))) { ORTE_ERROR_LOG(ret); @@ -161,20 +155,17 @@ int orte_ess_base_tool_setup(void) error = "orte_routed_base_select"; goto error; } - /* since I am a tool, then all I really want to do is communicate. * So setup communications and be done - finding the HNP * to which I want to communicate and setting up a route for * that link is my responsibility */ - /* enable communication via the rml */ if (ORTE_SUCCESS != (ret = orte_rml.enable_comm())) { ORTE_ERROR_LOG(ret); error = "orte_rml.enable_comm"; goto error; } - /* we -may- need to know the name of the head * of our session directory tree, particularly the * tmp base where any other session directories on diff --git a/orte/mca/ess/hnp/ess_hnp_module.c b/orte/mca/ess/hnp/ess_hnp_module.c index 7808554d0f6..5914490974f 100644 --- a/orte/mca/ess/hnp/ess_hnp_module.c +++ b/orte/mca/ess/hnp/ess_hnp_module.c @@ -232,7 +232,6 @@ static int rte_init(void) error = "opal_pstat_base_select"; goto error; } - /* open and setup the state machine */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_state_base_framework, 0))) { ORTE_ERROR_LOG(ret); @@ -262,7 +261,6 @@ static int rte_init(void) error = "orte_plm_base_open"; goto error; } - if (ORTE_SUCCESS != (ret = orte_plm_base_select())) { ORTE_ERROR_LOG(ret); error = "orte_plm_base_select"; @@ -297,7 +295,6 @@ static int rte_init(void) ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (NULL == orte_process_info.tmpdir_base) ? "UNDEF" : orte_process_info.tmpdir_base, orte_process_info.nodename)); - /* take a pass thru the session directory code to fillin the * tmpdir names - don't create anything yet */ @@ -326,7 +323,6 @@ static int rte_init(void) } /* Setup the communication infrastructure */ - /* * OOB Layer */ @@ -372,7 +368,6 @@ static int rte_init(void) error = "orte_errmgr_base_select"; goto error; } - /* setup the global job and node arrays */ orte_job_data = OBJ_NEW(opal_pointer_array_t); if (ORTE_SUCCESS != (ret = opal_pointer_array_init(orte_job_data, @@ -383,7 +378,6 @@ static int rte_init(void) error = "setup job array"; goto error; } - orte_node_pool = OBJ_NEW(opal_pointer_array_t); if (ORTE_SUCCESS != (ret = opal_pointer_array_init(orte_node_pool, ORTE_GLOBAL_ARRAY_BLOCK_SIZE, @@ -402,7 +396,6 @@ static int rte_init(void) error = "setup node topologies array"; goto error; } - /* Setup the job data object for the daemons */ /* create and store the job data object */ jdata = OBJ_NEW(orte_job_t); @@ -418,7 +411,6 @@ static int rte_init(void) app = OBJ_NEW(orte_app_context_t); opal_pointer_array_set_item(jdata->apps, 0, app); jdata->num_apps++; - /* create and store a node object where we are */ node = OBJ_NEW(orte_node_t); node->name = strdup(orte_process_info.nodename); @@ -433,19 +425,16 @@ static int rte_init(void) opal_pointer_array_add(orte_node_topologies, t); } #endif - /* create and store a proc object for us */ proc = OBJ_NEW(orte_proc_t); proc->name.jobid = ORTE_PROC_MY_NAME->jobid; proc->name.vpid = ORTE_PROC_MY_NAME->vpid; - proc->pid = orte_process_info.pid; proc->rml_uri = orte_rml.get_contact_info(); proc->state = ORTE_PROC_STATE_RUNNING; OBJ_RETAIN(node); /* keep accounting straight */ proc->node = node; opal_pointer_array_set_item(jdata->procs, proc->name.vpid, proc); - /* record that the daemon (i.e., us) is on this node * NOTE: we do not add the proc object to the node's * proc array because we are not an application proc. @@ -456,7 +445,6 @@ static int rte_init(void) node->daemon = proc; ORTE_FLAG_SET(node, ORTE_NODE_FLAG_DAEMON_LAUNCHED); node->state = ORTE_NODE_STATE_UP; - /* if we are to retain aliases, get ours */ if (orte_retain_aliases) { aliases = NULL; @@ -468,13 +456,11 @@ static int rte_init(void) orte_set_attribute(&node->attributes, ORTE_NODE_ALIAS, ORTE_ATTR_LOCAL, aptr, OPAL_STRING); free(aptr); } - /* record that the daemon job is running */ jdata->num_procs = 1; jdata->state = ORTE_JOB_STATE_RUNNING; /* obviously, we have "reported" */ jdata->num_reported = 1; - /* * Routed system */ @@ -488,8 +474,6 @@ static int rte_init(void) error = "orte_routed_base_select"; goto error; } - - /* * Group communications */ @@ -503,7 +487,6 @@ static int rte_init(void) error = "orte_grpcomm_base_select"; goto error; } - /* Now provide a chance for the PLM * to perform any module-specific init functions. This * needs to occur AFTER the communications are setup @@ -514,7 +497,6 @@ static int rte_init(void) error = "orte_plm_init"; goto error; } - /* * Setup the remaining resource * management and errmgr frameworks - application procs @@ -531,7 +513,6 @@ static int rte_init(void) error = "orte_ras_base_find_available"; goto error; } - if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_rmaps_base_framework, 0))) { ORTE_ERROR_LOG(ret); error = "orte_rmaps_base_open"; @@ -589,7 +570,6 @@ static int rte_init(void) } } #endif - /* Open/select the odls */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_odls_base_framework, 0))) { ORTE_ERROR_LOG(ret); @@ -601,7 +581,6 @@ static int rte_init(void) error = "orte_odls_base_select"; goto error; } - /* Open/select the rtc */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_rtc_base_framework, 0))) { ORTE_ERROR_LOG(ret); @@ -613,21 +592,18 @@ static int rte_init(void) error = "orte_rtc_base_select"; goto error; } - /* enable communication with the rml */ if (ORTE_SUCCESS != (ret = orte_rml.enable_comm())) { ORTE_ERROR_LOG(ret); error = "orte_rml.enable_comm"; goto error; } - /* we are an hnp, so update the contact info field for later use */ orte_process_info.my_hnp_uri = orte_rml.get_contact_info(); proc->rml_uri = strdup(orte_process_info.my_hnp_uri); /* we are also officially a daemon, so better update that field too */ orte_process_info.my_daemon_uri = strdup(orte_process_info.my_hnp_uri); - /* setup the orte_show_help system to recv remote output */ orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_SHOW_HELP, ORTE_RML_PERSISTENT, orte_show_help_recv, NULL); @@ -637,12 +613,10 @@ static int rte_init(void) * proc-specific session directory. */ opal_output_set_output_file_info(orte_process_info.proc_session_dir, "output-", NULL, NULL); - /* save my contact info in a file for others to find */ jobfam_dir = opal_dirname(orte_process_info.job_session_dir); contact_path = opal_os_path(false, jobfam_dir, "contact.txt", NULL); free(jobfam_dir); - OPAL_OUTPUT_VERBOSE((2, orte_debug_output, "%s writing contact file %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -660,14 +634,12 @@ static int rte_init(void) } free(contact_path); } - /* setup the PMIx server */ if (ORTE_SUCCESS != (ret = pmix_server_init())) { ORTE_ERROR_LOG(ret); error = "pmix server init"; goto error; } - /* setup the routed info - the selected routed component * will know what to do. */ @@ -676,7 +648,6 @@ static int rte_init(void) error = "orte_routed.init_routes"; goto error; } - /* setup I/O forwarding system - must come after we init routes */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_iof_base_framework, 0))) { ORTE_ERROR_LOG(ret); @@ -688,7 +659,6 @@ static int rte_init(void) error = "orte_iof_base_select"; goto error; } - /* setup the FileM */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_filem_base_framework, 0))) { ORTE_ERROR_LOG(ret); @@ -700,7 +670,6 @@ static int rte_init(void) error = "orte_filem_base_select"; goto error; } - #if OPAL_ENABLE_FT_CR == 1 /* * Setup the SnapC @@ -731,7 +700,6 @@ static int rte_init(void) #else opal_cr_set_enabled(false); #endif - /* * Initalize the CR setup * Note: Always do this, even in non-FT builds. @@ -742,7 +710,6 @@ static int rte_init(void) error = "orte_cr_init"; goto error; } - /* setup the dfs framework */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_dfs_base_framework, 0))) { ORTE_ERROR_LOG(ret); @@ -754,7 +721,6 @@ static int rte_init(void) error = "orte_dfs_select"; goto error; } - /* setup the schizo framework */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_schizo_base_framework, 0))) { ORTE_ERROR_LOG(ret); @@ -766,7 +732,6 @@ static int rte_init(void) error = "orte_schizo_select"; goto error; } - /* if a tool has launched us and is requesting event reports, * then set its contact info into the comm system */ @@ -776,14 +741,12 @@ static int rte_init(void) goto error; } } - /* We actually do *not* want an HNP to voluntarily yield() the processor more than necessary. Orterun already blocks when it is doing nothing, so it doesn't use any more CPU cycles than it should; but when it *is* doing something, we do not want it to be unnecessarily delayed because it voluntarily yielded the processor in the middle of its work. - For example: when a message arrives at orterun, we want the OS to wake us up in a timely fashion (which most OS's seem good about doing) and then we want orterun to process @@ -796,7 +759,6 @@ static int rte_init(void) problematic in some scenarios (e.g., COMM_SPAWN, BTL's that require OOB messages for wireup, etc.). */ opal_progress_set_yield_when_idle(false); - return ORTE_SUCCESS; error: @@ -805,7 +767,6 @@ static int rte_init(void) "orte_init:startup:internal-failure", true, error, ORTE_ERROR_NAME(ret), ret); } - return ORTE_ERR_SILENT; } @@ -877,7 +838,6 @@ static int rte_finalize(void) fclose(orte_xml_fp); } } - return ORTE_SUCCESS; } @@ -894,15 +854,12 @@ static void rte_abort(int status, bool report) /* CRS cleanup since it may have a named pipe and thread active */ orte_cr_finalize(); - /* ensure we scrub the session directory tree */ orte_session_dir_cleanup(ORTE_JOBID_WILDCARD); - /* - Clean out the global structures * (not really necessary, but good practice) */ orte_proc_info_finalize(); - /* just exit */ exit(status); } @@ -916,13 +873,10 @@ static void clean_abort(int fd, short flags, void *arg) if (forcibly_die) { /* kill any local procs */ orte_odls.kill_local_procs(NULL); - /* whack any lingering session directory files from our jobs */ orte_session_dir_cleanup(ORTE_JOBID_WILDCARD); - /* cleanup our data server */ orte_data_server_finalize(); - /* exit with a non-zero status */ exit(ORTE_ERROR_DEFAULT_EXIT_CODE); } @@ -937,17 +891,14 @@ static void clean_abort(int fd, short flags, void *arg) /* ensure that the forwarding of stdin stops */ orte_job_term_ordered = true; - /* tell us to be quiet - hey, the user killed us with a ctrl-c, * so need to tell them that! */ orte_execute_quiet = true; - if (!orte_never_launched) { /* cleanup our data server */ orte_data_server_finalize(); } - /* We are in an event handler; the job completed procedure will delete the signal handler that is currently running (which is a Bad Thing), so we can't call it directly. diff --git a/orte/mca/oob/tcp/oob_tcp.c b/orte/mca/oob/tcp/oob_tcp.c index 9cd56aa068f..fa07a9573e6 100644 --- a/orte/mca/oob/tcp/oob_tcp.c +++ b/orte/mca/oob/tcp/oob_tcp.c @@ -239,7 +239,7 @@ static int parse_uri(const uint16_t af_family, else if (AF_INET6 == af_family) { struct sockaddr_in6 *in6; memset(inaddr, 0, sizeof(struct sockaddr_in6));= - in6 = (struct sockaddr_in6*) inaddr; + in6 = (struct sockaddr_in6*) inaddr; if (0 == inet_pton(AF_INET6, host, (void*)&in6->sin6_addr)) { opal_output (0, "oob_tcp_parse_uri: Could not convert %s\n", host); return ORTE_ERR_BAD_PARAM; @@ -249,8 +249,6 @@ static int parse_uri(const uint16_t af_family, else { return ORTE_ERR_NOT_SUPPORTED; } - - return ORTE_SUCCESS; } @@ -582,7 +580,6 @@ static void recv_handler(int sd, short flg, void *cbdata) ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), strerror(opal_socket_errno), opal_socket_errno); } } - /* is the peer instance willing to accept this connection */ peer->sd = sd; if (mca_oob_tcp_peer_accept(peer) == false) { diff --git a/orte/mca/oob/tcp/oob_tcp_component.c b/orte/mca/oob/tcp/oob_tcp_component.c index 7d46571a67c..e513ef6aa88 100644 --- a/orte/mca/oob/tcp/oob_tcp_component.c +++ b/orte/mca/oob/tcp/oob_tcp_component.c @@ -162,7 +162,6 @@ static int tcp_component_open(void) "open" failing is not printed */ return ORTE_ERR_NOT_AVAILABLE; } - return ORTE_SUCCESS; } @@ -320,7 +319,6 @@ static int tcp_component_register(void) } #endif #endif - dyn_port_string = NULL; (void)mca_base_component_var_register(component, "dynamic_ipv4_ports", "Range of ports to be dynamically used by daemons and procs (IPv4)", @@ -406,7 +404,7 @@ static int tcp_component_register(void) &mca_oob_tcp_component.disable_ipv6_family); #endif - + mca_oob_tcp_component.keepalive_time = 10; (void)mca_base_component_var_register(component, "keepalive_time", "Idle time in seconds before starting to send keepalives (num <= 0 ----> disable keepalive)", @@ -495,7 +493,6 @@ static bool component_available(void) i, opal_ifindextokindex(i)); continue; } - /* ignore non-ip4/6 interfaces */ if (AF_INET != my_ss.ss_family #if OPAL_ENABLE_IPV6 @@ -504,7 +501,6 @@ static bool component_available(void) ) { continue; } - kindex = opal_ifindextokindex(i); if (kindex <= 0) { continue; @@ -638,7 +634,7 @@ static int component_startup(void) static void component_shutdown(void) { - int i; + int i = 0; opal_list_item_t *item; opal_output_verbose(2, orte_oob_base_framework.framework_output, @@ -745,20 +741,26 @@ static int component_set_addr(orte_process_name_t *peer, found = false; for (i=0; NULL != uris[i]; i++) { + tcpuri = strdup(uris[i]); + if (NULL == tcpuri) { + opal_output_verbose(2, orte_oob_base_framework.framework_output, + "%s oob:tcp: out of memory", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); + continue; + } if (0 == strncmp(uris[i], "tcp:", 4)) { af_family = AF_INET; - tcpuri = strdup(uris[i]); host = tcpuri + strlen("tcp://"); } else if (0 == strncmp(uris[i], "tcp6:", 5)) { #if OPAL_ENABLE_IPV6 af_family = AF_INET6; - tcpuri = strdup(uris[i]); host = tcpuri + strlen("tcp6://"); #else /* we don't support this connection type */ opal_output_verbose(2, orte_oob_base_framework.framework_output, "%s oob:tcp: address %s not supported", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), uris[i]); + free(tcpuri); continue; #endif } else { @@ -766,6 +768,7 @@ static int component_set_addr(orte_process_name_t *peer, opal_output_verbose(2, orte_oob_base_framework.framework_output, "%s oob:tcp: ignoring address %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), uris[i]); + free(tcpuri); continue; } @@ -774,26 +777,18 @@ static int component_set_addr(orte_process_name_t *peer, "%s oob:tcp: working peer %s address %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(peer), uris[i]); - /* separate the ports from the network addrs */ ports = strrchr(tcpuri, ':'); *ports = '\0'; ports++; /* split the addrs */ - if (NULL == host || 0 == strlen(host)) { - opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, - "FORMAT ERROR IN ADDR: %s", - (NULL == host) ? "NULL" : "ZERO LENGTH"); - free(tcpuri); - return ORTE_ERR_BAD_PARAM; - } - /* if this is a tcp6 connection, the first one will have a '[' * at the beginning of it, and the last will have a ']' at the * end - we need to remove those extra characters */ hptr = host; +#if OPAL_ENABLE_IPV6 if (AF_INET6 == af_family) { if ('[' == host[0]) { hptr = &host[1]; @@ -802,6 +797,7 @@ static int component_set_addr(orte_process_name_t *peer, host[strlen(host)-1] = '\0'; } } +#endif addrs = opal_argv_split(hptr, ','); @@ -1179,7 +1175,6 @@ static char **split_and_resolve(char **orig_str, char *name) break; } } - /* If we didn't find a match, keep trying */ if (if_index < 0) { orte_show_help("help-oob-tcp.txt", "invalid if_inexclude", diff --git a/orte/mca/oob/tcp/oob_tcp_sendrecv.c b/orte/mca/oob/tcp/oob_tcp_sendrecv.c index a5e6a7ac8ae..f75827a7f37 100644 --- a/orte/mca/oob/tcp/oob_tcp_sendrecv.c +++ b/orte/mca/oob/tcp/oob_tcp_sendrecv.c @@ -582,11 +582,6 @@ void mca_oob_tcp_recv_handler(int sd, short flags, void *cbdata) peer->recv_msg->hdr.channel, peer->recv_msg->hdr.seq_num, peer->recv_msg->data, peer->recv_msg->hdr.nbytes); - opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, - "%s DELIVERED TO RML tag = %d channel = %d seq_num = %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - peer->recv_msg->hdr.tag, peer->recv_msg->hdr.channel, - peer->recv_msg->hdr.seq_num); OBJ_RELEASE(peer->recv_msg); } else { /* promote this to the OOB as some other transport might diff --git a/orte/mca/oob/tcp/oob_tcp_sendrecv.h b/orte/mca/oob/tcp/oob_tcp_sendrecv.h index 658e4fd8f14..d8ac555b966 100644 --- a/orte/mca/oob/tcp/oob_tcp_sendrecv.h +++ b/orte/mca/oob/tcp/oob_tcp_sendrecv.h @@ -12,7 +12,7 @@ * Copyright (c) 2006-2013 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2010-2013 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2013-2014 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/orte/mca/oob/usock/oob_usock_connection.c b/orte/mca/oob/usock/oob_usock_connection.c index 821e561fa4a..3f247c9bd25 100644 --- a/orte/mca/oob/usock/oob_usock_connection.c +++ b/orte/mca/oob/usock/oob_usock_connection.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2014 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2015 Intel, Inc. All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -94,7 +94,6 @@ static int usock_peer_create_socket(mca_oob_usock_peer_t* peer) "%s oob:usock:peer creating socket to %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer->name)))); - peer->sd = socket(PF_UNIX, SOCK_STREAM, 0); if (peer->sd < 0) { @@ -248,7 +247,6 @@ void mca_oob_usock_peer_try_connect(int fd, short args, void *cbdata) "Connection across to proc %s succeeded", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&peer->name)); - /* setup our recv to catch the return ack call */ if (!peer->recv_ev_active) { opal_event_add(&peer->recv_event, 0); @@ -279,7 +277,7 @@ static int usock_peer_send_connect_ack(mca_oob_usock_peer_t* peer) size_t sdsize; char *cred; size_t credsize; - + opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s SEND CONNECT ACK", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); @@ -316,7 +314,7 @@ static int usock_peer_send_connect_ack(mca_oob_usock_peer_t* peer) memcpy(msg+sizeof(hdr), orte_version_string, strlen(orte_version_string)); memcpy(msg+sizeof(hdr)+strlen(orte_version_string)+1, cred, credsize); free(cred); - + if (ORTE_SUCCESS != usock_peer_send_blocking(peer, peer->sd, msg, sdsize)) { ORTE_ERROR_LOG(ORTE_ERR_UNREACH); free(msg); @@ -343,7 +341,6 @@ static void usock_peer_event_init(mca_oob_usock_peer_t* peer) opal_event_del(&peer->recv_event); peer->recv_ev_active = false; } - opal_event_set(mca_oob_usock_module.ev_base, &peer->send_event, peer->sd, @@ -871,7 +868,6 @@ void mca_oob_usock_peer_dump(mca_oob_usock_peer_t* peer, const char* msg) strerror(opal_socket_errno), opal_socket_errno); } - #if defined(USOCK_NODELAY) optlen = sizeof(nodelay); if (getsockopt(peer->sd, IPPROTO_USOCK, USOCK_NODELAY, (char *)&nodelay, &optlen) < 0) { diff --git a/orte/mca/oob/usock/oob_usock_hdr.h b/orte/mca/oob/usock/oob_usock_hdr.h index 010d69289ef..c7cad2d998b 100644 --- a/orte/mca/oob/usock/oob_usock_hdr.h +++ b/orte/mca/oob/usock/oob_usock_hdr.h @@ -12,7 +12,7 @@ * Copyright (c) 2006-2013 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2013-2014 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/orte/mca/oob/usock/oob_usock_sendrecv.c b/orte/mca/oob/usock/oob_usock_sendrecv.c index 2ae8a561af8..b07e42956a3 100644 --- a/orte/mca/oob/usock/oob_usock_sendrecv.c +++ b/orte/mca/oob/usock/oob_usock_sendrecv.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2014 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -292,7 +292,6 @@ void mca_oob_usock_send_handler(int sd, short flags, void *cbdata) peer->send_msg = (mca_oob_usock_send_t*) opal_list_remove_first(&peer->send_queue); } - /* if nothing else to do unregister for send event notifications */ if (NULL == peer->send_msg && peer->send_ev_active) { opal_event_del(&peer->send_event); @@ -526,6 +525,9 @@ void mca_oob_usock_recv_handler(int sd, short flags, void *cbdata) if (peer->recv_msg->hdr.dst.jobid == ORTE_PROC_MY_NAME->jobid && peer->recv_msg->hdr.dst.vpid == ORTE_PROC_MY_NAME->vpid) { /* yes - post it to the RML for delivery */ + opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output, + "%s DELIVERING TO RML", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); ORTE_RML_POST_MESSAGE(&peer->recv_msg->hdr.origin, peer->recv_msg->hdr.tag, peer->recv_msg->hdr.channel, peer->recv_msg->hdr.seq_num, peer->recv_msg->data, diff --git a/orte/mca/oob/usock/oob_usock_sendrecv.h b/orte/mca/oob/usock/oob_usock_sendrecv.h index 8614b5530e7..65658da08c7 100644 --- a/orte/mca/oob/usock/oob_usock_sendrecv.h +++ b/orte/mca/oob/usock/oob_usock_sendrecv.h @@ -12,7 +12,7 @@ * Copyright (c) 2006-2013 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2010-2013 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2013-2014 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/orte/mca/qos/base/oob_base_select.c b/orte/mca/qos/base/oob_base_select.c deleted file mode 100644 index 0cb01b05e49..00000000000 --- a/orte/mca/qos/base/oob_base_select.c +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" -#include "orte/constants.h" - -#include -#include - -#include "opal/mca/mca.h" -#include "opal/util/output.h" -#include "opal/mca/base/base.h" - -#include "orte/util/show_help.h" - -#include "orte/runtime/orte_globals.h" -#include "orte/mca/oob/oob.h" -#include "orte/mca/oob/base/base.h" - - -/** - * Function for selecting all runnable modules from those that are - * available. - * - * Call the init function on all available modules. - */ -int orte_oob_base_select(void) -{ - mca_base_component_list_item_t *cli, *cmp, *c2; - mca_oob_base_component_t *component, *c3; - bool added; - int i; - - /* Query all available components and ask if their transport is available */ - OPAL_LIST_FOREACH(cli, &orte_oob_base_framework.framework_components, mca_base_component_list_item_t) { - component = (mca_oob_base_component_t *) cli->cli_component; - - opal_output_verbose(5, orte_oob_base_framework.framework_output, - "mca:oob:select: checking available component %s", - component->oob_base.mca_component_name); - - /* If there's no query function, skip it */ - if (NULL == component->available) { - opal_output_verbose(5, orte_oob_base_framework.framework_output, - "mca:oob:select: Skipping component [%s]. It does not implement a query function", - component->oob_base.mca_component_name ); - continue; - } - - /* Query the component */ - opal_output_verbose(5, orte_oob_base_framework.framework_output, - "mca:oob:select: Querying component [%s]", - component->oob_base.mca_component_name); - - /* If the component is not available, then skip it as - * it has no available interfaces - */ - if (!component->available()) { - opal_output_verbose(5, orte_oob_base_framework.framework_output, - "mca:oob:select: Skipping component [%s] - no available interfaces", - component->oob_base.mca_component_name ); - continue; - } - - /* if it fails to startup, then skip it */ - if (ORTE_SUCCESS != component->startup()) { - opal_output_verbose(5, orte_oob_base_framework.framework_output, - "mca:oob:select: Skipping component [%s] - failed to startup", - component->oob_base.mca_component_name ); - continue; - } - - /* record it, but maintain priority order */ - added = false; - OPAL_LIST_FOREACH(cmp, &orte_oob_base.actives, mca_base_component_list_item_t) { - c3 = (mca_oob_base_component_t *) cmp->cli_component; - if (c3->priority > component->priority) { - continue; - } - opal_output_verbose(5, orte_oob_base_framework.framework_output, - "mca:oob:select: Inserting component"); - c2 = OBJ_NEW(mca_base_component_list_item_t); - c2->cli_component = (mca_base_component_t*)component; - opal_list_insert_pos(&orte_oob_base.actives, - &cmp->super, &c2->super); - added = true; - break; - } - if (!added) { - /* add to end */ - opal_output_verbose(5, orte_oob_base_framework.framework_output, - "mca:oob:select: Adding component to end"); - c2 = OBJ_NEW(mca_base_component_list_item_t); - c2->cli_component = (mca_base_component_t*)component; - opal_list_append(&orte_oob_base.actives, &c2->super); - } - } - - if (0 == opal_list_get_size(&orte_oob_base.actives)) { - /* no support available means we really cannot run */ - opal_output_verbose(5, orte_oob_base_framework.framework_output, - "mca:oob:select: Init failed to return any available transports"); - orte_show_help("help-oob-base.txt", "no-interfaces-avail", true); - return ORTE_ERR_SILENT; - } - - /* provide them an index so we can track their usability in a bitmap */ - i=0; - OPAL_LIST_FOREACH(cmp, &orte_oob_base.actives, mca_base_component_list_item_t) { - c3 = (mca_oob_base_component_t *) cmp->cli_component; - c3->idx = i++; - } - - opal_output_verbose(5, orte_oob_base_framework.framework_output, - "mca:oob:select: Found %d active transports", - (int)opal_list_get_size(&orte_oob_base.actives)); - return ORTE_SUCCESS; -} diff --git a/orte/mca/qos/qos.h b/orte/mca/qos/qos.h index 03bab52f152..378a8b9d93c 100644 --- a/orte/mca/qos/qos.h +++ b/orte/mca/qos/qos.h @@ -152,17 +152,8 @@ typedef struct { * Macro for use in components that are of type oob */ #define MCA_QOS_BASE_VERSION_2_0_0 \ -MCA_BASE_VERSION_2_0_0, \ -"qos", 2, 0, 0 +ORTE_MCA_BASE_VERSION_2_1_0 ("qos", 2, 0, 0) END_C_DECLS #endif - - - - - - - - diff --git a/orte/mca/rml/base/base.h b/orte/mca/rml/base/base.h index 3965a401dd8..e4d9119ede8 100644 --- a/orte/mca/rml/base/base.h +++ b/orte/mca/rml/base/base.h @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2007-2014 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014 -2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -112,7 +112,6 @@ ORTE_DECLSPEC extern opal_list_t orte_rml_base_components; * Component structure pointer for the currently selected RML * component. Useable between calls to orte_rml_base_select() and * orte_rml_base_close(). - * * @note This pointer should not be used outside the RML base. It is * available outside the RML base only for the F/T component. */ diff --git a/orte/mca/rml/base/rml_base_frame.c b/orte/mca/rml/base/rml_base_frame.c index 1cae2c3e965..0d763a3adc8 100644 --- a/orte/mca/rml/base/rml_base_frame.c +++ b/orte/mca/rml/base/rml_base_frame.c @@ -5,7 +5,7 @@ * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014 Intel Corporation. All rights reserved. + * Copyright (c) 2014 -2015 Intel Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -80,7 +80,6 @@ static int orte_rml_base_close(void) while (NULL != (item = opal_list_remove_first(&orte_rml_base.posted_recvs))) { OBJ_RELEASE(item); - } OBJ_DESTRUCT(&orte_rml_base.posted_recvs); @@ -165,7 +164,6 @@ int orte_rml_base_select(void) if (NULL != selected_module && NULL != selected_module->finalize) { selected_module->finalize(); } - selected_priority = priority; selected_component = component; selected_module = module; @@ -354,5 +352,3 @@ static void prq_des(orte_rml_recv_request_t *ptr) OBJ_CLASS_INSTANCE(orte_rml_recv_request_t, opal_object_t, prq_cons, prq_des); - - diff --git a/orte/mca/rml/base/rml_base_receive.c b/orte/mca/rml/base/rml_base_receive.c index b2557c56bcc..3fbc2516c72 100644 --- a/orte/mca/rml/base/rml_base_receive.c +++ b/orte/mca/rml/base/rml_base_receive.c @@ -55,7 +55,6 @@ void orte_rml_base_comm_start(void) if (recv_issued) { return; } - orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_RML_INFO_UPDATE, ORTE_RML_PERSISTENT, @@ -70,7 +69,6 @@ void orte_rml_base_comm_stop(void) if (!recv_issued) { return; } - orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_RML_INFO_UPDATE); recv_issued = false; } @@ -89,12 +87,10 @@ orte_rml_base_recv(int status, orte_process_name_t* sender, opal_buffer_t *buf; int rc; - OPAL_OUTPUT_VERBOSE((5, orte_rml_base_framework.framework_output, "%s rml:base:recv: processing message from %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(sender))); - count = 1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &command, &count, ORTE_RML_CMD))) { ORTE_ERROR_LOG(rc); @@ -108,11 +104,9 @@ orte_rml_base_recv(int status, orte_process_name_t* sender, return; } break; - default: ORTE_ERROR_LOG(ORTE_ERR_VALUE_OUT_OF_BOUNDS); } - /* send an ack back - this is REQUIRED to ensure that the routing * info gets updated -before- a message intending to use that info * arrives. Because message ordering is NOT preserved in the OOB, it @@ -124,7 +118,6 @@ orte_rml_base_recv(int status, orte_process_name_t* sender, "%s rml:base:recv: sending ack to %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(sender))); - buf = OBJ_NEW(opal_buffer_t); if (0 > (rc = orte_rml.send_buffer_nb(sender, buf, ORTE_RML_TAG_UPDATE_ROUTE_ACK, orte_rml_send_callback, NULL))) { diff --git a/orte/mca/rml/oob/rml_oob.h b/orte/mca/rml/oob/rml_oob.h index 2ec3b5b1861..6c7741ac39a 100644 --- a/orte/mca/rml/oob/rml_oob.h +++ b/orte/mca/rml/oob/rml_oob.h @@ -14,7 +14,7 @@ * All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014 -2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/orte/mca/rml/oob/rml_oob_send.c b/orte/mca/rml/oob/rml_oob_send.c index 7c2160c9511..57cb05d81b8 100644 --- a/orte/mca/rml/oob/rml_oob_send.c +++ b/orte/mca/rml/oob/rml_oob_send.c @@ -233,6 +233,12 @@ int orte_rml_oob_send_nb(orte_process_name_t* peer, ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); return ORTE_ERR_BAD_PARAM; } + if( NULL == peer || + OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_NAME_INVALID, peer) ) { + /* cannot send to an invalid peer */ + ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); + return ORTE_ERR_BAD_PARAM; + } /* get ourselves into an event to protect against * race conditions and threads */ @@ -270,7 +276,12 @@ int orte_rml_oob_send_buffer_nb(orte_process_name_t* peer, ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); return ORTE_ERR_BAD_PARAM; } - + if( NULL == peer || + OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_NAME_INVALID, peer) ) { + /* cannot send to an invalid peer */ + ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); + return ORTE_ERR_BAD_PARAM; + } /* get ourselves into an event to protect against * race conditions and threads */ @@ -298,8 +309,12 @@ int orte_rml_oob_open_channel(orte_process_name_t * peer, "%s rml_open_channel to peer %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(peer))); - /*if (!(orte_qos_base_have_qos_component_for_channel(qos_attributes))) - return ORTE_ERROR_QOS_UNAVAILABLE;*/ + if( NULL == peer || + OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_NAME_INVALID, peer) ) { + /* cannot send to an invalid peer */ + ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); + return ORTE_ERR_BAD_PARAM; + } /* process the request in an event to be safe */ req = OBJ_NEW(orte_rml_send_request_t); req->post.open_channel.dst = *peer; diff --git a/orte/mca/rml/rml.h b/orte/mca/rml/rml.h index 9770b5e4bd6..acd064c1489 100644 --- a/orte/mca/rml/rml.h +++ b/orte/mca/rml/rml.h @@ -13,7 +13,7 @@ * Copyright (c) 2015 Intel, Inc. All rights reserved. * * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights - * reserved. + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -77,9 +77,9 @@ ORTE_DECLSPEC void orte_rml_recv_callback(int status, orte_process_name_t* sende orte_rml_tag_t tag, void *cbdata); ORTE_DECLSPEC void orte_rml_open_channel_recv_callback(int status, - orte_process_name_t* sender, - opal_buffer_t *buffer, - orte_rml_tag_t tag, void *cbdata); + orte_process_name_t* sender, + opal_buffer_t *buffer, + orte_rml_tag_t tag, void *cbdata); ORTE_DECLSPEC void orte_rml_close_channel_recv_callback(int status, orte_process_name_t* sender, opal_buffer_t *buffer, @@ -442,9 +442,8 @@ typedef void (*orte_rml_module_purge_fn_t)(orte_process_name_t *peer); /********* NEW RML QOS MESSAGING APIS *****************/ /***** Questions *****/ /* - 1: Should the send and recv fns take the peer param as well for validation? - 2: Should we provide a func for the user to get qos attributes of a channel? (do we allow for sets??) - 3: Should open channel - have a channel error callback function? +1 : Should we provide a func for the user to get qos attributes of a channel? (do we allow for sets??) +2: Should open channel - have a channel error callback function? */ typedef void (*orte_rml_channel_callback_fn_t) (int status, orte_rml_channel_num_t channel_num, @@ -573,55 +572,6 @@ typedef int (*orte_rml_module_send_buffer_channel_nb_fn_t) (orte_rml_channel_num orte_rml_send_buffer_channel_callback_fn_t cbfunc, void* cbdata); -/** - * Receive an iovec non-blocking message - * - * @param[in] channel specific channel established with the peer of receiving msgs. - * @param[in] tag User defined tag for matching send/recv - * @param[in] persistent Boolean flag indicating whether or not this is a one-time recv - * @param[in] cbfunc Callback function on message comlpetion - * @param[in] cbdata User data to provide during completion callback - * - * @retval ORTE_SUCCESS - succesfully posted a recv request for the channel - * @retval ORTE_CHANNEL_UNAVAILABLE - the specific channel does not exist or is not available for receiving msgs. - */ -typedef int (*orte_rml_module_recv_channel_nb_fn_t)(orte_rml_channel_num_t channel, - orte_rml_tag_t tag, - bool persistent, - orte_rml_callback_fn_t cbfunc, - void* cbdata); - - -/** - * Receive a buffer non-blocking message - * - * @param[in] channel specific channel established with the peer of receiving msgs. - * @param[in] tag User defined tag for matching send/recv - * @param[in] persistent Boolean flag indicating whether or not this is a one-time recv - * @param[in] cbfunc Callback function on message comlpetion - * @param[in] cbdata User data to provide during completion callback - * - * @retval ORTE_SUCCESS - succesfully posted a recv request for the channel - * @retval ORTE_CHANNEL_UNAVAILABLE - the specific channel does not exist or is not available for receiving msgs. - */ -typedef int (*orte_rml_module_recv_buffer_channel_nb_fn_t)(orte_rml_channel_num_t channel, - orte_rml_tag_t tag, - bool persistent, - orte_rml_buffer_callback_fn_t cbfunc, - void* cbdata); - - -/** - * Cancel a posted non-blocking receive - * - * Attempt to cancel a posted non-blocking receive. - * - * @param[in] channel Specific channel or ORTE_ANY_CHANNEL for wild card receive - * @param[in] tag Posted receive tag - */ -typedef void (*orte_rml_module_recv_channel_cancel_fn_t)(orte_rml_channel_num_t channel, - orte_rml_tag_t tag); - /** * * close a messaging channel with specified QoS to a specific peer * diff --git a/orte/mca/rml/rml_types.h b/orte/mca/rml/rml_types.h index b434e6c0a1e..cba9f07fc67 100644 --- a/orte/mca/rml/rml_types.h +++ b/orte/mca/rml/rml_types.h @@ -12,7 +12,7 @@ * Copyright (c) 2007-2012 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2009-2011 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -154,14 +154,11 @@ BEGIN_C_DECLS #define ORTE_RML_TAG_CONFIRM_SPAWN 53 /*** QOS specific RML TAGS ***/ -#define ORTE_RML_TAG_OPEN_CHANNEL_REQ 54 -#define ORTE_RML_TAG_OPEN_CHANNEL_RESP 55 -#define ORTE_RML_TAG_MSG_ACK 56 -#define ORTE_RML_TAG_CLOSE_CHANNEL_REQ 57 -#define ORTE_RML_TAG_CLOSE_CHANNEL_ACCEPT 58 - - - +#define ORTE_RML_TAG_OPEN_CHANNEL_REQ 54 +#define ORTE_RML_TAG_OPEN_CHANNEL_RESP 55 +#define ORTE_RML_TAG_MSG_ACK 56 +#define ORTE_RML_TAG_CLOSE_CHANNEL_REQ 57 +#define ORTE_RML_TAG_CLOSE_CHANNEL_ACCEPT 58 #define ORTE_RML_TAG_MAX 100 diff --git a/orte/util/attr.h b/orte/util/attr.h index 168dd2e423b..f8d6fc6aac3 100644 --- a/orte/util/attr.h +++ b/orte/util/attr.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014 Intel, Inc. All rights reserved + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved * $COPYRIGHT$ * * Additional copyrights may follow From e75963eee9fe574355f24f0a60ba638c4e4d0079 Mon Sep 17 00:00:00 2001 From: annu13 Date: Wed, 29 Apr 2015 16:48:28 -0700 Subject: [PATCH 14/14] Update topo_base_dist_graph_create.c undoing whitespace auto insertions --- ompi/mca/topo/base/topo_base_dist_graph_create.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ompi/mca/topo/base/topo_base_dist_graph_create.c b/ompi/mca/topo/base/topo_base_dist_graph_create.c index 8b61680a33b..fdbeb0f82e1 100644 --- a/ompi/mca/topo/base/topo_base_dist_graph_create.c +++ b/ompi/mca/topo/base/topo_base_dist_graph_create.c @@ -281,7 +281,7 @@ int mca_topo_base_dist_graph_distribute(mca_topo_base_module_t* module, int mca_topo_base_dist_graph_create(mca_topo_base_module_t* module, ompi_communicator_t *comm_old, int n, int nodes[], - int degrees[], int targets[], + int degrees[], int targets[], int weights[], ompi_info_t *info, int reorder, ompi_communicator_t **newcomm)