diff --git a/ompi/mca/osc/base/osc_base_obj_convert.c b/ompi/mca/osc/base/osc_base_obj_convert.c index 98105236b1c..a5c3a694ecb 100644 --- a/ompi/mca/osc/base/osc_base_obj_convert.c +++ b/ompi/mca/osc/base/osc_base_obj_convert.c @@ -13,7 +13,8 @@ * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -114,7 +115,7 @@ int ompi_osc_base_process_op (void *outbuf, void *inbuf, size_t inbuflen, iov_count = OMPI_OSC_BASE_DECODE_MAX; done = opal_convertor_raw (&convertor, iov, &iov_count, &size); - for (int i = 0 ; i < iov_count ; ++i) { + for (uint32_t i = 0 ; i < iov_count ; ++i) { int primitive_count = iov[i].iov_len / primitive_size; ompi_op_reduce (op, inbuf, iov[i].iov_base, primitive_count, primitive_datatype); inbuf = (void *)((intptr_t) inbuf + iov[i].iov_len); diff --git a/orte/mca/oob/tcp/oob_tcp_common.c b/orte/mca/oob/tcp/oob_tcp_common.c index fb552bc4766..9e5b36ed75e 100644 --- a/orte/mca/oob/tcp/oob_tcp_common.c +++ b/orte/mca/oob/tcp/oob_tcp_common.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -42,32 +42,28 @@ #ifdef HAVE_NETINET_IN_H #include #endif +#ifdef HAVE_NETINET_TCP_H +#include +#endif #ifdef HAVE_ARPA_INET_H #include #endif #ifdef HAVE_NETDB_H #include #endif +#ifdef HAVE_SYS_SOCKET_H +#include +#endif #include -#include "opal/util/show_help.h" #include "opal/util/error.h" #include "opal/util/output.h" #include "opal/opal_socket_errno.h" #include "opal/util/if.h" #include "opal/util/net.h" -#include "opal/util/argv.h" #include "opal/class/opal_hash_table.h" -#include "opal/class/opal_list.h" #include "opal/mca/backtrace/backtrace.h" -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/ess/ess.h" -#include "orte/util/name_fns.h" -#include "orte/util/parse_options.h" -#include "orte/util/show_help.h" -#include "orte/runtime/orte_globals.h" - #include "orte/mca/oob/tcp/oob_tcp.h" #include "orte/mca/oob/tcp/oob_tcp_component.h" #include "oob_tcp_peer.h" @@ -77,12 +73,81 @@ * Set socket buffering */ +static void set_keepalive(int sd) +{ + int option; + socklen_t optlen; + + /* see if the keepalive option is available */ + optlen = sizeof(option); + if (getsockopt(sd, SOL_SOCKET, SO_KEEPALIVE, &option, &optlen) < 0) { + /* not available, so just return */ + return; + } + + /* Set the option active */ + option = 1; + if (setsockopt(sd, SOL_SOCKET, SO_KEEPALIVE, &option, optlen) < 0) { + opal_output(0, "[%s:%d] setsockopt(SO_KEEPALIVE) failed: %s (%d)", + __FILE__, __LINE__, + strerror(opal_socket_errno), + opal_socket_errno); + return; + } +#if defined(TCP_KEEPALIVE) + /* set the idle time */ + if (setsockopt(sd, IPPROTO_TCP, TCP_KEEPALIVE, + &mca_oob_tcp_component.keepalive_time, + sizeof(mca_oob_tcp_component.keepalive_time)) < 0) { + opal_output(0, "[%s:%d] setsockopt(TCP_KEEPALIVE) failed: %s (%d)", + __FILE__, __LINE__, + strerror(opal_socket_errno), + opal_socket_errno); + return; + } +#elif defined(TCP_KEEPIDLE) + /* set the idle time */ + if (setsockopt(sd, IPPROTO_TCP, TCP_KEEPIDLE, + &mca_oob_tcp_component.keepalive_time, + sizeof(mca_oob_tcp_component.keepalive_time)) < 0) { + opal_output(0, "[%s:%d] setsockopt(TCP_KEEPIDLE) failed: %s (%d)", + __FILE__, __LINE__, + strerror(opal_socket_errno), + opal_socket_errno); + return; + } +#endif // TCP_KEEPIDLE +#if defined(TCP_KEEPINTVL) + /* set the keepalive interval */ + if (setsockopt(sd, IPPROTO_TCP, TCP_KEEPINTVL, + &mca_oob_tcp_component.keepalive_intvl, + sizeof(mca_oob_tcp_component.keepalive_intvl)) < 0) { + opal_output(0, "[%s:%d] setsockopt(TCP_KEEPINTVL) failed: %s (%d)", + __FILE__, __LINE__, + strerror(opal_socket_errno), + opal_socket_errno); + return; + } +#endif // TCP_KEEPINTVL +#if defined(TCP_KEEPCNT) + /* set the miss rate */ + if (setsockopt(sd, IPPROTO_TCP, TCP_KEEPCNT, + &mca_oob_tcp_component.keepalive_probes, + sizeof(mca_oob_tcp_component.keepalive_probes)) < 0) { + opal_output(0, "[%s:%d] setsockopt(TCP_KEEPCNT) failed: %s (%d)", + __FILE__, __LINE__, + strerror(opal_socket_errno), + opal_socket_errno); + } +#endif // TCP_KEEPCNT +} + void orte_oob_tcp_set_socket_options(int sd) { #if defined(TCP_NODELAY) int optval; optval = 1; - if(setsockopt(sd, IPPROTO_TCP, TCP_NODELAY, (char *)&optval, sizeof(optval)) < 0) { + if (setsockopt(sd, IPPROTO_TCP, TCP_NODELAY, (char *)&optval, sizeof(optval)) < 0) { opal_backtrace_print(stderr, NULL, 1); opal_output(0, "[%s:%d] setsockopt(TCP_NODELAY) failed: %s (%d)", __FILE__, __LINE__, @@ -91,8 +156,8 @@ void orte_oob_tcp_set_socket_options(int sd) } #endif #if defined(SO_SNDBUF) - if(mca_oob_tcp_component.tcp_sndbuf > 0 && - setsockopt(sd, SOL_SOCKET, SO_SNDBUF, (char *)&mca_oob_tcp_component.tcp_sndbuf, sizeof(int)) < 0) { + if (mca_oob_tcp_component.tcp_sndbuf > 0 && + setsockopt(sd, SOL_SOCKET, SO_SNDBUF, (char *)&mca_oob_tcp_component.tcp_sndbuf, sizeof(int)) < 0) { opal_output(0, "[%s:%d] setsockopt(SO_SNDBUF) failed: %s (%d)", __FILE__, __LINE__, strerror(opal_socket_errno), @@ -100,14 +165,19 @@ void orte_oob_tcp_set_socket_options(int sd) } #endif #if defined(SO_RCVBUF) - if(mca_oob_tcp_component.tcp_rcvbuf > 0 && - setsockopt(sd, SOL_SOCKET, SO_RCVBUF, (char *)&mca_oob_tcp_component.tcp_rcvbuf, sizeof(int)) < 0) { + if (mca_oob_tcp_component.tcp_rcvbuf > 0 && + setsockopt(sd, SOL_SOCKET, SO_RCVBUF, (char *)&mca_oob_tcp_component.tcp_rcvbuf, sizeof(int)) < 0) { opal_output(0, "[%s:%d] setsockopt(SO_RCVBUF) failed: %s (%d)", __FILE__, __LINE__, strerror(opal_socket_errno), opal_socket_errno); } #endif +#if defined(SO_KEEPALIVE) + if (0 < mca_oob_tcp_component.keepalive_time) { + set_keepalive(sd); + } +#endif // SO_KEEPALIVE } mca_oob_tcp_peer_t* mca_oob_tcp_peer_lookup(const orte_process_name_t *name) diff --git a/orte/mca/oob/tcp/oob_tcp_component.c b/orte/mca/oob/tcp/oob_tcp_component.c index 383a078e5d9..5206bb5eafc 100644 --- a/orte/mca/oob/tcp/oob_tcp_component.c +++ b/orte/mca/oob/tcp/oob_tcp_component.c @@ -410,6 +410,29 @@ static int tcp_component_register(void) &mca_oob_tcp_component.disable_ipv6_family); #endif + + mca_oob_tcp_component.keepalive_time = 10; + (void)mca_base_component_var_register(component, "keepalive_time", + "Idle time in seconds before starting to send keepalives (num <= 0 ----> disable keepalive)", + MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, + &mca_oob_tcp_component.keepalive_time); + + mca_oob_tcp_component.keepalive_intvl = 60; + (void)mca_base_component_var_register(component, "keepalive_intvl", + "Time between keepalives, in seconds", + MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, + &mca_oob_tcp_component.keepalive_intvl); + mca_oob_tcp_component.keepalive_probes = 3; + (void)mca_base_component_var_register(component, "keepalive_probes", + "Number of keepalives that can be missed before declaring error", + MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, + &mca_oob_tcp_component.keepalive_probes); return ORTE_SUCCESS; } diff --git a/orte/mca/oob/tcp/oob_tcp_component.h b/orte/mca/oob/tcp/oob_tcp_component.h index a6657caace1..cd48d2e639f 100644 --- a/orte/mca/oob/tcp/oob_tcp_component.h +++ b/orte/mca/oob/tcp/oob_tcp_component.h @@ -12,7 +12,7 @@ * Copyright (c) 2006-2013 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved * $COPYRIGHT$ * * Additional copyrights may follow @@ -77,6 +77,9 @@ typedef struct { bool listen_thread_active; struct timeval listen_thread_tv; /**< Timeout when using listen thread */ int stop_thread[2]; /**< pipe used to exit the listen thread */ + int keepalive_probes; /**< number of keepalives that can be missed before declaring error */ + int keepalive_time; /**< idle time in seconds before starting to send keepalives */ + int keepalive_intvl; /**< time between keepalives, in seconds */ } mca_oob_tcp_component_t; ORTE_MODULE_DECLSPEC extern mca_oob_tcp_component_t mca_oob_tcp_component;