diff --git a/orte/mca/oob/tcp/oob_tcp_common.c b/orte/mca/oob/tcp/oob_tcp_common.c index fb552bc476..41c3ad1b62 100644 --- a/orte/mca/oob/tcp/oob_tcp_common.c +++ b/orte/mca/oob/tcp/oob_tcp_common.c @@ -77,6 +77,75 @@ * Set socket buffering */ +static void set_keepalive(int sd) +{ + int option; + socklen_t optlen; + + /* see if the keepalive option is available */ + optlen = sizeof(option); + if (getsockopt(sd, SOL_SOCKET, SO_KEEPALIVE, &option, &optlen) < 0) { + /* not available, so just return */ + return; + } + + /* Set the option active */ + option = 1; + if (setsockopt(sd, SOL_SOCKET, SO_KEEPALIVE, &option, optlen) < 0) { + opal_output(0, "[%s:%d] setsockopt(SO_KEEPALIVE) failed: %s (%d)", + __FILE__, __LINE__, + strerror(opal_socket_errno), + opal_socket_errno); + return; + } +#if defined(TCP_KEEPALIVE) + /* set the idle time */ + if (setsockopt(sd, IPPROTO_TCP, TCP_KEEPALIVE, + &mca_oob_tcp_component.keepalive_time, + sizeof(mca_oob_tcp_component.keepalive_time)) < 0) { + opal_output(0, "[%s:%d] setsockopt(TCP_KEEPALIVE) failed: %s (%d)", + __FILE__, __LINE__, + strerror(opal_socket_errno), + opal_socket_errno); + return; + } +#elif defined(TCP_KEEPIDLE) + /* set the idle time */ + if (setsockopt(sd, IPPROTO_TCP, TCP_KEEPIDLE, + &mca_oob_tcp_component.keepalive_time, + sizeof(mca_oob_tcp_component.keepalive_time)) < 0) { + opal_output(0, "[%s:%d] setsockopt(TCP_KEEPIDLE) failed: %s (%d)", + __FILE__, __LINE__, + strerror(opal_socket_errno), + opal_socket_errno); + return; + } +#endif // TCP_KEEPIDLE +#if defined(TCP_KEEPINTVL) + /* set the keepalive interval */ + if (setsockopt(sd, IPPROTO_TCP, TCP_KEEPINTVL, + &mca_oob_tcp_component.keepalive_intvl, + sizeof(mca_oob_tcp_component.keepalive_intvl)) < 0) { + opal_output(0, "[%s:%d] setsockopt(TCP_KEEPINTVL) failed: %s (%d)", + __FILE__, __LINE__, + strerror(opal_socket_errno), + opal_socket_errno); + return; + } +#endif // TCP_KEEPINTVL +#if defined(TCP_KEEPCNT) + /* set the miss rate */ + if (setsockopt(sd, IPPROTO_TCP, TCP_KEEPCNT, + &mca_oob_tcp_component.keepalive_probes, + sizeof(mca_oob_tcp_component.keepalive_probes)) < 0) { + opal_output(0, "[%s:%d] setsockopt(TCP_KEEPCNT) failed: %s (%d)", + __FILE__, __LINE__, + strerror(opal_socket_errno), + opal_socket_errno); + } +#endif // TCP_KEEPCNT +} + void orte_oob_tcp_set_socket_options(int sd) { #if defined(TCP_NODELAY) @@ -108,6 +177,11 @@ void orte_oob_tcp_set_socket_options(int sd) opal_socket_errno); } #endif +#if defined(SO_KEEPALIVE) + if (0 < mca_oob_tcp_component.keepalive_time) { + set_keepalive(sd); + } +#endif // SO_KEEPALIVE } mca_oob_tcp_peer_t* mca_oob_tcp_peer_lookup(const orte_process_name_t *name) diff --git a/orte/mca/oob/tcp/oob_tcp_component.c b/orte/mca/oob/tcp/oob_tcp_component.c index 08c19c4a67..ec020319c2 100644 --- a/orte/mca/oob/tcp/oob_tcp_component.c +++ b/orte/mca/oob/tcp/oob_tcp_component.c @@ -404,6 +404,28 @@ static int tcp_component_register(void) &mca_oob_tcp_component.disable_ipv6_family); #endif + mca_oob_tcp_component.keepalive_time = 10; + (void)mca_base_component_var_register(component, "keepalive_time", + "Idle time in seconds before starting to send keepalives (num <= 0 ----> disable keepalive)", + MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, + &mca_oob_tcp_component.keepalive_time); + + mca_oob_tcp_component.keepalive_intvl = 5; + (void)mca_base_component_var_register(component, "keepalive_intvl", + "Time between keepalives, in seconds", + MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, + &mca_oob_tcp_component.keepalive_intvl); + mca_oob_tcp_component.keepalive_probes = 3; + (void)mca_base_component_var_register(component, "keepalive_probes", + "Number of keepalives that can be missed before declaring error", + MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, + &mca_oob_tcp_component.keepalive_probes); return ORTE_SUCCESS; } diff --git a/orte/mca/oob/tcp/oob_tcp_component.h b/orte/mca/oob/tcp/oob_tcp_component.h index a6657caace..f3ecbbcf49 100644 --- a/orte/mca/oob/tcp/oob_tcp_component.h +++ b/orte/mca/oob/tcp/oob_tcp_component.h @@ -77,6 +77,9 @@ typedef struct { bool listen_thread_active; struct timeval listen_thread_tv; /**< Timeout when using listen thread */ int stop_thread[2]; /**< pipe used to exit the listen thread */ + int keepalive_probes; /**< number of keepalives that can be missed before declaring error */ + int keepalive_time; /**< idle time in seconds before starting to send keepalives */ + int keepalive_intvl; /**< time between keepalives, in seconds */ } mca_oob_tcp_component_t; ORTE_MODULE_DECLSPEC extern mca_oob_tcp_component_t mca_oob_tcp_component;