Skip to content
This repository was archived by the owner on Sep 30, 2022. It is now read-only.
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 74 additions & 0 deletions orte/mca/oob/tcp/oob_tcp_common.c
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,75 @@
* Set socket buffering
*/

static void set_keepalive(int sd)
{
int option;
socklen_t optlen;

/* see if the keepalive option is available */
optlen = sizeof(option);
if (getsockopt(sd, SOL_SOCKET, SO_KEEPALIVE, &option, &optlen) < 0) {
/* not available, so just return */
return;
}

/* Set the option active */
option = 1;
if (setsockopt(sd, SOL_SOCKET, SO_KEEPALIVE, &option, optlen) < 0) {
opal_output(0, "[%s:%d] setsockopt(SO_KEEPALIVE) failed: %s (%d)",
__FILE__, __LINE__,
strerror(opal_socket_errno),
opal_socket_errno);
return;
}
#if defined(TCP_KEEPALIVE)
/* set the idle time */
if (setsockopt(sd, IPPROTO_TCP, TCP_KEEPALIVE,
&mca_oob_tcp_component.keepalive_time,
sizeof(mca_oob_tcp_component.keepalive_time)) < 0) {
opal_output(0, "[%s:%d] setsockopt(TCP_KEEPALIVE) failed: %s (%d)",
__FILE__, __LINE__,
strerror(opal_socket_errno),
opal_socket_errno);
return;
}
#elif defined(TCP_KEEPIDLE)
/* set the idle time */
if (setsockopt(sd, IPPROTO_TCP, TCP_KEEPIDLE,
&mca_oob_tcp_component.keepalive_time,
sizeof(mca_oob_tcp_component.keepalive_time)) < 0) {
opal_output(0, "[%s:%d] setsockopt(TCP_KEEPIDLE) failed: %s (%d)",
__FILE__, __LINE__,
strerror(opal_socket_errno),
opal_socket_errno);
return;
}
#endif // TCP_KEEPIDLE
#if defined(TCP_KEEPINTVL)
/* set the keepalive interval */
if (setsockopt(sd, IPPROTO_TCP, TCP_KEEPINTVL,
&mca_oob_tcp_component.keepalive_intvl,
sizeof(mca_oob_tcp_component.keepalive_intvl)) < 0) {
opal_output(0, "[%s:%d] setsockopt(TCP_KEEPINTVL) failed: %s (%d)",
__FILE__, __LINE__,
strerror(opal_socket_errno),
opal_socket_errno);
return;
}
#endif // TCP_KEEPINTVL
#if defined(TCP_KEEPCNT)
/* set the miss rate */
if (setsockopt(sd, IPPROTO_TCP, TCP_KEEPCNT,
&mca_oob_tcp_component.keepalive_probes,
sizeof(mca_oob_tcp_component.keepalive_probes)) < 0) {
opal_output(0, "[%s:%d] setsockopt(TCP_KEEPCNT) failed: %s (%d)",
__FILE__, __LINE__,
strerror(opal_socket_errno),
opal_socket_errno);
}
#endif // TCP_KEEPCNT
}

void orte_oob_tcp_set_socket_options(int sd)
{
#if defined(TCP_NODELAY)
Expand Down Expand Up @@ -108,6 +177,11 @@ void orte_oob_tcp_set_socket_options(int sd)
opal_socket_errno);
}
#endif
#if defined(SO_KEEPALIVE)
if (0 < mca_oob_tcp_component.keepalive_time) {
set_keepalive(sd);
}
#endif // SO_KEEPALIVE
}

mca_oob_tcp_peer_t* mca_oob_tcp_peer_lookup(const orte_process_name_t *name)
Expand Down
22 changes: 22 additions & 0 deletions orte/mca/oob/tcp/oob_tcp_component.c
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,28 @@ static int tcp_component_register(void)
&mca_oob_tcp_component.disable_ipv6_family);
#endif

mca_oob_tcp_component.keepalive_time = 10;
(void)mca_base_component_var_register(component, "keepalive_time",
"Idle time in seconds before starting to send keepalives (num <= 0 ----> disable keepalive)",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_oob_tcp_component.keepalive_time);

mca_oob_tcp_component.keepalive_intvl = 5;
(void)mca_base_component_var_register(component, "keepalive_intvl",
"Time between keepalives, in seconds",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_oob_tcp_component.keepalive_intvl);
mca_oob_tcp_component.keepalive_probes = 3;
(void)mca_base_component_var_register(component, "keepalive_probes",
"Number of keepalives that can be missed before declaring error",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_oob_tcp_component.keepalive_probes);
return ORTE_SUCCESS;
}

Expand Down
3 changes: 3 additions & 0 deletions orte/mca/oob/tcp/oob_tcp_component.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,9 @@ typedef struct {
bool listen_thread_active;
struct timeval listen_thread_tv; /**< Timeout when using listen thread */
int stop_thread[2]; /**< pipe used to exit the listen thread */
int keepalive_probes; /**< number of keepalives that can be missed before declaring error */
int keepalive_time; /**< idle time in seconds before starting to send keepalives */
int keepalive_intvl; /**< time between keepalives, in seconds */
} mca_oob_tcp_component_t;

ORTE_MODULE_DECLSPEC extern mca_oob_tcp_component_t mca_oob_tcp_component;
Expand Down