Skip to content

Commit

Permalink
[links] stabilize latency calculation when nodes are not responsive
Browse files Browse the repository at this point in the history
The following scenario is more of a corner case than normal, but
this change allows to better deal with this situation:

1) 2 nodes cluster (corosync) (node A and node B)
2) kill -stop $(pidof corosync) on node A
3) node B will continue to send ping packets to node A
4) node A is accumulating those ping packets in the kernel network socket
5) wait some seconds and unpause node A
6) node A will start processing the ping packets in the queue
   and send pong replies to node B
7) node B will see an extreme increase of latency due
   those "obsoleted" ping/pong packets
8) node B, as latency increases, will take longer and longer
   to notice that node A is down due to the pong_timeout adjustment
   for latency (required for initial cluster spike).

the solution:

1) Use average latency to calculate pong_timeout_adj vs latency_max.
   Averate latency will go down again in time, while latency_max is never
   reset.

2) RX thread will filter out all pong packets that have higher latency
   than currently configure pong_timeout. This barrier should have
   been in place even before.

this solution reduces the latency spike on node B to a perfectly
reasonable level and it will all eventually stabilize over time
as latency samples increase and latency will reduce.

Please be aware that using a pong_timeout smaller than latency will
simply mark the link down now.

Signed-off-by: Fabio M. Di Nitto <fdinitto@redhat.com>
  • Loading branch information
fabbione committed Sep 9, 2019
1 parent eba56bb commit 4df82e5
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 29 deletions.
2 changes: 1 addition & 1 deletion libknet/threads_heartbeat.c
Expand Up @@ -177,7 +177,7 @@ static void _adjust_pong_timeouts(knet_handle_t knet_h)
dst_link->pong_timeout_backoff--;
}

dst_link->pong_timeout_adj = (dst_link->pong_timeout * dst_link->pong_timeout_backoff) + (dst_link->status.stats.latency_max * KNET_LINK_PONG_TIMEOUT_LAT_MUL);
dst_link->pong_timeout_adj = (dst_link->pong_timeout * dst_link->pong_timeout_backoff) + (dst_link->status.latency * KNET_LINK_PONG_TIMEOUT_LAT_MUL);
}
}

Expand Down
61 changes: 33 additions & 28 deletions libknet/threads_rx.c
Expand Up @@ -615,37 +615,42 @@ static void _parse_recv_from_links(knet_handle_t knet_h, int sockfd, const struc
timespec_diff(recvtime,
src_link->status.pong_last, &latency_last);

src_link->status.latency =
((src_link->status.latency * src_link->latency_exp) +
((latency_last / 1000llu) *
(src_link->latency_fix - src_link->latency_exp))) /
src_link->latency_fix;

if (src_link->status.latency < src_link->pong_timeout_adj) {
if (!src_link->status.connected) {
if (src_link->received_pong >= src_link->pong_count) {
log_info(knet_h, KNET_SUB_RX, "host: %u link: %u is up",
src_host->host_id, src_link->link_id);
_link_updown(knet_h, src_host->host_id, src_link->link_id, src_link->status.enabled, 1);
} else {
src_link->received_pong++;
log_debug(knet_h, KNET_SUB_RX, "host: %u link: %u received pong: %u",
src_host->host_id, src_link->link_id, src_link->received_pong);
if ((latency_last / 1000llu) > src_link->pong_timeout) {
log_debug(knet_h, KNET_SUB_RX,
"Incoming pong packet from host: %u link: %u has higher latency than pong_timeout. Discarding",
src_host->host_id, src_link->link_id);
} else {
src_link->status.latency =
((src_link->status.latency * src_link->latency_exp) +
((latency_last / 1000llu) *
(src_link->latency_fix - src_link->latency_exp))) /
src_link->latency_fix;

if (src_link->status.latency < src_link->pong_timeout_adj) {
if (!src_link->status.connected) {
if (src_link->received_pong >= src_link->pong_count) {
log_info(knet_h, KNET_SUB_RX, "host: %u link: %u is up",
src_host->host_id, src_link->link_id);
_link_updown(knet_h, src_host->host_id, src_link->link_id, src_link->status.enabled, 1);
} else {
src_link->received_pong++;
log_debug(knet_h, KNET_SUB_RX, "host: %u link: %u received pong: %u",
src_host->host_id, src_link->link_id, src_link->received_pong);
}
}
}
/* Calculate latency stats */
if (src_link->status.latency > src_link->status.stats.latency_max) {
src_link->status.stats.latency_max = src_link->status.latency;
}
if (src_link->status.latency < src_link->status.stats.latency_min) {
src_link->status.stats.latency_min = src_link->status.latency;
}
src_link->status.stats.latency_ave =
(src_link->status.stats.latency_ave * src_link->status.stats.latency_samples +
src_link->status.latency) / (src_link->status.stats.latency_samples+1);
src_link->status.stats.latency_samples++;
}
/* Calculate latency stats */
if (src_link->status.latency > src_link->status.stats.latency_max) {
src_link->status.stats.latency_max = src_link->status.latency;
}
if (src_link->status.latency < src_link->status.stats.latency_min) {
src_link->status.stats.latency_min = src_link->status.latency;
}
src_link->status.stats.latency_ave =
(src_link->status.stats.latency_ave * src_link->status.stats.latency_samples +
src_link->status.latency) / (src_link->status.stats.latency_samples+1);
src_link->status.stats.latency_samples++;

break;
case KNET_HEADER_TYPE_PMTUD:
src_link->status.stats.rx_pmtu_packets++;
Expand Down

0 comments on commit 4df82e5

Please sign in to comment.