Skip to content

Commit

Permalink
[PMTUd] fix external API and PMTUd interaction
Browse files Browse the repository at this point in the history
The problem:

PMTUd can take a long time to release the global read lock, mostly due
to the pthread_cond_timedwait required to ack/nack packets from the
other hosts. This delay could block any wrlock operation for several seconds
if not more.

The solution:

each call to the global pthread_rwlock_wrlock has been changed to a wrapper
that will notify the PMTUd to interrupt its operations (and restart) first,
then get a global write lock that is queued as soon as PMTUd is going out.

This solution also improves a lot shutdown speed.

How to test:

This is not super simple to test and verify. I used 2 VMs with known MTU of
1500. Start knet_bench on both (normal ping_data -C is more than enough).
Once they have established data exchange, change the MTU on one of the nodes
to 1600 (or higher). This should guarantee that the PMTUd process will take
a very long time to complete.
First verify that the PMTUd process takes several seconds.
Once the next PMTUd run starts, hit ctrl+c on the node that is executing
the PMTUd and the process should exit much faster than before this patch.

Signed-off-by: Fabio M. Di Nitto <fdinitto@redhat.com>
  • Loading branch information
fabbione committed Dec 17, 2017
1 parent 9deffd5 commit 718d1e8
Show file tree
Hide file tree
Showing 12 changed files with 101 additions and 38 deletions.
30 changes: 12 additions & 18 deletions libknet/handle.c
Original file line number Diff line number Diff line change
Expand Up @@ -520,12 +520,6 @@ static void _stop_threads(knet_handle_t knet_h)
pthread_join(knet_h->dst_link_handler_thread, &retval);
}

pthread_mutex_lock(&knet_h->pmtud_mutex);
pthread_cond_signal(&knet_h->pmtud_cond);
pthread_mutex_unlock(&knet_h->pmtud_mutex);

sleep(1);

if (knet_h->pmtud_link_handler_thread) {
pthread_cancel(knet_h->pmtud_link_handler_thread);
pthread_join(knet_h->pmtud_link_handler_thread, &retval);
Expand Down Expand Up @@ -710,7 +704,7 @@ int knet_handle_free(knet_handle_t knet_h)
goto exit_nolock;
}

savederrno = pthread_rwlock_wrlock(&knet_h->global_rwlock);
savederrno = get_global_wrlock(knet_h);
if (savederrno) {
log_err(knet_h, KNET_SUB_HANDLE, "Unable to get write lock: %s",
strerror(savederrno));
Expand Down Expand Up @@ -774,7 +768,7 @@ int knet_handle_enable_sock_notify(knet_handle_t knet_h,
return -1;
}

savederrno = pthread_rwlock_wrlock(&knet_h->global_rwlock);
savederrno = get_global_wrlock(knet_h);
if (savederrno) {
log_err(knet_h, KNET_SUB_HANDLE, "Unable to get write lock: %s",
strerror(savederrno));
Expand Down Expand Up @@ -817,7 +811,7 @@ int knet_handle_add_datafd(knet_handle_t knet_h, int *datafd, int8_t *channel)
return -1;
}

savederrno = pthread_rwlock_wrlock(&knet_h->global_rwlock);
savederrno = get_global_wrlock(knet_h);
if (savederrno) {
log_err(knet_h, KNET_SUB_HANDLE, "Unable to get write lock: %s",
strerror(savederrno));
Expand Down Expand Up @@ -949,7 +943,7 @@ int knet_handle_remove_datafd(knet_handle_t knet_h, int datafd)
return -1;
}

savederrno = pthread_rwlock_wrlock(&knet_h->global_rwlock);
savederrno = get_global_wrlock(knet_h);
if (savederrno) {
log_err(knet_h, KNET_SUB_HANDLE, "Unable to get write lock: %s",
strerror(savederrno));
Expand Down Expand Up @@ -1107,7 +1101,7 @@ int knet_handle_enable_filter(knet_handle_t knet_h,
return -1;
}

savederrno = pthread_rwlock_wrlock(&knet_h->global_rwlock);
savederrno = get_global_wrlock(knet_h);
if (savederrno) {
log_err(knet_h, KNET_SUB_HANDLE, "Unable to get write lock: %s",
strerror(savederrno));
Expand Down Expand Up @@ -1142,7 +1136,7 @@ int knet_handle_setfwd(knet_handle_t knet_h, unsigned int enabled)
return -1;
}

savederrno = pthread_rwlock_wrlock(&knet_h->global_rwlock);
savederrno = get_global_wrlock(knet_h);
if (savederrno) {
log_err(knet_h, KNET_SUB_HANDLE, "Unable to get write lock: %s",
strerror(savederrno));
Expand Down Expand Up @@ -1206,7 +1200,7 @@ int knet_handle_pmtud_setfreq(knet_handle_t knet_h, unsigned int interval)
return -1;
}

savederrno = pthread_rwlock_wrlock(&knet_h->global_rwlock);
savederrno = get_global_wrlock(knet_h);
if (savederrno) {
log_err(knet_h, KNET_SUB_HANDLE, "Unable to get write lock: %s",
strerror(savederrno));
Expand Down Expand Up @@ -1235,7 +1229,7 @@ int knet_handle_enable_pmtud_notify(knet_handle_t knet_h,
return -1;
}

savederrno = pthread_rwlock_wrlock(&knet_h->global_rwlock);
savederrno = get_global_wrlock(knet_h);
if (savederrno) {
log_err(knet_h, KNET_SUB_HANDLE, "Unable to get write lock: %s",
strerror(savederrno));
Expand Down Expand Up @@ -1301,7 +1295,7 @@ int knet_handle_crypto(knet_handle_t knet_h, struct knet_handle_crypto_cfg *knet
return -1;
}

savederrno = pthread_rwlock_wrlock(&knet_h->global_rwlock);
savederrno = get_global_wrlock(knet_h);
if (savederrno) {
log_err(knet_h, KNET_SUB_HANDLE, "Unable to get write lock: %s",
strerror(savederrno));
Expand Down Expand Up @@ -1362,7 +1356,7 @@ int knet_handle_compress(knet_handle_t knet_h, struct knet_handle_compress_cfg *
return -1;
}

savederrno = pthread_rwlock_wrlock(&knet_h->global_rwlock);
savederrno = get_global_wrlock(knet_h);
if (savederrno) {
log_err(knet_h, KNET_SUB_HANDLE, "Unable to get write lock: %s",
strerror(savederrno));
Expand Down Expand Up @@ -1521,7 +1515,7 @@ int knet_handle_get_stats(knet_handle_t knet_h, struct knet_handle_stats *stats,
return -1;
}

savederrno = pthread_rwlock_wrlock(&knet_h->global_rwlock);
savederrno = get_global_wrlock(knet_h);
if (savederrno) {
log_err(knet_h, KNET_SUB_HANDLE, "Unable to get write lock: %s",
strerror(savederrno));
Expand Down Expand Up @@ -1569,7 +1563,7 @@ int knet_handle_clear_stats(knet_handle_t knet_h, int clear_option)
return -1;
}

savederrno = pthread_rwlock_wrlock(&knet_h->global_rwlock);
savederrno = get_global_wrlock(knet_h);
if (savederrno) {
log_err(knet_h, KNET_SUB_HANDLE, "Unable to get write lock: %s",
strerror(savederrno));
Expand Down
11 changes: 6 additions & 5 deletions libknet/host.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include "host.h"
#include "internals.h"
#include "logging.h"
#include "threads_common.h"

static void _host_list_update(knet_handle_t knet_h)
{
Expand All @@ -41,7 +42,7 @@ int knet_host_add(knet_handle_t knet_h, knet_node_id_t host_id)
return -1;
}

savederrno = pthread_rwlock_wrlock(&knet_h->global_rwlock);
savederrno = get_global_wrlock(knet_h);
if (savederrno) {
log_err(knet_h, KNET_SUB_HOST, "Unable to get write lock: %s",
strerror(savederrno));
Expand Down Expand Up @@ -121,7 +122,7 @@ int knet_host_remove(knet_handle_t knet_h, knet_node_id_t host_id)
return -1;
}

savederrno = pthread_rwlock_wrlock(&knet_h->global_rwlock);
savederrno = get_global_wrlock(knet_h);
if (savederrno) {
log_err(knet_h, KNET_SUB_HOST, "Unable to get write lock: %s",
strerror(savederrno));
Expand Down Expand Up @@ -192,7 +193,7 @@ int knet_host_set_name(knet_handle_t knet_h, knet_node_id_t host_id, const char
return -1;
}

savederrno = pthread_rwlock_wrlock(&knet_h->global_rwlock);
savederrno = get_global_wrlock(knet_h);
if (savederrno) {
log_err(knet_h, KNET_SUB_HOST, "Unable to get write lock: %s",
strerror(savederrno));
Expand Down Expand Up @@ -374,7 +375,7 @@ int knet_host_set_policy(knet_handle_t knet_h, knet_node_id_t host_id,
return -1;
}

savederrno = pthread_rwlock_wrlock(&knet_h->global_rwlock);
savederrno = get_global_wrlock(knet_h);
if (savederrno) {
log_err(knet_h, KNET_SUB_HOST, "Unable to get write lock: %s",
strerror(savederrno));
Expand Down Expand Up @@ -505,7 +506,7 @@ int knet_host_enable_status_change_notify(knet_handle_t knet_h,
return -1;
}

savederrno = pthread_rwlock_wrlock(&knet_h->global_rwlock);
savederrno = get_global_wrlock(knet_h);
if (savederrno) {
log_err(knet_h, KNET_SUB_HOST, "Unable to get write lock: %s",
strerror(savederrno));
Expand Down
2 changes: 2 additions & 0 deletions libknet/internals.h
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,8 @@ struct knet_handle {
pthread_mutex_t tx_mutex; /* used to protect knet_send_sync and TX thread */
pthread_mutex_t hb_mutex; /* used to protect heartbeat thread and seq_num broadcasting */
pthread_mutex_t backoff_mutex; /* used to protect dst_link->pong_timeout_adj */
int pmtud_running;
int pmtud_abort;
struct crypto_instance *crypto_instance;
size_t sec_header_size;
size_t sec_block_size;
Expand Down
15 changes: 8 additions & 7 deletions libknet/links.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "links.h"
#include "transports.h"
#include "host.h"
#include "threads_common.h"

int _link_updown(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t link_id,
unsigned int enabled, unsigned int connected)
Expand Down Expand Up @@ -103,7 +104,7 @@ int knet_link_set_config(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t l
return -1;
}

savederrno = pthread_rwlock_wrlock(&knet_h->global_rwlock);
savederrno = get_global_wrlock(knet_h);
if (savederrno) {
log_err(knet_h, KNET_SUB_LINK, "Unable to get write lock: %s",
strerror(savederrno));
Expand Down Expand Up @@ -351,7 +352,7 @@ int knet_link_clear_config(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t
return -1;
}

savederrno = pthread_rwlock_wrlock(&knet_h->global_rwlock);
savederrno = get_global_wrlock(knet_h);
if (savederrno) {
log_err(knet_h, KNET_SUB_LINK, "Unable to get write lock: %s",
strerror(savederrno));
Expand Down Expand Up @@ -434,7 +435,7 @@ int knet_link_set_enable(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t l
return -1;
}

savederrno = pthread_rwlock_wrlock(&knet_h->global_rwlock);
savederrno = get_global_wrlock(knet_h);
if (savederrno) {
log_err(knet_h, KNET_SUB_LINK, "Unable to get read lock: %s",
strerror(savederrno));
Expand Down Expand Up @@ -561,7 +562,7 @@ int knet_link_set_pong_count(knet_handle_t knet_h, knet_node_id_t host_id, uint8
return -1;
}

savederrno = pthread_rwlock_wrlock(&knet_h->global_rwlock);
savederrno = get_global_wrlock(knet_h);
if (savederrno) {
log_err(knet_h, KNET_SUB_LINK, "Unable to get write lock: %s",
strerror(savederrno));
Expand Down Expand Up @@ -689,7 +690,7 @@ int knet_link_set_ping_timers(knet_handle_t knet_h, knet_node_id_t host_id, uint
return -1;
}

savederrno = pthread_rwlock_wrlock(&knet_h->global_rwlock);
savederrno = get_global_wrlock(knet_h);
if (savederrno) {
log_err(knet_h, KNET_SUB_LINK, "Unable to get write lock: %s",
strerror(savederrno));
Expand Down Expand Up @@ -819,7 +820,7 @@ int knet_link_set_priority(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t
return -1;
}

savederrno = pthread_rwlock_wrlock(&knet_h->global_rwlock);
savederrno = get_global_wrlock(knet_h);
if (savederrno) {
log_err(knet_h, KNET_SUB_LINK, "Unable to get write lock: %s",
strerror(savederrno));
Expand Down Expand Up @@ -1010,7 +1011,7 @@ int knet_link_get_status(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t l
return -1;
}

savederrno = pthread_rwlock_wrlock(&knet_h->global_rwlock);
savederrno = get_global_wrlock(knet_h);
if (savederrno) {
log_err(knet_h, KNET_SUB_LINK, "Unable to get read lock: %s",
strerror(savederrno));
Expand Down
3 changes: 2 additions & 1 deletion libknet/logging.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

#include "internals.h"
#include "logging.h"
#include "threads_common.h"

struct pretty_names {
const char *name;
Expand Down Expand Up @@ -152,7 +153,7 @@ int knet_log_set_loglevel(knet_handle_t knet_h, uint8_t subsystem,
return -1;
}

savederrno = pthread_rwlock_wrlock(&knet_h->global_rwlock);
savederrno = get_global_wrlock(knet_h);
if (savederrno) {
log_err(knet_h, subsystem, "Unable to get write lock: %s",
strerror(savederrno));
Expand Down
3 changes: 2 additions & 1 deletion libknet/tests/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -66,4 +66,5 @@ knet_bench_test_SOURCES = knet_bench.c \
../common.c \
../logging.c \
../compat.c \
../transport_common.c
../transport_common.c \
../threads_common.c
25 changes: 25 additions & 0 deletions libknet/threads_common.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,28 @@ int shutdown_in_progress(knet_handle_t knet_h)

return ret;
}

static int pmtud_reschedule(knet_handle_t knet_h)
{
if (pthread_mutex_lock(&knet_h->pmtud_mutex) != 0) {
log_debug(knet_h, KNET_SUB_PMTUD, "Unable to get mutex lock");
return -1;
}

knet_h->pmtud_abort = 1;

if (knet_h->pmtud_running) {
pthread_cond_signal(&knet_h->pmtud_cond);
}

pthread_mutex_unlock(&knet_h->pmtud_mutex);
return 0;
}

int get_global_wrlock(knet_handle_t knet_h)
{
if (pmtud_reschedule(knet_h) < 0) {
log_info(knet_h, KNET_SUB_PMTUD, "Unable to notify PMTUd to reschedule. Expect delays in executing API calls");
}
return pthread_rwlock_wrlock(&knet_h->global_rwlock);
}
1 change: 1 addition & 0 deletions libknet/threads_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,5 +24,6 @@ do { \
} while (0);

int shutdown_in_progress(knet_handle_t knet_h);
int get_global_wrlock(knet_handle_t knet_h);

#endif
3 changes: 2 additions & 1 deletion libknet/threads_dsthandler.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include "logging.h"
#include "threads_common.h"
#include "threads_dsthandler.h"
#include "threads_pmtud.h"

static void _handle_dst_link_updates(knet_handle_t knet_h)
{
Expand All @@ -28,7 +29,7 @@ static void _handle_dst_link_updates(knet_handle_t knet_h)
return;
}

if (pthread_rwlock_wrlock(&knet_h->global_rwlock) != 0) {
if (get_global_wrlock(knet_h) != 0) {
log_debug(knet_h, KNET_SUB_DSTCACHE, "Unable to get read lock");
return;
}
Expand Down

0 comments on commit 718d1e8

Please sign in to comment.