From 70cfb6de0f8ec9a03e12aae2f21ddfdf73ec1582 Mon Sep 17 00:00:00 2001 From: Artem Polyakov Date: Thu, 17 Dec 2015 17:26:24 +0600 Subject: [PATCH] Fix add_proc deadlock. (cherry picked from open-mpi/ompi@d0b4aa1f9af61c4f7abcadbc2d903f8d4e669f21) --- ompi/proc/proc.c | 52 ++++++++++++++++++++++++++++-------------------- 1 file changed, 30 insertions(+), 22 deletions(-) diff --git a/ompi/proc/proc.c b/ompi/proc/proc.c index e03b4c4cf8..aac794151f 100644 --- a/ompi/proc/proc.c +++ b/ompi/proc/proc.c @@ -50,6 +50,7 @@ ompi_proc_t* ompi_proc_local_proc = NULL; static void ompi_proc_construct(ompi_proc_t* proc); static void ompi_proc_destruct(ompi_proc_t* proc); +static ompi_proc_t *ompi_proc_for_name_nolock (const opal_process_name_t proc_name); OBJ_CLASS_INSTANCE( ompi_proc_t, @@ -198,6 +199,33 @@ opal_proc_t *ompi_proc_lookup (const opal_process_name_t proc_name) return NULL; } +static ompi_proc_t *ompi_proc_for_name_nolock (const opal_process_name_t proc_name) +{ + ompi_proc_t *proc = NULL; + int ret; + + /* double-check that another competing thread has not added this proc */ + ret = opal_hash_table_get_value_ptr (&ompi_proc_hash, &proc_name, sizeof (proc_name), (void **) &proc); + if (OPAL_SUCCESS == ret) { + goto exit; + } + + /* allocate a new ompi_proc_t object for the process and insert it into the process table */ + ret = ompi_proc_allocate (proc_name.jobid, proc_name.vpid, &proc); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + /* allocation fail */ + goto exit; + } + + /* finish filling in the important proc data fields */ + ret = ompi_proc_complete_init_single (proc); + if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { + goto exit; + } +exit: + return proc; +} + opal_proc_t *ompi_proc_for_name (const opal_process_name_t proc_name) { ompi_proc_t *proc = NULL; @@ -210,27 +238,7 @@ opal_proc_t *ompi_proc_for_name (const opal_process_name_t proc_name) } opal_mutex_lock (&ompi_proc_lock); - do { - /* double-check that another competing thread has not added this proc */ - ret = opal_hash_table_get_value_ptr (&ompi_proc_hash, &proc_name, sizeof (proc_name), (void **) &proc); - if (OPAL_SUCCESS == ret) { - break; - } - - /* allocate a new ompi_proc_t object for the process and insert it into the process table */ - ret = ompi_proc_allocate (proc_name.jobid, proc_name.vpid, &proc); - if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { - /* allocation fail */ - break; - } - - /* finish filling in the important proc data fields */ - ret = ompi_proc_complete_init_single (proc); - if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { - proc = NULL; - break; - } - } while (0); + proc = ompi_proc_for_name_nolock (proc_name); opal_mutex_unlock (&ompi_proc_lock); return (opal_proc_t *) proc; @@ -633,7 +641,7 @@ ompi_proc_pack(ompi_proc_t **proclist, int proclistsize, ompi_proc_t *proc = proclist[i]; if (ompi_proc_is_sentinel (proc)) { - proc = ompi_proc_for_name (ompi_proc_sentinel_to_name ((intptr_t) proc)); + proc = ompi_proc_for_name_nolock (ompi_proc_sentinel_to_name ((intptr_t) proc)); } /* send proc name */